1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 2001,2019
4 * Author: Sungwoo Park, Daniel Gröber
5 *
6 * Generalised profiling heap traversal.
7 *
8 * ---------------------------------------------------------------------------*/
9
10 #if defined(PROFILING)
11
12 #include "PosixSource.h"
13 #include "Rts.h"
14 #include "sm/Storage.h"
15
16 #include "TraverseHeap.h"
17
18 /** Note [Profiling heap traversal visited bit]
19 *
20 * If the RTS is compiled with profiling enabled StgProfHeader can be used by
21 * profiling code to store per-heap object information.
22 *
23 * The generic heap traversal code reserves the least significant bit of the
24 * largest members of the 'trav' union to decide whether we've already visited a
25 * given closure in the current pass or not. The rest of the field is free to be
26 * used by the calling profiler.
27 *
28 * By doing things this way we implicitly assume that the LSB of the largest
29 * field in the 'trav' union is insignificant. This is true at least for the
30 * word aligned pointers which the retainer profiler currently stores there and
31 * should be maintained by new users of the 'trav' union for example by shifting
32 * the real data up by one bit.
33 *
34 * Since we don't want to have to scan the entire heap a second time just to
35 * reset the per-object visitied bit before/after the real traversal we make the
36 * interpretation of this bit dependent on the value of a global variable,
37 * 'flip'.
38 *
39 * When the 'trav' bit is equal to the value of 'flip' the closure data is
40 * valid otherwise not (see isTravDataValid). We then invert the value of 'flip'
41 * on each heap traversal (see traverseWorkStack), in effect marking all
42 * closure's data as invalid at once.
43 *
44 * There are some complications with this approach, namely: static objects and
45 * mutable data. There we do just go over all existing objects to reset the bit
46 * manually. See 'resetStaticObjectForProfiling' and 'resetMutableObjects'.
47 */
48 StgWord flip = 0;
49
50 #define setTravDataToZero(c) \
51 (c)->header.prof.hp.trav.lsb = flip
52
53 typedef enum {
54 // Object with fixed layout. Keeps an information about that
55 // element was processed. (stackPos.next.step)
56 posTypeStep,
57 // Description of the pointers-first heap object. Keeps information
58 // about layout. (stackPos.next.ptrs)
59 posTypePtrs,
60 // Keeps SRT bitmap (stackPos.next.srt)
61 posTypeSRT,
62 // Keeps a new object that was not inspected yet. Keeps a parent
63 // element (stackPos.next.parent)
64 posTypeFresh
65 } nextPosType;
66
67 typedef union {
68 // fixed layout or layout specified by a field in the closure
69 StgWord step;
70
71 // layout.payload
72 struct {
73 // See StgClosureInfo in InfoTables.h
74 StgHalfWord pos;
75 StgHalfWord ptrs;
76 StgPtr payload;
77 } ptrs;
78
79 // SRT
80 struct {
81 StgClosure *srt;
82 } srt;
83
84 // parent of the current closure, used only when posTypeFresh is set
85 StgClosure *cp;
86 } nextPos;
87
88 /**
89 * Position pointer into a closure. Determines what the next element to return
90 * for a stackElement is.
91 */
92 typedef struct {
93 nextPosType type;
94 nextPos next;
95 } stackPos;
96
97 /**
98 * An element of the traversal work-stack. Besides the closure itself this also
99 * stores it's parent and associated data.
100 *
101 * When 'info.type == posTypeFresh' a 'stackElement' represents just one
102 * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
103 * offset into the children of 'c'. This is to support returning a closure's
104 * children one-by-one without pushing one element per child onto the stack. See
105 * traversePushChildren() and traversePop().
106 *
107 */
108 typedef struct stackElement_ {
109 stackPos info;
110 StgClosure *c;
111 stackData data;
112 } stackElement;
113
114
115 #if defined(DEBUG)
116 unsigned int g_traversalDebugLevel = 0;
debug(const char * s,...)117 static inline void debug(const char *s, ...)
118 {
119 va_list ap;
120
121 if(g_traversalDebugLevel == 0)
122 return;
123
124 va_start(ap,s);
125 vdebugBelch(s, ap);
126 va_end(ap);
127 }
128 #else
129 #define debug(...)
130 #endif
131
132 // number of blocks allocated for one stack
133 #define BLOCKS_IN_STACK 1
134
135 /* -----------------------------------------------------------------------------
136 * Add a new block group to the stack.
137 * Invariants:
138 * currentStack->link == s.
139 * -------------------------------------------------------------------------- */
140 STATIC_INLINE void
newStackBlock(traverseState * ts,bdescr * bd)141 newStackBlock( traverseState *ts, bdescr *bd )
142 {
143 ts->currentStack = bd;
144 ts->stackTop = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
145 ts->stackBottom = (stackElement *)bd->start;
146 ts->stackLimit = (stackElement *)ts->stackTop;
147 bd->free = (StgPtr)ts->stackLimit;
148 }
149
150 /* -----------------------------------------------------------------------------
151 * Return to the previous block group.
152 * Invariants:
153 * s->link == currentStack.
154 * -------------------------------------------------------------------------- */
155 STATIC_INLINE void
returnToOldStack(traverseState * ts,bdescr * bd)156 returnToOldStack( traverseState *ts, bdescr *bd )
157 {
158 ts->currentStack = bd;
159 ts->stackTop = (stackElement *)bd->free;
160 ts->stackBottom = (stackElement *)bd->start;
161 ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
162 bd->free = (StgPtr)ts->stackLimit;
163 }
164
165 /**
166 * Initializes the traversal work-stack.
167 */
168 void
initializeTraverseStack(traverseState * ts)169 initializeTraverseStack( traverseState *ts )
170 {
171 if (ts->firstStack != NULL) {
172 freeChain(ts->firstStack);
173 }
174
175 ts->firstStack = allocGroup(BLOCKS_IN_STACK);
176 ts->firstStack->link = NULL;
177 ts->firstStack->u.back = NULL;
178
179 ts->stackSize = 0;
180 ts->maxStackSize = 0;
181
182 newStackBlock(ts, ts->firstStack);
183 }
184
185 /**
186 * Frees all the block groups in the traversal works-stack.
187 *
188 * Invariants:
189 * firstStack != NULL
190 */
191 void
closeTraverseStack(traverseState * ts)192 closeTraverseStack( traverseState *ts )
193 {
194 freeChain(ts->firstStack);
195 ts->firstStack = NULL;
196 }
197
198 /**
199 * Returns the largest stack size encountered during the traversal.
200 */
201 int
getTraverseStackMaxSize(traverseState * ts)202 getTraverseStackMaxSize(traverseState *ts)
203 {
204 return ts->maxStackSize;
205 }
206
207 /**
208 * Returns true if the whole stack is empty.
209 **/
210 STATIC_INLINE bool
isEmptyWorkStack(traverseState * ts)211 isEmptyWorkStack( traverseState *ts )
212 {
213 return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit;
214 }
215
216 /**
217 * Returns size of stack
218 */
219 W_
traverseWorkStackBlocks(traverseState * ts)220 traverseWorkStackBlocks(traverseState *ts)
221 {
222 bdescr* bd;
223 W_ res = 0;
224
225 for (bd = ts->firstStack; bd != NULL; bd = bd->link)
226 res += bd->blocks;
227
228 return res;
229 }
230
231 /**
232 * Initializes *info from ptrs and payload.
233 *
234 * Invariants:
235 *
236 * payload[] begins with ptrs pointers followed by non-pointers.
237 */
238 STATIC_INLINE void
init_ptrs(stackPos * info,uint32_t ptrs,StgPtr payload)239 init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
240 {
241 info->type = posTypePtrs;
242 info->next.ptrs.pos = 0;
243 info->next.ptrs.ptrs = ptrs;
244 info->next.ptrs.payload = payload;
245 }
246
247 /**
248 * Find the next object from *info.
249 */
250 STATIC_INLINE StgClosure *
find_ptrs(stackPos * info)251 find_ptrs( stackPos *info )
252 {
253 if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
254 return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
255 } else {
256 return NULL;
257 }
258 }
259
260 /**
261 * Initializes *info from SRT information stored in *infoTable.
262 */
263 STATIC_INLINE void
init_srt_fun(stackPos * info,const StgFunInfoTable * infoTable)264 init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
265 {
266 info->type = posTypeSRT;
267 if (infoTable->i.srt) {
268 info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
269 } else {
270 info->next.srt.srt = NULL;
271 }
272 }
273
274 STATIC_INLINE void
init_srt_thunk(stackPos * info,const StgThunkInfoTable * infoTable)275 init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
276 {
277 info->type = posTypeSRT;
278 if (infoTable->i.srt) {
279 info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
280 } else {
281 info->next.srt.srt = NULL;
282 }
283 }
284
285 /**
286 * Find the next object from *info.
287 */
288 STATIC_INLINE StgClosure *
find_srt(stackPos * info)289 find_srt( stackPos *info )
290 {
291 StgClosure *c;
292 if (info->type == posTypeSRT) {
293 c = info->next.srt.srt;
294 info->next.srt.srt = NULL;
295 return c;
296 }
297
298 return NULL;
299 }
300
301 /**
302 * Push a set of closures, represented by a single 'stackElement', onto the
303 * traversal work-stack.
304 */
305 static void
pushStackElement(traverseState * ts,const stackElement se)306 pushStackElement(traverseState *ts, const stackElement se)
307 {
308 bdescr *nbd; // Next Block Descriptor
309 if (ts->stackTop - 1 < ts->stackBottom) {
310 debug("pushStackElement() to the next stack.\n");
311
312 // currentStack->free is updated when the active stack is switched
313 // to the next stack.
314 ts->currentStack->free = (StgPtr)ts->stackTop;
315
316 if (ts->currentStack->link == NULL) {
317 nbd = allocGroup(BLOCKS_IN_STACK);
318 nbd->link = NULL;
319 nbd->u.back = ts->currentStack;
320 ts->currentStack->link = nbd;
321 } else
322 nbd = ts->currentStack->link;
323
324 newStackBlock(ts, nbd);
325 }
326
327 // adjust stackTop (acutal push)
328 ts->stackTop--;
329 // If the size of stackElement was huge, we would better replace the
330 // following statement by either a memcpy() call or a switch statement
331 // on the type of the element. Currently, the size of stackElement is
332 // small enough (5 words) that this direct assignment seems to be enough.
333 *ts->stackTop = se;
334
335 ts->stackSize++;
336 if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
337 ASSERT(ts->stackSize >= 0);
338 debug("stackSize = %d\n", ts->stackSize);
339 }
340
341 /**
342 * Push a single closure onto the traversal work-stack.
343 *
344 * cp - object's parent
345 * c - closure
346 * data - data associated with closure.
347 */
348 inline void
traversePushClosure(traverseState * ts,StgClosure * c,StgClosure * cp,stackData data)349 traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) {
350 stackElement se;
351
352 se.c = c;
353 se.info.next.cp = cp;
354 se.data = data;
355 se.info.type = posTypeFresh;
356
357 pushStackElement(ts, se);
358 };
359
360 /**
361 * traversePushChildren() extracts the first child of 'c' in 'first_child' and
362 * conceptually pushes all remaining children of 'c' onto the traversal stack
363 * while associating 'data' with the pushed elements to be returned upon poping.
364 *
365 * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed
366 * onto the stack.
367 *
368 * If 'c' has only one child, 'first_child' is set to that child and nothing is
369 * pushed onto the stack.
370 *
371 * Invariants:
372 *
373 * - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot
374 * be any stack objects.
375 *
376 * Note: SRTs are considered to be children as well.
377 *
378 * Note: When pushing onto the stack we only really push one 'stackElement'
379 * representing all children onto the stack. See traversePop()
380 */
381 STATIC_INLINE void
traversePushChildren(traverseState * ts,StgClosure * c,stackData data,StgClosure ** first_child)382 traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child)
383 {
384 stackElement se;
385
386 debug("traversePushChildren(): stackTop = 0x%x\n", ts->stackTop);
387
388 ASSERT(get_itbl(c)->type != TSO);
389 ASSERT(get_itbl(c)->type != AP_STACK);
390
391 //
392 // fill in se
393 //
394
395 se.c = c;
396 se.data = data;
397
398 // fill in se.info
399 switch (get_itbl(c)->type) {
400 // no child, no SRT
401 case CONSTR_0_1:
402 case CONSTR_0_2:
403 case ARR_WORDS:
404 case COMPACT_NFDATA:
405 *first_child = NULL;
406 return;
407
408 // one child (fixed), no SRT
409 case MUT_VAR_CLEAN:
410 case MUT_VAR_DIRTY:
411 *first_child = ((StgMutVar *)c)->var;
412 return;
413 case THUNK_SELECTOR:
414 *first_child = ((StgSelector *)c)->selectee;
415 return;
416 case BLACKHOLE:
417 *first_child = ((StgInd *)c)->indirectee;
418 return;
419 case CONSTR_1_0:
420 case CONSTR_1_1:
421 *first_child = c->payload[0];
422 return;
423
424 // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
425 // of the next child. We do not write a separate initialization code.
426 // Also we do not have to initialize info.type;
427
428 // two children (fixed), no SRT
429 // need to push a stackElement, but nothing to store in se.info
430 case CONSTR_2_0:
431 *first_child = c->payload[0]; // return the first pointer
432 se.info.type = posTypeStep;
433 se.info.next.step = 2; // 2 = second
434 break;
435
436 // three children (fixed), no SRT
437 // need to push a stackElement
438 case MVAR_CLEAN:
439 case MVAR_DIRTY:
440 // head must be TSO and the head of a linked list of TSOs.
441 // Shoule it be a child? Seems to be yes.
442 *first_child = (StgClosure *)((StgMVar *)c)->head;
443 se.info.type = posTypeStep;
444 se.info.next.step = 2; // 2 = second
445 break;
446
447 // three children (fixed), no SRT
448 case WEAK:
449 *first_child = ((StgWeak *)c)->key;
450 se.info.type = posTypeStep;
451 se.info.next.step = 2;
452 break;
453
454 // layout.payload.ptrs, no SRT
455 case TVAR:
456 case CONSTR:
457 case CONSTR_NOCAF:
458 case PRIM:
459 case MUT_PRIM:
460 case BCO:
461 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
462 (StgPtr)c->payload);
463 *first_child = find_ptrs(&se.info);
464 if (*first_child == NULL)
465 return; // no child
466 break;
467
468 // StgMutArrPtr.ptrs, no SRT
469 case MUT_ARR_PTRS_CLEAN:
470 case MUT_ARR_PTRS_DIRTY:
471 case MUT_ARR_PTRS_FROZEN_CLEAN:
472 case MUT_ARR_PTRS_FROZEN_DIRTY:
473 init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
474 (StgPtr)(((StgMutArrPtrs *)c)->payload));
475 *first_child = find_ptrs(&se.info);
476 if (*first_child == NULL)
477 return;
478 break;
479
480 // StgMutArrPtr.ptrs, no SRT
481 case SMALL_MUT_ARR_PTRS_CLEAN:
482 case SMALL_MUT_ARR_PTRS_DIRTY:
483 case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
484 case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
485 init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
486 (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
487 *first_child = find_ptrs(&se.info);
488 if (*first_child == NULL)
489 return;
490 break;
491
492 // layout.payload.ptrs, SRT
493 case FUN_STATIC:
494 case FUN: // *c is a heap object.
495 case FUN_2_0:
496 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
497 *first_child = find_ptrs(&se.info);
498 if (*first_child == NULL)
499 // no child from ptrs, so check SRT
500 goto fun_srt_only;
501 break;
502
503 case THUNK:
504 case THUNK_2_0:
505 init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
506 (StgPtr)((StgThunk *)c)->payload);
507 *first_child = find_ptrs(&se.info);
508 if (*first_child == NULL)
509 // no child from ptrs, so check SRT
510 goto thunk_srt_only;
511 break;
512
513 // 1 fixed child, SRT
514 case FUN_1_0:
515 case FUN_1_1:
516 *first_child = c->payload[0];
517 ASSERT(*first_child != NULL);
518 init_srt_fun(&se.info, get_fun_itbl(c));
519 break;
520
521 case THUNK_1_0:
522 case THUNK_1_1:
523 *first_child = ((StgThunk *)c)->payload[0];
524 ASSERT(*first_child != NULL);
525 init_srt_thunk(&se.info, get_thunk_itbl(c));
526 break;
527
528 case FUN_0_1: // *c is a heap object.
529 case FUN_0_2:
530 fun_srt_only:
531 init_srt_fun(&se.info, get_fun_itbl(c));
532 *first_child = find_srt(&se.info);
533 if (*first_child == NULL)
534 return; // no child
535 break;
536
537 // SRT only
538 case THUNK_STATIC:
539 ASSERT(get_itbl(c)->srt != 0);
540 /* fall-thru */
541 case THUNK_0_1:
542 case THUNK_0_2:
543 thunk_srt_only:
544 init_srt_thunk(&se.info, get_thunk_itbl(c));
545 *first_child = find_srt(&se.info);
546 if (*first_child == NULL)
547 return; // no child
548 break;
549
550 case TREC_CHUNK:
551 *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
552 se.info.type = posTypeStep;
553 se.info.next.step = 0; // entry no.
554 break;
555
556 // cannot appear
557 case PAP:
558 case AP:
559 case AP_STACK:
560 case TSO:
561 case STACK:
562 case IND_STATIC:
563 // stack objects
564 case UPDATE_FRAME:
565 case CATCH_FRAME:
566 case UNDERFLOW_FRAME:
567 case STOP_FRAME:
568 case RET_BCO:
569 case RET_SMALL:
570 case RET_BIG:
571 // invalid objects
572 case IND:
573 case INVALID_OBJECT:
574 default:
575 barf("Invalid object *c in push(): %d", get_itbl(c)->type);
576 return;
577 }
578
579 // se.info.next.cp has to be initialized when type==posTypeFresh. We don't
580 // do that here though. So type must be !=posTypeFresh.
581 ASSERT(se.info.type != posTypeFresh);
582
583 pushStackElement(ts, se);
584 }
585
586 /**
587 * popStackElement(): Remove a depleted stackElement from the top of the
588 * traversal work-stack.
589 *
590 * Invariants:
591 * stackTop cannot be equal to stackLimit unless the whole stack is
592 * empty, in which case popStackElement() is not allowed.
593 */
594 static void
popStackElement(traverseState * ts)595 popStackElement(traverseState *ts) {
596 debug("popStackElement(): stackTop = 0x%x\n", ts->stackTop);
597
598 ASSERT(ts->stackTop != ts->stackLimit);
599 ASSERT(!isEmptyWorkStack(ts));
600
601 // <= (instead of <) is wrong!
602 if (ts->stackTop + 1 < ts->stackLimit) {
603 ts->stackTop++;
604
605 ts->stackSize--;
606 if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
607 ASSERT(ts->stackSize >= 0);
608 debug("stackSize = (--) %d\n", ts->stackSize);
609
610 return;
611 }
612
613 bdescr *pbd; // Previous Block Descriptor
614
615 debug("popStackElement() to the previous stack.\n");
616
617 ASSERT(ts->stackTop + 1 == ts->stackLimit);
618 ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start);
619
620 if (ts->firstStack == ts->currentStack) {
621 // The stack is completely empty.
622 ts->stackTop++;
623 ASSERT(ts->stackTop == ts->stackLimit);
624
625 ts->stackSize--;
626 if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
627 ASSERT(ts->stackSize >= 0);
628 debug("stackSize = %d\n", ts->stackSize);
629
630 return;
631 }
632
633 // currentStack->free is updated when the active stack is switched back
634 // to the previous stack.
635 ts->currentStack->free = (StgPtr)ts->stackLimit;
636
637 // find the previous block descriptor
638 pbd = ts->currentStack->u.back;
639 ASSERT(pbd != NULL);
640
641 returnToOldStack(ts, pbd);
642
643 ts->stackSize--;
644 if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
645 ASSERT(ts->stackSize >= 0);
646 debug("stackSize = %d\n", ts->stackSize);
647 }
648
649 /**
650 * Finds the next object to be considered for retainer profiling and store
651 * its pointer to *c.
652 *
653 * If the unprocessed object was stored in the stack (posTypeFresh), the
654 * this object is returned as-is. Otherwise Test if the topmost stack
655 * element indicates that more objects are left,
656 * and if so, retrieve the first object and store its pointer to *c. Also,
657 * set *cp and *data appropriately, both of which are stored in the stack
658 * element. The topmost stack element then is overwritten so as for it to now
659 * denote the next object.
660 *
661 * If the topmost stack element indicates no more objects are left, pop
662 * off the stack element until either an object can be retrieved or
663 * the work-stack becomes empty, indicated by true returned by
664 * isEmptyWorkStack(), in which case *c is set to NULL.
665 *
666 * Note:
667 *
668 * It is okay to call this function even when the work-stack is empty.
669 */
670 STATIC_INLINE void
traversePop(traverseState * ts,StgClosure ** c,StgClosure ** cp,stackData * data)671 traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data)
672 {
673 stackElement *se;
674
675 debug("traversePop(): stackTop = 0x%x\n", ts->stackTop);
676
677 // Is this the last internal element? If so instead of modifying the current
678 // stackElement in place we actually remove it from the stack.
679 bool last = false;
680
681 do {
682 if (isEmptyWorkStack(ts)) {
683 *c = NULL;
684 return;
685 }
686
687 // Note: Below every `break`, where the loop condition is true, must be
688 // accompanied by a popStackElement() otherwise this is an infinite
689 // loop.
690 se = ts->stackTop;
691
692 // If this is a top-level element, you should pop that out.
693 if (se->info.type == posTypeFresh) {
694 *cp = se->info.next.cp;
695 *c = se->c;
696 *data = se->data;
697 popStackElement(ts);
698 return;
699 }
700
701 // Note: The first ptr of all of these was already returned as
702 // *fist_child in push(), so we always start with the second field.
703 switch (get_itbl(se->c)->type) {
704 // two children (fixed), no SRT
705 // nothing in se.info
706 case CONSTR_2_0:
707 *c = se->c->payload[1];
708 last = true;
709 goto out;
710
711 // three children (fixed), no SRT
712 // need to push a stackElement
713 case MVAR_CLEAN:
714 case MVAR_DIRTY:
715 if (se->info.next.step == 2) {
716 *c = (StgClosure *)((StgMVar *)se->c)->tail;
717 se->info.next.step++; // move to the next step
718 // no popStackElement
719 } else {
720 *c = ((StgMVar *)se->c)->value;
721 last = true;
722 }
723 goto out;
724
725 // three children (fixed), no SRT
726 case WEAK:
727 if (se->info.next.step == 2) {
728 *c = ((StgWeak *)se->c)->value;
729 se->info.next.step++;
730 // no popStackElement
731 } else {
732 *c = ((StgWeak *)se->c)->finalizer;
733 last = true;
734 }
735 goto out;
736
737 case TREC_CHUNK: {
738 // These are pretty complicated: we have N entries, each
739 // of which contains 3 fields that we want to follow. So
740 // we divide the step counter: the 2 low bits indicate
741 // which field, and the rest of the bits indicate the
742 // entry number (starting from zero).
743 TRecEntry *entry;
744 uint32_t entry_no = se->info.next.step >> 2;
745 uint32_t field_no = se->info.next.step & 3;
746 if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
747 *c = NULL;
748 popStackElement(ts);
749 break; // this breaks out of the switch not the loop
750 }
751 entry = &((StgTRecChunk *)se->c)->entries[entry_no];
752 if (field_no == 0) {
753 *c = (StgClosure *)entry->tvar;
754 } else if (field_no == 1) {
755 *c = entry->expected_value;
756 } else {
757 *c = entry->new_value;
758 }
759 se->info.next.step++;
760 goto out;
761 }
762
763 case TVAR:
764 case CONSTR:
765 case PRIM:
766 case MUT_PRIM:
767 case BCO:
768 // StgMutArrPtr.ptrs, no SRT
769 case MUT_ARR_PTRS_CLEAN:
770 case MUT_ARR_PTRS_DIRTY:
771 case MUT_ARR_PTRS_FROZEN_CLEAN:
772 case MUT_ARR_PTRS_FROZEN_DIRTY:
773 case SMALL_MUT_ARR_PTRS_CLEAN:
774 case SMALL_MUT_ARR_PTRS_DIRTY:
775 case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
776 case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
777 *c = find_ptrs(&se->info);
778 if (*c == NULL) {
779 popStackElement(ts);
780 break; // this breaks out of the switch not the loop
781 }
782 goto out;
783
784 // layout.payload.ptrs, SRT
785 case FUN: // always a heap object
786 case FUN_STATIC:
787 case FUN_2_0:
788 if (se->info.type == posTypePtrs) {
789 *c = find_ptrs(&se->info);
790 if (*c != NULL) {
791 goto out;
792 }
793 init_srt_fun(&se->info, get_fun_itbl(se->c));
794 }
795 goto do_srt;
796
797 case THUNK:
798 case THUNK_2_0:
799 if (se->info.type == posTypePtrs) {
800 *c = find_ptrs(&se->info);
801 if (*c != NULL) {
802 goto out;
803 }
804 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
805 }
806 goto do_srt;
807
808 // SRT
809 do_srt:
810 case THUNK_STATIC:
811 case FUN_0_1:
812 case FUN_0_2:
813 case THUNK_0_1:
814 case THUNK_0_2:
815 case FUN_1_0:
816 case FUN_1_1:
817 case THUNK_1_0:
818 case THUNK_1_1:
819 *c = find_srt(&se->info);
820 if(*c == NULL) {
821 popStackElement(ts);
822 break; // this breaks out of the switch not the loop
823 }
824 goto out;
825
826 // no child (fixed), no SRT
827 case CONSTR_0_1:
828 case CONSTR_0_2:
829 case ARR_WORDS:
830 // one child (fixed), no SRT
831 case MUT_VAR_CLEAN:
832 case MUT_VAR_DIRTY:
833 case THUNK_SELECTOR:
834 case CONSTR_1_1:
835 // cannot appear
836 case PAP:
837 case AP:
838 case AP_STACK:
839 case TSO:
840 case STACK:
841 case IND_STATIC:
842 case CONSTR_NOCAF:
843 // stack objects
844 case UPDATE_FRAME:
845 case CATCH_FRAME:
846 case UNDERFLOW_FRAME:
847 case STOP_FRAME:
848 case RET_BCO:
849 case RET_SMALL:
850 case RET_BIG:
851 // invalid objects
852 case IND:
853 case INVALID_OBJECT:
854 default:
855 barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type);
856 return;
857 }
858 } while (*c == NULL);
859
860 out:
861
862 ASSERT(*c != NULL);
863
864 *cp = se->c;
865 *data = se->data;
866
867 if(last)
868 popStackElement(ts);
869
870 return;
871
872 }
873
874 /**
875 * Make sure a closure's profiling data is initialized to zero if it does not
876 * conform to the current value of the flip bit, returns true in this case.
877 *
878 * See Note [Profiling heap traversal visited bit].
879 */
880 bool
traverseMaybeInitClosureData(StgClosure * c)881 traverseMaybeInitClosureData(StgClosure *c)
882 {
883 if (!isTravDataValid(c)) {
884 setTravDataToZero(c);
885 return true;
886 }
887 return false;
888 }
889
890 /**
891 * Call traversePushClosure for each of the closures covered by a large bitmap.
892 */
893 static void
traverseLargeBitmap(traverseState * ts,StgPtr p,StgLargeBitmap * large_bitmap,uint32_t size,StgClosure * c,stackData data)894 traverseLargeBitmap (traverseState *ts, StgPtr p, StgLargeBitmap *large_bitmap,
895 uint32_t size, StgClosure *c, stackData data)
896 {
897 uint32_t i, b;
898 StgWord bitmap;
899
900 b = 0;
901 bitmap = large_bitmap->bitmap[b];
902 for (i = 0; i < size; ) {
903 if ((bitmap & 1) == 0) {
904 traversePushClosure(ts, (StgClosure *)*p, c, data);
905 }
906 i++;
907 p++;
908 if (i % BITS_IN(W_) == 0) {
909 b++;
910 bitmap = large_bitmap->bitmap[b];
911 } else {
912 bitmap = bitmap >> 1;
913 }
914 }
915 }
916
917 STATIC_INLINE StgPtr
traverseSmallBitmap(traverseState * ts,StgPtr p,uint32_t size,StgWord bitmap,StgClosure * c,stackData data)918 traverseSmallBitmap (traverseState *ts, StgPtr p, uint32_t size, StgWord bitmap,
919 StgClosure *c, stackData data)
920 {
921 while (size > 0) {
922 if ((bitmap & 1) == 0) {
923 traversePushClosure(ts, (StgClosure *)*p, c, data);
924 }
925 p++;
926 bitmap = bitmap >> 1;
927 size--;
928 }
929 return p;
930 }
931
932 /**
933 * traversePushStack(ts, cp, data, stackStart, stackEnd) pushes all the objects
934 * in the STG stack-chunk from stackStart to stackEnd onto the traversal
935 * work-stack with 'c' and 'data' being their parent and associated data,
936 * respectively.
937 *
938 * Invariants:
939 *
940 * *cp is one of the following: TSO, AP_STACK.
941 *
942 * stackStart < stackEnd.
943 *
944 * If *c is TSO, its state is not ThreadComplete,or ThreadKilled,
945 * which means that its stack is ready to process.
946 *
947 * Note:
948 *
949 * This code was almost plagiarzied from GC.c! For each pointer,
950 * traversePushClosure() is invoked instead of evacuate().
951 */
952 static void
traversePushStack(traverseState * ts,StgClosure * cp,stackData data,StgPtr stackStart,StgPtr stackEnd)953 traversePushStack(traverseState *ts, StgClosure *cp, stackData data,
954 StgPtr stackStart, StgPtr stackEnd)
955 {
956 StgPtr p;
957 const StgRetInfoTable *info;
958 StgWord bitmap;
959 uint32_t size;
960
961 ASSERT(get_itbl(cp)->type == STACK);
962
963 p = stackStart;
964 while (p < stackEnd) {
965 info = get_ret_itbl((StgClosure *)p);
966
967 switch(info->i.type) {
968
969 case UPDATE_FRAME:
970 traversePushClosure(ts, ((StgUpdateFrame *)p)->updatee, cp, data);
971 p += sizeofW(StgUpdateFrame);
972 continue;
973
974 case UNDERFLOW_FRAME:
975 case STOP_FRAME:
976 case CATCH_FRAME:
977 case CATCH_STM_FRAME:
978 case CATCH_RETRY_FRAME:
979 case ATOMICALLY_FRAME:
980 case RET_SMALL:
981 bitmap = BITMAP_BITS(info->i.layout.bitmap);
982 size = BITMAP_SIZE(info->i.layout.bitmap);
983 p++;
984 p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
985
986 follow_srt:
987 if (info->i.srt) {
988 traversePushClosure(ts, GET_SRT(info), cp, data);
989 }
990 continue;
991
992 case RET_BCO: {
993 StgBCO *bco;
994
995 p++;
996 traversePushClosure(ts, (StgClosure*)*p, cp, data);
997 bco = (StgBCO *)*p;
998 p++;
999 size = BCO_BITMAP_SIZE(bco);
1000 traverseLargeBitmap(ts, p, BCO_BITMAP(bco), size, cp, data);
1001 p += size;
1002 continue;
1003 }
1004
1005 // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1006 case RET_BIG:
1007 size = GET_LARGE_BITMAP(&info->i)->size;
1008 p++;
1009 traverseLargeBitmap(ts, p, GET_LARGE_BITMAP(&info->i),
1010 size, cp, data);
1011 p += size;
1012 // and don't forget to follow the SRT
1013 goto follow_srt;
1014
1015 case RET_FUN: {
1016 StgRetFun *ret_fun = (StgRetFun *)p;
1017 const StgFunInfoTable *fun_info;
1018
1019 traversePushClosure(ts, ret_fun->fun, cp, data);
1020 fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(ret_fun->fun));
1021
1022 p = (P_)&ret_fun->payload;
1023 switch (fun_info->f.fun_type) {
1024 case ARG_GEN:
1025 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1026 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1027 p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
1028 break;
1029 case ARG_GEN_BIG:
1030 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1031 traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info),
1032 size, cp, data);
1033 p += size;
1034 break;
1035 default:
1036 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1037 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1038 p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
1039 break;
1040 }
1041 goto follow_srt;
1042 }
1043
1044 default:
1045 barf("Invalid object found in traversePushStack(): %d",
1046 (int)(info->i.type));
1047 }
1048 }
1049 }
1050
1051 /**
1052 * Call traversePushClosure for each of the children of a PAP/AP
1053 */
1054 STATIC_INLINE StgPtr
traversePAP(traverseState * ts,StgClosure * pap,stackData data,StgClosure * fun,StgClosure ** payload,StgWord n_args)1055 traversePAP (traverseState *ts,
1056 StgClosure *pap, /* NOT tagged */
1057 stackData data,
1058 StgClosure *fun, /* tagged */
1059 StgClosure** payload, StgWord n_args)
1060 {
1061 StgPtr p;
1062 StgWord bitmap;
1063 const StgFunInfoTable *fun_info;
1064
1065 traversePushClosure(ts, fun, pap, data);
1066 fun = UNTAG_CLOSURE(fun);
1067 fun_info = get_fun_itbl(fun);
1068 ASSERT(fun_info->i.type != PAP);
1069
1070 p = (StgPtr)payload;
1071
1072 switch (fun_info->f.fun_type) {
1073 case ARG_GEN:
1074 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1075 p = traverseSmallBitmap(ts, p, n_args, bitmap,
1076 pap, data);
1077 break;
1078 case ARG_GEN_BIG:
1079 traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info),
1080 n_args, pap, data);
1081 p += n_args;
1082 break;
1083 case ARG_BCO:
1084 traverseLargeBitmap(ts, (StgPtr)payload, BCO_BITMAP(fun),
1085 n_args, pap, data);
1086 p += n_args;
1087 break;
1088 default:
1089 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1090 p = traverseSmallBitmap(ts, p, n_args, bitmap, pap, data);
1091 break;
1092 }
1093 return p;
1094 }
1095
1096 static void
resetMutableObjects(void)1097 resetMutableObjects(void)
1098 {
1099 uint32_t g, n;
1100 bdescr *bd;
1101 StgPtr ml;
1102
1103 // The following code resets the 'trav' field of each unvisited mutable
1104 // object.
1105 for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1106 // NOT true: even G0 has a block on its mutable list
1107 // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1108
1109 // Traversing through mut_list is necessary
1110 // because we can find MUT_VAR objects which have not been
1111 // visited during heap traversal.
1112 for (n = 0; n < n_capabilities; n++) {
1113 for (bd = capabilities[n]->mut_lists[g]; bd != NULL; bd = bd->link) {
1114 for (ml = bd->start; ml < bd->free; ml++) {
1115
1116 traverseMaybeInitClosureData((StgClosure *)*ml);
1117 }
1118 }
1119 }
1120 }
1121 }
1122
1123 /**
1124 * Traverse all closures on the traversal work-stack, calling 'visit_cb' on each
1125 * closure. See 'visitClosure_cb' for details. This function flips the 'flip'
1126 * bit and hence every closure's profiling data will be reset to zero upon
1127 * visiting. See Note [Profiling heap traversal visited bit].
1128 */
1129 void
traverseWorkStack(traverseState * ts,visitClosure_cb visit_cb)1130 traverseWorkStack(traverseState *ts, visitClosure_cb visit_cb)
1131 {
1132 // first_child = first child of c
1133 StgClosure *c, *cp, *first_child;
1134 stackData data, child_data;
1135 StgWord typeOfc;
1136
1137 // Now we flip the flip bit.
1138 flip = flip ^ 1;
1139
1140 // c = Current closure (possibly tagged)
1141 // cp = Current closure's Parent (NOT tagged)
1142 // data = current closures' associated data (NOT tagged)
1143 // data_out = data to associate with current closure's children
1144
1145 loop:
1146 traversePop(ts, &c, &cp, &data);
1147
1148 if (c == NULL) {
1149 debug("maxStackSize= %d\n", ts->maxStackSize);
1150 resetMutableObjects();
1151 return;
1152 }
1153 inner_loop:
1154 c = UNTAG_CLOSURE(c);
1155
1156 typeOfc = get_itbl(c)->type;
1157
1158 // special cases
1159 switch (typeOfc) {
1160 case TSO:
1161 if (((StgTSO *)c)->what_next == ThreadComplete ||
1162 ((StgTSO *)c)->what_next == ThreadKilled) {
1163 debug("ThreadComplete or ThreadKilled encountered in traverseWorkStack()\n");
1164 goto loop;
1165 }
1166 break;
1167
1168 case IND_STATIC:
1169 // We just skip IND_STATIC, so it's never visited.
1170 c = ((StgIndStatic *)c)->indirectee;
1171 goto inner_loop;
1172
1173 case CONSTR_NOCAF:
1174 // static objects with no pointers out, so goto loop.
1175
1176 // It is not just enough not to visit *c; it is
1177 // mandatory because CONSTR_NOCAF are not reachable from
1178 // scavenged_static_objects, the list from which is assumed to traverse
1179 // all static objects after major garbage collections.
1180 goto loop;
1181
1182 case THUNK_STATIC:
1183 if (get_itbl(c)->srt == 0) {
1184 // No need to visit *c; no dynamic objects are reachable from it.
1185 //
1186 // Static objects: if we traverse all the live closures,
1187 // including static closures, during each heap census then
1188 // we will observe that some static closures appear and
1189 // disappear. eg. a closure may contain a pointer to a
1190 // static function 'f' which is not otherwise reachable
1191 // (it doesn't indirectly point to any CAFs, so it doesn't
1192 // appear in any SRTs), so we would find 'f' during
1193 // traversal. However on the next sweep there may be no
1194 // closures pointing to 'f'.
1195 //
1196 // We must therefore ignore static closures whose SRT is
1197 // empty, because these are exactly the closures that may
1198 // "appear". A closure with a non-empty SRT, and which is
1199 // still required, will always be reachable.
1200 //
1201 // But what about CONSTR? Surely these may be able
1202 // to appear, and they don't have SRTs, so we can't
1203 // check. So for now, we're calling
1204 // resetStaticObjectForProfiling() from the
1205 // garbage collector to reset the retainer sets in all the
1206 // reachable static objects.
1207 goto loop;
1208 }
1209 /* fall-thru */
1210
1211 case FUN_STATIC: {
1212 const StgInfoTable *info = get_itbl(c);
1213 if (info->srt == 0 && info->layout.payload.ptrs == 0) {
1214 goto loop;
1215 } else {
1216 break;
1217 }
1218 }
1219
1220 default:
1221 break;
1222 }
1223
1224 // If this is the first visit to c, initialize its data.
1225 bool first_visit = traverseMaybeInitClosureData(c);
1226 bool traverse_children
1227 = visit_cb(c, cp, data, first_visit, (stackData*)&child_data);
1228 if(!traverse_children)
1229 goto loop;
1230
1231 // process child
1232
1233 // Special case closures: we process these all in one go rather
1234 // than attempting to save the current position, because doing so
1235 // would be hard.
1236 switch (typeOfc) {
1237 case STACK:
1238 traversePushStack(ts, c, child_data,
1239 ((StgStack *)c)->sp,
1240 ((StgStack *)c)->stack + ((StgStack *)c)->stack_size);
1241 goto loop;
1242
1243 case TSO:
1244 {
1245 StgTSO *tso = (StgTSO *)c;
1246
1247 traversePushClosure(ts, (StgClosure *) tso->stackobj, c, child_data);
1248 traversePushClosure(ts, (StgClosure *) tso->blocked_exceptions, c, child_data);
1249 traversePushClosure(ts, (StgClosure *) tso->bq, c, child_data);
1250 traversePushClosure(ts, (StgClosure *) tso->trec, c, child_data);
1251 if ( tso->why_blocked == BlockedOnMVar
1252 || tso->why_blocked == BlockedOnMVarRead
1253 || tso->why_blocked == BlockedOnBlackHole
1254 || tso->why_blocked == BlockedOnMsgThrowTo
1255 ) {
1256 traversePushClosure(ts, tso->block_info.closure, c, child_data);
1257 }
1258 goto loop;
1259 }
1260
1261 case BLOCKING_QUEUE:
1262 {
1263 StgBlockingQueue *bq = (StgBlockingQueue *)c;
1264 traversePushClosure(ts, (StgClosure *) bq->link, c, child_data);
1265 traversePushClosure(ts, (StgClosure *) bq->bh, c, child_data);
1266 traversePushClosure(ts, (StgClosure *) bq->owner, c, child_data);
1267 goto loop;
1268 }
1269
1270 case PAP:
1271 {
1272 StgPAP *pap = (StgPAP *)c;
1273 traversePAP(ts, c, child_data, pap->fun, pap->payload, pap->n_args);
1274 goto loop;
1275 }
1276
1277 case AP:
1278 {
1279 StgAP *ap = (StgAP *)c;
1280 traversePAP(ts, c, child_data, ap->fun, ap->payload, ap->n_args);
1281 goto loop;
1282 }
1283
1284 case AP_STACK:
1285 traversePushClosure(ts, ((StgAP_STACK *)c)->fun, c, child_data);
1286 traversePushStack(ts, c, child_data,
1287 (StgPtr)((StgAP_STACK *)c)->payload,
1288 (StgPtr)((StgAP_STACK *)c)->payload +
1289 ((StgAP_STACK *)c)->size);
1290 goto loop;
1291 }
1292
1293 traversePushChildren(ts, c, child_data, &first_child);
1294
1295 // If first_child is null, c has no child.
1296 // If first_child is not null, the top stack element points to the next
1297 // object. traversePushChildren() may or may not push a stackElement on the
1298 // stack.
1299 if (first_child == NULL)
1300 goto loop;
1301
1302 // (c, cp, data) = (first_child, c, child_data)
1303 data = child_data;
1304 cp = c;
1305 c = first_child;
1306 goto inner_loop;
1307 }
1308
1309 /**
1310 * Traverse all static objects for which we compute retainer sets,
1311 * and reset their rs fields to NULL, which is accomplished by
1312 * invoking traverseMaybeInitClosureData(). This function must be called
1313 * before zeroing all objects reachable from scavenged_static_objects
1314 * in the case of major garbage collections. See GarbageCollect() in
1315 * GC.c.
1316 * Note:
1317 * The mut_once_list of the oldest generation must also be traversed?
1318 * Why? Because if the evacuation of an object pointed to by a static
1319 * indirection object fails, it is put back to the mut_once_list of
1320 * the oldest generation.
1321 * However, this is not necessary because any static indirection objects
1322 * are just traversed through to reach dynamic objects. In other words,
1323 * they are not taken into consideration in computing retainer sets.
1324 *
1325 * SDM (20/7/2011): I don't think this is doing anything sensible,
1326 * because it happens before retainerProfile() and at the beginning of
1327 * retainerProfil() we change the sense of 'flip'. So all of the
1328 * calls to traverseMaybeInitClosureData() here are initialising retainer sets
1329 * with the wrong flip. Also, I don't see why this is necessary. I
1330 * added a traverseMaybeInitClosureData() call to retainRoot(), and that seems
1331 * to have fixed the assertion failure in retainerSetOf() I was
1332 * encountering.
1333 */
1334 void
resetStaticObjectForProfiling(StgClosure * static_objects)1335 resetStaticObjectForProfiling( StgClosure *static_objects )
1336 {
1337 uint32_t count = 0;
1338 StgClosure *p;
1339
1340 p = static_objects;
1341 while (p != END_OF_STATIC_OBJECT_LIST) {
1342 p = UNTAG_STATIC_LIST_PTR(p);
1343 count++;
1344
1345 switch (get_itbl(p)->type) {
1346 case IND_STATIC:
1347 // Since we do not compute the retainer set of any
1348 // IND_STATIC object, we don't have to reset its retainer
1349 // field.
1350 p = (StgClosure*)*IND_STATIC_LINK(p);
1351 break;
1352 case THUNK_STATIC:
1353 traverseMaybeInitClosureData(p);
1354 p = (StgClosure*)*THUNK_STATIC_LINK(p);
1355 break;
1356 case FUN_STATIC:
1357 case CONSTR:
1358 case CONSTR_1_0:
1359 case CONSTR_2_0:
1360 case CONSTR_1_1:
1361 case CONSTR_NOCAF:
1362 traverseMaybeInitClosureData(p);
1363 p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1364 break;
1365 default:
1366 barf("resetStaticObjectForProfiling: %p (%lu)",
1367 p, (unsigned long)get_itbl(p)->type);
1368 break;
1369 }
1370 }
1371
1372 debug("count in scavenged_static_objects = %d\n", count);
1373 }
1374
1375 #endif /* PROFILING */
1376