1 /* -----------------------------------------------------------------------------
2  *
3  * (c) The GHC Team, 2001,2019
4  * Author: Sungwoo Park, Daniel Gröber
5  *
6  * Generalised profiling heap traversal.
7  *
8  * ---------------------------------------------------------------------------*/
9 
10 #if defined(PROFILING)
11 
12 #include "PosixSource.h"
13 #include "Rts.h"
14 #include "sm/Storage.h"
15 
16 #include "TraverseHeap.h"
17 
18 /** Note [Profiling heap traversal visited bit]
19  *
20  * If the RTS is compiled with profiling enabled StgProfHeader can be used by
21  * profiling code to store per-heap object information.
22  *
23  * The generic heap traversal code reserves the least significant bit of the
24  * largest members of the 'trav' union to decide whether we've already visited a
25  * given closure in the current pass or not. The rest of the field is free to be
26  * used by the calling profiler.
27  *
28  * By doing things this way we implicitly assume that the LSB of the largest
29  * field in the 'trav' union is insignificant. This is true at least for the
30  * word aligned pointers which the retainer profiler currently stores there and
31  * should be maintained by new users of the 'trav' union for example by shifting
32  * the real data up by one bit.
33  *
34  * Since we don't want to have to scan the entire heap a second time just to
35  * reset the per-object visitied bit before/after the real traversal we make the
36  * interpretation of this bit dependent on the value of a global variable,
37  * 'flip'.
38  *
39  * When the 'trav' bit is equal to the value of 'flip' the closure data is
40  * valid otherwise not (see isTravDataValid). We then invert the value of 'flip'
41  * on each heap traversal (see traverseWorkStack), in effect marking all
42  * closure's data as invalid at once.
43  *
44  * There are some complications with this approach, namely: static objects and
45  * mutable data. There we do just go over all existing objects to reset the bit
46  * manually. See 'resetStaticObjectForProfiling' and 'resetMutableObjects'.
47  */
48 StgWord flip = 0;
49 
50 #define setTravDataToZero(c) \
51   (c)->header.prof.hp.trav.lsb = flip
52 
53 typedef enum {
54     // Object with fixed layout. Keeps an information about that
55     // element was processed. (stackPos.next.step)
56     posTypeStep,
57     // Description of the pointers-first heap object. Keeps information
58     // about layout. (stackPos.next.ptrs)
59     posTypePtrs,
60     // Keeps SRT bitmap (stackPos.next.srt)
61     posTypeSRT,
62     // Keeps a new object that was not inspected yet. Keeps a parent
63     // element (stackPos.next.parent)
64     posTypeFresh
65 } nextPosType;
66 
67 typedef union {
68     // fixed layout or layout specified by a field in the closure
69     StgWord step;
70 
71     // layout.payload
72     struct {
73         // See StgClosureInfo in InfoTables.h
74         StgHalfWord pos;
75         StgHalfWord ptrs;
76         StgPtr payload;
77     } ptrs;
78 
79     // SRT
80     struct {
81         StgClosure *srt;
82     } srt;
83 
84     // parent of the current closure, used only when posTypeFresh is set
85     StgClosure *cp;
86 } nextPos;
87 
88 /**
89  * Position pointer into a closure. Determines what the next element to return
90  * for a stackElement is.
91  */
92 typedef struct {
93     nextPosType type;
94     nextPos next;
95 } stackPos;
96 
97 /**
98  * An element of the traversal work-stack. Besides the closure itself this also
99  * stores it's parent and associated data.
100  *
101  * When 'info.type == posTypeFresh' a 'stackElement' represents just one
102  * closure, namely 'c' and 'cp' being it's parent. Otherwise 'info' specifies an
103  * offset into the children of 'c'. This is to support returning a closure's
104  * children one-by-one without pushing one element per child onto the stack. See
105  * traversePushChildren() and traversePop().
106  *
107  */
108 typedef struct stackElement_ {
109     stackPos info;
110     StgClosure *c;
111     stackData data;
112 } stackElement;
113 
114 
115 #if defined(DEBUG)
116 unsigned int g_traversalDebugLevel = 0;
debug(const char * s,...)117 static inline void debug(const char *s, ...)
118 {
119     va_list ap;
120 
121     if(g_traversalDebugLevel == 0)
122         return;
123 
124     va_start(ap,s);
125     vdebugBelch(s, ap);
126     va_end(ap);
127 }
128 #else
129 #define debug(...)
130 #endif
131 
132 // number of blocks allocated for one stack
133 #define BLOCKS_IN_STACK 1
134 
135 /* -----------------------------------------------------------------------------
136  * Add a new block group to the stack.
137  * Invariants:
138  *  currentStack->link == s.
139  * -------------------------------------------------------------------------- */
140 STATIC_INLINE void
newStackBlock(traverseState * ts,bdescr * bd)141 newStackBlock( traverseState *ts, bdescr *bd )
142 {
143     ts->currentStack = bd;
144     ts->stackTop     = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
145     ts->stackBottom  = (stackElement *)bd->start;
146     ts->stackLimit   = (stackElement *)ts->stackTop;
147     bd->free     = (StgPtr)ts->stackLimit;
148 }
149 
150 /* -----------------------------------------------------------------------------
151  * Return to the previous block group.
152  * Invariants:
153  *   s->link == currentStack.
154  * -------------------------------------------------------------------------- */
155 STATIC_INLINE void
returnToOldStack(traverseState * ts,bdescr * bd)156 returnToOldStack( traverseState *ts, bdescr *bd )
157 {
158     ts->currentStack = bd;
159     ts->stackTop = (stackElement *)bd->free;
160     ts->stackBottom = (stackElement *)bd->start;
161     ts->stackLimit = (stackElement *)(bd->start + BLOCK_SIZE_W * bd->blocks);
162     bd->free = (StgPtr)ts->stackLimit;
163 }
164 
165 /**
166  *  Initializes the traversal work-stack.
167  */
168 void
initializeTraverseStack(traverseState * ts)169 initializeTraverseStack( traverseState *ts )
170 {
171     if (ts->firstStack != NULL) {
172         freeChain(ts->firstStack);
173     }
174 
175     ts->firstStack = allocGroup(BLOCKS_IN_STACK);
176     ts->firstStack->link = NULL;
177     ts->firstStack->u.back = NULL;
178 
179     ts->stackSize = 0;
180     ts->maxStackSize = 0;
181 
182     newStackBlock(ts, ts->firstStack);
183 }
184 
185 /**
186  * Frees all the block groups in the traversal works-stack.
187  *
188  * Invariants:
189  *   firstStack != NULL
190  */
191 void
closeTraverseStack(traverseState * ts)192 closeTraverseStack( traverseState *ts )
193 {
194     freeChain(ts->firstStack);
195     ts->firstStack = NULL;
196 }
197 
198 /**
199  * Returns the largest stack size encountered during the traversal.
200  */
201 int
getTraverseStackMaxSize(traverseState * ts)202 getTraverseStackMaxSize(traverseState *ts)
203 {
204     return ts->maxStackSize;
205 }
206 
207 /**
208  * Returns true if the whole stack is empty.
209  **/
210 STATIC_INLINE bool
isEmptyWorkStack(traverseState * ts)211 isEmptyWorkStack( traverseState *ts )
212 {
213     return (ts->firstStack == ts->currentStack) && ts->stackTop == ts->stackLimit;
214 }
215 
216 /**
217  * Returns size of stack
218  */
219 W_
traverseWorkStackBlocks(traverseState * ts)220 traverseWorkStackBlocks(traverseState *ts)
221 {
222     bdescr* bd;
223     W_ res = 0;
224 
225     for (bd = ts->firstStack; bd != NULL; bd = bd->link)
226       res += bd->blocks;
227 
228     return res;
229 }
230 
231 /**
232  * Initializes *info from ptrs and payload.
233  *
234  * Invariants:
235  *
236  *   payload[] begins with ptrs pointers followed by non-pointers.
237  */
238 STATIC_INLINE void
init_ptrs(stackPos * info,uint32_t ptrs,StgPtr payload)239 init_ptrs( stackPos *info, uint32_t ptrs, StgPtr payload )
240 {
241     info->type              = posTypePtrs;
242     info->next.ptrs.pos     = 0;
243     info->next.ptrs.ptrs    = ptrs;
244     info->next.ptrs.payload = payload;
245 }
246 
247 /**
248  * Find the next object from *info.
249  */
250 STATIC_INLINE StgClosure *
find_ptrs(stackPos * info)251 find_ptrs( stackPos *info )
252 {
253     if (info->next.ptrs.pos < info->next.ptrs.ptrs) {
254         return (StgClosure *)info->next.ptrs.payload[info->next.ptrs.pos++];
255     } else {
256         return NULL;
257     }
258 }
259 
260 /**
261  *  Initializes *info from SRT information stored in *infoTable.
262  */
263 STATIC_INLINE void
init_srt_fun(stackPos * info,const StgFunInfoTable * infoTable)264 init_srt_fun( stackPos *info, const StgFunInfoTable *infoTable )
265 {
266     info->type = posTypeSRT;
267     if (infoTable->i.srt) {
268         info->next.srt.srt = (StgClosure*)GET_FUN_SRT(infoTable);
269     } else {
270         info->next.srt.srt = NULL;
271     }
272 }
273 
274 STATIC_INLINE void
init_srt_thunk(stackPos * info,const StgThunkInfoTable * infoTable)275 init_srt_thunk( stackPos *info, const StgThunkInfoTable *infoTable )
276 {
277     info->type = posTypeSRT;
278     if (infoTable->i.srt) {
279         info->next.srt.srt = (StgClosure*)GET_SRT(infoTable);
280     } else {
281         info->next.srt.srt = NULL;
282     }
283 }
284 
285 /**
286  * Find the next object from *info.
287  */
288 STATIC_INLINE StgClosure *
find_srt(stackPos * info)289 find_srt( stackPos *info )
290 {
291     StgClosure *c;
292     if (info->type == posTypeSRT) {
293         c = info->next.srt.srt;
294         info->next.srt.srt = NULL;
295         return c;
296     }
297 
298     return NULL;
299 }
300 
301 /**
302  * Push a set of closures, represented by a single 'stackElement', onto the
303  * traversal work-stack.
304  */
305 static void
pushStackElement(traverseState * ts,const stackElement se)306 pushStackElement(traverseState *ts, const stackElement se)
307 {
308     bdescr *nbd;      // Next Block Descriptor
309     if (ts->stackTop - 1 < ts->stackBottom) {
310         debug("pushStackElement() to the next stack.\n");
311 
312         // currentStack->free is updated when the active stack is switched
313         // to the next stack.
314         ts->currentStack->free = (StgPtr)ts->stackTop;
315 
316         if (ts->currentStack->link == NULL) {
317             nbd = allocGroup(BLOCKS_IN_STACK);
318             nbd->link = NULL;
319             nbd->u.back = ts->currentStack;
320             ts->currentStack->link = nbd;
321         } else
322             nbd = ts->currentStack->link;
323 
324         newStackBlock(ts, nbd);
325     }
326 
327     // adjust stackTop (acutal push)
328     ts->stackTop--;
329     // If the size of stackElement was huge, we would better replace the
330     // following statement by either a memcpy() call or a switch statement
331     // on the type of the element. Currently, the size of stackElement is
332     // small enough (5 words) that this direct assignment seems to be enough.
333     *ts->stackTop = se;
334 
335     ts->stackSize++;
336     if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
337     ASSERT(ts->stackSize >= 0);
338     debug("stackSize = %d\n", ts->stackSize);
339 }
340 
341 /**
342  * Push a single closure onto the traversal work-stack.
343  *
344  *  cp   - object's parent
345  *  c    - closure
346  *  data - data associated with closure.
347  */
348 inline void
traversePushClosure(traverseState * ts,StgClosure * c,StgClosure * cp,stackData data)349 traversePushClosure(traverseState *ts, StgClosure *c, StgClosure *cp, stackData data) {
350     stackElement se;
351 
352     se.c = c;
353     se.info.next.cp = cp;
354     se.data = data;
355     se.info.type = posTypeFresh;
356 
357     pushStackElement(ts, se);
358 };
359 
360 /**
361  * traversePushChildren() extracts the first child of 'c' in 'first_child' and
362  * conceptually pushes all remaining children of 'c' onto the traversal stack
363  * while associating 'data' with the pushed elements to be returned upon poping.
364  *
365  * If 'c' has no children, 'first_child' is set to NULL and nothing is pushed
366  * onto the stack.
367  *
368  * If 'c' has only one child, 'first_child' is set to that child and nothing is
369  * pushed onto the stack.
370  *
371  * Invariants:
372  *
373  *  - 'c' is not any of TSO, AP, PAP, AP_STACK, which means that there cannot
374  *       be any stack objects.
375  *
376  * Note: SRTs are considered to be children as well.
377  *
378  * Note: When pushing onto the stack we only really push one 'stackElement'
379  * representing all children onto the stack. See traversePop()
380  */
381 STATIC_INLINE void
traversePushChildren(traverseState * ts,StgClosure * c,stackData data,StgClosure ** first_child)382 traversePushChildren(traverseState *ts, StgClosure *c, stackData data, StgClosure **first_child)
383 {
384     stackElement se;
385 
386     debug("traversePushChildren(): stackTop = 0x%x\n", ts->stackTop);
387 
388     ASSERT(get_itbl(c)->type != TSO);
389     ASSERT(get_itbl(c)->type != AP_STACK);
390 
391     //
392     // fill in se
393     //
394 
395     se.c = c;
396     se.data = data;
397 
398     // fill in se.info
399     switch (get_itbl(c)->type) {
400         // no child, no SRT
401     case CONSTR_0_1:
402     case CONSTR_0_2:
403     case ARR_WORDS:
404     case COMPACT_NFDATA:
405         *first_child = NULL;
406         return;
407 
408         // one child (fixed), no SRT
409     case MUT_VAR_CLEAN:
410     case MUT_VAR_DIRTY:
411         *first_child = ((StgMutVar *)c)->var;
412         return;
413     case THUNK_SELECTOR:
414         *first_child = ((StgSelector *)c)->selectee;
415         return;
416     case BLACKHOLE:
417         *first_child = ((StgInd *)c)->indirectee;
418         return;
419     case CONSTR_1_0:
420     case CONSTR_1_1:
421         *first_child = c->payload[0];
422         return;
423 
424         // For CONSTR_2_0 and MVAR, we use se.info.step to record the position
425         // of the next child. We do not write a separate initialization code.
426         // Also we do not have to initialize info.type;
427 
428         // two children (fixed), no SRT
429         // need to push a stackElement, but nothing to store in se.info
430     case CONSTR_2_0:
431         *first_child = c->payload[0];         // return the first pointer
432         se.info.type = posTypeStep;
433         se.info.next.step = 2;            // 2 = second
434         break;
435 
436         // three children (fixed), no SRT
437         // need to push a stackElement
438     case MVAR_CLEAN:
439     case MVAR_DIRTY:
440         // head must be TSO and the head of a linked list of TSOs.
441         // Shoule it be a child? Seems to be yes.
442         *first_child = (StgClosure *)((StgMVar *)c)->head;
443         se.info.type = posTypeStep;
444         se.info.next.step = 2;            // 2 = second
445         break;
446 
447         // three children (fixed), no SRT
448     case WEAK:
449         *first_child = ((StgWeak *)c)->key;
450         se.info.type = posTypeStep;
451         se.info.next.step = 2;
452         break;
453 
454         // layout.payload.ptrs, no SRT
455     case TVAR:
456     case CONSTR:
457     case CONSTR_NOCAF:
458     case PRIM:
459     case MUT_PRIM:
460     case BCO:
461         init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
462                   (StgPtr)c->payload);
463         *first_child = find_ptrs(&se.info);
464         if (*first_child == NULL)
465             return;   // no child
466         break;
467 
468         // StgMutArrPtr.ptrs, no SRT
469     case MUT_ARR_PTRS_CLEAN:
470     case MUT_ARR_PTRS_DIRTY:
471     case MUT_ARR_PTRS_FROZEN_CLEAN:
472     case MUT_ARR_PTRS_FROZEN_DIRTY:
473         init_ptrs(&se.info, ((StgMutArrPtrs *)c)->ptrs,
474                   (StgPtr)(((StgMutArrPtrs *)c)->payload));
475         *first_child = find_ptrs(&se.info);
476         if (*first_child == NULL)
477             return;
478         break;
479 
480         // StgMutArrPtr.ptrs, no SRT
481     case SMALL_MUT_ARR_PTRS_CLEAN:
482     case SMALL_MUT_ARR_PTRS_DIRTY:
483     case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
484     case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
485         init_ptrs(&se.info, ((StgSmallMutArrPtrs *)c)->ptrs,
486                   (StgPtr)(((StgSmallMutArrPtrs *)c)->payload));
487         *first_child = find_ptrs(&se.info);
488         if (*first_child == NULL)
489             return;
490         break;
491 
492     // layout.payload.ptrs, SRT
493     case FUN_STATIC:
494     case FUN:           // *c is a heap object.
495     case FUN_2_0:
496         init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs, (StgPtr)c->payload);
497         *first_child = find_ptrs(&se.info);
498         if (*first_child == NULL)
499             // no child from ptrs, so check SRT
500             goto fun_srt_only;
501         break;
502 
503     case THUNK:
504     case THUNK_2_0:
505         init_ptrs(&se.info, get_itbl(c)->layout.payload.ptrs,
506                   (StgPtr)((StgThunk *)c)->payload);
507         *first_child = find_ptrs(&se.info);
508         if (*first_child == NULL)
509             // no child from ptrs, so check SRT
510             goto thunk_srt_only;
511         break;
512 
513         // 1 fixed child, SRT
514     case FUN_1_0:
515     case FUN_1_1:
516         *first_child = c->payload[0];
517         ASSERT(*first_child != NULL);
518         init_srt_fun(&se.info, get_fun_itbl(c));
519         break;
520 
521     case THUNK_1_0:
522     case THUNK_1_1:
523         *first_child = ((StgThunk *)c)->payload[0];
524         ASSERT(*first_child != NULL);
525         init_srt_thunk(&se.info, get_thunk_itbl(c));
526         break;
527 
528     case FUN_0_1:      // *c is a heap object.
529     case FUN_0_2:
530     fun_srt_only:
531         init_srt_fun(&se.info, get_fun_itbl(c));
532         *first_child = find_srt(&se.info);
533         if (*first_child == NULL)
534             return;     // no child
535         break;
536 
537     // SRT only
538     case THUNK_STATIC:
539         ASSERT(get_itbl(c)->srt != 0);
540         /* fall-thru */
541     case THUNK_0_1:
542     case THUNK_0_2:
543     thunk_srt_only:
544         init_srt_thunk(&se.info, get_thunk_itbl(c));
545         *first_child = find_srt(&se.info);
546         if (*first_child == NULL)
547             return;     // no child
548         break;
549 
550     case TREC_CHUNK:
551         *first_child = (StgClosure *)((StgTRecChunk *)c)->prev_chunk;
552         se.info.type = posTypeStep;
553         se.info.next.step = 0;  // entry no.
554         break;
555 
556         // cannot appear
557     case PAP:
558     case AP:
559     case AP_STACK:
560     case TSO:
561     case STACK:
562     case IND_STATIC:
563         // stack objects
564     case UPDATE_FRAME:
565     case CATCH_FRAME:
566     case UNDERFLOW_FRAME:
567     case STOP_FRAME:
568     case RET_BCO:
569     case RET_SMALL:
570     case RET_BIG:
571         // invalid objects
572     case IND:
573     case INVALID_OBJECT:
574     default:
575         barf("Invalid object *c in push(): %d", get_itbl(c)->type);
576         return;
577     }
578 
579     // se.info.next.cp has to be initialized when type==posTypeFresh. We don't
580     // do that here though. So type must be !=posTypeFresh.
581     ASSERT(se.info.type != posTypeFresh);
582 
583     pushStackElement(ts, se);
584 }
585 
586 /**
587  *  popStackElement(): Remove a depleted stackElement from the top of the
588  *  traversal work-stack.
589  *
590  *  Invariants:
591  *    stackTop cannot be equal to stackLimit unless the whole stack is
592  *    empty, in which case popStackElement() is not allowed.
593  */
594 static void
popStackElement(traverseState * ts)595 popStackElement(traverseState *ts) {
596     debug("popStackElement(): stackTop = 0x%x\n", ts->stackTop);
597 
598     ASSERT(ts->stackTop != ts->stackLimit);
599     ASSERT(!isEmptyWorkStack(ts));
600 
601     // <= (instead of <) is wrong!
602     if (ts->stackTop + 1 < ts->stackLimit) {
603         ts->stackTop++;
604 
605         ts->stackSize--;
606         if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
607         ASSERT(ts->stackSize >= 0);
608         debug("stackSize = (--) %d\n", ts->stackSize);
609 
610         return;
611     }
612 
613     bdescr *pbd;    // Previous Block Descriptor
614 
615     debug("popStackElement() to the previous stack.\n");
616 
617     ASSERT(ts->stackTop + 1 == ts->stackLimit);
618     ASSERT(ts->stackBottom == (stackElement *)ts->currentStack->start);
619 
620     if (ts->firstStack == ts->currentStack) {
621         // The stack is completely empty.
622         ts->stackTop++;
623         ASSERT(ts->stackTop == ts->stackLimit);
624 
625         ts->stackSize--;
626         if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
627         ASSERT(ts->stackSize >= 0);
628         debug("stackSize = %d\n", ts->stackSize);
629 
630         return;
631     }
632 
633     // currentStack->free is updated when the active stack is switched back
634     // to the previous stack.
635     ts->currentStack->free = (StgPtr)ts->stackLimit;
636 
637     // find the previous block descriptor
638     pbd = ts->currentStack->u.back;
639     ASSERT(pbd != NULL);
640 
641     returnToOldStack(ts, pbd);
642 
643     ts->stackSize--;
644     if (ts->stackSize > ts->maxStackSize) ts->maxStackSize = ts->stackSize;
645     ASSERT(ts->stackSize >= 0);
646     debug("stackSize = %d\n", ts->stackSize);
647 }
648 
649 /**
650  *  Finds the next object to be considered for retainer profiling and store
651  *  its pointer to *c.
652  *
653  *  If the unprocessed object was stored in the stack (posTypeFresh), the
654  *  this object is returned as-is. Otherwise Test if the topmost stack
655  *  element indicates that more objects are left,
656  *  and if so, retrieve the first object and store its pointer to *c. Also,
657  *  set *cp and *data appropriately, both of which are stored in the stack
658  *  element.  The topmost stack element then is overwritten so as for it to now
659  *  denote the next object.
660  *
661  *  If the topmost stack element indicates no more objects are left, pop
662  *  off the stack element until either an object can be retrieved or
663  *  the work-stack becomes empty, indicated by true returned by
664  *  isEmptyWorkStack(), in which case *c is set to NULL.
665  *
666  *  Note:
667  *
668  *    It is okay to call this function even when the work-stack is empty.
669  */
670 STATIC_INLINE void
traversePop(traverseState * ts,StgClosure ** c,StgClosure ** cp,stackData * data)671 traversePop(traverseState *ts, StgClosure **c, StgClosure **cp, stackData *data)
672 {
673     stackElement *se;
674 
675     debug("traversePop(): stackTop = 0x%x\n", ts->stackTop);
676 
677     // Is this the last internal element? If so instead of modifying the current
678     // stackElement in place we actually remove it from the stack.
679     bool last = false;
680 
681     do {
682         if (isEmptyWorkStack(ts)) {
683             *c = NULL;
684             return;
685         }
686 
687         // Note: Below every `break`, where the loop condition is true, must be
688         // accompanied by a popStackElement() otherwise this is an infinite
689         // loop.
690         se = ts->stackTop;
691 
692         // If this is a top-level element, you should pop that out.
693         if (se->info.type == posTypeFresh) {
694             *cp = se->info.next.cp;
695             *c = se->c;
696             *data = se->data;
697             popStackElement(ts);
698             return;
699         }
700 
701         // Note: The first ptr of all of these was already returned as
702         // *fist_child in push(), so we always start with the second field.
703         switch (get_itbl(se->c)->type) {
704             // two children (fixed), no SRT
705             // nothing in se.info
706         case CONSTR_2_0:
707             *c = se->c->payload[1];
708             last = true;
709             goto out;
710 
711             // three children (fixed), no SRT
712             // need to push a stackElement
713         case MVAR_CLEAN:
714         case MVAR_DIRTY:
715             if (se->info.next.step == 2) {
716                 *c = (StgClosure *)((StgMVar *)se->c)->tail;
717                 se->info.next.step++;             // move to the next step
718                 // no popStackElement
719             } else {
720                 *c = ((StgMVar *)se->c)->value;
721                 last = true;
722             }
723             goto out;
724 
725             // three children (fixed), no SRT
726         case WEAK:
727             if (se->info.next.step == 2) {
728                 *c = ((StgWeak *)se->c)->value;
729                 se->info.next.step++;
730                 // no popStackElement
731             } else {
732                 *c = ((StgWeak *)se->c)->finalizer;
733                 last = true;
734             }
735             goto out;
736 
737         case TREC_CHUNK: {
738             // These are pretty complicated: we have N entries, each
739             // of which contains 3 fields that we want to follow.  So
740             // we divide the step counter: the 2 low bits indicate
741             // which field, and the rest of the bits indicate the
742             // entry number (starting from zero).
743             TRecEntry *entry;
744             uint32_t entry_no = se->info.next.step >> 2;
745             uint32_t field_no = se->info.next.step & 3;
746             if (entry_no == ((StgTRecChunk *)se->c)->next_entry_idx) {
747                 *c = NULL;
748                 popStackElement(ts);
749                 break; // this breaks out of the switch not the loop
750             }
751             entry = &((StgTRecChunk *)se->c)->entries[entry_no];
752             if (field_no == 0) {
753                 *c = (StgClosure *)entry->tvar;
754             } else if (field_no == 1) {
755                 *c = entry->expected_value;
756             } else {
757                 *c = entry->new_value;
758             }
759             se->info.next.step++;
760             goto out;
761         }
762 
763         case TVAR:
764         case CONSTR:
765         case PRIM:
766         case MUT_PRIM:
767         case BCO:
768             // StgMutArrPtr.ptrs, no SRT
769         case MUT_ARR_PTRS_CLEAN:
770         case MUT_ARR_PTRS_DIRTY:
771         case MUT_ARR_PTRS_FROZEN_CLEAN:
772         case MUT_ARR_PTRS_FROZEN_DIRTY:
773         case SMALL_MUT_ARR_PTRS_CLEAN:
774         case SMALL_MUT_ARR_PTRS_DIRTY:
775         case SMALL_MUT_ARR_PTRS_FROZEN_CLEAN:
776         case SMALL_MUT_ARR_PTRS_FROZEN_DIRTY:
777             *c = find_ptrs(&se->info);
778             if (*c == NULL) {
779                 popStackElement(ts);
780                 break; // this breaks out of the switch not the loop
781             }
782             goto out;
783 
784             // layout.payload.ptrs, SRT
785         case FUN:         // always a heap object
786         case FUN_STATIC:
787         case FUN_2_0:
788             if (se->info.type == posTypePtrs) {
789                 *c = find_ptrs(&se->info);
790                 if (*c != NULL) {
791                     goto out;
792                 }
793                 init_srt_fun(&se->info, get_fun_itbl(se->c));
794             }
795             goto do_srt;
796 
797         case THUNK:
798         case THUNK_2_0:
799             if (se->info.type == posTypePtrs) {
800                 *c = find_ptrs(&se->info);
801                 if (*c != NULL) {
802                     goto out;
803                 }
804                 init_srt_thunk(&se->info, get_thunk_itbl(se->c));
805             }
806             goto do_srt;
807 
808             // SRT
809         do_srt:
810         case THUNK_STATIC:
811         case FUN_0_1:
812         case FUN_0_2:
813         case THUNK_0_1:
814         case THUNK_0_2:
815         case FUN_1_0:
816         case FUN_1_1:
817         case THUNK_1_0:
818         case THUNK_1_1:
819             *c = find_srt(&se->info);
820             if(*c == NULL) {
821                 popStackElement(ts);
822                 break; // this breaks out of the switch not the loop
823             }
824             goto out;
825 
826             // no child (fixed), no SRT
827         case CONSTR_0_1:
828         case CONSTR_0_2:
829         case ARR_WORDS:
830             // one child (fixed), no SRT
831         case MUT_VAR_CLEAN:
832         case MUT_VAR_DIRTY:
833         case THUNK_SELECTOR:
834         case CONSTR_1_1:
835             // cannot appear
836         case PAP:
837         case AP:
838         case AP_STACK:
839         case TSO:
840         case STACK:
841         case IND_STATIC:
842         case CONSTR_NOCAF:
843             // stack objects
844         case UPDATE_FRAME:
845         case CATCH_FRAME:
846         case UNDERFLOW_FRAME:
847         case STOP_FRAME:
848         case RET_BCO:
849         case RET_SMALL:
850         case RET_BIG:
851             // invalid objects
852         case IND:
853         case INVALID_OBJECT:
854         default:
855             barf("Invalid object *c in traversePop(): %d", get_itbl(se->c)->type);
856             return;
857         }
858     } while (*c == NULL);
859 
860 out:
861 
862     ASSERT(*c != NULL);
863 
864     *cp = se->c;
865     *data = se->data;
866 
867     if(last)
868         popStackElement(ts);
869 
870     return;
871 
872 }
873 
874 /**
875  * Make sure a closure's profiling data is initialized to zero if it does not
876  * conform to the current value of the flip bit, returns true in this case.
877  *
878  * See Note [Profiling heap traversal visited bit].
879  */
880 bool
traverseMaybeInitClosureData(StgClosure * c)881 traverseMaybeInitClosureData(StgClosure *c)
882 {
883     if (!isTravDataValid(c)) {
884         setTravDataToZero(c);
885         return true;
886     }
887     return false;
888 }
889 
890 /**
891  * Call traversePushClosure for each of the closures covered by a large bitmap.
892  */
893 static void
traverseLargeBitmap(traverseState * ts,StgPtr p,StgLargeBitmap * large_bitmap,uint32_t size,StgClosure * c,stackData data)894 traverseLargeBitmap (traverseState *ts, StgPtr p, StgLargeBitmap *large_bitmap,
895                      uint32_t size, StgClosure *c, stackData data)
896 {
897     uint32_t i, b;
898     StgWord bitmap;
899 
900     b = 0;
901     bitmap = large_bitmap->bitmap[b];
902     for (i = 0; i < size; ) {
903         if ((bitmap & 1) == 0) {
904             traversePushClosure(ts, (StgClosure *)*p, c, data);
905         }
906         i++;
907         p++;
908         if (i % BITS_IN(W_) == 0) {
909             b++;
910             bitmap = large_bitmap->bitmap[b];
911         } else {
912             bitmap = bitmap >> 1;
913         }
914     }
915 }
916 
917 STATIC_INLINE StgPtr
traverseSmallBitmap(traverseState * ts,StgPtr p,uint32_t size,StgWord bitmap,StgClosure * c,stackData data)918 traverseSmallBitmap (traverseState *ts, StgPtr p, uint32_t size, StgWord bitmap,
919                      StgClosure *c, stackData data)
920 {
921     while (size > 0) {
922         if ((bitmap & 1) == 0) {
923             traversePushClosure(ts, (StgClosure *)*p, c, data);
924         }
925         p++;
926         bitmap = bitmap >> 1;
927         size--;
928     }
929     return p;
930 }
931 
932 /**
933  *  traversePushStack(ts, cp, data, stackStart, stackEnd) pushes all the objects
934  *  in the STG stack-chunk from stackStart to stackEnd onto the traversal
935  *  work-stack with 'c' and 'data' being their parent and associated data,
936  *  respectively.
937  *
938  *  Invariants:
939  *
940  *    *cp is one of the following: TSO, AP_STACK.
941  *
942  *    stackStart < stackEnd.
943  *
944  *    If *c is TSO, its state is not ThreadComplete,or ThreadKilled,
945  *    which means that its stack is ready to process.
946  *
947  *  Note:
948  *
949  *    This code was almost plagiarzied from GC.c! For each pointer,
950  *    traversePushClosure() is invoked instead of evacuate().
951  */
952 static void
traversePushStack(traverseState * ts,StgClosure * cp,stackData data,StgPtr stackStart,StgPtr stackEnd)953 traversePushStack(traverseState *ts, StgClosure *cp, stackData data,
954                   StgPtr stackStart, StgPtr stackEnd)
955 {
956     StgPtr p;
957     const StgRetInfoTable *info;
958     StgWord bitmap;
959     uint32_t size;
960 
961     ASSERT(get_itbl(cp)->type == STACK);
962 
963     p = stackStart;
964     while (p < stackEnd) {
965         info = get_ret_itbl((StgClosure *)p);
966 
967         switch(info->i.type) {
968 
969         case UPDATE_FRAME:
970             traversePushClosure(ts, ((StgUpdateFrame *)p)->updatee, cp, data);
971             p += sizeofW(StgUpdateFrame);
972             continue;
973 
974         case UNDERFLOW_FRAME:
975         case STOP_FRAME:
976         case CATCH_FRAME:
977         case CATCH_STM_FRAME:
978         case CATCH_RETRY_FRAME:
979         case ATOMICALLY_FRAME:
980         case RET_SMALL:
981             bitmap = BITMAP_BITS(info->i.layout.bitmap);
982             size   = BITMAP_SIZE(info->i.layout.bitmap);
983             p++;
984             p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
985 
986         follow_srt:
987             if (info->i.srt) {
988                 traversePushClosure(ts, GET_SRT(info), cp, data);
989             }
990             continue;
991 
992         case RET_BCO: {
993             StgBCO *bco;
994 
995             p++;
996             traversePushClosure(ts, (StgClosure*)*p, cp, data);
997             bco = (StgBCO *)*p;
998             p++;
999             size = BCO_BITMAP_SIZE(bco);
1000             traverseLargeBitmap(ts, p, BCO_BITMAP(bco), size, cp, data);
1001             p += size;
1002             continue;
1003         }
1004 
1005             // large bitmap (> 32 entries, or > 64 on a 64-bit machine)
1006         case RET_BIG:
1007             size = GET_LARGE_BITMAP(&info->i)->size;
1008             p++;
1009             traverseLargeBitmap(ts, p, GET_LARGE_BITMAP(&info->i),
1010                                 size, cp, data);
1011             p += size;
1012             // and don't forget to follow the SRT
1013             goto follow_srt;
1014 
1015         case RET_FUN: {
1016             StgRetFun *ret_fun = (StgRetFun *)p;
1017             const StgFunInfoTable *fun_info;
1018 
1019             traversePushClosure(ts, ret_fun->fun, cp, data);
1020             fun_info = get_fun_itbl(UNTAG_CONST_CLOSURE(ret_fun->fun));
1021 
1022             p = (P_)&ret_fun->payload;
1023             switch (fun_info->f.fun_type) {
1024             case ARG_GEN:
1025                 bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1026                 size = BITMAP_SIZE(fun_info->f.b.bitmap);
1027                 p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
1028                 break;
1029             case ARG_GEN_BIG:
1030                 size = GET_FUN_LARGE_BITMAP(fun_info)->size;
1031                 traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info),
1032                                     size, cp, data);
1033                 p += size;
1034                 break;
1035             default:
1036                 bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1037                 size = BITMAP_SIZE(stg_arg_bitmaps[fun_info->f.fun_type]);
1038                 p = traverseSmallBitmap(ts, p, size, bitmap, cp, data);
1039                 break;
1040             }
1041             goto follow_srt;
1042         }
1043 
1044         default:
1045             barf("Invalid object found in traversePushStack(): %d",
1046                  (int)(info->i.type));
1047         }
1048     }
1049 }
1050 
1051 /**
1052  * Call traversePushClosure for each of the children of a PAP/AP
1053  */
1054 STATIC_INLINE StgPtr
traversePAP(traverseState * ts,StgClosure * pap,stackData data,StgClosure * fun,StgClosure ** payload,StgWord n_args)1055 traversePAP (traverseState *ts,
1056                     StgClosure *pap,    /* NOT tagged */
1057                     stackData data,
1058                     StgClosure *fun,    /* tagged */
1059                     StgClosure** payload, StgWord n_args)
1060 {
1061     StgPtr p;
1062     StgWord bitmap;
1063     const StgFunInfoTable *fun_info;
1064 
1065     traversePushClosure(ts, fun, pap, data);
1066     fun = UNTAG_CLOSURE(fun);
1067     fun_info = get_fun_itbl(fun);
1068     ASSERT(fun_info->i.type != PAP);
1069 
1070     p = (StgPtr)payload;
1071 
1072     switch (fun_info->f.fun_type) {
1073     case ARG_GEN:
1074         bitmap = BITMAP_BITS(fun_info->f.b.bitmap);
1075         p = traverseSmallBitmap(ts, p, n_args, bitmap,
1076                                 pap, data);
1077         break;
1078     case ARG_GEN_BIG:
1079         traverseLargeBitmap(ts, p, GET_FUN_LARGE_BITMAP(fun_info),
1080                             n_args, pap, data);
1081         p += n_args;
1082         break;
1083     case ARG_BCO:
1084         traverseLargeBitmap(ts, (StgPtr)payload, BCO_BITMAP(fun),
1085                             n_args, pap, data);
1086         p += n_args;
1087         break;
1088     default:
1089         bitmap = BITMAP_BITS(stg_arg_bitmaps[fun_info->f.fun_type]);
1090         p = traverseSmallBitmap(ts, p, n_args, bitmap, pap, data);
1091         break;
1092     }
1093     return p;
1094 }
1095 
1096 static void
resetMutableObjects(void)1097 resetMutableObjects(void)
1098 {
1099     uint32_t g, n;
1100     bdescr *bd;
1101     StgPtr ml;
1102 
1103     // The following code resets the 'trav' field of each unvisited mutable
1104     // object.
1105     for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1106         // NOT true: even G0 has a block on its mutable list
1107         // ASSERT(g != 0 || (generations[g].mut_list == NULL));
1108 
1109         // Traversing through mut_list is necessary
1110         // because we can find MUT_VAR objects which have not been
1111         // visited during heap traversal.
1112         for (n = 0; n < n_capabilities; n++) {
1113           for (bd = capabilities[n]->mut_lists[g]; bd != NULL; bd = bd->link) {
1114             for (ml = bd->start; ml < bd->free; ml++) {
1115 
1116                 traverseMaybeInitClosureData((StgClosure *)*ml);
1117             }
1118           }
1119         }
1120     }
1121 }
1122 
1123 /**
1124  * Traverse all closures on the traversal work-stack, calling 'visit_cb' on each
1125  * closure. See 'visitClosure_cb' for details. This function flips the 'flip'
1126  * bit and hence every closure's profiling data will be reset to zero upon
1127  * visiting. See Note [Profiling heap traversal visited bit].
1128  */
1129 void
traverseWorkStack(traverseState * ts,visitClosure_cb visit_cb)1130 traverseWorkStack(traverseState *ts, visitClosure_cb visit_cb)
1131 {
1132     // first_child = first child of c
1133     StgClosure *c, *cp, *first_child;
1134     stackData data, child_data;
1135     StgWord typeOfc;
1136 
1137     // Now we flip the flip bit.
1138     flip = flip ^ 1;
1139 
1140     // c = Current closure                           (possibly tagged)
1141     // cp = Current closure's Parent                 (NOT tagged)
1142     // data = current closures' associated data      (NOT tagged)
1143     // data_out = data to associate with current closure's children
1144 
1145 loop:
1146     traversePop(ts, &c, &cp, &data);
1147 
1148     if (c == NULL) {
1149         debug("maxStackSize= %d\n", ts->maxStackSize);
1150         resetMutableObjects();
1151         return;
1152     }
1153 inner_loop:
1154     c = UNTAG_CLOSURE(c);
1155 
1156     typeOfc = get_itbl(c)->type;
1157 
1158     // special cases
1159     switch (typeOfc) {
1160     case TSO:
1161         if (((StgTSO *)c)->what_next == ThreadComplete ||
1162             ((StgTSO *)c)->what_next == ThreadKilled) {
1163             debug("ThreadComplete or ThreadKilled encountered in traverseWorkStack()\n");
1164             goto loop;
1165         }
1166         break;
1167 
1168     case IND_STATIC:
1169         // We just skip IND_STATIC, so it's never visited.
1170         c = ((StgIndStatic *)c)->indirectee;
1171         goto inner_loop;
1172 
1173     case CONSTR_NOCAF:
1174         // static objects with no pointers out, so goto loop.
1175 
1176         // It is not just enough not to visit *c; it is
1177         // mandatory because CONSTR_NOCAF are not reachable from
1178         // scavenged_static_objects, the list from which is assumed to traverse
1179         // all static objects after major garbage collections.
1180         goto loop;
1181 
1182     case THUNK_STATIC:
1183         if (get_itbl(c)->srt == 0) {
1184             // No need to visit *c; no dynamic objects are reachable from it.
1185             //
1186             // Static objects: if we traverse all the live closures,
1187             // including static closures, during each heap census then
1188             // we will observe that some static closures appear and
1189             // disappear.  eg. a closure may contain a pointer to a
1190             // static function 'f' which is not otherwise reachable
1191             // (it doesn't indirectly point to any CAFs, so it doesn't
1192             // appear in any SRTs), so we would find 'f' during
1193             // traversal.  However on the next sweep there may be no
1194             // closures pointing to 'f'.
1195             //
1196             // We must therefore ignore static closures whose SRT is
1197             // empty, because these are exactly the closures that may
1198             // "appear".  A closure with a non-empty SRT, and which is
1199             // still required, will always be reachable.
1200             //
1201             // But what about CONSTR?  Surely these may be able
1202             // to appear, and they don't have SRTs, so we can't
1203             // check.  So for now, we're calling
1204             // resetStaticObjectForProfiling() from the
1205             // garbage collector to reset the retainer sets in all the
1206             // reachable static objects.
1207             goto loop;
1208         }
1209         /* fall-thru */
1210 
1211     case FUN_STATIC: {
1212         const StgInfoTable *info = get_itbl(c);
1213         if (info->srt == 0 && info->layout.payload.ptrs == 0) {
1214             goto loop;
1215         } else {
1216             break;
1217         }
1218     }
1219 
1220     default:
1221         break;
1222     }
1223 
1224     // If this is the first visit to c, initialize its data.
1225     bool first_visit = traverseMaybeInitClosureData(c);
1226     bool traverse_children
1227         = visit_cb(c, cp, data, first_visit, (stackData*)&child_data);
1228     if(!traverse_children)
1229         goto loop;
1230 
1231     // process child
1232 
1233     // Special case closures: we process these all in one go rather
1234     // than attempting to save the current position, because doing so
1235     // would be hard.
1236     switch (typeOfc) {
1237     case STACK:
1238         traversePushStack(ts, c, child_data,
1239                     ((StgStack *)c)->sp,
1240                     ((StgStack *)c)->stack + ((StgStack *)c)->stack_size);
1241         goto loop;
1242 
1243     case TSO:
1244     {
1245         StgTSO *tso = (StgTSO *)c;
1246 
1247         traversePushClosure(ts, (StgClosure *) tso->stackobj, c, child_data);
1248         traversePushClosure(ts, (StgClosure *) tso->blocked_exceptions, c, child_data);
1249         traversePushClosure(ts, (StgClosure *) tso->bq, c, child_data);
1250         traversePushClosure(ts, (StgClosure *) tso->trec, c, child_data);
1251         if (   tso->why_blocked == BlockedOnMVar
1252                || tso->why_blocked == BlockedOnMVarRead
1253                || tso->why_blocked == BlockedOnBlackHole
1254                || tso->why_blocked == BlockedOnMsgThrowTo
1255             ) {
1256             traversePushClosure(ts, tso->block_info.closure, c, child_data);
1257         }
1258         goto loop;
1259     }
1260 
1261     case BLOCKING_QUEUE:
1262     {
1263         StgBlockingQueue *bq = (StgBlockingQueue *)c;
1264         traversePushClosure(ts, (StgClosure *) bq->link,  c, child_data);
1265         traversePushClosure(ts, (StgClosure *) bq->bh,    c, child_data);
1266         traversePushClosure(ts, (StgClosure *) bq->owner, c, child_data);
1267         goto loop;
1268     }
1269 
1270     case PAP:
1271     {
1272         StgPAP *pap = (StgPAP *)c;
1273         traversePAP(ts, c, child_data, pap->fun, pap->payload, pap->n_args);
1274         goto loop;
1275     }
1276 
1277     case AP:
1278     {
1279         StgAP *ap = (StgAP *)c;
1280         traversePAP(ts, c, child_data, ap->fun, ap->payload, ap->n_args);
1281         goto loop;
1282     }
1283 
1284     case AP_STACK:
1285         traversePushClosure(ts, ((StgAP_STACK *)c)->fun, c, child_data);
1286         traversePushStack(ts, c, child_data,
1287                     (StgPtr)((StgAP_STACK *)c)->payload,
1288                     (StgPtr)((StgAP_STACK *)c)->payload +
1289                              ((StgAP_STACK *)c)->size);
1290         goto loop;
1291     }
1292 
1293     traversePushChildren(ts, c, child_data, &first_child);
1294 
1295     // If first_child is null, c has no child.
1296     // If first_child is not null, the top stack element points to the next
1297     // object. traversePushChildren() may or may not push a stackElement on the
1298     // stack.
1299     if (first_child == NULL)
1300         goto loop;
1301 
1302     // (c, cp, data) = (first_child, c, child_data)
1303     data = child_data;
1304     cp = c;
1305     c = first_child;
1306     goto inner_loop;
1307 }
1308 
1309 /**
1310  *  Traverse all static objects for which we compute retainer sets,
1311  *  and reset their rs fields to NULL, which is accomplished by
1312  *  invoking traverseMaybeInitClosureData(). This function must be called
1313  *  before zeroing all objects reachable from scavenged_static_objects
1314  *  in the case of major garbage collections. See GarbageCollect() in
1315  *  GC.c.
1316  *  Note:
1317  *    The mut_once_list of the oldest generation must also be traversed?
1318  *    Why? Because if the evacuation of an object pointed to by a static
1319  *    indirection object fails, it is put back to the mut_once_list of
1320  *    the oldest generation.
1321  *    However, this is not necessary because any static indirection objects
1322  *    are just traversed through to reach dynamic objects. In other words,
1323  *    they are not taken into consideration in computing retainer sets.
1324  *
1325  * SDM (20/7/2011): I don't think this is doing anything sensible,
1326  * because it happens before retainerProfile() and at the beginning of
1327  * retainerProfil() we change the sense of 'flip'.  So all of the
1328  * calls to traverseMaybeInitClosureData() here are initialising retainer sets
1329  * with the wrong flip.  Also, I don't see why this is necessary.  I
1330  * added a traverseMaybeInitClosureData() call to retainRoot(), and that seems
1331  * to have fixed the assertion failure in retainerSetOf() I was
1332  * encountering.
1333  */
1334 void
resetStaticObjectForProfiling(StgClosure * static_objects)1335 resetStaticObjectForProfiling( StgClosure *static_objects )
1336 {
1337     uint32_t count = 0;
1338     StgClosure *p;
1339 
1340     p = static_objects;
1341     while (p != END_OF_STATIC_OBJECT_LIST) {
1342         p = UNTAG_STATIC_LIST_PTR(p);
1343         count++;
1344 
1345         switch (get_itbl(p)->type) {
1346         case IND_STATIC:
1347             // Since we do not compute the retainer set of any
1348             // IND_STATIC object, we don't have to reset its retainer
1349             // field.
1350             p = (StgClosure*)*IND_STATIC_LINK(p);
1351             break;
1352         case THUNK_STATIC:
1353             traverseMaybeInitClosureData(p);
1354             p = (StgClosure*)*THUNK_STATIC_LINK(p);
1355             break;
1356         case FUN_STATIC:
1357         case CONSTR:
1358         case CONSTR_1_0:
1359         case CONSTR_2_0:
1360         case CONSTR_1_1:
1361         case CONSTR_NOCAF:
1362             traverseMaybeInitClosureData(p);
1363             p = (StgClosure*)*STATIC_LINK(get_itbl(p), p);
1364             break;
1365         default:
1366             barf("resetStaticObjectForProfiling: %p (%lu)",
1367                  p, (unsigned long)get_itbl(p)->type);
1368             break;
1369         }
1370     }
1371 
1372     debug("count in scavenged_static_objects = %d\n", count);
1373 }
1374 
1375 #endif /* PROFILING */
1376