1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Garbage collector.
6
7 #include <unistd.h>
8
9 #include "runtime.h"
10 #include "arch.h"
11 #include "malloc.h"
12 #include "mgc0.h"
13 #include "race.h"
14 #include "go-type.h"
15
16 // Map gccgo field names to gc field names.
17 // Slice aka __go_open_array.
18 #define array __values
19 #define cap __capacity
20 // Iface aka __go_interface
21 #define tab __methods
22 // Eface aka __go_empty_interface.
23 #define type __type_descriptor
24 // Hmap aka __go_map
25 typedef struct __go_map Hmap;
26 // Type aka __go_type_descriptor
27 #define kind __code
28 #define string __reflection
29 #define KindPtr GO_PTR
30 #define KindNoPointers GO_NO_POINTERS
31 // PtrType aka __go_ptr_type
32 #define elem __element_type
33
34 #ifdef USING_SPLIT_STACK
35
36 extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
37 void **);
38
39 extern void * __splitstack_find_context (void *context[10], size_t *, void **,
40 void **, void **);
41
42 #endif
43
44 enum {
45 Debug = 0,
46 DebugMark = 0, // run second pass to check mark
47 CollectStats = 0,
48 ScanStackByFrames = 0,
49 IgnorePreciseGC = 0,
50
51 // Four bits per word (see #defines below).
52 wordsPerBitmapWord = sizeof(void*)*8/4,
53 bitShift = sizeof(void*)*8/4,
54
55 handoffThreshold = 4,
56 IntermediateBufferCapacity = 64,
57
58 // Bits in type information
59 PRECISE = 1,
60 LOOP = 2,
61 PC_BITS = PRECISE | LOOP,
62 };
63
64 // Bits in per-word bitmap.
65 // #defines because enum might not be able to hold the values.
66 //
67 // Each word in the bitmap describes wordsPerBitmapWord words
68 // of heap memory. There are 4 bitmap bits dedicated to each heap word,
69 // so on a 64-bit system there is one bitmap word per 16 heap words.
70 // The bits in the word are packed together by type first, then by
71 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
72 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
73 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
74 // This layout makes it easier to iterate over the bits of a given type.
75 //
76 // The bitmap starts at mheap.arena_start and extends *backward* from
77 // there. On a 64-bit system the off'th word in the arena is tracked by
78 // the off/16+1'th word before mheap.arena_start. (On a 32-bit system,
79 // the only difference is that the divisor is 8.)
80 //
81 // To pull out the bits corresponding to a given pointer p, we use:
82 //
83 // off = p - (uintptr*)mheap.arena_start; // word offset
84 // b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
85 // shift = off % wordsPerBitmapWord
86 // bits = *b >> shift;
87 // /* then test bits & bitAllocated, bits & bitMarked, etc. */
88 //
89 #define bitAllocated ((uintptr)1<<(bitShift*0))
90 #define bitNoPointers ((uintptr)1<<(bitShift*1)) /* when bitAllocated is set */
91 #define bitMarked ((uintptr)1<<(bitShift*2)) /* when bitAllocated is set */
92 #define bitSpecial ((uintptr)1<<(bitShift*3)) /* when bitAllocated is set - has finalizer or being profiled */
93 #define bitBlockBoundary ((uintptr)1<<(bitShift*1)) /* when bitAllocated is NOT set */
94
95 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
96
97 // Holding worldsema grants an M the right to try to stop the world.
98 // The procedure is:
99 //
100 // runtime_semacquire(&runtime_worldsema);
101 // m->gcing = 1;
102 // runtime_stoptheworld();
103 //
104 // ... do stuff ...
105 //
106 // m->gcing = 0;
107 // runtime_semrelease(&runtime_worldsema);
108 // runtime_starttheworld();
109 //
110 uint32 runtime_worldsema = 1;
111
112 static int32 gctrace;
113
114 // The size of Workbuf is N*PageSize.
115 typedef struct Workbuf Workbuf;
116 struct Workbuf
117 {
118 #define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr))
119 LFNode node; // must be first
120 uintptr nobj;
121 Obj obj[SIZE/sizeof(Obj) - 1];
122 uint8 _padding[SIZE%sizeof(Obj) + sizeof(Obj)];
123 #undef SIZE
124 };
125
126 typedef struct Finalizer Finalizer;
127 struct Finalizer
128 {
129 FuncVal *fn;
130 void *arg;
131 const struct __go_func_type *ft;
132 };
133
134 typedef struct FinBlock FinBlock;
135 struct FinBlock
136 {
137 FinBlock *alllink;
138 FinBlock *next;
139 int32 cnt;
140 int32 cap;
141 Finalizer fin[1];
142 };
143
144 static G *fing;
145 static FinBlock *finq; // list of finalizers that are to be executed
146 static FinBlock *finc; // cache of free blocks
147 static FinBlock *allfin; // list of all blocks
148 static Lock finlock;
149 static int32 fingwait;
150
151 static void runfinq(void*);
152 static Workbuf* getempty(Workbuf*);
153 static Workbuf* getfull(Workbuf*);
154 static void putempty(Workbuf*);
155 static Workbuf* handoff(Workbuf*);
156 static void gchelperstart(void);
157
158 static struct {
159 uint64 full; // lock-free list of full blocks
160 uint64 empty; // lock-free list of empty blocks
161 byte pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
162 uint32 nproc;
163 volatile uint32 nwait;
164 volatile uint32 ndone;
165 volatile uint32 debugmarkdone;
166 Note alldone;
167 ParFor *markfor;
168 ParFor *sweepfor;
169
170 Lock;
171 byte *chunk;
172 uintptr nchunk;
173
174 Obj *roots;
175 uint32 nroot;
176 uint32 rootcap;
177 } work __attribute__((aligned(8)));
178
179 enum {
180 GC_DEFAULT_PTR = GC_NUM_INSTR,
181 GC_MAP_NEXT,
182 GC_CHAN,
183
184 GC_NUM_INSTR2
185 };
186
187 static struct {
188 struct {
189 uint64 sum;
190 uint64 cnt;
191 } ptr;
192 uint64 nbytes;
193 struct {
194 uint64 sum;
195 uint64 cnt;
196 uint64 notype;
197 uint64 typelookup;
198 } obj;
199 uint64 rescan;
200 uint64 rescanbytes;
201 uint64 instr[GC_NUM_INSTR2];
202 uint64 putempty;
203 uint64 getfull;
204 } gcstats;
205
206 // markonly marks an object. It returns true if the object
207 // has been marked by this function, false otherwise.
208 // This function doesn't append the object to any buffer.
209 static bool
markonly(void * obj)210 markonly(void *obj)
211 {
212 byte *p;
213 uintptr *bitp, bits, shift, x, xbits, off;
214 MSpan *s;
215 PageID k;
216
217 // Words outside the arena cannot be pointers.
218 if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
219 return false;
220
221 // obj may be a pointer to a live object.
222 // Try to find the beginning of the object.
223
224 // Round down to word boundary.
225 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
226
227 // Find bits for this word.
228 off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
229 bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
230 shift = off % wordsPerBitmapWord;
231 xbits = *bitp;
232 bits = xbits >> shift;
233
234 // Pointing at the beginning of a block?
235 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
236 goto found;
237
238 // Otherwise consult span table to find beginning.
239 // (Manually inlined copy of MHeap_LookupMaybe.)
240 k = (uintptr)obj>>PageShift;
241 x = k;
242 x -= (uintptr)runtime_mheap->arena_start>>PageShift;
243 s = runtime_mheap->map[x];
244 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
245 return false;
246 p = (byte*)((uintptr)s->start<<PageShift);
247 if(s->sizeclass == 0) {
248 obj = p;
249 } else {
250 if((byte*)obj >= (byte*)s->limit)
251 return false;
252 uintptr size = s->elemsize;
253 int32 i = ((byte*)obj - p)/size;
254 obj = p+i*size;
255 }
256
257 // Now that we know the object header, reload bits.
258 off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
259 bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
260 shift = off % wordsPerBitmapWord;
261 xbits = *bitp;
262 bits = xbits >> shift;
263
264 found:
265 // Now we have bits, bitp, and shift correct for
266 // obj pointing at the base of the object.
267 // Only care about allocated and not marked.
268 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
269 return false;
270 if(work.nproc == 1)
271 *bitp |= bitMarked<<shift;
272 else {
273 for(;;) {
274 x = *bitp;
275 if(x & (bitMarked<<shift))
276 return false;
277 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
278 break;
279 }
280 }
281
282 // The object is now marked
283 return true;
284 }
285
286 // PtrTarget is a structure used by intermediate buffers.
287 // The intermediate buffers hold GC data before it
288 // is moved/flushed to the work buffer (Workbuf).
289 // The size of an intermediate buffer is very small,
290 // such as 32 or 64 elements.
291 typedef struct PtrTarget PtrTarget;
292 struct PtrTarget
293 {
294 void *p;
295 uintptr ti;
296 };
297
298 typedef struct BufferList BufferList;
299 struct BufferList
300 {
301 PtrTarget ptrtarget[IntermediateBufferCapacity];
302 Obj obj[IntermediateBufferCapacity];
303 uint32 busy;
304 byte pad[CacheLineSize];
305 };
306 static BufferList bufferList[MaxGcproc];
307
308 static Type *itabtype;
309
310 static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
311
312 // flushptrbuf moves data from the PtrTarget buffer to the work buffer.
313 // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned,
314 // while the work buffer contains blocks which have been marked
315 // and are prepared to be scanned by the garbage collector.
316 //
317 // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer.
318 //
319 // A simplified drawing explaining how the todo-list moves from a structure to another:
320 //
321 // scanblock
322 // (find pointers)
323 // Obj ------> PtrTarget (pointer targets)
324 // ↑ |
325 // | |
326 // `----------'
327 // flushptrbuf
328 // (find block start, mark and enqueue)
329 static void
flushptrbuf(PtrTarget * ptrbuf,PtrTarget ** ptrbufpos,Obj ** _wp,Workbuf ** _wbuf,uintptr * _nobj)330 flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
331 {
332 byte *p, *arena_start, *obj;
333 uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n;
334 MSpan *s;
335 PageID k;
336 Obj *wp;
337 Workbuf *wbuf;
338 PtrTarget *ptrbuf_end;
339
340 arena_start = runtime_mheap->arena_start;
341
342 wp = *_wp;
343 wbuf = *_wbuf;
344 nobj = *_nobj;
345
346 ptrbuf_end = *ptrbufpos;
347 n = ptrbuf_end - ptrbuf;
348 *ptrbufpos = ptrbuf;
349
350 if(CollectStats) {
351 runtime_xadd64(&gcstats.ptr.sum, n);
352 runtime_xadd64(&gcstats.ptr.cnt, 1);
353 }
354
355 // If buffer is nearly full, get a new one.
356 if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) {
357 if(wbuf != nil)
358 wbuf->nobj = nobj;
359 wbuf = getempty(wbuf);
360 wp = wbuf->obj;
361 nobj = 0;
362
363 if(n >= nelem(wbuf->obj))
364 runtime_throw("ptrbuf has to be smaller than WorkBuf");
365 }
366
367 // TODO(atom): This block is a branch of an if-then-else statement.
368 // The single-threaded branch may be added in a next CL.
369 {
370 // Multi-threaded version.
371
372 while(ptrbuf < ptrbuf_end) {
373 obj = ptrbuf->p;
374 ti = ptrbuf->ti;
375 ptrbuf++;
376
377 // obj belongs to interval [mheap.arena_start, mheap.arena_used).
378 if(Debug > 1) {
379 if(obj < runtime_mheap->arena_start || obj >= runtime_mheap->arena_used)
380 runtime_throw("object is outside of mheap");
381 }
382
383 // obj may be a pointer to a live object.
384 // Try to find the beginning of the object.
385
386 // Round down to word boundary.
387 if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
388 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
389 ti = 0;
390 }
391
392 // Find bits for this word.
393 off = (uintptr*)obj - (uintptr*)arena_start;
394 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
395 shift = off % wordsPerBitmapWord;
396 xbits = *bitp;
397 bits = xbits >> shift;
398
399 // Pointing at the beginning of a block?
400 if((bits & (bitAllocated|bitBlockBoundary)) != 0)
401 goto found;
402
403 ti = 0;
404
405 // Pointing just past the beginning?
406 // Scan backward a little to find a block boundary.
407 for(j=shift; j-->0; ) {
408 if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
409 obj = (byte*)obj - (shift-j)*PtrSize;
410 shift = j;
411 bits = xbits>>shift;
412 goto found;
413 }
414 }
415
416 // Otherwise consult span table to find beginning.
417 // (Manually inlined copy of MHeap_LookupMaybe.)
418 k = (uintptr)obj>>PageShift;
419 x = k;
420 x -= (uintptr)arena_start>>PageShift;
421 s = runtime_mheap->map[x];
422 if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
423 continue;
424 p = (byte*)((uintptr)s->start<<PageShift);
425 if(s->sizeclass == 0) {
426 obj = p;
427 } else {
428 if((byte*)obj >= (byte*)s->limit)
429 continue;
430 size = s->elemsize;
431 int32 i = ((byte*)obj - p)/size;
432 obj = p+i*size;
433 }
434
435 // Now that we know the object header, reload bits.
436 off = (uintptr*)obj - (uintptr*)arena_start;
437 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
438 shift = off % wordsPerBitmapWord;
439 xbits = *bitp;
440 bits = xbits >> shift;
441
442 found:
443 // Now we have bits, bitp, and shift correct for
444 // obj pointing at the base of the object.
445 // Only care about allocated and not marked.
446 if((bits & (bitAllocated|bitMarked)) != bitAllocated)
447 continue;
448 if(work.nproc == 1)
449 *bitp |= bitMarked<<shift;
450 else {
451 for(;;) {
452 x = *bitp;
453 if(x & (bitMarked<<shift))
454 goto continue_obj;
455 if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
456 break;
457 }
458 }
459
460 // If object has no pointers, don't need to scan further.
461 if((bits & bitNoPointers) != 0)
462 continue;
463
464 // Ask span about size class.
465 // (Manually inlined copy of MHeap_Lookup.)
466 x = (uintptr)obj >> PageShift;
467 x -= (uintptr)arena_start>>PageShift;
468 s = runtime_mheap->map[x];
469
470 PREFETCH(obj);
471
472 *wp = (Obj){obj, s->elemsize, ti};
473 wp++;
474 nobj++;
475 continue_obj:;
476 }
477
478 // If another proc wants a pointer, give it some.
479 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
480 wbuf->nobj = nobj;
481 wbuf = handoff(wbuf);
482 nobj = wbuf->nobj;
483 wp = wbuf->obj + nobj;
484 }
485 }
486
487 *_wp = wp;
488 *_wbuf = wbuf;
489 *_nobj = nobj;
490 }
491
492 static void
flushobjbuf(Obj * objbuf,Obj ** objbufpos,Obj ** _wp,Workbuf ** _wbuf,uintptr * _nobj)493 flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
494 {
495 uintptr nobj, off;
496 Obj *wp, obj;
497 Workbuf *wbuf;
498 Obj *objbuf_end;
499
500 wp = *_wp;
501 wbuf = *_wbuf;
502 nobj = *_nobj;
503
504 objbuf_end = *objbufpos;
505 *objbufpos = objbuf;
506
507 while(objbuf < objbuf_end) {
508 obj = *objbuf++;
509
510 // Align obj.b to a word boundary.
511 off = (uintptr)obj.p & (PtrSize-1);
512 if(off != 0) {
513 obj.p += PtrSize - off;
514 obj.n -= PtrSize - off;
515 obj.ti = 0;
516 }
517
518 if(obj.p == nil || obj.n == 0)
519 continue;
520
521 // If buffer is full, get a new one.
522 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
523 if(wbuf != nil)
524 wbuf->nobj = nobj;
525 wbuf = getempty(wbuf);
526 wp = wbuf->obj;
527 nobj = 0;
528 }
529
530 *wp = obj;
531 wp++;
532 nobj++;
533 }
534
535 // If another proc wants a pointer, give it some.
536 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
537 wbuf->nobj = nobj;
538 wbuf = handoff(wbuf);
539 nobj = wbuf->nobj;
540 wp = wbuf->obj + nobj;
541 }
542
543 *_wp = wp;
544 *_wbuf = wbuf;
545 *_nobj = nobj;
546 }
547
548 // Program that scans the whole block and treats every block element as a potential pointer
549 static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
550
551 #if 0
552 // Hashmap iterator program
553 static uintptr mapProg[2] = {0, GC_MAP_NEXT};
554
555 // Hchan program
556 static uintptr chanProg[2] = {0, GC_CHAN};
557 #endif
558
559 // Local variables of a program fragment or loop
560 typedef struct Frame Frame;
561 struct Frame {
562 uintptr count, elemsize, b;
563 uintptr *loop_or_ret;
564 };
565
566 // Sanity check for the derived type info objti.
567 static void
checkptr(void * obj,uintptr objti)568 checkptr(void *obj, uintptr objti)
569 {
570 uintptr type, tisize, i, x;
571 byte *objstart;
572 Type *t;
573 MSpan *s;
574
575 if(!Debug)
576 runtime_throw("checkptr is debug only");
577
578 if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
579 return;
580 type = runtime_gettype(obj);
581 t = (Type*)(type & ~(uintptr)(PtrSize-1));
582 if(t == nil)
583 return;
584 x = (uintptr)obj >> PageShift;
585 x -= (uintptr)(runtime_mheap->arena_start)>>PageShift;
586 s = runtime_mheap->map[x];
587 objstart = (byte*)((uintptr)s->start<<PageShift);
588 if(s->sizeclass != 0) {
589 i = ((byte*)obj - objstart)/s->elemsize;
590 objstart += i*s->elemsize;
591 }
592 tisize = *(uintptr*)objti;
593 // Sanity check for object size: it should fit into the memory block.
594 if((byte*)obj + tisize > objstart + s->elemsize)
595 runtime_throw("invalid gc type info");
596 if(obj != objstart)
597 return;
598 // If obj points to the beginning of the memory block,
599 // check type info as well.
600 if(t->string == nil ||
601 // Gob allocates unsafe pointers for indirection.
602 (runtime_strcmp((const char *)t->string->str, (const char*)"unsafe.Pointer") &&
603 // Runtime and gc think differently about closures.
604 runtime_strstr((const char *)t->string->str, (const char*)"struct { F uintptr") != (const char *)t->string->str)) {
605 #if 0
606 pc1 = (uintptr*)objti;
607 pc2 = (uintptr*)t->gc;
608 // A simple best-effort check until first GC_END.
609 for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) {
610 if(pc1[j] != pc2[j]) {
611 runtime_printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n",
612 t->string ? (const int8*)t->string->str : (const int8*)"?", j, pc1[j], pc2[j]);
613 runtime_throw("invalid gc type info");
614 }
615 }
616 #endif
617 }
618 }
619
620 // scanblock scans a block of n bytes starting at pointer b for references
621 // to other objects, scanning any it finds recursively until there are no
622 // unscanned objects left. Instead of using an explicit recursion, it keeps
623 // a work list in the Workbuf* structures and loops in the main function
624 // body. Keeping an explicit work list is easier on the stack allocator and
625 // more efficient.
626 //
627 // wbuf: current work buffer
628 // wp: storage for next queued pointer (write pointer)
629 // nobj: number of queued objects
630 static void
scanblock(Workbuf * wbuf,Obj * wp,uintptr nobj,bool keepworking)631 scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
632 {
633 byte *b, *arena_start, *arena_used;
634 uintptr n, i, end_b, elemsize, size, ti, objti, count /* , type */;
635 uintptr *pc, precise_type, nominal_size;
636 #if 0
637 uintptr *map_ret, mapkey_size, mapval_size, mapkey_ti, mapval_ti, *chan_ret, chancap;
638 #endif
639 void *obj;
640 const Type *t;
641 Slice *sliceptr;
642 Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4];
643 BufferList *scanbuffers;
644 PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos;
645 Obj *objbuf, *objbuf_end, *objbufpos;
646 Eface *eface;
647 Iface *iface;
648 #if 0
649 Hmap *hmap;
650 MapType *maptype;
651 bool mapkey_kind, mapval_kind;
652 struct hash_gciter map_iter;
653 struct hash_gciter_data d;
654 Hchan *chan;
655 ChanType *chantype;
656 #endif
657
658 if(sizeof(Workbuf) % PageSize != 0)
659 runtime_throw("scanblock: size of Workbuf is suboptimal");
660
661 // Memory arena parameters.
662 arena_start = runtime_mheap->arena_start;
663 arena_used = runtime_mheap->arena_used;
664
665 stack_ptr = stack+nelem(stack)-1;
666
667 precise_type = false;
668 nominal_size = 0;
669
670 // Allocate ptrbuf
671 {
672 scanbuffers = &bufferList[runtime_m()->helpgc];
673 ptrbuf = &scanbuffers->ptrtarget[0];
674 ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget);
675 objbuf = &scanbuffers->obj[0];
676 objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj);
677 }
678
679 ptrbufpos = ptrbuf;
680 objbufpos = objbuf;
681
682 // (Silence the compiler)
683 #if 0
684 map_ret = nil;
685 mapkey_size = mapval_size = 0;
686 mapkey_kind = mapval_kind = false;
687 mapkey_ti = mapval_ti = 0;
688 chan = nil;
689 chantype = nil;
690 chan_ret = nil;
691 #endif
692
693 goto next_block;
694
695 for(;;) {
696 // Each iteration scans the block b of length n, queueing pointers in
697 // the work buffer.
698 if(Debug > 1) {
699 runtime_printf("scanblock %p %D\n", b, (int64)n);
700 }
701
702 if(CollectStats) {
703 runtime_xadd64(&gcstats.nbytes, n);
704 runtime_xadd64(&gcstats.obj.sum, nobj);
705 runtime_xadd64(&gcstats.obj.cnt, 1);
706 }
707
708 if(ti != 0 && false) {
709 pc = (uintptr*)(ti & ~(uintptr)PC_BITS);
710 precise_type = (ti & PRECISE);
711 stack_top.elemsize = pc[0];
712 if(!precise_type)
713 nominal_size = pc[0];
714 if(ti & LOOP) {
715 stack_top.count = 0; // 0 means an infinite number of iterations
716 stack_top.loop_or_ret = pc+1;
717 } else {
718 stack_top.count = 1;
719 }
720 if(Debug) {
721 // Simple sanity check for provided type info ti:
722 // The declared size of the object must be not larger than the actual size
723 // (it can be smaller due to inferior pointers).
724 // It's difficult to make a comprehensive check due to inferior pointers,
725 // reflection, gob, etc.
726 if(pc[0] > n) {
727 runtime_printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n);
728 runtime_throw("invalid gc type info");
729 }
730 }
731 } else if(UseSpanType && false) {
732 if(CollectStats)
733 runtime_xadd64(&gcstats.obj.notype, 1);
734
735 #if 0
736 type = runtime_gettype(b);
737 if(type != 0) {
738 if(CollectStats)
739 runtime_xadd64(&gcstats.obj.typelookup, 1);
740
741 t = (Type*)(type & ~(uintptr)(PtrSize-1));
742 switch(type & (PtrSize-1)) {
743 case TypeInfo_SingleObject:
744 pc = (uintptr*)t->gc;
745 precise_type = true; // type information about 'b' is precise
746 stack_top.count = 1;
747 stack_top.elemsize = pc[0];
748 break;
749 case TypeInfo_Array:
750 pc = (uintptr*)t->gc;
751 if(pc[0] == 0)
752 goto next_block;
753 precise_type = true; // type information about 'b' is precise
754 stack_top.count = 0; // 0 means an infinite number of iterations
755 stack_top.elemsize = pc[0];
756 stack_top.loop_or_ret = pc+1;
757 break;
758 case TypeInfo_Map:
759 hmap = (Hmap*)b;
760 maptype = (MapType*)t;
761 if(hash_gciter_init(hmap, &map_iter)) {
762 mapkey_size = maptype->key->size;
763 mapkey_kind = maptype->key->kind;
764 mapkey_ti = (uintptr)maptype->key->gc | PRECISE;
765 mapval_size = maptype->elem->size;
766 mapval_kind = maptype->elem->kind;
767 mapval_ti = (uintptr)maptype->elem->gc | PRECISE;
768
769 map_ret = nil;
770 pc = mapProg;
771 } else {
772 goto next_block;
773 }
774 break;
775 case TypeInfo_Chan:
776 chan = (Hchan*)b;
777 chantype = (ChanType*)t;
778 chan_ret = nil;
779 pc = chanProg;
780 break;
781 default:
782 runtime_throw("scanblock: invalid type");
783 return;
784 }
785 } else {
786 pc = defaultProg;
787 }
788 #endif
789 } else {
790 pc = defaultProg;
791 }
792
793 if(IgnorePreciseGC)
794 pc = defaultProg;
795
796 pc++;
797 stack_top.b = (uintptr)b;
798
799 end_b = (uintptr)b + n - PtrSize;
800
801 for(;;) {
802 if(CollectStats)
803 runtime_xadd64(&gcstats.instr[pc[0]], 1);
804
805 obj = nil;
806 objti = 0;
807 switch(pc[0]) {
808 case GC_PTR:
809 obj = *(void**)(stack_top.b + pc[1]);
810 objti = pc[2];
811 pc += 3;
812 if(Debug)
813 checkptr(obj, objti);
814 break;
815
816 case GC_SLICE:
817 sliceptr = (Slice*)(stack_top.b + pc[1]);
818 if(sliceptr->cap != 0) {
819 obj = sliceptr->array;
820 // Can't use slice element type for scanning,
821 // because if it points to an array embedded
822 // in the beginning of a struct,
823 // we will scan the whole struct as the slice.
824 // So just obtain type info from heap.
825 }
826 pc += 3;
827 break;
828
829 case GC_APTR:
830 obj = *(void**)(stack_top.b + pc[1]);
831 pc += 2;
832 break;
833
834 case GC_STRING:
835 obj = *(void**)(stack_top.b + pc[1]);
836 markonly(obj);
837 pc += 2;
838 continue;
839
840 case GC_EFACE:
841 eface = (Eface*)(stack_top.b + pc[1]);
842 pc += 2;
843 if(eface->type == nil)
844 continue;
845
846 // eface->type
847 t = eface->type;
848 if((const byte*)t >= arena_start && (const byte*)t < arena_used) {
849 union { const Type *tc; Type *tr; } u;
850 u.tc = t;
851 *ptrbufpos++ = (struct PtrTarget){(void*)u.tr, 0};
852 if(ptrbufpos == ptrbuf_end)
853 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
854 }
855
856 // eface->__object
857 if((byte*)eface->__object >= arena_start && (byte*)eface->__object < arena_used) {
858 if(t->__size <= sizeof(void*)) {
859 if((t->kind & KindNoPointers))
860 continue;
861
862 obj = eface->__object;
863 if((t->kind & ~KindNoPointers) == KindPtr)
864 // objti = (uintptr)((PtrType*)t)->elem->gc;
865 objti = 0;
866 } else {
867 obj = eface->__object;
868 // objti = (uintptr)t->gc;
869 objti = 0;
870 }
871 }
872 break;
873
874 case GC_IFACE:
875 iface = (Iface*)(stack_top.b + pc[1]);
876 pc += 2;
877 if(iface->tab == nil)
878 continue;
879
880 // iface->tab
881 if((byte*)iface->tab >= arena_start && (byte*)iface->tab < arena_used) {
882 // *ptrbufpos++ = (struct PtrTarget){iface->tab, (uintptr)itabtype->gc};
883 *ptrbufpos++ = (struct PtrTarget){iface->tab, 0};
884 if(ptrbufpos == ptrbuf_end)
885 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
886 }
887
888 // iface->data
889 if((byte*)iface->__object >= arena_start && (byte*)iface->__object < arena_used) {
890 // t = iface->tab->type;
891 t = nil;
892 if(t->__size <= sizeof(void*)) {
893 if((t->kind & KindNoPointers))
894 continue;
895
896 obj = iface->__object;
897 if((t->kind & ~KindNoPointers) == KindPtr)
898 // objti = (uintptr)((const PtrType*)t)->elem->gc;
899 objti = 0;
900 } else {
901 obj = iface->__object;
902 // objti = (uintptr)t->gc;
903 objti = 0;
904 }
905 }
906 break;
907
908 case GC_DEFAULT_PTR:
909 while(stack_top.b <= end_b) {
910 obj = *(byte**)stack_top.b;
911 stack_top.b += PtrSize;
912 if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
913 *ptrbufpos++ = (struct PtrTarget){obj, 0};
914 if(ptrbufpos == ptrbuf_end)
915 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
916 }
917 }
918 goto next_block;
919
920 case GC_END:
921 if(--stack_top.count != 0) {
922 // Next iteration of a loop if possible.
923 stack_top.b += stack_top.elemsize;
924 if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) {
925 pc = stack_top.loop_or_ret;
926 continue;
927 }
928 i = stack_top.b;
929 } else {
930 // Stack pop if possible.
931 if(stack_ptr+1 < stack+nelem(stack)) {
932 pc = stack_top.loop_or_ret;
933 stack_top = *(++stack_ptr);
934 continue;
935 }
936 i = (uintptr)b + nominal_size;
937 }
938 if(!precise_type) {
939 // Quickly scan [b+i,b+n) for possible pointers.
940 for(; i<=end_b; i+=PtrSize) {
941 if(*(byte**)i != nil) {
942 // Found a value that may be a pointer.
943 // Do a rescan of the entire block.
944 enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj);
945 if(CollectStats) {
946 runtime_xadd64(&gcstats.rescan, 1);
947 runtime_xadd64(&gcstats.rescanbytes, n);
948 }
949 break;
950 }
951 }
952 }
953 goto next_block;
954
955 case GC_ARRAY_START:
956 i = stack_top.b + pc[1];
957 count = pc[2];
958 elemsize = pc[3];
959 pc += 4;
960
961 // Stack push.
962 *stack_ptr-- = stack_top;
963 stack_top = (Frame){count, elemsize, i, pc};
964 continue;
965
966 case GC_ARRAY_NEXT:
967 if(--stack_top.count != 0) {
968 stack_top.b += stack_top.elemsize;
969 pc = stack_top.loop_or_ret;
970 } else {
971 // Stack pop.
972 stack_top = *(++stack_ptr);
973 pc += 1;
974 }
975 continue;
976
977 case GC_CALL:
978 // Stack push.
979 *stack_ptr-- = stack_top;
980 stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/};
981 pc = (uintptr*)((byte*)pc + *(int32*)(pc+2)); // target of the CALL instruction
982 continue;
983
984 #if 0
985 case GC_MAP_PTR:
986 hmap = *(Hmap**)(stack_top.b + pc[1]);
987 if(hmap == nil) {
988 pc += 3;
989 continue;
990 }
991 if(markonly(hmap)) {
992 maptype = (MapType*)pc[2];
993 if(hash_gciter_init(hmap, &map_iter)) {
994 mapkey_size = maptype->key->size;
995 mapkey_kind = maptype->key->kind;
996 mapkey_ti = (uintptr)maptype->key->gc | PRECISE;
997 mapval_size = maptype->elem->size;
998 mapval_kind = maptype->elem->kind;
999 mapval_ti = (uintptr)maptype->elem->gc | PRECISE;
1000
1001 // Start mapProg.
1002 map_ret = pc+3;
1003 pc = mapProg+1;
1004 } else {
1005 pc += 3;
1006 }
1007 } else {
1008 pc += 3;
1009 }
1010 continue;
1011
1012 case GC_MAP_NEXT:
1013 // Add all keys and values to buffers, mark all subtables.
1014 while(hash_gciter_next(&map_iter, &d)) {
1015 // buffers: reserve space for 2 objects.
1016 if(ptrbufpos+2 >= ptrbuf_end)
1017 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
1018 if(objbufpos+2 >= objbuf_end)
1019 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1020
1021 if(d.st != nil)
1022 markonly(d.st);
1023
1024 if(d.key_data != nil) {
1025 if(!(mapkey_kind & KindNoPointers) || d.indirectkey) {
1026 if(!d.indirectkey)
1027 *objbufpos++ = (Obj){d.key_data, mapkey_size, mapkey_ti};
1028 else {
1029 if(Debug) {
1030 obj = *(void**)d.key_data;
1031 if(!(arena_start <= obj && obj < arena_used))
1032 runtime_throw("scanblock: inconsistent hashmap");
1033 }
1034 *ptrbufpos++ = (struct PtrTarget){*(void**)d.key_data, mapkey_ti};
1035 }
1036 }
1037 if(!(mapval_kind & KindNoPointers) || d.indirectval) {
1038 if(!d.indirectval)
1039 *objbufpos++ = (Obj){d.val_data, mapval_size, mapval_ti};
1040 else {
1041 if(Debug) {
1042 obj = *(void**)d.val_data;
1043 if(!(arena_start <= obj && obj < arena_used))
1044 runtime_throw("scanblock: inconsistent hashmap");
1045 }
1046 *ptrbufpos++ = (struct PtrTarget){*(void**)d.val_data, mapval_ti};
1047 }
1048 }
1049 }
1050 }
1051 if(map_ret == nil)
1052 goto next_block;
1053 pc = map_ret;
1054 continue;
1055 #endif
1056
1057 case GC_REGION:
1058 obj = (void*)(stack_top.b + pc[1]);
1059 size = pc[2];
1060 objti = pc[3];
1061 pc += 4;
1062
1063 *objbufpos++ = (Obj){obj, size, objti};
1064 if(objbufpos == objbuf_end)
1065 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1066 continue;
1067
1068 #if 0
1069 case GC_CHAN_PTR:
1070 // Similar to GC_MAP_PTR
1071 chan = *(Hchan**)(stack_top.b + pc[1]);
1072 if(chan == nil) {
1073 pc += 3;
1074 continue;
1075 }
1076 if(markonly(chan)) {
1077 chantype = (ChanType*)pc[2];
1078 if(!(chantype->elem->kind & KindNoPointers)) {
1079 // Start chanProg.
1080 chan_ret = pc+3;
1081 pc = chanProg+1;
1082 continue;
1083 }
1084 }
1085 pc += 3;
1086 continue;
1087
1088 case GC_CHAN:
1089 // There are no heap pointers in struct Hchan,
1090 // so we can ignore the leading sizeof(Hchan) bytes.
1091 if(!(chantype->elem->kind & KindNoPointers)) {
1092 // Channel's buffer follows Hchan immediately in memory.
1093 // Size of buffer (cap(c)) is second int in the chan struct.
1094 chancap = ((uintgo*)chan)[1];
1095 if(chancap > 0) {
1096 // TODO(atom): split into two chunks so that only the
1097 // in-use part of the circular buffer is scanned.
1098 // (Channel routines zero the unused part, so the current
1099 // code does not lead to leaks, it's just a little inefficient.)
1100 *objbufpos++ = (Obj){(byte*)chan+runtime_Hchansize, chancap*chantype->elem->size,
1101 (uintptr)chantype->elem->gc | PRECISE | LOOP};
1102 if(objbufpos == objbuf_end)
1103 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1104 }
1105 }
1106 if(chan_ret == nil)
1107 goto next_block;
1108 pc = chan_ret;
1109 continue;
1110 #endif
1111
1112 default:
1113 runtime_throw("scanblock: invalid GC instruction");
1114 return;
1115 }
1116
1117 if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
1118 *ptrbufpos++ = (struct PtrTarget){obj, objti};
1119 if(ptrbufpos == ptrbuf_end)
1120 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
1121 }
1122 }
1123
1124 next_block:
1125 // Done scanning [b, b+n). Prepare for the next iteration of
1126 // the loop by setting b, n, ti to the parameters for the next block.
1127
1128 if(nobj == 0) {
1129 flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
1130 flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1131
1132 if(nobj == 0) {
1133 if(!keepworking) {
1134 if(wbuf)
1135 putempty(wbuf);
1136 goto endscan;
1137 }
1138 // Emptied our buffer: refill.
1139 wbuf = getfull(wbuf);
1140 if(wbuf == nil)
1141 goto endscan;
1142 nobj = wbuf->nobj;
1143 wp = wbuf->obj + wbuf->nobj;
1144 }
1145 }
1146
1147 // Fetch b from the work buffer.
1148 --wp;
1149 b = wp->p;
1150 n = wp->n;
1151 ti = wp->ti;
1152 nobj--;
1153 }
1154
1155 endscan:;
1156 }
1157
1158 // debug_scanblock is the debug copy of scanblock.
1159 // it is simpler, slower, single-threaded, recursive,
1160 // and uses bitSpecial as the mark bit.
1161 static void
debug_scanblock(byte * b,uintptr n)1162 debug_scanblock(byte *b, uintptr n)
1163 {
1164 byte *obj, *p;
1165 void **vp;
1166 uintptr size, *bitp, bits, shift, i, xbits, off;
1167 MSpan *s;
1168
1169 if(!DebugMark)
1170 runtime_throw("debug_scanblock without DebugMark");
1171
1172 if((intptr)n < 0) {
1173 runtime_printf("debug_scanblock %p %D\n", b, (int64)n);
1174 runtime_throw("debug_scanblock");
1175 }
1176
1177 // Align b to a word boundary.
1178 off = (uintptr)b & (PtrSize-1);
1179 if(off != 0) {
1180 b += PtrSize - off;
1181 n -= PtrSize - off;
1182 }
1183
1184 vp = (void**)b;
1185 n /= PtrSize;
1186 for(i=0; i<(uintptr)n; i++) {
1187 obj = (byte*)vp[i];
1188
1189 // Words outside the arena cannot be pointers.
1190 if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
1191 continue;
1192
1193 // Round down to word boundary.
1194 obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
1195
1196 // Consult span table to find beginning.
1197 s = runtime_MHeap_LookupMaybe(runtime_mheap, obj);
1198 if(s == nil)
1199 continue;
1200
1201 p = (byte*)((uintptr)s->start<<PageShift);
1202 size = s->elemsize;
1203 if(s->sizeclass == 0) {
1204 obj = p;
1205 } else {
1206 if((byte*)obj >= (byte*)s->limit)
1207 continue;
1208 int32 i = ((byte*)obj - p)/size;
1209 obj = p+i*size;
1210 }
1211
1212 // Now that we know the object header, reload bits.
1213 off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
1214 bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
1215 shift = off % wordsPerBitmapWord;
1216 xbits = *bitp;
1217 bits = xbits >> shift;
1218
1219 // Now we have bits, bitp, and shift correct for
1220 // obj pointing at the base of the object.
1221 // If not allocated or already marked, done.
1222 if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0) // NOTE: bitSpecial not bitMarked
1223 continue;
1224 *bitp |= bitSpecial<<shift;
1225 if(!(bits & bitMarked))
1226 runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
1227
1228 // If object has no pointers, don't need to scan further.
1229 if((bits & bitNoPointers) != 0)
1230 continue;
1231
1232 debug_scanblock(obj, size);
1233 }
1234 }
1235
1236 // Append obj to the work buffer.
1237 // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer.
1238 static void
enqueue(Obj obj,Workbuf ** _wbuf,Obj ** _wp,uintptr * _nobj)1239 enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
1240 {
1241 uintptr nobj, off;
1242 Obj *wp;
1243 Workbuf *wbuf;
1244
1245 if(Debug > 1)
1246 runtime_printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti);
1247
1248 // Align obj.b to a word boundary.
1249 off = (uintptr)obj.p & (PtrSize-1);
1250 if(off != 0) {
1251 obj.p += PtrSize - off;
1252 obj.n -= PtrSize - off;
1253 obj.ti = 0;
1254 }
1255
1256 if(obj.p == nil || obj.n == 0)
1257 return;
1258
1259 // Load work buffer state
1260 wp = *_wp;
1261 wbuf = *_wbuf;
1262 nobj = *_nobj;
1263
1264 // If another proc wants a pointer, give it some.
1265 if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
1266 wbuf->nobj = nobj;
1267 wbuf = handoff(wbuf);
1268 nobj = wbuf->nobj;
1269 wp = wbuf->obj + nobj;
1270 }
1271
1272 // If buffer is full, get a new one.
1273 if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
1274 if(wbuf != nil)
1275 wbuf->nobj = nobj;
1276 wbuf = getempty(wbuf);
1277 wp = wbuf->obj;
1278 nobj = 0;
1279 }
1280
1281 *wp = obj;
1282 wp++;
1283 nobj++;
1284
1285 // Save work buffer state
1286 *_wp = wp;
1287 *_wbuf = wbuf;
1288 *_nobj = nobj;
1289 }
1290
1291 static void
markroot(ParFor * desc,uint32 i)1292 markroot(ParFor *desc, uint32 i)
1293 {
1294 Obj *wp;
1295 Workbuf *wbuf;
1296 uintptr nobj;
1297
1298 USED(&desc);
1299 wp = nil;
1300 wbuf = nil;
1301 nobj = 0;
1302 enqueue(work.roots[i], &wbuf, &wp, &nobj);
1303 scanblock(wbuf, wp, nobj, false);
1304 }
1305
1306 // Get an empty work buffer off the work.empty list,
1307 // allocating new buffers as needed.
1308 static Workbuf*
getempty(Workbuf * b)1309 getempty(Workbuf *b)
1310 {
1311 if(b != nil)
1312 runtime_lfstackpush(&work.full, &b->node);
1313 b = (Workbuf*)runtime_lfstackpop(&work.empty);
1314 if(b == nil) {
1315 // Need to allocate.
1316 runtime_lock(&work);
1317 if(work.nchunk < sizeof *b) {
1318 work.nchunk = 1<<20;
1319 work.chunk = runtime_SysAlloc(work.nchunk);
1320 if(work.chunk == nil)
1321 runtime_throw("runtime: cannot allocate memory");
1322 }
1323 b = (Workbuf*)work.chunk;
1324 work.chunk += sizeof *b;
1325 work.nchunk -= sizeof *b;
1326 runtime_unlock(&work);
1327 }
1328 b->nobj = 0;
1329 return b;
1330 }
1331
1332 static void
putempty(Workbuf * b)1333 putempty(Workbuf *b)
1334 {
1335 if(CollectStats)
1336 runtime_xadd64(&gcstats.putempty, 1);
1337
1338 runtime_lfstackpush(&work.empty, &b->node);
1339 }
1340
1341 // Get a full work buffer off the work.full list, or return nil.
1342 static Workbuf*
getfull(Workbuf * b)1343 getfull(Workbuf *b)
1344 {
1345 M *m;
1346 int32 i;
1347
1348 if(CollectStats)
1349 runtime_xadd64(&gcstats.getfull, 1);
1350
1351 if(b != nil)
1352 runtime_lfstackpush(&work.empty, &b->node);
1353 b = (Workbuf*)runtime_lfstackpop(&work.full);
1354 if(b != nil || work.nproc == 1)
1355 return b;
1356
1357 m = runtime_m();
1358 runtime_xadd(&work.nwait, +1);
1359 for(i=0;; i++) {
1360 if(work.full != 0) {
1361 runtime_xadd(&work.nwait, -1);
1362 b = (Workbuf*)runtime_lfstackpop(&work.full);
1363 if(b != nil)
1364 return b;
1365 runtime_xadd(&work.nwait, +1);
1366 }
1367 if(work.nwait == work.nproc)
1368 return nil;
1369 if(i < 10) {
1370 m->gcstats.nprocyield++;
1371 runtime_procyield(20);
1372 } else if(i < 20) {
1373 m->gcstats.nosyield++;
1374 runtime_osyield();
1375 } else {
1376 m->gcstats.nsleep++;
1377 runtime_usleep(100);
1378 }
1379 }
1380 }
1381
1382 static Workbuf*
handoff(Workbuf * b)1383 handoff(Workbuf *b)
1384 {
1385 M *m;
1386 int32 n;
1387 Workbuf *b1;
1388
1389 m = runtime_m();
1390
1391 // Make new buffer with half of b's pointers.
1392 b1 = getempty(nil);
1393 n = b->nobj/2;
1394 b->nobj -= n;
1395 b1->nobj = n;
1396 runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
1397 m->gcstats.nhandoff++;
1398 m->gcstats.nhandoffcnt += n;
1399
1400 // Put b on full list - let first half of b get stolen.
1401 runtime_lfstackpush(&work.full, &b->node);
1402 return b1;
1403 }
1404
1405 static void
addroot(Obj obj)1406 addroot(Obj obj)
1407 {
1408 uint32 cap;
1409 Obj *new;
1410
1411 if(work.nroot >= work.rootcap) {
1412 cap = PageSize/sizeof(Obj);
1413 if(cap < 2*work.rootcap)
1414 cap = 2*work.rootcap;
1415 new = (Obj*)runtime_SysAlloc(cap*sizeof(Obj));
1416 if(new == nil)
1417 runtime_throw("runtime: cannot allocate memory");
1418 if(work.roots != nil) {
1419 runtime_memmove(new, work.roots, work.rootcap*sizeof(Obj));
1420 runtime_SysFree(work.roots, work.rootcap*sizeof(Obj));
1421 }
1422 work.roots = new;
1423 work.rootcap = cap;
1424 }
1425 work.roots[work.nroot] = obj;
1426 work.nroot++;
1427 }
1428
1429 static void
addstackroots(G * gp)1430 addstackroots(G *gp)
1431 {
1432 #ifdef USING_SPLIT_STACK
1433 M *mp;
1434 void* sp;
1435 size_t spsize;
1436 void* next_segment;
1437 void* next_sp;
1438 void* initial_sp;
1439
1440 if(gp == runtime_g()) {
1441 // Scanning our own stack.
1442 sp = __splitstack_find(nil, nil, &spsize, &next_segment,
1443 &next_sp, &initial_sp);
1444 } else if((mp = gp->m) != nil && mp->helpgc) {
1445 // gchelper's stack is in active use and has no interesting pointers.
1446 return;
1447 } else {
1448 // Scanning another goroutine's stack.
1449 // The goroutine is usually asleep (the world is stopped).
1450
1451 // The exception is that if the goroutine is about to enter or might
1452 // have just exited a system call, it may be executing code such
1453 // as schedlock and may have needed to start a new stack segment.
1454 // Use the stack segment and stack pointer at the time of
1455 // the system call instead, since that won't change underfoot.
1456 if(gp->gcstack != nil) {
1457 sp = gp->gcstack;
1458 spsize = gp->gcstack_size;
1459 next_segment = gp->gcnext_segment;
1460 next_sp = gp->gcnext_sp;
1461 initial_sp = gp->gcinitial_sp;
1462 } else {
1463 sp = __splitstack_find_context(&gp->stack_context[0],
1464 &spsize, &next_segment,
1465 &next_sp, &initial_sp);
1466 }
1467 }
1468 if(sp != nil) {
1469 addroot((Obj){sp, spsize, 0});
1470 while((sp = __splitstack_find(next_segment, next_sp,
1471 &spsize, &next_segment,
1472 &next_sp, &initial_sp)) != nil)
1473 addroot((Obj){sp, spsize, 0});
1474 }
1475 #else
1476 M *mp;
1477 byte* bottom;
1478 byte* top;
1479
1480 if(gp == runtime_g()) {
1481 // Scanning our own stack.
1482 bottom = (byte*)&gp;
1483 } else if((mp = gp->m) != nil && mp->helpgc) {
1484 // gchelper's stack is in active use and has no interesting pointers.
1485 return;
1486 } else {
1487 // Scanning another goroutine's stack.
1488 // The goroutine is usually asleep (the world is stopped).
1489 bottom = (byte*)gp->gcnext_sp;
1490 if(bottom == nil)
1491 return;
1492 }
1493 top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
1494 if(top > bottom)
1495 addroot((Obj){bottom, top - bottom, 0});
1496 else
1497 addroot((Obj){top, bottom - top, 0});
1498 #endif
1499 }
1500
1501 static void
addfinroots(void * v)1502 addfinroots(void *v)
1503 {
1504 uintptr size;
1505 void *base;
1506
1507 size = 0;
1508 if(!runtime_mlookup(v, (byte**)&base, &size, nil) || !runtime_blockspecial(base))
1509 runtime_throw("mark - finalizer inconsistency");
1510
1511 // do not mark the finalizer block itself. just mark the things it points at.
1512 addroot((Obj){base, size, 0});
1513 }
1514
1515 static struct root_list* roots;
1516
1517 void
__go_register_gc_roots(struct root_list * r)1518 __go_register_gc_roots (struct root_list* r)
1519 {
1520 // FIXME: This needs locking if multiple goroutines can call
1521 // dlopen simultaneously.
1522 r->next = roots;
1523 roots = r;
1524 }
1525
1526 static void
addroots(void)1527 addroots(void)
1528 {
1529 struct root_list *pl;
1530 G *gp;
1531 FinBlock *fb;
1532 MSpan *s, **allspans;
1533 uint32 spanidx;
1534
1535 work.nroot = 0;
1536
1537 // mark data+bss.
1538 for(pl = roots; pl != nil; pl = pl->next) {
1539 struct root* pr = &pl->roots[0];
1540 while(1) {
1541 void *decl = pr->decl;
1542 if(decl == nil)
1543 break;
1544 addroot((Obj){decl, pr->size, 0});
1545 pr++;
1546 }
1547 }
1548
1549 addroot((Obj){(byte*)&runtime_m0, sizeof runtime_m0, 0});
1550 addroot((Obj){(byte*)&runtime_g0, sizeof runtime_g0, 0});
1551 addroot((Obj){(byte*)&runtime_allg, sizeof runtime_allg, 0});
1552 addroot((Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0});
1553 addroot((Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0});
1554 runtime_proc_scan(addroot);
1555 runtime_MProf_Mark(addroot);
1556 runtime_time_scan(addroot);
1557
1558 // MSpan.types
1559 allspans = runtime_mheap->allspans;
1560 for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) {
1561 s = allspans[spanidx];
1562 if(s->state == MSpanInUse) {
1563 // The garbage collector ignores type pointers stored in MSpan.types:
1564 // - Compiler-generated types are stored outside of heap.
1565 // - The reflect package has runtime-generated types cached in its data structures.
1566 // The garbage collector relies on finding the references via that cache.
1567 switch(s->types.compression) {
1568 case MTypes_Empty:
1569 case MTypes_Single:
1570 break;
1571 case MTypes_Words:
1572 case MTypes_Bytes:
1573 markonly((byte*)s->types.data);
1574 break;
1575 }
1576 }
1577 }
1578
1579 // stacks
1580 for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
1581 switch(gp->status){
1582 default:
1583 runtime_printf("unexpected G.status %d\n", gp->status);
1584 runtime_throw("mark - bad status");
1585 case Gdead:
1586 break;
1587 case Grunning:
1588 if(gp != runtime_g())
1589 runtime_throw("mark - world not stopped");
1590 addstackroots(gp);
1591 break;
1592 case Grunnable:
1593 case Gsyscall:
1594 case Gwaiting:
1595 addstackroots(gp);
1596 break;
1597 }
1598 }
1599
1600 runtime_walkfintab(addfinroots, addroot);
1601
1602 for(fb=allfin; fb; fb=fb->alllink)
1603 addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
1604
1605 addroot((Obj){(byte*)&work, sizeof work, 0});
1606 }
1607
1608 static bool
handlespecial(byte * p,uintptr size)1609 handlespecial(byte *p, uintptr size)
1610 {
1611 FuncVal *fn;
1612 const struct __go_func_type *ft;
1613 FinBlock *block;
1614 Finalizer *f;
1615
1616 if(!runtime_getfinalizer(p, true, &fn, &ft)) {
1617 runtime_setblockspecial(p, false);
1618 runtime_MProf_Free(p, size);
1619 return false;
1620 }
1621
1622 runtime_lock(&finlock);
1623 if(finq == nil || finq->cnt == finq->cap) {
1624 if(finc == nil) {
1625 finc = runtime_SysAlloc(PageSize);
1626 if(finc == nil)
1627 runtime_throw("runtime: cannot allocate memory");
1628 finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
1629 finc->alllink = allfin;
1630 allfin = finc;
1631 }
1632 block = finc;
1633 finc = block->next;
1634 block->next = finq;
1635 finq = block;
1636 }
1637 f = &finq->fin[finq->cnt];
1638 finq->cnt++;
1639 f->fn = fn;
1640 f->ft = ft;
1641 f->arg = p;
1642 runtime_unlock(&finlock);
1643 return true;
1644 }
1645
1646 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
1647 // It clears the mark bits in preparation for the next GC round.
1648 static void
sweepspan(ParFor * desc,uint32 idx)1649 sweepspan(ParFor *desc, uint32 idx)
1650 {
1651 M *m;
1652 int32 cl, n, npages;
1653 uintptr size;
1654 byte *p;
1655 MCache *c;
1656 byte *arena_start;
1657 MLink head, *end;
1658 int32 nfree;
1659 byte *type_data;
1660 byte compression;
1661 uintptr type_data_inc;
1662 MSpan *s;
1663
1664 m = runtime_m();
1665
1666 USED(&desc);
1667 s = runtime_mheap->allspans[idx];
1668 if(s->state != MSpanInUse)
1669 return;
1670 arena_start = runtime_mheap->arena_start;
1671 p = (byte*)(s->start << PageShift);
1672 cl = s->sizeclass;
1673 size = s->elemsize;
1674 if(cl == 0) {
1675 n = 1;
1676 } else {
1677 // Chunk full of small blocks.
1678 npages = runtime_class_to_allocnpages[cl];
1679 n = (npages << PageShift) / size;
1680 }
1681 nfree = 0;
1682 end = &head;
1683 c = m->mcache;
1684
1685 type_data = (byte*)s->types.data;
1686 type_data_inc = sizeof(uintptr);
1687 compression = s->types.compression;
1688 switch(compression) {
1689 case MTypes_Bytes:
1690 type_data += 8*sizeof(uintptr);
1691 type_data_inc = 1;
1692 break;
1693 }
1694
1695 // Sweep through n objects of given size starting at p.
1696 // This thread owns the span now, so it can manipulate
1697 // the block bitmap without atomic operations.
1698 for(; n > 0; n--, p += size, type_data+=type_data_inc) {
1699 uintptr off, *bitp, shift, bits;
1700
1701 off = (uintptr*)p - (uintptr*)arena_start;
1702 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
1703 shift = off % wordsPerBitmapWord;
1704 bits = *bitp>>shift;
1705
1706 if((bits & bitAllocated) == 0)
1707 continue;
1708
1709 if((bits & bitMarked) != 0) {
1710 if(DebugMark) {
1711 if(!(bits & bitSpecial))
1712 runtime_printf("found spurious mark on %p\n", p);
1713 *bitp &= ~(bitSpecial<<shift);
1714 }
1715 *bitp &= ~(bitMarked<<shift);
1716 continue;
1717 }
1718
1719 // Special means it has a finalizer or is being profiled.
1720 // In DebugMark mode, the bit has been coopted so
1721 // we have to assume all blocks are special.
1722 if(DebugMark || (bits & bitSpecial) != 0) {
1723 if(handlespecial(p, size))
1724 continue;
1725 }
1726
1727 // Mark freed; restore block boundary bit.
1728 *bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1729
1730 if(cl == 0) {
1731 // Free large span.
1732 runtime_unmarkspan(p, 1<<PageShift);
1733 *(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll; // needs zeroing
1734 runtime_MHeap_Free(runtime_mheap, s, 1);
1735 c->local_alloc -= size;
1736 c->local_nfree++;
1737 } else {
1738 // Free small object.
1739 switch(compression) {
1740 case MTypes_Words:
1741 *(uintptr*)type_data = 0;
1742 break;
1743 case MTypes_Bytes:
1744 *(byte*)type_data = 0;
1745 break;
1746 }
1747 if(size > sizeof(uintptr))
1748 ((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll; // mark as "needs to be zeroed"
1749
1750 end->next = (MLink*)p;
1751 end = (MLink*)p;
1752 nfree++;
1753 }
1754 }
1755
1756 if(nfree) {
1757 c->local_by_size[cl].nfree += nfree;
1758 c->local_alloc -= size * nfree;
1759 c->local_nfree += nfree;
1760 c->local_cachealloc -= nfree * size;
1761 c->local_objects -= nfree;
1762 runtime_MCentral_FreeSpan(&runtime_mheap->central[cl], s, nfree, head.next, end);
1763 }
1764 }
1765
1766 static void
dumpspan(uint32 idx)1767 dumpspan(uint32 idx)
1768 {
1769 int32 sizeclass, n, npages, i, column;
1770 uintptr size;
1771 byte *p;
1772 byte *arena_start;
1773 MSpan *s;
1774 bool allocated, special;
1775
1776 s = runtime_mheap->allspans[idx];
1777 if(s->state != MSpanInUse)
1778 return;
1779 arena_start = runtime_mheap->arena_start;
1780 p = (byte*)(s->start << PageShift);
1781 sizeclass = s->sizeclass;
1782 size = s->elemsize;
1783 if(sizeclass == 0) {
1784 n = 1;
1785 } else {
1786 npages = runtime_class_to_allocnpages[sizeclass];
1787 n = (npages << PageShift) / size;
1788 }
1789
1790 runtime_printf("%p .. %p:\n", p, p+n*size);
1791 column = 0;
1792 for(; n>0; n--, p+=size) {
1793 uintptr off, *bitp, shift, bits;
1794
1795 off = (uintptr*)p - (uintptr*)arena_start;
1796 bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
1797 shift = off % wordsPerBitmapWord;
1798 bits = *bitp>>shift;
1799
1800 allocated = ((bits & bitAllocated) != 0);
1801 special = ((bits & bitSpecial) != 0);
1802
1803 for(i=0; (uint32)i<size; i+=sizeof(void*)) {
1804 if(column == 0) {
1805 runtime_printf("\t");
1806 }
1807 if(i == 0) {
1808 runtime_printf(allocated ? "(" : "[");
1809 runtime_printf(special ? "@" : "");
1810 runtime_printf("%p: ", p+i);
1811 } else {
1812 runtime_printf(" ");
1813 }
1814
1815 runtime_printf("%p", *(void**)(p+i));
1816
1817 if(i+sizeof(void*) >= size) {
1818 runtime_printf(allocated ? ") " : "] ");
1819 }
1820
1821 column++;
1822 if(column == 8) {
1823 runtime_printf("\n");
1824 column = 0;
1825 }
1826 }
1827 }
1828 runtime_printf("\n");
1829 }
1830
1831 // A debugging function to dump the contents of memory
1832 void
runtime_memorydump(void)1833 runtime_memorydump(void)
1834 {
1835 uint32 spanidx;
1836
1837 for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) {
1838 dumpspan(spanidx);
1839 }
1840 }
1841
1842 void
runtime_gchelper(void)1843 runtime_gchelper(void)
1844 {
1845 gchelperstart();
1846
1847 // parallel mark for over gc roots
1848 runtime_parfordo(work.markfor);
1849
1850 // help other threads scan secondary blocks
1851 scanblock(nil, nil, 0, true);
1852
1853 if(DebugMark) {
1854 // wait while the main thread executes mark(debug_scanblock)
1855 while(runtime_atomicload(&work.debugmarkdone) == 0)
1856 runtime_usleep(10);
1857 }
1858
1859 runtime_parfordo(work.sweepfor);
1860 bufferList[runtime_m()->helpgc].busy = 0;
1861 if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
1862 runtime_notewakeup(&work.alldone);
1863 }
1864
1865 #define GcpercentUnknown (-2)
1866
1867 // Initialized from $GOGC. GOGC=off means no gc.
1868 //
1869 // Next gc is after we've allocated an extra amount of
1870 // memory proportional to the amount already in use.
1871 // If gcpercent=100 and we're using 4M, we'll gc again
1872 // when we get to 8M. This keeps the gc cost in linear
1873 // proportion to the allocation cost. Adjusting gcpercent
1874 // just changes the linear constant (and also the amount of
1875 // extra memory used).
1876 static int32 gcpercent = GcpercentUnknown;
1877
1878 static void
cachestats(GCStats * stats)1879 cachestats(GCStats *stats)
1880 {
1881 M *mp;
1882 MCache *c;
1883 P *p, **pp;
1884 uint32 i;
1885 uint64 stacks_inuse;
1886 uint64 *src, *dst;
1887
1888 if(stats)
1889 runtime_memclr((byte*)stats, sizeof(*stats));
1890 stacks_inuse = 0;
1891 for(mp=runtime_allm; mp; mp=mp->alllink) {
1892 //stacks_inuse += mp->stackinuse*FixedStack;
1893 if(stats) {
1894 src = (uint64*)&mp->gcstats;
1895 dst = (uint64*)stats;
1896 for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
1897 dst[i] += src[i];
1898 runtime_memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
1899 }
1900 }
1901 for(pp=runtime_allp; (p=*pp) != nil; pp++) {
1902 c = p->mcache;
1903 if(c==nil)
1904 continue;
1905 runtime_purgecachedstats(c);
1906 for(i=0; i<nelem(c->local_by_size); i++) {
1907 mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
1908 c->local_by_size[i].nmalloc = 0;
1909 mstats.by_size[i].nfree += c->local_by_size[i].nfree;
1910 c->local_by_size[i].nfree = 0;
1911 }
1912 }
1913 mstats.stacks_inuse = stacks_inuse;
1914 }
1915
1916 // Structure of arguments passed to function gc().
1917 // This allows the arguments to be passed via reflect_call.
1918 struct gc_args
1919 {
1920 int32 force;
1921 };
1922
1923 static void gc(struct gc_args *args);
1924
1925 static int32
readgogc(void)1926 readgogc(void)
1927 {
1928 const byte *p;
1929
1930 p = runtime_getenv("GOGC");
1931 if(p == nil || p[0] == '\0')
1932 return 100;
1933 if(runtime_strcmp((const char *)p, "off") == 0)
1934 return -1;
1935 return runtime_atoi(p);
1936 }
1937
1938 void
runtime_gc(int32 force)1939 runtime_gc(int32 force)
1940 {
1941 M *m;
1942 const byte *p;
1943 struct gc_args a, *ap;
1944
1945 // The atomic operations are not atomic if the uint64s
1946 // are not aligned on uint64 boundaries. This has been
1947 // a problem in the past.
1948 if((((uintptr)&work.empty) & 7) != 0)
1949 runtime_throw("runtime: gc work buffer is misaligned");
1950 if((((uintptr)&work.full) & 7) != 0)
1951 runtime_throw("runtime: gc work buffer is misaligned");
1952
1953 // Make sure all registers are saved on stack so that
1954 // scanstack sees them.
1955 __builtin_unwind_init();
1956
1957 // The gc is turned off (via enablegc) until
1958 // the bootstrap has completed.
1959 // Also, malloc gets called in the guts
1960 // of a number of libraries that might be
1961 // holding locks. To avoid priority inversion
1962 // problems, don't bother trying to run gc
1963 // while holding a lock. The next mallocgc
1964 // without a lock will do the gc instead.
1965 m = runtime_m();
1966 if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
1967 return;
1968
1969 if(gcpercent == GcpercentUnknown) { // first time through
1970 gcpercent = readgogc();
1971
1972 p = runtime_getenv("GOGCTRACE");
1973 if(p != nil)
1974 gctrace = runtime_atoi(p);
1975 }
1976 if(gcpercent < 0)
1977 return;
1978
1979 // Run gc on a bigger stack to eliminate
1980 // a potentially large number of calls to runtime_morestack.
1981 // But not when using gccgo.
1982 a.force = force;
1983 ap = &a;
1984 gc(ap);
1985
1986 if(gctrace > 1 && !force) {
1987 a.force = 1;
1988 gc(&a);
1989 }
1990 }
1991
1992 static void
gc(struct gc_args * args)1993 gc(struct gc_args *args)
1994 {
1995 M *m;
1996 int64 t0, t1, t2, t3, t4;
1997 uint64 heap0, heap1, obj0, obj1, ninstr;
1998 GCStats stats;
1999 M *mp;
2000 uint32 i;
2001 // Eface eface;
2002
2003 runtime_semacquire(&runtime_worldsema);
2004 if(!args->force && mstats.heap_alloc < mstats.next_gc) {
2005 runtime_semrelease(&runtime_worldsema);
2006 return;
2007 }
2008
2009 m = runtime_m();
2010
2011 t0 = runtime_nanotime();
2012
2013 m->gcing = 1;
2014 runtime_stoptheworld();
2015
2016 if(CollectStats)
2017 runtime_memclr((byte*)&gcstats, sizeof(gcstats));
2018
2019 for(mp=runtime_allm; mp; mp=mp->alllink)
2020 runtime_settype_flush(mp, false);
2021
2022 heap0 = 0;
2023 obj0 = 0;
2024 if(gctrace) {
2025 cachestats(nil);
2026 heap0 = mstats.heap_alloc;
2027 obj0 = mstats.nmalloc - mstats.nfree;
2028 }
2029
2030 m->locks++; // disable gc during mallocs in parforalloc
2031 if(work.markfor == nil)
2032 work.markfor = runtime_parforalloc(MaxGcproc);
2033 if(work.sweepfor == nil)
2034 work.sweepfor = runtime_parforalloc(MaxGcproc);
2035 m->locks--;
2036
2037 if(itabtype == nil) {
2038 // get C pointer to the Go type "itab"
2039 // runtime_gc_itab_ptr(&eface);
2040 // itabtype = ((PtrType*)eface.type)->elem;
2041 }
2042
2043 work.nwait = 0;
2044 work.ndone = 0;
2045 work.debugmarkdone = 0;
2046 work.nproc = runtime_gcprocs();
2047 addroots();
2048 runtime_parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
2049 runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap->nspan, nil, true, sweepspan);
2050 if(work.nproc > 1) {
2051 runtime_noteclear(&work.alldone);
2052 runtime_helpgc(work.nproc);
2053 }
2054
2055 t1 = runtime_nanotime();
2056
2057 gchelperstart();
2058 runtime_parfordo(work.markfor);
2059 scanblock(nil, nil, 0, true);
2060
2061 if(DebugMark) {
2062 for(i=0; i<work.nroot; i++)
2063 debug_scanblock(work.roots[i].p, work.roots[i].n);
2064 runtime_atomicstore(&work.debugmarkdone, 1);
2065 }
2066 t2 = runtime_nanotime();
2067
2068 runtime_parfordo(work.sweepfor);
2069 bufferList[m->helpgc].busy = 0;
2070 t3 = runtime_nanotime();
2071
2072 if(work.nproc > 1)
2073 runtime_notesleep(&work.alldone);
2074
2075 cachestats(&stats);
2076
2077 stats.nprocyield += work.sweepfor->nprocyield;
2078 stats.nosyield += work.sweepfor->nosyield;
2079 stats.nsleep += work.sweepfor->nsleep;
2080
2081 mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100;
2082 m->gcing = 0;
2083
2084 if(finq != nil) {
2085 m->locks++; // disable gc during the mallocs in newproc
2086 // kick off or wake up goroutine to run queued finalizers
2087 if(fing == nil)
2088 fing = __go_go(runfinq, nil);
2089 else if(fingwait) {
2090 fingwait = 0;
2091 runtime_ready(fing);
2092 }
2093 m->locks--;
2094 }
2095
2096 heap1 = mstats.heap_alloc;
2097 obj1 = mstats.nmalloc - mstats.nfree;
2098
2099 t4 = runtime_nanotime();
2100 mstats.last_gc = t4;
2101 mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
2102 mstats.pause_total_ns += t4 - t0;
2103 mstats.numgc++;
2104 if(mstats.debuggc)
2105 runtime_printf("pause %D\n", t4-t0);
2106
2107 if(gctrace) {
2108 runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
2109 " %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
2110 mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
2111 heap0>>20, heap1>>20, obj0, obj1,
2112 mstats.nmalloc, mstats.nfree,
2113 stats.nhandoff, stats.nhandoffcnt,
2114 work.sweepfor->nsteal, work.sweepfor->nstealcnt,
2115 stats.nprocyield, stats.nosyield, stats.nsleep);
2116 if(CollectStats) {
2117 runtime_printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
2118 gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
2119 if(gcstats.ptr.cnt != 0)
2120 runtime_printf("avg ptrbufsize: %D (%D/%D)\n",
2121 gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt);
2122 if(gcstats.obj.cnt != 0)
2123 runtime_printf("avg nobj: %D (%D/%D)\n",
2124 gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt);
2125 runtime_printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes);
2126
2127 runtime_printf("instruction counts:\n");
2128 ninstr = 0;
2129 for(i=0; i<nelem(gcstats.instr); i++) {
2130 runtime_printf("\t%d:\t%D\n", i, gcstats.instr[i]);
2131 ninstr += gcstats.instr[i];
2132 }
2133 runtime_printf("\ttotal:\t%D\n", ninstr);
2134
2135 runtime_printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull);
2136 }
2137 }
2138
2139 runtime_MProf_GC();
2140 runtime_semrelease(&runtime_worldsema);
2141 runtime_starttheworld();
2142
2143 // give the queued finalizers, if any, a chance to run
2144 if(finq != nil)
2145 runtime_gosched();
2146 }
2147
2148 void runtime_ReadMemStats(MStats *)
2149 __asm__ (GOSYM_PREFIX "runtime.ReadMemStats");
2150
2151 void
runtime_ReadMemStats(MStats * stats)2152 runtime_ReadMemStats(MStats *stats)
2153 {
2154 M *m;
2155
2156 // Have to acquire worldsema to stop the world,
2157 // because stoptheworld can only be used by
2158 // one goroutine at a time, and there might be
2159 // a pending garbage collection already calling it.
2160 runtime_semacquire(&runtime_worldsema);
2161 m = runtime_m();
2162 m->gcing = 1;
2163 runtime_stoptheworld();
2164 cachestats(nil);
2165 *stats = mstats;
2166 m->gcing = 0;
2167 runtime_semrelease(&runtime_worldsema);
2168 runtime_starttheworld();
2169 }
2170
2171 void runtime_debug_readGCStats(Slice*)
2172 __asm__("runtime_debug.readGCStats");
2173
2174 void
runtime_debug_readGCStats(Slice * pauses)2175 runtime_debug_readGCStats(Slice *pauses)
2176 {
2177 uint64 *p;
2178 uint32 i, n;
2179
2180 // Calling code in runtime/debug should make the slice large enough.
2181 if((size_t)pauses->cap < nelem(mstats.pause_ns)+3)
2182 runtime_throw("runtime: short slice passed to readGCStats");
2183
2184 // Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
2185 p = (uint64*)pauses->array;
2186 runtime_lock(runtime_mheap);
2187 n = mstats.numgc;
2188 if(n > nelem(mstats.pause_ns))
2189 n = nelem(mstats.pause_ns);
2190
2191 // The pause buffer is circular. The most recent pause is at
2192 // pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
2193 // from there to go back farther in time. We deliver the times
2194 // most recent first (in p[0]).
2195 for(i=0; i<n; i++)
2196 p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)];
2197
2198 p[n] = mstats.last_gc;
2199 p[n+1] = mstats.numgc;
2200 p[n+2] = mstats.pause_total_ns;
2201 runtime_unlock(runtime_mheap);
2202 pauses->__count = n+3;
2203 }
2204
2205 intgo runtime_debug_setGCPercent(intgo)
2206 __asm__("runtime_debug.setGCPercent");
2207
2208 intgo
runtime_debug_setGCPercent(intgo in)2209 runtime_debug_setGCPercent(intgo in)
2210 {
2211 intgo out;
2212
2213 runtime_lock(runtime_mheap);
2214 if(gcpercent == GcpercentUnknown)
2215 gcpercent = readgogc();
2216 out = gcpercent;
2217 if(in < 0)
2218 in = -1;
2219 gcpercent = in;
2220 runtime_unlock(runtime_mheap);
2221 return out;
2222 }
2223
2224 static void
gchelperstart(void)2225 gchelperstart(void)
2226 {
2227 M *m;
2228
2229 m = runtime_m();
2230 if(m->helpgc < 0 || m->helpgc >= MaxGcproc)
2231 runtime_throw("gchelperstart: bad m->helpgc");
2232 if(runtime_xchg(&bufferList[m->helpgc].busy, 1))
2233 runtime_throw("gchelperstart: already busy");
2234 }
2235
2236 static void
runfinq(void * dummy)2237 runfinq(void* dummy __attribute__ ((unused)))
2238 {
2239 Finalizer *f;
2240 FinBlock *fb, *next;
2241 uint32 i;
2242
2243 for(;;) {
2244 // There's no need for a lock in this section
2245 // because it only conflicts with the garbage
2246 // collector, and the garbage collector only
2247 // runs when everyone else is stopped, and
2248 // runfinq only stops at the gosched() or
2249 // during the calls in the for loop.
2250 fb = finq;
2251 finq = nil;
2252 if(fb == nil) {
2253 fingwait = 1;
2254 runtime_park(nil, nil, "finalizer wait");
2255 continue;
2256 }
2257 if(raceenabled)
2258 runtime_racefingo();
2259 for(; fb; fb=next) {
2260 next = fb->next;
2261 for(i=0; i<(uint32)fb->cnt; i++) {
2262 void *param;
2263
2264 f = &fb->fin[i];
2265 param = &f->arg;
2266 reflect_call(f->ft, f->fn, 0, 0, ¶m, nil);
2267 f->fn = nil;
2268 f->arg = nil;
2269 }
2270 fb->cnt = 0;
2271 fb->next = finc;
2272 finc = fb;
2273 }
2274 runtime_gc(1); // trigger another gc to clean up the finalized objects, if possible
2275 }
2276 }
2277
2278 // mark the block at v of size n as allocated.
2279 // If noptr is true, mark it as having no pointers.
2280 void
runtime_markallocated(void * v,uintptr n,bool noptr)2281 runtime_markallocated(void *v, uintptr n, bool noptr)
2282 {
2283 uintptr *b, obits, bits, off, shift;
2284
2285 if(0)
2286 runtime_printf("markallocated %p+%p\n", v, n);
2287
2288 if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2289 runtime_throw("markallocated: bad pointer");
2290
2291 off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
2292 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2293 shift = off % wordsPerBitmapWord;
2294
2295 for(;;) {
2296 obits = *b;
2297 bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
2298 if(noptr)
2299 bits |= bitNoPointers<<shift;
2300 if(runtime_singleproc) {
2301 *b = bits;
2302 break;
2303 } else {
2304 // more than one goroutine is potentially running: use atomic op
2305 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
2306 break;
2307 }
2308 }
2309 }
2310
2311 // mark the block at v of size n as freed.
2312 void
runtime_markfreed(void * v,uintptr n)2313 runtime_markfreed(void *v, uintptr n)
2314 {
2315 uintptr *b, obits, bits, off, shift;
2316
2317 if(0)
2318 runtime_printf("markallocated %p+%p\n", v, n);
2319
2320 if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2321 runtime_throw("markallocated: bad pointer");
2322
2323 off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
2324 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2325 shift = off % wordsPerBitmapWord;
2326
2327 for(;;) {
2328 obits = *b;
2329 bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
2330 if(runtime_singleproc) {
2331 *b = bits;
2332 break;
2333 } else {
2334 // more than one goroutine is potentially running: use atomic op
2335 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
2336 break;
2337 }
2338 }
2339 }
2340
2341 // check that the block at v of size n is marked freed.
2342 void
runtime_checkfreed(void * v,uintptr n)2343 runtime_checkfreed(void *v, uintptr n)
2344 {
2345 uintptr *b, bits, off, shift;
2346
2347 if(!runtime_checking)
2348 return;
2349
2350 if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2351 return; // not allocated, so okay
2352
2353 off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start; // word offset
2354 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2355 shift = off % wordsPerBitmapWord;
2356
2357 bits = *b>>shift;
2358 if((bits & bitAllocated) != 0) {
2359 runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
2360 v, n, off, bits & bitMask);
2361 runtime_throw("checkfreed: not freed");
2362 }
2363 }
2364
2365 // mark the span of memory at v as having n blocks of the given size.
2366 // if leftover is true, there is left over space at the end of the span.
2367 void
runtime_markspan(void * v,uintptr size,uintptr n,bool leftover)2368 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
2369 {
2370 uintptr *b, off, shift;
2371 byte *p;
2372
2373 if((byte*)v+size*n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2374 runtime_throw("markspan: bad pointer");
2375
2376 p = v;
2377 if(leftover) // mark a boundary just past end of last block too
2378 n++;
2379 for(; n-- > 0; p += size) {
2380 // Okay to use non-atomic ops here, because we control
2381 // the entire span, and each bitmap word has bits for only
2382 // one span, so no other goroutines are changing these
2383 // bitmap words.
2384 off = (uintptr*)p - (uintptr*)runtime_mheap->arena_start; // word offset
2385 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2386 shift = off % wordsPerBitmapWord;
2387 *b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
2388 }
2389 }
2390
2391 // unmark the span of memory at v of length n bytes.
2392 void
runtime_unmarkspan(void * v,uintptr n)2393 runtime_unmarkspan(void *v, uintptr n)
2394 {
2395 uintptr *p, *b, off;
2396
2397 if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2398 runtime_throw("markspan: bad pointer");
2399
2400 p = v;
2401 off = p - (uintptr*)runtime_mheap->arena_start; // word offset
2402 if(off % wordsPerBitmapWord != 0)
2403 runtime_throw("markspan: unaligned pointer");
2404 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2405 n /= PtrSize;
2406 if(n%wordsPerBitmapWord != 0)
2407 runtime_throw("unmarkspan: unaligned length");
2408 // Okay to use non-atomic ops here, because we control
2409 // the entire span, and each bitmap word has bits for only
2410 // one span, so no other goroutines are changing these
2411 // bitmap words.
2412 n /= wordsPerBitmapWord;
2413 while(n-- > 0)
2414 *b-- = 0;
2415 }
2416
2417 bool
runtime_blockspecial(void * v)2418 runtime_blockspecial(void *v)
2419 {
2420 uintptr *b, off, shift;
2421
2422 if(DebugMark)
2423 return true;
2424
2425 off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;
2426 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2427 shift = off % wordsPerBitmapWord;
2428
2429 return (*b & (bitSpecial<<shift)) != 0;
2430 }
2431
2432 void
runtime_setblockspecial(void * v,bool s)2433 runtime_setblockspecial(void *v, bool s)
2434 {
2435 uintptr *b, off, shift, bits, obits;
2436
2437 if(DebugMark)
2438 return;
2439
2440 off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;
2441 b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2442 shift = off % wordsPerBitmapWord;
2443
2444 for(;;) {
2445 obits = *b;
2446 if(s)
2447 bits = obits | (bitSpecial<<shift);
2448 else
2449 bits = obits & ~(bitSpecial<<shift);
2450 if(runtime_singleproc) {
2451 *b = bits;
2452 break;
2453 } else {
2454 // more than one goroutine is potentially running: use atomic op
2455 if(runtime_casp((void**)b, (void*)obits, (void*)bits))
2456 break;
2457 }
2458 }
2459 }
2460
2461 void
runtime_MHeap_MapBits(MHeap * h)2462 runtime_MHeap_MapBits(MHeap *h)
2463 {
2464 size_t page_size;
2465
2466 // Caller has added extra mappings to the arena.
2467 // Add extra mappings of bitmap words as needed.
2468 // We allocate extra bitmap pieces in chunks of bitmapChunk.
2469 enum {
2470 bitmapChunk = 8192
2471 };
2472 uintptr n;
2473
2474 n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
2475 n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
2476 if(h->bitmap_mapped >= n)
2477 return;
2478
2479 page_size = getpagesize();
2480 n = (n+page_size-1) & ~(page_size-1);
2481
2482 runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
2483 h->bitmap_mapped = n;
2484 }
2485