1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Garbage collector.
6 
7 #include <unistd.h>
8 
9 #include "runtime.h"
10 #include "arch.h"
11 #include "malloc.h"
12 #include "mgc0.h"
13 #include "race.h"
14 #include "go-type.h"
15 
16 // Map gccgo field names to gc field names.
17 // Slice aka __go_open_array.
18 #define array __values
19 #define cap __capacity
20 // Iface aka __go_interface
21 #define tab __methods
22 // Eface aka __go_empty_interface.
23 #define type __type_descriptor
24 // Hmap aka __go_map
25 typedef struct __go_map Hmap;
26 // Type aka __go_type_descriptor
27 #define kind __code
28 #define string __reflection
29 #define KindPtr GO_PTR
30 #define KindNoPointers GO_NO_POINTERS
31 // PtrType aka __go_ptr_type
32 #define elem __element_type
33 
34 #ifdef USING_SPLIT_STACK
35 
36 extern void * __splitstack_find (void *, void *, size_t *, void **, void **,
37 				 void **);
38 
39 extern void * __splitstack_find_context (void *context[10], size_t *, void **,
40 					 void **, void **);
41 
42 #endif
43 
44 enum {
45 	Debug = 0,
46 	DebugMark = 0,  // run second pass to check mark
47 	CollectStats = 0,
48 	ScanStackByFrames = 0,
49 	IgnorePreciseGC = 0,
50 
51 	// Four bits per word (see #defines below).
52 	wordsPerBitmapWord = sizeof(void*)*8/4,
53 	bitShift = sizeof(void*)*8/4,
54 
55 	handoffThreshold = 4,
56 	IntermediateBufferCapacity = 64,
57 
58 	// Bits in type information
59 	PRECISE = 1,
60 	LOOP = 2,
61 	PC_BITS = PRECISE | LOOP,
62 };
63 
64 // Bits in per-word bitmap.
65 // #defines because enum might not be able to hold the values.
66 //
67 // Each word in the bitmap describes wordsPerBitmapWord words
68 // of heap memory.  There are 4 bitmap bits dedicated to each heap word,
69 // so on a 64-bit system there is one bitmap word per 16 heap words.
70 // The bits in the word are packed together by type first, then by
71 // heap location, so each 64-bit bitmap word consists of, from top to bottom,
72 // the 16 bitSpecial bits for the corresponding heap words, then the 16 bitMarked bits,
73 // then the 16 bitNoPointers/bitBlockBoundary bits, then the 16 bitAllocated bits.
74 // This layout makes it easier to iterate over the bits of a given type.
75 //
76 // The bitmap starts at mheap.arena_start and extends *backward* from
77 // there.  On a 64-bit system the off'th word in the arena is tracked by
78 // the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
79 // the only difference is that the divisor is 8.)
80 //
81 // To pull out the bits corresponding to a given pointer p, we use:
82 //
83 //	off = p - (uintptr*)mheap.arena_start;  // word offset
84 //	b = (uintptr*)mheap.arena_start - off/wordsPerBitmapWord - 1;
85 //	shift = off % wordsPerBitmapWord
86 //	bits = *b >> shift;
87 //	/* then test bits & bitAllocated, bits & bitMarked, etc. */
88 //
89 #define bitAllocated		((uintptr)1<<(bitShift*0))
90 #define bitNoPointers		((uintptr)1<<(bitShift*1))	/* when bitAllocated is set */
91 #define bitMarked		((uintptr)1<<(bitShift*2))	/* when bitAllocated is set */
92 #define bitSpecial		((uintptr)1<<(bitShift*3))	/* when bitAllocated is set - has finalizer or being profiled */
93 #define bitBlockBoundary	((uintptr)1<<(bitShift*1))	/* when bitAllocated is NOT set */
94 
95 #define bitMask (bitBlockBoundary | bitAllocated | bitMarked | bitSpecial)
96 
97 // Holding worldsema grants an M the right to try to stop the world.
98 // The procedure is:
99 //
100 //	runtime_semacquire(&runtime_worldsema);
101 //	m->gcing = 1;
102 //	runtime_stoptheworld();
103 //
104 //	... do stuff ...
105 //
106 //	m->gcing = 0;
107 //	runtime_semrelease(&runtime_worldsema);
108 //	runtime_starttheworld();
109 //
110 uint32 runtime_worldsema = 1;
111 
112 static int32 gctrace;
113 
114 // The size of Workbuf is N*PageSize.
115 typedef struct Workbuf Workbuf;
116 struct Workbuf
117 {
118 #define SIZE (2*PageSize-sizeof(LFNode)-sizeof(uintptr))
119 	LFNode  node; // must be first
120 	uintptr nobj;
121 	Obj     obj[SIZE/sizeof(Obj) - 1];
122 	uint8   _padding[SIZE%sizeof(Obj) + sizeof(Obj)];
123 #undef SIZE
124 };
125 
126 typedef struct Finalizer Finalizer;
127 struct Finalizer
128 {
129 	FuncVal *fn;
130 	void *arg;
131 	const struct __go_func_type *ft;
132 };
133 
134 typedef struct FinBlock FinBlock;
135 struct FinBlock
136 {
137 	FinBlock *alllink;
138 	FinBlock *next;
139 	int32 cnt;
140 	int32 cap;
141 	Finalizer fin[1];
142 };
143 
144 static G *fing;
145 static FinBlock *finq; // list of finalizers that are to be executed
146 static FinBlock *finc; // cache of free blocks
147 static FinBlock *allfin; // list of all blocks
148 static Lock finlock;
149 static int32 fingwait;
150 
151 static void runfinq(void*);
152 static Workbuf* getempty(Workbuf*);
153 static Workbuf* getfull(Workbuf*);
154 static void	putempty(Workbuf*);
155 static Workbuf* handoff(Workbuf*);
156 static void	gchelperstart(void);
157 
158 static struct {
159 	uint64	full;  // lock-free list of full blocks
160 	uint64	empty; // lock-free list of empty blocks
161 	byte	pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
162 	uint32	nproc;
163 	volatile uint32	nwait;
164 	volatile uint32	ndone;
165 	volatile uint32 debugmarkdone;
166 	Note	alldone;
167 	ParFor	*markfor;
168 	ParFor	*sweepfor;
169 
170 	Lock;
171 	byte	*chunk;
172 	uintptr	nchunk;
173 
174 	Obj	*roots;
175 	uint32	nroot;
176 	uint32	rootcap;
177 } work __attribute__((aligned(8)));
178 
179 enum {
180 	GC_DEFAULT_PTR = GC_NUM_INSTR,
181 	GC_MAP_NEXT,
182 	GC_CHAN,
183 
184 	GC_NUM_INSTR2
185 };
186 
187 static struct {
188 	struct {
189 		uint64 sum;
190 		uint64 cnt;
191 	} ptr;
192 	uint64 nbytes;
193 	struct {
194 		uint64 sum;
195 		uint64 cnt;
196 		uint64 notype;
197 		uint64 typelookup;
198 	} obj;
199 	uint64 rescan;
200 	uint64 rescanbytes;
201 	uint64 instr[GC_NUM_INSTR2];
202 	uint64 putempty;
203 	uint64 getfull;
204 } gcstats;
205 
206 // markonly marks an object. It returns true if the object
207 // has been marked by this function, false otherwise.
208 // This function doesn't append the object to any buffer.
209 static bool
markonly(void * obj)210 markonly(void *obj)
211 {
212 	byte *p;
213 	uintptr *bitp, bits, shift, x, xbits, off;
214 	MSpan *s;
215 	PageID k;
216 
217 	// Words outside the arena cannot be pointers.
218 	if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
219 		return false;
220 
221 	// obj may be a pointer to a live object.
222 	// Try to find the beginning of the object.
223 
224 	// Round down to word boundary.
225 	obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
226 
227 	// Find bits for this word.
228 	off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
229 	bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
230 	shift = off % wordsPerBitmapWord;
231 	xbits = *bitp;
232 	bits = xbits >> shift;
233 
234 	// Pointing at the beginning of a block?
235 	if((bits & (bitAllocated|bitBlockBoundary)) != 0)
236 		goto found;
237 
238 	// Otherwise consult span table to find beginning.
239 	// (Manually inlined copy of MHeap_LookupMaybe.)
240 	k = (uintptr)obj>>PageShift;
241 	x = k;
242 	x -= (uintptr)runtime_mheap->arena_start>>PageShift;
243 	s = runtime_mheap->map[x];
244 	if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
245 		return false;
246 	p = (byte*)((uintptr)s->start<<PageShift);
247 	if(s->sizeclass == 0) {
248 		obj = p;
249 	} else {
250 		if((byte*)obj >= (byte*)s->limit)
251 			return false;
252 		uintptr size = s->elemsize;
253 		int32 i = ((byte*)obj - p)/size;
254 		obj = p+i*size;
255 	}
256 
257 	// Now that we know the object header, reload bits.
258 	off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
259 	bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
260 	shift = off % wordsPerBitmapWord;
261 	xbits = *bitp;
262 	bits = xbits >> shift;
263 
264 found:
265 	// Now we have bits, bitp, and shift correct for
266 	// obj pointing at the base of the object.
267 	// Only care about allocated and not marked.
268 	if((bits & (bitAllocated|bitMarked)) != bitAllocated)
269 		return false;
270 	if(work.nproc == 1)
271 		*bitp |= bitMarked<<shift;
272 	else {
273 		for(;;) {
274 			x = *bitp;
275 			if(x & (bitMarked<<shift))
276 				return false;
277 			if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
278 				break;
279 		}
280 	}
281 
282 	// The object is now marked
283 	return true;
284 }
285 
286 // PtrTarget is a structure used by intermediate buffers.
287 // The intermediate buffers hold GC data before it
288 // is moved/flushed to the work buffer (Workbuf).
289 // The size of an intermediate buffer is very small,
290 // such as 32 or 64 elements.
291 typedef struct PtrTarget PtrTarget;
292 struct PtrTarget
293 {
294 	void *p;
295 	uintptr ti;
296 };
297 
298 typedef struct BufferList BufferList;
299 struct BufferList
300 {
301 	PtrTarget ptrtarget[IntermediateBufferCapacity];
302 	Obj obj[IntermediateBufferCapacity];
303 	uint32 busy;
304 	byte pad[CacheLineSize];
305 };
306 static BufferList bufferList[MaxGcproc];
307 
308 static Type *itabtype;
309 
310 static void enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj);
311 
312 // flushptrbuf moves data from the PtrTarget buffer to the work buffer.
313 // The PtrTarget buffer contains blocks irrespective of whether the blocks have been marked or scanned,
314 // while the work buffer contains blocks which have been marked
315 // and are prepared to be scanned by the garbage collector.
316 //
317 // _wp, _wbuf, _nobj are input/output parameters and are specifying the work buffer.
318 //
319 // A simplified drawing explaining how the todo-list moves from a structure to another:
320 //
321 //     scanblock
322 //  (find pointers)
323 //    Obj ------> PtrTarget (pointer targets)
324 //     ↑          |
325 //     |          |
326 //     `----------'
327 //     flushptrbuf
328 //  (find block start, mark and enqueue)
329 static void
flushptrbuf(PtrTarget * ptrbuf,PtrTarget ** ptrbufpos,Obj ** _wp,Workbuf ** _wbuf,uintptr * _nobj)330 flushptrbuf(PtrTarget *ptrbuf, PtrTarget **ptrbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
331 {
332 	byte *p, *arena_start, *obj;
333 	uintptr size, *bitp, bits, shift, j, x, xbits, off, nobj, ti, n;
334 	MSpan *s;
335 	PageID k;
336 	Obj *wp;
337 	Workbuf *wbuf;
338 	PtrTarget *ptrbuf_end;
339 
340 	arena_start = runtime_mheap->arena_start;
341 
342 	wp = *_wp;
343 	wbuf = *_wbuf;
344 	nobj = *_nobj;
345 
346 	ptrbuf_end = *ptrbufpos;
347 	n = ptrbuf_end - ptrbuf;
348 	*ptrbufpos = ptrbuf;
349 
350 	if(CollectStats) {
351 		runtime_xadd64(&gcstats.ptr.sum, n);
352 		runtime_xadd64(&gcstats.ptr.cnt, 1);
353 	}
354 
355 	// If buffer is nearly full, get a new one.
356 	if(wbuf == nil || nobj+n >= nelem(wbuf->obj)) {
357 		if(wbuf != nil)
358 			wbuf->nobj = nobj;
359 		wbuf = getempty(wbuf);
360 		wp = wbuf->obj;
361 		nobj = 0;
362 
363 		if(n >= nelem(wbuf->obj))
364 			runtime_throw("ptrbuf has to be smaller than WorkBuf");
365 	}
366 
367 	// TODO(atom): This block is a branch of an if-then-else statement.
368 	//             The single-threaded branch may be added in a next CL.
369 	{
370 		// Multi-threaded version.
371 
372 		while(ptrbuf < ptrbuf_end) {
373 			obj = ptrbuf->p;
374 			ti = ptrbuf->ti;
375 			ptrbuf++;
376 
377 			// obj belongs to interval [mheap.arena_start, mheap.arena_used).
378 			if(Debug > 1) {
379 				if(obj < runtime_mheap->arena_start || obj >= runtime_mheap->arena_used)
380 					runtime_throw("object is outside of mheap");
381 			}
382 
383 			// obj may be a pointer to a live object.
384 			// Try to find the beginning of the object.
385 
386 			// Round down to word boundary.
387 			if(((uintptr)obj & ((uintptr)PtrSize-1)) != 0) {
388 				obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
389 				ti = 0;
390 			}
391 
392 			// Find bits for this word.
393 			off = (uintptr*)obj - (uintptr*)arena_start;
394 			bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
395 			shift = off % wordsPerBitmapWord;
396 			xbits = *bitp;
397 			bits = xbits >> shift;
398 
399 			// Pointing at the beginning of a block?
400 			if((bits & (bitAllocated|bitBlockBoundary)) != 0)
401 				goto found;
402 
403 			ti = 0;
404 
405 			// Pointing just past the beginning?
406 			// Scan backward a little to find a block boundary.
407 			for(j=shift; j-->0; ) {
408 				if(((xbits>>j) & (bitAllocated|bitBlockBoundary)) != 0) {
409 					obj = (byte*)obj - (shift-j)*PtrSize;
410 					shift = j;
411 					bits = xbits>>shift;
412 					goto found;
413 				}
414 			}
415 
416 			// Otherwise consult span table to find beginning.
417 			// (Manually inlined copy of MHeap_LookupMaybe.)
418 			k = (uintptr)obj>>PageShift;
419 			x = k;
420 			x -= (uintptr)arena_start>>PageShift;
421 			s = runtime_mheap->map[x];
422 			if(s == nil || k < s->start || k - s->start >= s->npages || s->state != MSpanInUse)
423 				continue;
424 			p = (byte*)((uintptr)s->start<<PageShift);
425 			if(s->sizeclass == 0) {
426 				obj = p;
427 			} else {
428 				if((byte*)obj >= (byte*)s->limit)
429 					continue;
430 				size = s->elemsize;
431 				int32 i = ((byte*)obj - p)/size;
432 				obj = p+i*size;
433 			}
434 
435 			// Now that we know the object header, reload bits.
436 			off = (uintptr*)obj - (uintptr*)arena_start;
437 			bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
438 			shift = off % wordsPerBitmapWord;
439 			xbits = *bitp;
440 			bits = xbits >> shift;
441 
442 		found:
443 			// Now we have bits, bitp, and shift correct for
444 			// obj pointing at the base of the object.
445 			// Only care about allocated and not marked.
446 			if((bits & (bitAllocated|bitMarked)) != bitAllocated)
447 				continue;
448 			if(work.nproc == 1)
449 				*bitp |= bitMarked<<shift;
450 			else {
451 				for(;;) {
452 					x = *bitp;
453 					if(x & (bitMarked<<shift))
454 						goto continue_obj;
455 					if(runtime_casp((void**)bitp, (void*)x, (void*)(x|(bitMarked<<shift))))
456 						break;
457 				}
458 			}
459 
460 			// If object has no pointers, don't need to scan further.
461 			if((bits & bitNoPointers) != 0)
462 				continue;
463 
464 			// Ask span about size class.
465 			// (Manually inlined copy of MHeap_Lookup.)
466 			x = (uintptr)obj >> PageShift;
467 			x -= (uintptr)arena_start>>PageShift;
468 			s = runtime_mheap->map[x];
469 
470 			PREFETCH(obj);
471 
472 			*wp = (Obj){obj, s->elemsize, ti};
473 			wp++;
474 			nobj++;
475 		continue_obj:;
476 		}
477 
478 		// If another proc wants a pointer, give it some.
479 		if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
480 			wbuf->nobj = nobj;
481 			wbuf = handoff(wbuf);
482 			nobj = wbuf->nobj;
483 			wp = wbuf->obj + nobj;
484 		}
485 	}
486 
487 	*_wp = wp;
488 	*_wbuf = wbuf;
489 	*_nobj = nobj;
490 }
491 
492 static void
flushobjbuf(Obj * objbuf,Obj ** objbufpos,Obj ** _wp,Workbuf ** _wbuf,uintptr * _nobj)493 flushobjbuf(Obj *objbuf, Obj **objbufpos, Obj **_wp, Workbuf **_wbuf, uintptr *_nobj)
494 {
495 	uintptr nobj, off;
496 	Obj *wp, obj;
497 	Workbuf *wbuf;
498 	Obj *objbuf_end;
499 
500 	wp = *_wp;
501 	wbuf = *_wbuf;
502 	nobj = *_nobj;
503 
504 	objbuf_end = *objbufpos;
505 	*objbufpos = objbuf;
506 
507 	while(objbuf < objbuf_end) {
508 		obj = *objbuf++;
509 
510 		// Align obj.b to a word boundary.
511 		off = (uintptr)obj.p & (PtrSize-1);
512 		if(off != 0) {
513 			obj.p += PtrSize - off;
514 			obj.n -= PtrSize - off;
515 			obj.ti = 0;
516 		}
517 
518 		if(obj.p == nil || obj.n == 0)
519 			continue;
520 
521 		// If buffer is full, get a new one.
522 		if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
523 			if(wbuf != nil)
524 				wbuf->nobj = nobj;
525 			wbuf = getempty(wbuf);
526 			wp = wbuf->obj;
527 			nobj = 0;
528 		}
529 
530 		*wp = obj;
531 		wp++;
532 		nobj++;
533 	}
534 
535 	// If another proc wants a pointer, give it some.
536 	if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
537 		wbuf->nobj = nobj;
538 		wbuf = handoff(wbuf);
539 		nobj = wbuf->nobj;
540 		wp = wbuf->obj + nobj;
541 	}
542 
543 	*_wp = wp;
544 	*_wbuf = wbuf;
545 	*_nobj = nobj;
546 }
547 
548 // Program that scans the whole block and treats every block element as a potential pointer
549 static uintptr defaultProg[2] = {PtrSize, GC_DEFAULT_PTR};
550 
551 #if 0
552 // Hashmap iterator program
553 static uintptr mapProg[2] = {0, GC_MAP_NEXT};
554 
555 // Hchan program
556 static uintptr chanProg[2] = {0, GC_CHAN};
557 #endif
558 
559 // Local variables of a program fragment or loop
560 typedef struct Frame Frame;
561 struct Frame {
562 	uintptr count, elemsize, b;
563 	uintptr *loop_or_ret;
564 };
565 
566 // Sanity check for the derived type info objti.
567 static void
checkptr(void * obj,uintptr objti)568 checkptr(void *obj, uintptr objti)
569 {
570 	uintptr type, tisize, i, x;
571 	byte *objstart;
572 	Type *t;
573 	MSpan *s;
574 
575 	if(!Debug)
576 		runtime_throw("checkptr is debug only");
577 
578 	if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
579 		return;
580 	type = runtime_gettype(obj);
581 	t = (Type*)(type & ~(uintptr)(PtrSize-1));
582 	if(t == nil)
583 		return;
584 	x = (uintptr)obj >> PageShift;
585 	x -= (uintptr)(runtime_mheap->arena_start)>>PageShift;
586 	s = runtime_mheap->map[x];
587 	objstart = (byte*)((uintptr)s->start<<PageShift);
588 	if(s->sizeclass != 0) {
589 		i = ((byte*)obj - objstart)/s->elemsize;
590 		objstart += i*s->elemsize;
591 	}
592 	tisize = *(uintptr*)objti;
593 	// Sanity check for object size: it should fit into the memory block.
594 	if((byte*)obj + tisize > objstart + s->elemsize)
595 		runtime_throw("invalid gc type info");
596 	if(obj != objstart)
597 		return;
598 	// If obj points to the beginning of the memory block,
599 	// check type info as well.
600 	if(t->string == nil ||
601 		// Gob allocates unsafe pointers for indirection.
602 		(runtime_strcmp((const char *)t->string->str, (const char*)"unsafe.Pointer") &&
603 		// Runtime and gc think differently about closures.
604 		 runtime_strstr((const char *)t->string->str, (const char*)"struct { F uintptr") != (const char *)t->string->str)) {
605 #if 0
606 		pc1 = (uintptr*)objti;
607 		pc2 = (uintptr*)t->gc;
608 		// A simple best-effort check until first GC_END.
609 		for(j = 1; pc1[j] != GC_END && pc2[j] != GC_END; j++) {
610 			if(pc1[j] != pc2[j]) {
611 				runtime_printf("invalid gc type info for '%s' at %p, type info %p, block info %p\n",
612 					t->string ? (const int8*)t->string->str : (const int8*)"?", j, pc1[j], pc2[j]);
613 				runtime_throw("invalid gc type info");
614 			}
615 		}
616 #endif
617 	}
618 }
619 
620 // scanblock scans a block of n bytes starting at pointer b for references
621 // to other objects, scanning any it finds recursively until there are no
622 // unscanned objects left.  Instead of using an explicit recursion, it keeps
623 // a work list in the Workbuf* structures and loops in the main function
624 // body.  Keeping an explicit work list is easier on the stack allocator and
625 // more efficient.
626 //
627 // wbuf: current work buffer
628 // wp:   storage for next queued pointer (write pointer)
629 // nobj: number of queued objects
630 static void
scanblock(Workbuf * wbuf,Obj * wp,uintptr nobj,bool keepworking)631 scanblock(Workbuf *wbuf, Obj *wp, uintptr nobj, bool keepworking)
632 {
633 	byte *b, *arena_start, *arena_used;
634 	uintptr n, i, end_b, elemsize, size, ti, objti, count /* , type */;
635 	uintptr *pc, precise_type, nominal_size;
636 #if 0
637 	uintptr *map_ret, mapkey_size, mapval_size, mapkey_ti, mapval_ti, *chan_ret, chancap;
638 #endif
639 	void *obj;
640 	const Type *t;
641 	Slice *sliceptr;
642 	Frame *stack_ptr, stack_top, stack[GC_STACK_CAPACITY+4];
643 	BufferList *scanbuffers;
644 	PtrTarget *ptrbuf, *ptrbuf_end, *ptrbufpos;
645 	Obj *objbuf, *objbuf_end, *objbufpos;
646 	Eface *eface;
647 	Iface *iface;
648 #if 0
649 	Hmap *hmap;
650 	MapType *maptype;
651 	bool mapkey_kind, mapval_kind;
652 	struct hash_gciter map_iter;
653 	struct hash_gciter_data d;
654 	Hchan *chan;
655 	ChanType *chantype;
656 #endif
657 
658 	if(sizeof(Workbuf) % PageSize != 0)
659 		runtime_throw("scanblock: size of Workbuf is suboptimal");
660 
661 	// Memory arena parameters.
662 	arena_start = runtime_mheap->arena_start;
663 	arena_used = runtime_mheap->arena_used;
664 
665 	stack_ptr = stack+nelem(stack)-1;
666 
667 	precise_type = false;
668 	nominal_size = 0;
669 
670 	// Allocate ptrbuf
671 	{
672 		scanbuffers = &bufferList[runtime_m()->helpgc];
673 		ptrbuf = &scanbuffers->ptrtarget[0];
674 		ptrbuf_end = &scanbuffers->ptrtarget[0] + nelem(scanbuffers->ptrtarget);
675 		objbuf = &scanbuffers->obj[0];
676 		objbuf_end = &scanbuffers->obj[0] + nelem(scanbuffers->obj);
677 	}
678 
679 	ptrbufpos = ptrbuf;
680 	objbufpos = objbuf;
681 
682 	// (Silence the compiler)
683 #if 0
684 	map_ret = nil;
685 	mapkey_size = mapval_size = 0;
686 	mapkey_kind = mapval_kind = false;
687 	mapkey_ti = mapval_ti = 0;
688 	chan = nil;
689 	chantype = nil;
690 	chan_ret = nil;
691 #endif
692 
693 	goto next_block;
694 
695 	for(;;) {
696 		// Each iteration scans the block b of length n, queueing pointers in
697 		// the work buffer.
698 		if(Debug > 1) {
699 			runtime_printf("scanblock %p %D\n", b, (int64)n);
700 		}
701 
702 		if(CollectStats) {
703 			runtime_xadd64(&gcstats.nbytes, n);
704 			runtime_xadd64(&gcstats.obj.sum, nobj);
705 			runtime_xadd64(&gcstats.obj.cnt, 1);
706 		}
707 
708 		if(ti != 0 && false) {
709 			pc = (uintptr*)(ti & ~(uintptr)PC_BITS);
710 			precise_type = (ti & PRECISE);
711 			stack_top.elemsize = pc[0];
712 			if(!precise_type)
713 				nominal_size = pc[0];
714 			if(ti & LOOP) {
715 				stack_top.count = 0;	// 0 means an infinite number of iterations
716 				stack_top.loop_or_ret = pc+1;
717 			} else {
718 				stack_top.count = 1;
719 			}
720 			if(Debug) {
721 				// Simple sanity check for provided type info ti:
722 				// The declared size of the object must be not larger than the actual size
723 				// (it can be smaller due to inferior pointers).
724 				// It's difficult to make a comprehensive check due to inferior pointers,
725 				// reflection, gob, etc.
726 				if(pc[0] > n) {
727 					runtime_printf("invalid gc type info: type info size %p, block size %p\n", pc[0], n);
728 					runtime_throw("invalid gc type info");
729 				}
730 			}
731 		} else if(UseSpanType && false) {
732 			if(CollectStats)
733 				runtime_xadd64(&gcstats.obj.notype, 1);
734 
735 #if 0
736 			type = runtime_gettype(b);
737 			if(type != 0) {
738 				if(CollectStats)
739 					runtime_xadd64(&gcstats.obj.typelookup, 1);
740 
741 				t = (Type*)(type & ~(uintptr)(PtrSize-1));
742 				switch(type & (PtrSize-1)) {
743 				case TypeInfo_SingleObject:
744 					pc = (uintptr*)t->gc;
745 					precise_type = true;  // type information about 'b' is precise
746 					stack_top.count = 1;
747 					stack_top.elemsize = pc[0];
748 					break;
749 				case TypeInfo_Array:
750 					pc = (uintptr*)t->gc;
751 					if(pc[0] == 0)
752 						goto next_block;
753 					precise_type = true;  // type information about 'b' is precise
754 					stack_top.count = 0;  // 0 means an infinite number of iterations
755 					stack_top.elemsize = pc[0];
756 					stack_top.loop_or_ret = pc+1;
757 					break;
758 				case TypeInfo_Map:
759 					hmap = (Hmap*)b;
760 					maptype = (MapType*)t;
761 					if(hash_gciter_init(hmap, &map_iter)) {
762 						mapkey_size = maptype->key->size;
763 						mapkey_kind = maptype->key->kind;
764 						mapkey_ti   = (uintptr)maptype->key->gc | PRECISE;
765 						mapval_size = maptype->elem->size;
766 						mapval_kind = maptype->elem->kind;
767 						mapval_ti   = (uintptr)maptype->elem->gc | PRECISE;
768 
769 						map_ret = nil;
770 						pc = mapProg;
771 					} else {
772 						goto next_block;
773 					}
774 					break;
775 				case TypeInfo_Chan:
776 					chan = (Hchan*)b;
777 					chantype = (ChanType*)t;
778 					chan_ret = nil;
779 					pc = chanProg;
780 					break;
781 				default:
782 					runtime_throw("scanblock: invalid type");
783 					return;
784 				}
785 			} else {
786 				pc = defaultProg;
787 			}
788 #endif
789 		} else {
790 			pc = defaultProg;
791 		}
792 
793 		if(IgnorePreciseGC)
794 			pc = defaultProg;
795 
796 		pc++;
797 		stack_top.b = (uintptr)b;
798 
799 		end_b = (uintptr)b + n - PtrSize;
800 
801 	for(;;) {
802 		if(CollectStats)
803 			runtime_xadd64(&gcstats.instr[pc[0]], 1);
804 
805 		obj = nil;
806 		objti = 0;
807 		switch(pc[0]) {
808 		case GC_PTR:
809 			obj = *(void**)(stack_top.b + pc[1]);
810 			objti = pc[2];
811 			pc += 3;
812 			if(Debug)
813 				checkptr(obj, objti);
814 			break;
815 
816 		case GC_SLICE:
817 			sliceptr = (Slice*)(stack_top.b + pc[1]);
818 			if(sliceptr->cap != 0) {
819 				obj = sliceptr->array;
820 				// Can't use slice element type for scanning,
821 				// because if it points to an array embedded
822 				// in the beginning of a struct,
823 				// we will scan the whole struct as the slice.
824 				// So just obtain type info from heap.
825 			}
826 			pc += 3;
827 			break;
828 
829 		case GC_APTR:
830 			obj = *(void**)(stack_top.b + pc[1]);
831 			pc += 2;
832 			break;
833 
834 		case GC_STRING:
835 			obj = *(void**)(stack_top.b + pc[1]);
836 			markonly(obj);
837 			pc += 2;
838 			continue;
839 
840 		case GC_EFACE:
841 			eface = (Eface*)(stack_top.b + pc[1]);
842 			pc += 2;
843 			if(eface->type == nil)
844 				continue;
845 
846 			// eface->type
847 			t = eface->type;
848 			if((const byte*)t >= arena_start && (const byte*)t < arena_used) {
849 				union { const Type *tc; Type *tr; } u;
850 				u.tc = t;
851 				*ptrbufpos++ = (struct PtrTarget){(void*)u.tr, 0};
852 				if(ptrbufpos == ptrbuf_end)
853 					flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
854 			}
855 
856 			// eface->__object
857 			if((byte*)eface->__object >= arena_start && (byte*)eface->__object < arena_used) {
858 				if(t->__size <= sizeof(void*)) {
859 					if((t->kind & KindNoPointers))
860 						continue;
861 
862 					obj = eface->__object;
863 					if((t->kind & ~KindNoPointers) == KindPtr)
864 						// objti = (uintptr)((PtrType*)t)->elem->gc;
865 						objti = 0;
866 				} else {
867 					obj = eface->__object;
868 					// objti = (uintptr)t->gc;
869 					objti = 0;
870 				}
871 			}
872 			break;
873 
874 		case GC_IFACE:
875 			iface = (Iface*)(stack_top.b + pc[1]);
876 			pc += 2;
877 			if(iface->tab == nil)
878 				continue;
879 
880 			// iface->tab
881 			if((byte*)iface->tab >= arena_start && (byte*)iface->tab < arena_used) {
882 				// *ptrbufpos++ = (struct PtrTarget){iface->tab, (uintptr)itabtype->gc};
883 				*ptrbufpos++ = (struct PtrTarget){iface->tab, 0};
884 				if(ptrbufpos == ptrbuf_end)
885 					flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
886 			}
887 
888 			// iface->data
889 			if((byte*)iface->__object >= arena_start && (byte*)iface->__object < arena_used) {
890 				// t = iface->tab->type;
891 				t = nil;
892 				if(t->__size <= sizeof(void*)) {
893 					if((t->kind & KindNoPointers))
894 						continue;
895 
896 					obj = iface->__object;
897 					if((t->kind & ~KindNoPointers) == KindPtr)
898 						// objti = (uintptr)((const PtrType*)t)->elem->gc;
899 						objti = 0;
900 				} else {
901 					obj = iface->__object;
902 					// objti = (uintptr)t->gc;
903 					objti = 0;
904 				}
905 			}
906 			break;
907 
908 		case GC_DEFAULT_PTR:
909 			while(stack_top.b <= end_b) {
910 				obj = *(byte**)stack_top.b;
911 				stack_top.b += PtrSize;
912 				if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
913 					*ptrbufpos++ = (struct PtrTarget){obj, 0};
914 					if(ptrbufpos == ptrbuf_end)
915 						flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
916 				}
917 			}
918 			goto next_block;
919 
920 		case GC_END:
921 			if(--stack_top.count != 0) {
922 				// Next iteration of a loop if possible.
923 				stack_top.b += stack_top.elemsize;
924 				if(stack_top.b + stack_top.elemsize <= end_b+PtrSize) {
925 					pc = stack_top.loop_or_ret;
926 					continue;
927 				}
928 				i = stack_top.b;
929 			} else {
930 				// Stack pop if possible.
931 				if(stack_ptr+1 < stack+nelem(stack)) {
932 					pc = stack_top.loop_or_ret;
933 					stack_top = *(++stack_ptr);
934 					continue;
935 				}
936 				i = (uintptr)b + nominal_size;
937 			}
938 			if(!precise_type) {
939 				// Quickly scan [b+i,b+n) for possible pointers.
940 				for(; i<=end_b; i+=PtrSize) {
941 					if(*(byte**)i != nil) {
942 						// Found a value that may be a pointer.
943 						// Do a rescan of the entire block.
944 						enqueue((Obj){b, n, 0}, &wbuf, &wp, &nobj);
945 						if(CollectStats) {
946 							runtime_xadd64(&gcstats.rescan, 1);
947 							runtime_xadd64(&gcstats.rescanbytes, n);
948 						}
949 						break;
950 					}
951 				}
952 			}
953 			goto next_block;
954 
955 		case GC_ARRAY_START:
956 			i = stack_top.b + pc[1];
957 			count = pc[2];
958 			elemsize = pc[3];
959 			pc += 4;
960 
961 			// Stack push.
962 			*stack_ptr-- = stack_top;
963 			stack_top = (Frame){count, elemsize, i, pc};
964 			continue;
965 
966 		case GC_ARRAY_NEXT:
967 			if(--stack_top.count != 0) {
968 				stack_top.b += stack_top.elemsize;
969 				pc = stack_top.loop_or_ret;
970 			} else {
971 				// Stack pop.
972 				stack_top = *(++stack_ptr);
973 				pc += 1;
974 			}
975 			continue;
976 
977 		case GC_CALL:
978 			// Stack push.
979 			*stack_ptr-- = stack_top;
980 			stack_top = (Frame){1, 0, stack_top.b + pc[1], pc+3 /*return address*/};
981 			pc = (uintptr*)((byte*)pc + *(int32*)(pc+2));  // target of the CALL instruction
982 			continue;
983 
984 #if 0
985 		case GC_MAP_PTR:
986 			hmap = *(Hmap**)(stack_top.b + pc[1]);
987 			if(hmap == nil) {
988 				pc += 3;
989 				continue;
990 			}
991 			if(markonly(hmap)) {
992 				maptype = (MapType*)pc[2];
993 				if(hash_gciter_init(hmap, &map_iter)) {
994 					mapkey_size = maptype->key->size;
995 					mapkey_kind = maptype->key->kind;
996 					mapkey_ti   = (uintptr)maptype->key->gc | PRECISE;
997 					mapval_size = maptype->elem->size;
998 					mapval_kind = maptype->elem->kind;
999 					mapval_ti   = (uintptr)maptype->elem->gc | PRECISE;
1000 
1001 					// Start mapProg.
1002 					map_ret = pc+3;
1003 					pc = mapProg+1;
1004 				} else {
1005 					pc += 3;
1006 				}
1007 			} else {
1008 				pc += 3;
1009 			}
1010 			continue;
1011 
1012 		case GC_MAP_NEXT:
1013 			// Add all keys and values to buffers, mark all subtables.
1014 			while(hash_gciter_next(&map_iter, &d)) {
1015 				// buffers: reserve space for 2 objects.
1016 				if(ptrbufpos+2 >= ptrbuf_end)
1017 					flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
1018 				if(objbufpos+2 >= objbuf_end)
1019 					flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1020 
1021 				if(d.st != nil)
1022 					markonly(d.st);
1023 
1024 				if(d.key_data != nil) {
1025 					if(!(mapkey_kind & KindNoPointers) || d.indirectkey) {
1026 						if(!d.indirectkey)
1027 							*objbufpos++ = (Obj){d.key_data, mapkey_size, mapkey_ti};
1028 						else {
1029 							if(Debug) {
1030 								obj = *(void**)d.key_data;
1031 								if(!(arena_start <= obj && obj < arena_used))
1032 									runtime_throw("scanblock: inconsistent hashmap");
1033 							}
1034 							*ptrbufpos++ = (struct PtrTarget){*(void**)d.key_data, mapkey_ti};
1035 						}
1036 					}
1037 					if(!(mapval_kind & KindNoPointers) || d.indirectval) {
1038 						if(!d.indirectval)
1039 							*objbufpos++ = (Obj){d.val_data, mapval_size, mapval_ti};
1040 						else {
1041 							if(Debug) {
1042 								obj = *(void**)d.val_data;
1043 								if(!(arena_start <= obj && obj < arena_used))
1044 									runtime_throw("scanblock: inconsistent hashmap");
1045 							}
1046 							*ptrbufpos++ = (struct PtrTarget){*(void**)d.val_data, mapval_ti};
1047 						}
1048 					}
1049 				}
1050 			}
1051 			if(map_ret == nil)
1052 				goto next_block;
1053 			pc = map_ret;
1054 			continue;
1055 #endif
1056 
1057 		case GC_REGION:
1058 			obj = (void*)(stack_top.b + pc[1]);
1059 			size = pc[2];
1060 			objti = pc[3];
1061 			pc += 4;
1062 
1063 			*objbufpos++ = (Obj){obj, size, objti};
1064 			if(objbufpos == objbuf_end)
1065 				flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1066 			continue;
1067 
1068 #if 0
1069 		case GC_CHAN_PTR:
1070 			// Similar to GC_MAP_PTR
1071 			chan = *(Hchan**)(stack_top.b + pc[1]);
1072 			if(chan == nil) {
1073 				pc += 3;
1074 				continue;
1075 			}
1076 			if(markonly(chan)) {
1077 				chantype = (ChanType*)pc[2];
1078 				if(!(chantype->elem->kind & KindNoPointers)) {
1079 					// Start chanProg.
1080 					chan_ret = pc+3;
1081 					pc = chanProg+1;
1082 					continue;
1083 				}
1084 			}
1085 			pc += 3;
1086 			continue;
1087 
1088 		case GC_CHAN:
1089 			// There are no heap pointers in struct Hchan,
1090 			// so we can ignore the leading sizeof(Hchan) bytes.
1091 			if(!(chantype->elem->kind & KindNoPointers)) {
1092 				// Channel's buffer follows Hchan immediately in memory.
1093 				// Size of buffer (cap(c)) is second int in the chan struct.
1094 				chancap = ((uintgo*)chan)[1];
1095 				if(chancap > 0) {
1096 					// TODO(atom): split into two chunks so that only the
1097 					// in-use part of the circular buffer is scanned.
1098 					// (Channel routines zero the unused part, so the current
1099 					// code does not lead to leaks, it's just a little inefficient.)
1100 					*objbufpos++ = (Obj){(byte*)chan+runtime_Hchansize, chancap*chantype->elem->size,
1101 						(uintptr)chantype->elem->gc | PRECISE | LOOP};
1102 					if(objbufpos == objbuf_end)
1103 						flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1104 				}
1105 			}
1106 			if(chan_ret == nil)
1107 				goto next_block;
1108 			pc = chan_ret;
1109 			continue;
1110 #endif
1111 
1112 		default:
1113 			runtime_throw("scanblock: invalid GC instruction");
1114 			return;
1115 		}
1116 
1117 		if((byte*)obj >= arena_start && (byte*)obj < arena_used) {
1118 			*ptrbufpos++ = (struct PtrTarget){obj, objti};
1119 			if(ptrbufpos == ptrbuf_end)
1120 				flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
1121 		}
1122 	}
1123 
1124 	next_block:
1125 		// Done scanning [b, b+n).  Prepare for the next iteration of
1126 		// the loop by setting b, n, ti to the parameters for the next block.
1127 
1128 		if(nobj == 0) {
1129 			flushptrbuf(ptrbuf, &ptrbufpos, &wp, &wbuf, &nobj);
1130 			flushobjbuf(objbuf, &objbufpos, &wp, &wbuf, &nobj);
1131 
1132 			if(nobj == 0) {
1133 				if(!keepworking) {
1134 					if(wbuf)
1135 						putempty(wbuf);
1136 					goto endscan;
1137 				}
1138 				// Emptied our buffer: refill.
1139 				wbuf = getfull(wbuf);
1140 				if(wbuf == nil)
1141 					goto endscan;
1142 				nobj = wbuf->nobj;
1143 				wp = wbuf->obj + wbuf->nobj;
1144 			}
1145 		}
1146 
1147 		// Fetch b from the work buffer.
1148 		--wp;
1149 		b = wp->p;
1150 		n = wp->n;
1151 		ti = wp->ti;
1152 		nobj--;
1153 	}
1154 
1155 endscan:;
1156 }
1157 
1158 // debug_scanblock is the debug copy of scanblock.
1159 // it is simpler, slower, single-threaded, recursive,
1160 // and uses bitSpecial as the mark bit.
1161 static void
debug_scanblock(byte * b,uintptr n)1162 debug_scanblock(byte *b, uintptr n)
1163 {
1164 	byte *obj, *p;
1165 	void **vp;
1166 	uintptr size, *bitp, bits, shift, i, xbits, off;
1167 	MSpan *s;
1168 
1169 	if(!DebugMark)
1170 		runtime_throw("debug_scanblock without DebugMark");
1171 
1172 	if((intptr)n < 0) {
1173 		runtime_printf("debug_scanblock %p %D\n", b, (int64)n);
1174 		runtime_throw("debug_scanblock");
1175 	}
1176 
1177 	// Align b to a word boundary.
1178 	off = (uintptr)b & (PtrSize-1);
1179 	if(off != 0) {
1180 		b += PtrSize - off;
1181 		n -= PtrSize - off;
1182 	}
1183 
1184 	vp = (void**)b;
1185 	n /= PtrSize;
1186 	for(i=0; i<(uintptr)n; i++) {
1187 		obj = (byte*)vp[i];
1188 
1189 		// Words outside the arena cannot be pointers.
1190 		if((byte*)obj < runtime_mheap->arena_start || (byte*)obj >= runtime_mheap->arena_used)
1191 			continue;
1192 
1193 		// Round down to word boundary.
1194 		obj = (void*)((uintptr)obj & ~((uintptr)PtrSize-1));
1195 
1196 		// Consult span table to find beginning.
1197 		s = runtime_MHeap_LookupMaybe(runtime_mheap, obj);
1198 		if(s == nil)
1199 			continue;
1200 
1201 		p =  (byte*)((uintptr)s->start<<PageShift);
1202 		size = s->elemsize;
1203 		if(s->sizeclass == 0) {
1204 			obj = p;
1205 		} else {
1206 			if((byte*)obj >= (byte*)s->limit)
1207 				continue;
1208 			int32 i = ((byte*)obj - p)/size;
1209 			obj = p+i*size;
1210 		}
1211 
1212 		// Now that we know the object header, reload bits.
1213 		off = (uintptr*)obj - (uintptr*)runtime_mheap->arena_start;
1214 		bitp = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
1215 		shift = off % wordsPerBitmapWord;
1216 		xbits = *bitp;
1217 		bits = xbits >> shift;
1218 
1219 		// Now we have bits, bitp, and shift correct for
1220 		// obj pointing at the base of the object.
1221 		// If not allocated or already marked, done.
1222 		if((bits & bitAllocated) == 0 || (bits & bitSpecial) != 0)  // NOTE: bitSpecial not bitMarked
1223 			continue;
1224 		*bitp |= bitSpecial<<shift;
1225 		if(!(bits & bitMarked))
1226 			runtime_printf("found unmarked block %p in %p\n", obj, vp+i);
1227 
1228 		// If object has no pointers, don't need to scan further.
1229 		if((bits & bitNoPointers) != 0)
1230 			continue;
1231 
1232 		debug_scanblock(obj, size);
1233 	}
1234 }
1235 
1236 // Append obj to the work buffer.
1237 // _wbuf, _wp, _nobj are input/output parameters and are specifying the work buffer.
1238 static void
enqueue(Obj obj,Workbuf ** _wbuf,Obj ** _wp,uintptr * _nobj)1239 enqueue(Obj obj, Workbuf **_wbuf, Obj **_wp, uintptr *_nobj)
1240 {
1241 	uintptr nobj, off;
1242 	Obj *wp;
1243 	Workbuf *wbuf;
1244 
1245 	if(Debug > 1)
1246 		runtime_printf("append obj(%p %D %p)\n", obj.p, (int64)obj.n, obj.ti);
1247 
1248 	// Align obj.b to a word boundary.
1249 	off = (uintptr)obj.p & (PtrSize-1);
1250 	if(off != 0) {
1251 		obj.p += PtrSize - off;
1252 		obj.n -= PtrSize - off;
1253 		obj.ti = 0;
1254 	}
1255 
1256 	if(obj.p == nil || obj.n == 0)
1257 		return;
1258 
1259 	// Load work buffer state
1260 	wp = *_wp;
1261 	wbuf = *_wbuf;
1262 	nobj = *_nobj;
1263 
1264 	// If another proc wants a pointer, give it some.
1265 	if(work.nwait > 0 && nobj > handoffThreshold && work.full == 0) {
1266 		wbuf->nobj = nobj;
1267 		wbuf = handoff(wbuf);
1268 		nobj = wbuf->nobj;
1269 		wp = wbuf->obj + nobj;
1270 	}
1271 
1272 	// If buffer is full, get a new one.
1273 	if(wbuf == nil || nobj >= nelem(wbuf->obj)) {
1274 		if(wbuf != nil)
1275 			wbuf->nobj = nobj;
1276 		wbuf = getempty(wbuf);
1277 		wp = wbuf->obj;
1278 		nobj = 0;
1279 	}
1280 
1281 	*wp = obj;
1282 	wp++;
1283 	nobj++;
1284 
1285 	// Save work buffer state
1286 	*_wp = wp;
1287 	*_wbuf = wbuf;
1288 	*_nobj = nobj;
1289 }
1290 
1291 static void
markroot(ParFor * desc,uint32 i)1292 markroot(ParFor *desc, uint32 i)
1293 {
1294 	Obj *wp;
1295 	Workbuf *wbuf;
1296 	uintptr nobj;
1297 
1298 	USED(&desc);
1299 	wp = nil;
1300 	wbuf = nil;
1301 	nobj = 0;
1302 	enqueue(work.roots[i], &wbuf, &wp, &nobj);
1303 	scanblock(wbuf, wp, nobj, false);
1304 }
1305 
1306 // Get an empty work buffer off the work.empty list,
1307 // allocating new buffers as needed.
1308 static Workbuf*
getempty(Workbuf * b)1309 getempty(Workbuf *b)
1310 {
1311 	if(b != nil)
1312 		runtime_lfstackpush(&work.full, &b->node);
1313 	b = (Workbuf*)runtime_lfstackpop(&work.empty);
1314 	if(b == nil) {
1315 		// Need to allocate.
1316 		runtime_lock(&work);
1317 		if(work.nchunk < sizeof *b) {
1318 			work.nchunk = 1<<20;
1319 			work.chunk = runtime_SysAlloc(work.nchunk);
1320 			if(work.chunk == nil)
1321 				runtime_throw("runtime: cannot allocate memory");
1322 		}
1323 		b = (Workbuf*)work.chunk;
1324 		work.chunk += sizeof *b;
1325 		work.nchunk -= sizeof *b;
1326 		runtime_unlock(&work);
1327 	}
1328 	b->nobj = 0;
1329 	return b;
1330 }
1331 
1332 static void
putempty(Workbuf * b)1333 putempty(Workbuf *b)
1334 {
1335 	if(CollectStats)
1336 		runtime_xadd64(&gcstats.putempty, 1);
1337 
1338 	runtime_lfstackpush(&work.empty, &b->node);
1339 }
1340 
1341 // Get a full work buffer off the work.full list, or return nil.
1342 static Workbuf*
getfull(Workbuf * b)1343 getfull(Workbuf *b)
1344 {
1345 	M *m;
1346 	int32 i;
1347 
1348 	if(CollectStats)
1349 		runtime_xadd64(&gcstats.getfull, 1);
1350 
1351 	if(b != nil)
1352 		runtime_lfstackpush(&work.empty, &b->node);
1353 	b = (Workbuf*)runtime_lfstackpop(&work.full);
1354 	if(b != nil || work.nproc == 1)
1355 		return b;
1356 
1357 	m = runtime_m();
1358 	runtime_xadd(&work.nwait, +1);
1359 	for(i=0;; i++) {
1360 		if(work.full != 0) {
1361 			runtime_xadd(&work.nwait, -1);
1362 			b = (Workbuf*)runtime_lfstackpop(&work.full);
1363 			if(b != nil)
1364 				return b;
1365 			runtime_xadd(&work.nwait, +1);
1366 		}
1367 		if(work.nwait == work.nproc)
1368 			return nil;
1369 		if(i < 10) {
1370 			m->gcstats.nprocyield++;
1371 			runtime_procyield(20);
1372 		} else if(i < 20) {
1373 			m->gcstats.nosyield++;
1374 			runtime_osyield();
1375 		} else {
1376 			m->gcstats.nsleep++;
1377 			runtime_usleep(100);
1378 		}
1379 	}
1380 }
1381 
1382 static Workbuf*
handoff(Workbuf * b)1383 handoff(Workbuf *b)
1384 {
1385 	M *m;
1386 	int32 n;
1387 	Workbuf *b1;
1388 
1389 	m = runtime_m();
1390 
1391 	// Make new buffer with half of b's pointers.
1392 	b1 = getempty(nil);
1393 	n = b->nobj/2;
1394 	b->nobj -= n;
1395 	b1->nobj = n;
1396 	runtime_memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
1397 	m->gcstats.nhandoff++;
1398 	m->gcstats.nhandoffcnt += n;
1399 
1400 	// Put b on full list - let first half of b get stolen.
1401 	runtime_lfstackpush(&work.full, &b->node);
1402 	return b1;
1403 }
1404 
1405 static void
addroot(Obj obj)1406 addroot(Obj obj)
1407 {
1408 	uint32 cap;
1409 	Obj *new;
1410 
1411 	if(work.nroot >= work.rootcap) {
1412 		cap = PageSize/sizeof(Obj);
1413 		if(cap < 2*work.rootcap)
1414 			cap = 2*work.rootcap;
1415 		new = (Obj*)runtime_SysAlloc(cap*sizeof(Obj));
1416 		if(new == nil)
1417 			runtime_throw("runtime: cannot allocate memory");
1418 		if(work.roots != nil) {
1419 			runtime_memmove(new, work.roots, work.rootcap*sizeof(Obj));
1420 			runtime_SysFree(work.roots, work.rootcap*sizeof(Obj));
1421 		}
1422 		work.roots = new;
1423 		work.rootcap = cap;
1424 	}
1425 	work.roots[work.nroot] = obj;
1426 	work.nroot++;
1427 }
1428 
1429 static void
addstackroots(G * gp)1430 addstackroots(G *gp)
1431 {
1432 #ifdef USING_SPLIT_STACK
1433 	M *mp;
1434 	void* sp;
1435 	size_t spsize;
1436 	void* next_segment;
1437 	void* next_sp;
1438 	void* initial_sp;
1439 
1440 	if(gp == runtime_g()) {
1441 		// Scanning our own stack.
1442 		sp = __splitstack_find(nil, nil, &spsize, &next_segment,
1443 				       &next_sp, &initial_sp);
1444 	} else if((mp = gp->m) != nil && mp->helpgc) {
1445 		// gchelper's stack is in active use and has no interesting pointers.
1446 		return;
1447 	} else {
1448 		// Scanning another goroutine's stack.
1449 		// The goroutine is usually asleep (the world is stopped).
1450 
1451 		// The exception is that if the goroutine is about to enter or might
1452 		// have just exited a system call, it may be executing code such
1453 		// as schedlock and may have needed to start a new stack segment.
1454 		// Use the stack segment and stack pointer at the time of
1455 		// the system call instead, since that won't change underfoot.
1456 		if(gp->gcstack != nil) {
1457 			sp = gp->gcstack;
1458 			spsize = gp->gcstack_size;
1459 			next_segment = gp->gcnext_segment;
1460 			next_sp = gp->gcnext_sp;
1461 			initial_sp = gp->gcinitial_sp;
1462 		} else {
1463 			sp = __splitstack_find_context(&gp->stack_context[0],
1464 						       &spsize, &next_segment,
1465 						       &next_sp, &initial_sp);
1466 		}
1467 	}
1468 	if(sp != nil) {
1469 		addroot((Obj){sp, spsize, 0});
1470 		while((sp = __splitstack_find(next_segment, next_sp,
1471 					      &spsize, &next_segment,
1472 					      &next_sp, &initial_sp)) != nil)
1473 			addroot((Obj){sp, spsize, 0});
1474 	}
1475 #else
1476 	M *mp;
1477 	byte* bottom;
1478 	byte* top;
1479 
1480 	if(gp == runtime_g()) {
1481 		// Scanning our own stack.
1482 		bottom = (byte*)&gp;
1483 	} else if((mp = gp->m) != nil && mp->helpgc) {
1484 		// gchelper's stack is in active use and has no interesting pointers.
1485 		return;
1486 	} else {
1487 		// Scanning another goroutine's stack.
1488 		// The goroutine is usually asleep (the world is stopped).
1489 		bottom = (byte*)gp->gcnext_sp;
1490 		if(bottom == nil)
1491 			return;
1492 	}
1493 	top = (byte*)gp->gcinitial_sp + gp->gcstack_size;
1494 	if(top > bottom)
1495 		addroot((Obj){bottom, top - bottom, 0});
1496 	else
1497 		addroot((Obj){top, bottom - top, 0});
1498 #endif
1499 }
1500 
1501 static void
addfinroots(void * v)1502 addfinroots(void *v)
1503 {
1504 	uintptr size;
1505 	void *base;
1506 
1507 	size = 0;
1508 	if(!runtime_mlookup(v, (byte**)&base, &size, nil) || !runtime_blockspecial(base))
1509 		runtime_throw("mark - finalizer inconsistency");
1510 
1511 	// do not mark the finalizer block itself.  just mark the things it points at.
1512 	addroot((Obj){base, size, 0});
1513 }
1514 
1515 static struct root_list* roots;
1516 
1517 void
__go_register_gc_roots(struct root_list * r)1518 __go_register_gc_roots (struct root_list* r)
1519 {
1520 	// FIXME: This needs locking if multiple goroutines can call
1521 	// dlopen simultaneously.
1522 	r->next = roots;
1523 	roots = r;
1524 }
1525 
1526 static void
addroots(void)1527 addroots(void)
1528 {
1529 	struct root_list *pl;
1530 	G *gp;
1531 	FinBlock *fb;
1532 	MSpan *s, **allspans;
1533 	uint32 spanidx;
1534 
1535 	work.nroot = 0;
1536 
1537 	// mark data+bss.
1538 	for(pl = roots; pl != nil; pl = pl->next) {
1539 		struct root* pr = &pl->roots[0];
1540 		while(1) {
1541 			void *decl = pr->decl;
1542 			if(decl == nil)
1543 				break;
1544 			addroot((Obj){decl, pr->size, 0});
1545 			pr++;
1546 		}
1547 	}
1548 
1549 	addroot((Obj){(byte*)&runtime_m0, sizeof runtime_m0, 0});
1550 	addroot((Obj){(byte*)&runtime_g0, sizeof runtime_g0, 0});
1551 	addroot((Obj){(byte*)&runtime_allg, sizeof runtime_allg, 0});
1552 	addroot((Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0});
1553 	addroot((Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0});
1554 	runtime_proc_scan(addroot);
1555 	runtime_MProf_Mark(addroot);
1556 	runtime_time_scan(addroot);
1557 
1558 	// MSpan.types
1559 	allspans = runtime_mheap->allspans;
1560 	for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) {
1561 		s = allspans[spanidx];
1562 		if(s->state == MSpanInUse) {
1563 			// The garbage collector ignores type pointers stored in MSpan.types:
1564 			//  - Compiler-generated types are stored outside of heap.
1565 			//  - The reflect package has runtime-generated types cached in its data structures.
1566 			//    The garbage collector relies on finding the references via that cache.
1567 			switch(s->types.compression) {
1568 			case MTypes_Empty:
1569 			case MTypes_Single:
1570 				break;
1571 			case MTypes_Words:
1572 			case MTypes_Bytes:
1573 				markonly((byte*)s->types.data);
1574 				break;
1575 			}
1576 		}
1577 	}
1578 
1579 	// stacks
1580 	for(gp=runtime_allg; gp!=nil; gp=gp->alllink) {
1581 		switch(gp->status){
1582 		default:
1583 			runtime_printf("unexpected G.status %d\n", gp->status);
1584 			runtime_throw("mark - bad status");
1585 		case Gdead:
1586 			break;
1587 		case Grunning:
1588 			if(gp != runtime_g())
1589 				runtime_throw("mark - world not stopped");
1590 			addstackroots(gp);
1591 			break;
1592 		case Grunnable:
1593 		case Gsyscall:
1594 		case Gwaiting:
1595 			addstackroots(gp);
1596 			break;
1597 		}
1598 	}
1599 
1600 	runtime_walkfintab(addfinroots, addroot);
1601 
1602 	for(fb=allfin; fb; fb=fb->alllink)
1603 		addroot((Obj){(byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), 0});
1604 
1605 	addroot((Obj){(byte*)&work, sizeof work, 0});
1606 }
1607 
1608 static bool
handlespecial(byte * p,uintptr size)1609 handlespecial(byte *p, uintptr size)
1610 {
1611 	FuncVal *fn;
1612 	const struct __go_func_type *ft;
1613 	FinBlock *block;
1614 	Finalizer *f;
1615 
1616 	if(!runtime_getfinalizer(p, true, &fn, &ft)) {
1617 		runtime_setblockspecial(p, false);
1618 		runtime_MProf_Free(p, size);
1619 		return false;
1620 	}
1621 
1622 	runtime_lock(&finlock);
1623 	if(finq == nil || finq->cnt == finq->cap) {
1624 		if(finc == nil) {
1625 			finc = runtime_SysAlloc(PageSize);
1626 			if(finc == nil)
1627 				runtime_throw("runtime: cannot allocate memory");
1628 			finc->cap = (PageSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
1629 			finc->alllink = allfin;
1630 			allfin = finc;
1631 		}
1632 		block = finc;
1633 		finc = block->next;
1634 		block->next = finq;
1635 		finq = block;
1636 	}
1637 	f = &finq->fin[finq->cnt];
1638 	finq->cnt++;
1639 	f->fn = fn;
1640 	f->ft = ft;
1641 	f->arg = p;
1642 	runtime_unlock(&finlock);
1643 	return true;
1644 }
1645 
1646 // Sweep frees or collects finalizers for blocks not marked in the mark phase.
1647 // It clears the mark bits in preparation for the next GC round.
1648 static void
sweepspan(ParFor * desc,uint32 idx)1649 sweepspan(ParFor *desc, uint32 idx)
1650 {
1651 	M *m;
1652 	int32 cl, n, npages;
1653 	uintptr size;
1654 	byte *p;
1655 	MCache *c;
1656 	byte *arena_start;
1657 	MLink head, *end;
1658 	int32 nfree;
1659 	byte *type_data;
1660 	byte compression;
1661 	uintptr type_data_inc;
1662 	MSpan *s;
1663 
1664 	m = runtime_m();
1665 
1666 	USED(&desc);
1667 	s = runtime_mheap->allspans[idx];
1668 	if(s->state != MSpanInUse)
1669 		return;
1670 	arena_start = runtime_mheap->arena_start;
1671 	p = (byte*)(s->start << PageShift);
1672 	cl = s->sizeclass;
1673 	size = s->elemsize;
1674 	if(cl == 0) {
1675 		n = 1;
1676 	} else {
1677 		// Chunk full of small blocks.
1678 		npages = runtime_class_to_allocnpages[cl];
1679 		n = (npages << PageShift) / size;
1680 	}
1681 	nfree = 0;
1682 	end = &head;
1683 	c = m->mcache;
1684 
1685 	type_data = (byte*)s->types.data;
1686 	type_data_inc = sizeof(uintptr);
1687 	compression = s->types.compression;
1688 	switch(compression) {
1689 	case MTypes_Bytes:
1690 		type_data += 8*sizeof(uintptr);
1691 		type_data_inc = 1;
1692 		break;
1693 	}
1694 
1695 	// Sweep through n objects of given size starting at p.
1696 	// This thread owns the span now, so it can manipulate
1697 	// the block bitmap without atomic operations.
1698 	for(; n > 0; n--, p += size, type_data+=type_data_inc) {
1699 		uintptr off, *bitp, shift, bits;
1700 
1701 		off = (uintptr*)p - (uintptr*)arena_start;
1702 		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
1703 		shift = off % wordsPerBitmapWord;
1704 		bits = *bitp>>shift;
1705 
1706 		if((bits & bitAllocated) == 0)
1707 			continue;
1708 
1709 		if((bits & bitMarked) != 0) {
1710 			if(DebugMark) {
1711 				if(!(bits & bitSpecial))
1712 					runtime_printf("found spurious mark on %p\n", p);
1713 				*bitp &= ~(bitSpecial<<shift);
1714 			}
1715 			*bitp &= ~(bitMarked<<shift);
1716 			continue;
1717 		}
1718 
1719 		// Special means it has a finalizer or is being profiled.
1720 		// In DebugMark mode, the bit has been coopted so
1721 		// we have to assume all blocks are special.
1722 		if(DebugMark || (bits & bitSpecial) != 0) {
1723 			if(handlespecial(p, size))
1724 				continue;
1725 		}
1726 
1727 		// Mark freed; restore block boundary bit.
1728 		*bitp = (*bitp & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
1729 
1730 		if(cl == 0) {
1731 			// Free large span.
1732 			runtime_unmarkspan(p, 1<<PageShift);
1733 			*(uintptr*)p = (uintptr)0xdeaddeaddeaddeadll;	// needs zeroing
1734 			runtime_MHeap_Free(runtime_mheap, s, 1);
1735 			c->local_alloc -= size;
1736 			c->local_nfree++;
1737 		} else {
1738 			// Free small object.
1739 			switch(compression) {
1740 			case MTypes_Words:
1741 				*(uintptr*)type_data = 0;
1742 				break;
1743 			case MTypes_Bytes:
1744 				*(byte*)type_data = 0;
1745 				break;
1746 			}
1747 			if(size > sizeof(uintptr))
1748 				((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
1749 
1750 			end->next = (MLink*)p;
1751 			end = (MLink*)p;
1752 			nfree++;
1753 		}
1754 	}
1755 
1756 	if(nfree) {
1757 		c->local_by_size[cl].nfree += nfree;
1758 		c->local_alloc -= size * nfree;
1759 		c->local_nfree += nfree;
1760 		c->local_cachealloc -= nfree * size;
1761 		c->local_objects -= nfree;
1762 		runtime_MCentral_FreeSpan(&runtime_mheap->central[cl], s, nfree, head.next, end);
1763 	}
1764 }
1765 
1766 static void
dumpspan(uint32 idx)1767 dumpspan(uint32 idx)
1768 {
1769 	int32 sizeclass, n, npages, i, column;
1770 	uintptr size;
1771 	byte *p;
1772 	byte *arena_start;
1773 	MSpan *s;
1774 	bool allocated, special;
1775 
1776 	s = runtime_mheap->allspans[idx];
1777 	if(s->state != MSpanInUse)
1778 		return;
1779 	arena_start = runtime_mheap->arena_start;
1780 	p = (byte*)(s->start << PageShift);
1781 	sizeclass = s->sizeclass;
1782 	size = s->elemsize;
1783 	if(sizeclass == 0) {
1784 		n = 1;
1785 	} else {
1786 		npages = runtime_class_to_allocnpages[sizeclass];
1787 		n = (npages << PageShift) / size;
1788 	}
1789 
1790 	runtime_printf("%p .. %p:\n", p, p+n*size);
1791 	column = 0;
1792 	for(; n>0; n--, p+=size) {
1793 		uintptr off, *bitp, shift, bits;
1794 
1795 		off = (uintptr*)p - (uintptr*)arena_start;
1796 		bitp = (uintptr*)arena_start - off/wordsPerBitmapWord - 1;
1797 		shift = off % wordsPerBitmapWord;
1798 		bits = *bitp>>shift;
1799 
1800 		allocated = ((bits & bitAllocated) != 0);
1801 		special = ((bits & bitSpecial) != 0);
1802 
1803 		for(i=0; (uint32)i<size; i+=sizeof(void*)) {
1804 			if(column == 0) {
1805 				runtime_printf("\t");
1806 			}
1807 			if(i == 0) {
1808 				runtime_printf(allocated ? "(" : "[");
1809 				runtime_printf(special ? "@" : "");
1810 				runtime_printf("%p: ", p+i);
1811 			} else {
1812 				runtime_printf(" ");
1813 			}
1814 
1815 			runtime_printf("%p", *(void**)(p+i));
1816 
1817 			if(i+sizeof(void*) >= size) {
1818 				runtime_printf(allocated ? ") " : "] ");
1819 			}
1820 
1821 			column++;
1822 			if(column == 8) {
1823 				runtime_printf("\n");
1824 				column = 0;
1825 			}
1826 		}
1827 	}
1828 	runtime_printf("\n");
1829 }
1830 
1831 // A debugging function to dump the contents of memory
1832 void
runtime_memorydump(void)1833 runtime_memorydump(void)
1834 {
1835 	uint32 spanidx;
1836 
1837 	for(spanidx=0; spanidx<runtime_mheap->nspan; spanidx++) {
1838 		dumpspan(spanidx);
1839 	}
1840 }
1841 
1842 void
runtime_gchelper(void)1843 runtime_gchelper(void)
1844 {
1845 	gchelperstart();
1846 
1847 	// parallel mark for over gc roots
1848 	runtime_parfordo(work.markfor);
1849 
1850 	// help other threads scan secondary blocks
1851 	scanblock(nil, nil, 0, true);
1852 
1853 	if(DebugMark) {
1854 		// wait while the main thread executes mark(debug_scanblock)
1855 		while(runtime_atomicload(&work.debugmarkdone) == 0)
1856 			runtime_usleep(10);
1857 	}
1858 
1859 	runtime_parfordo(work.sweepfor);
1860 	bufferList[runtime_m()->helpgc].busy = 0;
1861 	if(runtime_xadd(&work.ndone, +1) == work.nproc-1)
1862 		runtime_notewakeup(&work.alldone);
1863 }
1864 
1865 #define GcpercentUnknown (-2)
1866 
1867 // Initialized from $GOGC.  GOGC=off means no gc.
1868 //
1869 // Next gc is after we've allocated an extra amount of
1870 // memory proportional to the amount already in use.
1871 // If gcpercent=100 and we're using 4M, we'll gc again
1872 // when we get to 8M.  This keeps the gc cost in linear
1873 // proportion to the allocation cost.  Adjusting gcpercent
1874 // just changes the linear constant (and also the amount of
1875 // extra memory used).
1876 static int32 gcpercent = GcpercentUnknown;
1877 
1878 static void
cachestats(GCStats * stats)1879 cachestats(GCStats *stats)
1880 {
1881 	M *mp;
1882 	MCache *c;
1883 	P *p, **pp;
1884 	uint32 i;
1885 	uint64 stacks_inuse;
1886 	uint64 *src, *dst;
1887 
1888 	if(stats)
1889 		runtime_memclr((byte*)stats, sizeof(*stats));
1890 	stacks_inuse = 0;
1891 	for(mp=runtime_allm; mp; mp=mp->alllink) {
1892 		//stacks_inuse += mp->stackinuse*FixedStack;
1893 		if(stats) {
1894 			src = (uint64*)&mp->gcstats;
1895 			dst = (uint64*)stats;
1896 			for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
1897 				dst[i] += src[i];
1898 			runtime_memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
1899 		}
1900 	}
1901 	for(pp=runtime_allp; (p=*pp) != nil; pp++) {
1902 		c = p->mcache;
1903 		if(c==nil)
1904 			continue;
1905 		runtime_purgecachedstats(c);
1906 		for(i=0; i<nelem(c->local_by_size); i++) {
1907 			mstats.by_size[i].nmalloc += c->local_by_size[i].nmalloc;
1908 			c->local_by_size[i].nmalloc = 0;
1909 			mstats.by_size[i].nfree += c->local_by_size[i].nfree;
1910 			c->local_by_size[i].nfree = 0;
1911 		}
1912 	}
1913 	mstats.stacks_inuse = stacks_inuse;
1914 }
1915 
1916 // Structure of arguments passed to function gc().
1917 // This allows the arguments to be passed via reflect_call.
1918 struct gc_args
1919 {
1920 	int32 force;
1921 };
1922 
1923 static void gc(struct gc_args *args);
1924 
1925 static int32
readgogc(void)1926 readgogc(void)
1927 {
1928 	const byte *p;
1929 
1930 	p = runtime_getenv("GOGC");
1931 	if(p == nil || p[0] == '\0')
1932 		return 100;
1933 	if(runtime_strcmp((const char *)p, "off") == 0)
1934 		return -1;
1935 	return runtime_atoi(p);
1936 }
1937 
1938 void
runtime_gc(int32 force)1939 runtime_gc(int32 force)
1940 {
1941 	M *m;
1942 	const byte *p;
1943 	struct gc_args a, *ap;
1944 
1945 	// The atomic operations are not atomic if the uint64s
1946 	// are not aligned on uint64 boundaries. This has been
1947 	// a problem in the past.
1948 	if((((uintptr)&work.empty) & 7) != 0)
1949 		runtime_throw("runtime: gc work buffer is misaligned");
1950 	if((((uintptr)&work.full) & 7) != 0)
1951 		runtime_throw("runtime: gc work buffer is misaligned");
1952 
1953 	// Make sure all registers are saved on stack so that
1954 	// scanstack sees them.
1955 	__builtin_unwind_init();
1956 
1957 	// The gc is turned off (via enablegc) until
1958 	// the bootstrap has completed.
1959 	// Also, malloc gets called in the guts
1960 	// of a number of libraries that might be
1961 	// holding locks.  To avoid priority inversion
1962 	// problems, don't bother trying to run gc
1963 	// while holding a lock.  The next mallocgc
1964 	// without a lock will do the gc instead.
1965 	m = runtime_m();
1966 	if(!mstats.enablegc || m->locks > 0 || runtime_panicking)
1967 		return;
1968 
1969 	if(gcpercent == GcpercentUnknown) {	// first time through
1970 		gcpercent = readgogc();
1971 
1972 		p = runtime_getenv("GOGCTRACE");
1973 		if(p != nil)
1974 			gctrace = runtime_atoi(p);
1975 	}
1976 	if(gcpercent < 0)
1977 		return;
1978 
1979 	// Run gc on a bigger stack to eliminate
1980 	// a potentially large number of calls to runtime_morestack.
1981 	// But not when using gccgo.
1982 	a.force = force;
1983 	ap = &a;
1984 	gc(ap);
1985 
1986 	if(gctrace > 1 && !force) {
1987 		a.force = 1;
1988 		gc(&a);
1989 	}
1990 }
1991 
1992 static void
gc(struct gc_args * args)1993 gc(struct gc_args *args)
1994 {
1995 	M *m;
1996 	int64 t0, t1, t2, t3, t4;
1997 	uint64 heap0, heap1, obj0, obj1, ninstr;
1998 	GCStats stats;
1999 	M *mp;
2000 	uint32 i;
2001 	// Eface eface;
2002 
2003 	runtime_semacquire(&runtime_worldsema);
2004 	if(!args->force && mstats.heap_alloc < mstats.next_gc) {
2005 		runtime_semrelease(&runtime_worldsema);
2006 		return;
2007 	}
2008 
2009 	m = runtime_m();
2010 
2011 	t0 = runtime_nanotime();
2012 
2013 	m->gcing = 1;
2014 	runtime_stoptheworld();
2015 
2016 	if(CollectStats)
2017 		runtime_memclr((byte*)&gcstats, sizeof(gcstats));
2018 
2019 	for(mp=runtime_allm; mp; mp=mp->alllink)
2020 		runtime_settype_flush(mp, false);
2021 
2022 	heap0 = 0;
2023 	obj0 = 0;
2024 	if(gctrace) {
2025 		cachestats(nil);
2026 		heap0 = mstats.heap_alloc;
2027 		obj0 = mstats.nmalloc - mstats.nfree;
2028 	}
2029 
2030 	m->locks++;	// disable gc during mallocs in parforalloc
2031 	if(work.markfor == nil)
2032 		work.markfor = runtime_parforalloc(MaxGcproc);
2033 	if(work.sweepfor == nil)
2034 		work.sweepfor = runtime_parforalloc(MaxGcproc);
2035 	m->locks--;
2036 
2037 	if(itabtype == nil) {
2038 		// get C pointer to the Go type "itab"
2039 		// runtime_gc_itab_ptr(&eface);
2040 		// itabtype = ((PtrType*)eface.type)->elem;
2041 	}
2042 
2043 	work.nwait = 0;
2044 	work.ndone = 0;
2045 	work.debugmarkdone = 0;
2046 	work.nproc = runtime_gcprocs();
2047 	addroots();
2048 	runtime_parforsetup(work.markfor, work.nproc, work.nroot, nil, false, markroot);
2049 	runtime_parforsetup(work.sweepfor, work.nproc, runtime_mheap->nspan, nil, true, sweepspan);
2050 	if(work.nproc > 1) {
2051 		runtime_noteclear(&work.alldone);
2052 		runtime_helpgc(work.nproc);
2053 	}
2054 
2055 	t1 = runtime_nanotime();
2056 
2057 	gchelperstart();
2058 	runtime_parfordo(work.markfor);
2059 	scanblock(nil, nil, 0, true);
2060 
2061 	if(DebugMark) {
2062 		for(i=0; i<work.nroot; i++)
2063 			debug_scanblock(work.roots[i].p, work.roots[i].n);
2064 		runtime_atomicstore(&work.debugmarkdone, 1);
2065 	}
2066 	t2 = runtime_nanotime();
2067 
2068 	runtime_parfordo(work.sweepfor);
2069 	bufferList[m->helpgc].busy = 0;
2070 	t3 = runtime_nanotime();
2071 
2072 	if(work.nproc > 1)
2073 		runtime_notesleep(&work.alldone);
2074 
2075 	cachestats(&stats);
2076 
2077 	stats.nprocyield += work.sweepfor->nprocyield;
2078 	stats.nosyield += work.sweepfor->nosyield;
2079 	stats.nsleep += work.sweepfor->nsleep;
2080 
2081 	mstats.next_gc = mstats.heap_alloc+(mstats.heap_alloc-runtime_stacks_sys)*gcpercent/100;
2082 	m->gcing = 0;
2083 
2084 	if(finq != nil) {
2085 		m->locks++;	// disable gc during the mallocs in newproc
2086 		// kick off or wake up goroutine to run queued finalizers
2087 		if(fing == nil)
2088 			fing = __go_go(runfinq, nil);
2089 		else if(fingwait) {
2090 			fingwait = 0;
2091 			runtime_ready(fing);
2092 		}
2093 		m->locks--;
2094 	}
2095 
2096 	heap1 = mstats.heap_alloc;
2097 	obj1 = mstats.nmalloc - mstats.nfree;
2098 
2099 	t4 = runtime_nanotime();
2100 	mstats.last_gc = t4;
2101 	mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
2102 	mstats.pause_total_ns += t4 - t0;
2103 	mstats.numgc++;
2104 	if(mstats.debuggc)
2105 		runtime_printf("pause %D\n", t4-t0);
2106 
2107 	if(gctrace) {
2108 		runtime_printf("gc%d(%d): %D+%D+%D ms, %D -> %D MB %D -> %D (%D-%D) objects,"
2109 				" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
2110 			mstats.numgc, work.nproc, (t2-t1)/1000000, (t3-t2)/1000000, (t1-t0+t4-t3)/1000000,
2111 			heap0>>20, heap1>>20, obj0, obj1,
2112 			mstats.nmalloc, mstats.nfree,
2113 			stats.nhandoff, stats.nhandoffcnt,
2114 			work.sweepfor->nsteal, work.sweepfor->nstealcnt,
2115 			stats.nprocyield, stats.nosyield, stats.nsleep);
2116 		if(CollectStats) {
2117 			runtime_printf("scan: %D bytes, %D objects, %D untyped, %D types from MSpan\n",
2118 				gcstats.nbytes, gcstats.obj.cnt, gcstats.obj.notype, gcstats.obj.typelookup);
2119 			if(gcstats.ptr.cnt != 0)
2120 				runtime_printf("avg ptrbufsize: %D (%D/%D)\n",
2121 					gcstats.ptr.sum/gcstats.ptr.cnt, gcstats.ptr.sum, gcstats.ptr.cnt);
2122 			if(gcstats.obj.cnt != 0)
2123 				runtime_printf("avg nobj: %D (%D/%D)\n",
2124 					gcstats.obj.sum/gcstats.obj.cnt, gcstats.obj.sum, gcstats.obj.cnt);
2125 			runtime_printf("rescans: %D, %D bytes\n", gcstats.rescan, gcstats.rescanbytes);
2126 
2127 			runtime_printf("instruction counts:\n");
2128 			ninstr = 0;
2129 			for(i=0; i<nelem(gcstats.instr); i++) {
2130 				runtime_printf("\t%d:\t%D\n", i, gcstats.instr[i]);
2131 				ninstr += gcstats.instr[i];
2132 			}
2133 			runtime_printf("\ttotal:\t%D\n", ninstr);
2134 
2135 			runtime_printf("putempty: %D, getfull: %D\n", gcstats.putempty, gcstats.getfull);
2136 		}
2137 	}
2138 
2139 	runtime_MProf_GC();
2140 	runtime_semrelease(&runtime_worldsema);
2141 	runtime_starttheworld();
2142 
2143 	// give the queued finalizers, if any, a chance to run
2144 	if(finq != nil)
2145 		runtime_gosched();
2146 }
2147 
2148 void runtime_ReadMemStats(MStats *)
2149   __asm__ (GOSYM_PREFIX "runtime.ReadMemStats");
2150 
2151 void
runtime_ReadMemStats(MStats * stats)2152 runtime_ReadMemStats(MStats *stats)
2153 {
2154 	M *m;
2155 
2156 	// Have to acquire worldsema to stop the world,
2157 	// because stoptheworld can only be used by
2158 	// one goroutine at a time, and there might be
2159 	// a pending garbage collection already calling it.
2160 	runtime_semacquire(&runtime_worldsema);
2161 	m = runtime_m();
2162 	m->gcing = 1;
2163 	runtime_stoptheworld();
2164 	cachestats(nil);
2165 	*stats = mstats;
2166 	m->gcing = 0;
2167 	runtime_semrelease(&runtime_worldsema);
2168 	runtime_starttheworld();
2169 }
2170 
2171 void runtime_debug_readGCStats(Slice*)
2172   __asm__("runtime_debug.readGCStats");
2173 
2174 void
runtime_debug_readGCStats(Slice * pauses)2175 runtime_debug_readGCStats(Slice *pauses)
2176 {
2177 	uint64 *p;
2178 	uint32 i, n;
2179 
2180 	// Calling code in runtime/debug should make the slice large enough.
2181 	if((size_t)pauses->cap < nelem(mstats.pause_ns)+3)
2182 		runtime_throw("runtime: short slice passed to readGCStats");
2183 
2184 	// Pass back: pauses, last gc (absolute time), number of gc, total pause ns.
2185 	p = (uint64*)pauses->array;
2186 	runtime_lock(runtime_mheap);
2187 	n = mstats.numgc;
2188 	if(n > nelem(mstats.pause_ns))
2189 		n = nelem(mstats.pause_ns);
2190 
2191 	// The pause buffer is circular. The most recent pause is at
2192 	// pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
2193 	// from there to go back farther in time. We deliver the times
2194 	// most recent first (in p[0]).
2195 	for(i=0; i<n; i++)
2196 		p[i] = mstats.pause_ns[(mstats.numgc-1-i)%nelem(mstats.pause_ns)];
2197 
2198 	p[n] = mstats.last_gc;
2199 	p[n+1] = mstats.numgc;
2200 	p[n+2] = mstats.pause_total_ns;
2201 	runtime_unlock(runtime_mheap);
2202 	pauses->__count = n+3;
2203 }
2204 
2205 intgo runtime_debug_setGCPercent(intgo)
2206   __asm__("runtime_debug.setGCPercent");
2207 
2208 intgo
runtime_debug_setGCPercent(intgo in)2209 runtime_debug_setGCPercent(intgo in)
2210 {
2211 	intgo out;
2212 
2213 	runtime_lock(runtime_mheap);
2214 	if(gcpercent == GcpercentUnknown)
2215 		gcpercent = readgogc();
2216 	out = gcpercent;
2217 	if(in < 0)
2218 		in = -1;
2219 	gcpercent = in;
2220 	runtime_unlock(runtime_mheap);
2221 	return out;
2222 }
2223 
2224 static void
gchelperstart(void)2225 gchelperstart(void)
2226 {
2227 	M *m;
2228 
2229 	m = runtime_m();
2230 	if(m->helpgc < 0 || m->helpgc >= MaxGcproc)
2231 		runtime_throw("gchelperstart: bad m->helpgc");
2232 	if(runtime_xchg(&bufferList[m->helpgc].busy, 1))
2233 		runtime_throw("gchelperstart: already busy");
2234 }
2235 
2236 static void
runfinq(void * dummy)2237 runfinq(void* dummy __attribute__ ((unused)))
2238 {
2239 	Finalizer *f;
2240 	FinBlock *fb, *next;
2241 	uint32 i;
2242 
2243 	for(;;) {
2244 		// There's no need for a lock in this section
2245 		// because it only conflicts with the garbage
2246 		// collector, and the garbage collector only
2247 		// runs when everyone else is stopped, and
2248 		// runfinq only stops at the gosched() or
2249 		// during the calls in the for loop.
2250 		fb = finq;
2251 		finq = nil;
2252 		if(fb == nil) {
2253 			fingwait = 1;
2254 			runtime_park(nil, nil, "finalizer wait");
2255 			continue;
2256 		}
2257 		if(raceenabled)
2258 			runtime_racefingo();
2259 		for(; fb; fb=next) {
2260 			next = fb->next;
2261 			for(i=0; i<(uint32)fb->cnt; i++) {
2262 				void *param;
2263 
2264 				f = &fb->fin[i];
2265 				param = &f->arg;
2266 				reflect_call(f->ft, f->fn, 0, 0, &param, nil);
2267 				f->fn = nil;
2268 				f->arg = nil;
2269 			}
2270 			fb->cnt = 0;
2271 			fb->next = finc;
2272 			finc = fb;
2273 		}
2274 		runtime_gc(1);	// trigger another gc to clean up the finalized objects, if possible
2275 	}
2276 }
2277 
2278 // mark the block at v of size n as allocated.
2279 // If noptr is true, mark it as having no pointers.
2280 void
runtime_markallocated(void * v,uintptr n,bool noptr)2281 runtime_markallocated(void *v, uintptr n, bool noptr)
2282 {
2283 	uintptr *b, obits, bits, off, shift;
2284 
2285 	if(0)
2286 		runtime_printf("markallocated %p+%p\n", v, n);
2287 
2288 	if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2289 		runtime_throw("markallocated: bad pointer");
2290 
2291 	off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;  // word offset
2292 	b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2293 	shift = off % wordsPerBitmapWord;
2294 
2295 	for(;;) {
2296 		obits = *b;
2297 		bits = (obits & ~(bitMask<<shift)) | (bitAllocated<<shift);
2298 		if(noptr)
2299 			bits |= bitNoPointers<<shift;
2300 		if(runtime_singleproc) {
2301 			*b = bits;
2302 			break;
2303 		} else {
2304 			// more than one goroutine is potentially running: use atomic op
2305 			if(runtime_casp((void**)b, (void*)obits, (void*)bits))
2306 				break;
2307 		}
2308 	}
2309 }
2310 
2311 // mark the block at v of size n as freed.
2312 void
runtime_markfreed(void * v,uintptr n)2313 runtime_markfreed(void *v, uintptr n)
2314 {
2315 	uintptr *b, obits, bits, off, shift;
2316 
2317 	if(0)
2318 		runtime_printf("markallocated %p+%p\n", v, n);
2319 
2320 	if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2321 		runtime_throw("markallocated: bad pointer");
2322 
2323 	off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;  // word offset
2324 	b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2325 	shift = off % wordsPerBitmapWord;
2326 
2327 	for(;;) {
2328 		obits = *b;
2329 		bits = (obits & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
2330 		if(runtime_singleproc) {
2331 			*b = bits;
2332 			break;
2333 		} else {
2334 			// more than one goroutine is potentially running: use atomic op
2335 			if(runtime_casp((void**)b, (void*)obits, (void*)bits))
2336 				break;
2337 		}
2338 	}
2339 }
2340 
2341 // check that the block at v of size n is marked freed.
2342 void
runtime_checkfreed(void * v,uintptr n)2343 runtime_checkfreed(void *v, uintptr n)
2344 {
2345 	uintptr *b, bits, off, shift;
2346 
2347 	if(!runtime_checking)
2348 		return;
2349 
2350 	if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2351 		return;	// not allocated, so okay
2352 
2353 	off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;  // word offset
2354 	b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2355 	shift = off % wordsPerBitmapWord;
2356 
2357 	bits = *b>>shift;
2358 	if((bits & bitAllocated) != 0) {
2359 		runtime_printf("checkfreed %p+%p: off=%p have=%p\n",
2360 			v, n, off, bits & bitMask);
2361 		runtime_throw("checkfreed: not freed");
2362 	}
2363 }
2364 
2365 // mark the span of memory at v as having n blocks of the given size.
2366 // if leftover is true, there is left over space at the end of the span.
2367 void
runtime_markspan(void * v,uintptr size,uintptr n,bool leftover)2368 runtime_markspan(void *v, uintptr size, uintptr n, bool leftover)
2369 {
2370 	uintptr *b, off, shift;
2371 	byte *p;
2372 
2373 	if((byte*)v+size*n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2374 		runtime_throw("markspan: bad pointer");
2375 
2376 	p = v;
2377 	if(leftover)	// mark a boundary just past end of last block too
2378 		n++;
2379 	for(; n-- > 0; p += size) {
2380 		// Okay to use non-atomic ops here, because we control
2381 		// the entire span, and each bitmap word has bits for only
2382 		// one span, so no other goroutines are changing these
2383 		// bitmap words.
2384 		off = (uintptr*)p - (uintptr*)runtime_mheap->arena_start;  // word offset
2385 		b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2386 		shift = off % wordsPerBitmapWord;
2387 		*b = (*b & ~(bitMask<<shift)) | (bitBlockBoundary<<shift);
2388 	}
2389 }
2390 
2391 // unmark the span of memory at v of length n bytes.
2392 void
runtime_unmarkspan(void * v,uintptr n)2393 runtime_unmarkspan(void *v, uintptr n)
2394 {
2395 	uintptr *p, *b, off;
2396 
2397 	if((byte*)v+n > (byte*)runtime_mheap->arena_used || (byte*)v < runtime_mheap->arena_start)
2398 		runtime_throw("markspan: bad pointer");
2399 
2400 	p = v;
2401 	off = p - (uintptr*)runtime_mheap->arena_start;  // word offset
2402 	if(off % wordsPerBitmapWord != 0)
2403 		runtime_throw("markspan: unaligned pointer");
2404 	b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2405 	n /= PtrSize;
2406 	if(n%wordsPerBitmapWord != 0)
2407 		runtime_throw("unmarkspan: unaligned length");
2408 	// Okay to use non-atomic ops here, because we control
2409 	// the entire span, and each bitmap word has bits for only
2410 	// one span, so no other goroutines are changing these
2411 	// bitmap words.
2412 	n /= wordsPerBitmapWord;
2413 	while(n-- > 0)
2414 		*b-- = 0;
2415 }
2416 
2417 bool
runtime_blockspecial(void * v)2418 runtime_blockspecial(void *v)
2419 {
2420 	uintptr *b, off, shift;
2421 
2422 	if(DebugMark)
2423 		return true;
2424 
2425 	off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;
2426 	b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2427 	shift = off % wordsPerBitmapWord;
2428 
2429 	return (*b & (bitSpecial<<shift)) != 0;
2430 }
2431 
2432 void
runtime_setblockspecial(void * v,bool s)2433 runtime_setblockspecial(void *v, bool s)
2434 {
2435 	uintptr *b, off, shift, bits, obits;
2436 
2437 	if(DebugMark)
2438 		return;
2439 
2440 	off = (uintptr*)v - (uintptr*)runtime_mheap->arena_start;
2441 	b = (uintptr*)runtime_mheap->arena_start - off/wordsPerBitmapWord - 1;
2442 	shift = off % wordsPerBitmapWord;
2443 
2444 	for(;;) {
2445 		obits = *b;
2446 		if(s)
2447 			bits = obits | (bitSpecial<<shift);
2448 		else
2449 			bits = obits & ~(bitSpecial<<shift);
2450 		if(runtime_singleproc) {
2451 			*b = bits;
2452 			break;
2453 		} else {
2454 			// more than one goroutine is potentially running: use atomic op
2455 			if(runtime_casp((void**)b, (void*)obits, (void*)bits))
2456 				break;
2457 		}
2458 	}
2459 }
2460 
2461 void
runtime_MHeap_MapBits(MHeap * h)2462 runtime_MHeap_MapBits(MHeap *h)
2463 {
2464 	size_t page_size;
2465 
2466 	// Caller has added extra mappings to the arena.
2467 	// Add extra mappings of bitmap words as needed.
2468 	// We allocate extra bitmap pieces in chunks of bitmapChunk.
2469 	enum {
2470 		bitmapChunk = 8192
2471 	};
2472 	uintptr n;
2473 
2474 	n = (h->arena_used - h->arena_start) / wordsPerBitmapWord;
2475 	n = (n+bitmapChunk-1) & ~(bitmapChunk-1);
2476 	if(h->bitmap_mapped >= n)
2477 		return;
2478 
2479 	page_size = getpagesize();
2480 	n = (n+page_size-1) & ~(page_size-1);
2481 
2482 	runtime_SysMap(h->arena_start - n, n - h->bitmap_mapped);
2483 	h->bitmap_mapped = n;
2484 }
2485