1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package runtime 6 7import ( 8 "runtime/internal/atomic" 9 "runtime/internal/sys" 10 "unsafe" 11) 12 13const ( 14 _WorkbufSize = 2048 // in bytes; larger values result in less contention 15 16 // workbufAlloc is the number of bytes to allocate at a time 17 // for new workbufs. This must be a multiple of pageSize and 18 // should be a multiple of _WorkbufSize. 19 // 20 // Larger values reduce workbuf allocation overhead. Smaller 21 // values reduce heap fragmentation. 22 workbufAlloc = 32 << 10 23) 24 25// throwOnGCWork causes any operations that add pointers to a gcWork 26// buffer to throw. 27// 28// TODO(austin): This is a temporary debugging measure for issue 29// #27993. To be removed before release. 30var throwOnGCWork bool 31 32func init() { 33 if workbufAlloc%pageSize != 0 || workbufAlloc%_WorkbufSize != 0 { 34 throw("bad workbufAlloc") 35 } 36} 37 38// Garbage collector work pool abstraction. 39// 40// This implements a producer/consumer model for pointers to grey 41// objects. A grey object is one that is marked and on a work 42// queue. A black object is marked and not on a work queue. 43// 44// Write barriers, root discovery, stack scanning, and object scanning 45// produce pointers to grey objects. Scanning consumes pointers to 46// grey objects, thus blackening them, and then scans them, 47// potentially producing new pointers to grey objects. 48 49// A gcWork provides the interface to produce and consume work for the 50// garbage collector. 51// 52// A gcWork can be used on the stack as follows: 53// 54// (preemption must be disabled) 55// gcw := &getg().m.p.ptr().gcw 56// .. call gcw.put() to produce and gcw.tryGet() to consume .. 57// 58// It's important that any use of gcWork during the mark phase prevent 59// the garbage collector from transitioning to mark termination since 60// gcWork may locally hold GC work buffers. This can be done by 61// disabling preemption (systemstack or acquirem). 62type gcWork struct { 63 // wbuf1 and wbuf2 are the primary and secondary work buffers. 64 // 65 // This can be thought of as a stack of both work buffers' 66 // pointers concatenated. When we pop the last pointer, we 67 // shift the stack up by one work buffer by bringing in a new 68 // full buffer and discarding an empty one. When we fill both 69 // buffers, we shift the stack down by one work buffer by 70 // bringing in a new empty buffer and discarding a full one. 71 // This way we have one buffer's worth of hysteresis, which 72 // amortizes the cost of getting or putting a work buffer over 73 // at least one buffer of work and reduces contention on the 74 // global work lists. 75 // 76 // wbuf1 is always the buffer we're currently pushing to and 77 // popping from and wbuf2 is the buffer that will be discarded 78 // next. 79 // 80 // Invariant: Both wbuf1 and wbuf2 are nil or neither are. 81 wbuf1, wbuf2 *workbuf 82 83 // Bytes marked (blackened) on this gcWork. This is aggregated 84 // into work.bytesMarked by dispose. 85 bytesMarked uint64 86 87 // Scan work performed on this gcWork. This is aggregated into 88 // gcController by dispose and may also be flushed by callers. 89 scanWork int64 90 91 // flushedWork indicates that a non-empty work buffer was 92 // flushed to the global work list since the last gcMarkDone 93 // termination check. Specifically, this indicates that this 94 // gcWork may have communicated work to another gcWork. 95 flushedWork bool 96 97 // pauseGen causes put operations to spin while pauseGen == 98 // gcWorkPauseGen if debugCachedWork is true. 99 pauseGen uint32 100 101 // putGen is the pauseGen of the last putGen. 102 putGen uint32 103 104 // pauseStack is the stack at which this P was paused if 105 // debugCachedWork is true. 106 pauseStack [16]uintptr 107} 108 109// Most of the methods of gcWork are go:nowritebarrierrec because the 110// write barrier itself can invoke gcWork methods but the methods are 111// not generally re-entrant. Hence, if a gcWork method invoked the 112// write barrier while the gcWork was in an inconsistent state, and 113// the write barrier in turn invoked a gcWork method, it could 114// permanently corrupt the gcWork. 115 116func (w *gcWork) init() { 117 w.wbuf1 = getempty() 118 wbuf2 := trygetfull() 119 if wbuf2 == nil { 120 wbuf2 = getempty() 121 } 122 w.wbuf2 = wbuf2 123} 124 125func (w *gcWork) checkPut(ptr uintptr, ptrs []uintptr) { 126 if debugCachedWork { 127 alreadyFailed := w.putGen == w.pauseGen 128 w.putGen = w.pauseGen 129 if !canPreemptM(getg().m) { 130 // If we were to spin, the runtime may 131 // deadlock. Since we can't be preempted, the 132 // spin could prevent gcMarkDone from 133 // finishing the ragged barrier, which is what 134 // releases us from the spin. 135 return 136 } 137 for atomic.Load(&gcWorkPauseGen) == w.pauseGen { 138 } 139 if throwOnGCWork { 140 printlock() 141 if alreadyFailed { 142 println("runtime: checkPut already failed at this generation") 143 } 144 println("runtime: late gcWork put") 145 if ptr != 0 { 146 gcDumpObject("ptr", ptr, ^uintptr(0)) 147 } 148 for _, ptr := range ptrs { 149 gcDumpObject("ptrs", ptr, ^uintptr(0)) 150 } 151 println("runtime: paused at") 152 for _, pc := range w.pauseStack { 153 if pc == 0 { 154 break 155 } 156 f := findfunc(pc) 157 if f.valid() { 158 // Obviously this doesn't 159 // relate to ancestor 160 // tracebacks, but this 161 // function prints what we 162 // want. 163 printAncestorTracebackFuncInfo(f, pc) 164 } else { 165 println("\tunknown PC ", hex(pc), "\n") 166 } 167 } 168 throw("throwOnGCWork") 169 } 170 } 171} 172 173// put enqueues a pointer for the garbage collector to trace. 174// obj must point to the beginning of a heap object or an oblet. 175//go:nowritebarrierrec 176func (w *gcWork) put(obj uintptr) { 177 w.checkPut(obj, nil) 178 179 flushed := false 180 wbuf := w.wbuf1 181 if wbuf == nil { 182 w.init() 183 wbuf = w.wbuf1 184 // wbuf is empty at this point. 185 } else if wbuf.nobj == len(wbuf.obj) { 186 w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 187 wbuf = w.wbuf1 188 if wbuf.nobj == len(wbuf.obj) { 189 putfull(wbuf) 190 w.flushedWork = true 191 wbuf = getempty() 192 w.wbuf1 = wbuf 193 flushed = true 194 } 195 } 196 197 wbuf.obj[wbuf.nobj] = obj 198 wbuf.nobj++ 199 200 // If we put a buffer on full, let the GC controller know so 201 // it can encourage more workers to run. We delay this until 202 // the end of put so that w is in a consistent state, since 203 // enlistWorker may itself manipulate w. 204 if flushed && gcphase == _GCmark { 205 gcController.enlistWorker() 206 } 207} 208 209// putFast does a put and reports whether it can be done quickly 210// otherwise it returns false and the caller needs to call put. 211//go:nowritebarrierrec 212func (w *gcWork) putFast(obj uintptr) bool { 213 w.checkPut(obj, nil) 214 215 wbuf := w.wbuf1 216 if wbuf == nil { 217 return false 218 } else if wbuf.nobj == len(wbuf.obj) { 219 return false 220 } 221 222 wbuf.obj[wbuf.nobj] = obj 223 wbuf.nobj++ 224 return true 225} 226 227// putBatch performs a put on every pointer in obj. See put for 228// constraints on these pointers. 229// 230//go:nowritebarrierrec 231func (w *gcWork) putBatch(obj []uintptr) { 232 if len(obj) == 0 { 233 return 234 } 235 236 w.checkPut(0, obj) 237 238 flushed := false 239 wbuf := w.wbuf1 240 if wbuf == nil { 241 w.init() 242 wbuf = w.wbuf1 243 } 244 245 for len(obj) > 0 { 246 for wbuf.nobj == len(wbuf.obj) { 247 putfull(wbuf) 248 w.flushedWork = true 249 w.wbuf1, w.wbuf2 = w.wbuf2, getempty() 250 wbuf = w.wbuf1 251 flushed = true 252 } 253 n := copy(wbuf.obj[wbuf.nobj:], obj) 254 wbuf.nobj += n 255 obj = obj[n:] 256 } 257 258 if flushed && gcphase == _GCmark { 259 gcController.enlistWorker() 260 } 261} 262 263// tryGet dequeues a pointer for the garbage collector to trace. 264// 265// If there are no pointers remaining in this gcWork or in the global 266// queue, tryGet returns 0. Note that there may still be pointers in 267// other gcWork instances or other caches. 268//go:nowritebarrierrec 269func (w *gcWork) tryGet() uintptr { 270 wbuf := w.wbuf1 271 if wbuf == nil { 272 w.init() 273 wbuf = w.wbuf1 274 // wbuf is empty at this point. 275 } 276 if wbuf.nobj == 0 { 277 w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 278 wbuf = w.wbuf1 279 if wbuf.nobj == 0 { 280 owbuf := wbuf 281 wbuf = trygetfull() 282 if wbuf == nil { 283 return 0 284 } 285 putempty(owbuf) 286 w.wbuf1 = wbuf 287 } 288 } 289 290 wbuf.nobj-- 291 return wbuf.obj[wbuf.nobj] 292} 293 294// tryGetFast dequeues a pointer for the garbage collector to trace 295// if one is readily available. Otherwise it returns 0 and 296// the caller is expected to call tryGet(). 297//go:nowritebarrierrec 298func (w *gcWork) tryGetFast() uintptr { 299 wbuf := w.wbuf1 300 if wbuf == nil { 301 return 0 302 } 303 if wbuf.nobj == 0 { 304 return 0 305 } 306 307 wbuf.nobj-- 308 return wbuf.obj[wbuf.nobj] 309} 310 311// dispose returns any cached pointers to the global queue. 312// The buffers are being put on the full queue so that the 313// write barriers will not simply reacquire them before the 314// GC can inspect them. This helps reduce the mutator's 315// ability to hide pointers during the concurrent mark phase. 316// 317//go:nowritebarrierrec 318func (w *gcWork) dispose() { 319 if wbuf := w.wbuf1; wbuf != nil { 320 if wbuf.nobj == 0 { 321 putempty(wbuf) 322 } else { 323 putfull(wbuf) 324 w.flushedWork = true 325 } 326 w.wbuf1 = nil 327 328 wbuf = w.wbuf2 329 if wbuf.nobj == 0 { 330 putempty(wbuf) 331 } else { 332 putfull(wbuf) 333 w.flushedWork = true 334 } 335 w.wbuf2 = nil 336 } 337 if w.bytesMarked != 0 { 338 // dispose happens relatively infrequently. If this 339 // atomic becomes a problem, we should first try to 340 // dispose less and if necessary aggregate in a per-P 341 // counter. 342 atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked)) 343 w.bytesMarked = 0 344 } 345 if w.scanWork != 0 { 346 atomic.Xaddint64(&gcController.scanWork, w.scanWork) 347 w.scanWork = 0 348 } 349} 350 351// balance moves some work that's cached in this gcWork back on the 352// global queue. 353//go:nowritebarrierrec 354func (w *gcWork) balance() { 355 if w.wbuf1 == nil { 356 return 357 } 358 if wbuf := w.wbuf2; wbuf.nobj != 0 { 359 w.checkPut(0, wbuf.obj[:wbuf.nobj]) 360 putfull(wbuf) 361 w.flushedWork = true 362 w.wbuf2 = getempty() 363 } else if wbuf := w.wbuf1; wbuf.nobj > 4 { 364 w.checkPut(0, wbuf.obj[:wbuf.nobj]) 365 w.wbuf1 = handoff(wbuf) 366 w.flushedWork = true // handoff did putfull 367 } else { 368 return 369 } 370 // We flushed a buffer to the full list, so wake a worker. 371 if gcphase == _GCmark { 372 gcController.enlistWorker() 373 } 374} 375 376// empty reports whether w has no mark work available. 377//go:nowritebarrierrec 378func (w *gcWork) empty() bool { 379 return w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0) 380} 381 382// Internally, the GC work pool is kept in arrays in work buffers. 383// The gcWork interface caches a work buffer until full (or empty) to 384// avoid contending on the global work buffer lists. 385 386type workbufhdr struct { 387 node lfnode // must be first 388 nobj int 389} 390 391//go:notinheap 392type workbuf struct { 393 workbufhdr 394 // account for the above fields 395 obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / sys.PtrSize]uintptr 396} 397 398// workbuf factory routines. These funcs are used to manage the 399// workbufs. 400// If the GC asks for some work these are the only routines that 401// make wbufs available to the GC. 402 403func (b *workbuf) checknonempty() { 404 if b.nobj == 0 { 405 throw("workbuf is empty") 406 } 407} 408 409func (b *workbuf) checkempty() { 410 if b.nobj != 0 { 411 throw("workbuf is not empty") 412 } 413} 414 415// getempty pops an empty work buffer off the work.empty list, 416// allocating new buffers if none are available. 417//go:nowritebarrier 418func getempty() *workbuf { 419 var b *workbuf 420 if work.empty != 0 { 421 b = (*workbuf)(work.empty.pop()) 422 if b != nil { 423 b.checkempty() 424 } 425 } 426 if b == nil { 427 // Allocate more workbufs. 428 var s *mspan 429 if work.wbufSpans.free.first != nil { 430 lock(&work.wbufSpans.lock) 431 s = work.wbufSpans.free.first 432 if s != nil { 433 work.wbufSpans.free.remove(s) 434 work.wbufSpans.busy.insert(s) 435 } 436 unlock(&work.wbufSpans.lock) 437 } 438 if s == nil { 439 systemstack(func() { 440 s = mheap_.allocManual(workbufAlloc/pageSize, &memstats.gc_sys) 441 }) 442 if s == nil { 443 throw("out of memory") 444 } 445 // Record the new span in the busy list. 446 lock(&work.wbufSpans.lock) 447 work.wbufSpans.busy.insert(s) 448 unlock(&work.wbufSpans.lock) 449 } 450 // Slice up the span into new workbufs. Return one and 451 // put the rest on the empty list. 452 for i := uintptr(0); i+_WorkbufSize <= workbufAlloc; i += _WorkbufSize { 453 newb := (*workbuf)(unsafe.Pointer(s.base() + i)) 454 newb.nobj = 0 455 lfnodeValidate(&newb.node) 456 if i == 0 { 457 b = newb 458 } else { 459 putempty(newb) 460 } 461 } 462 } 463 return b 464} 465 466// putempty puts a workbuf onto the work.empty list. 467// Upon entry this go routine owns b. The lfstack.push relinquishes ownership. 468//go:nowritebarrier 469func putempty(b *workbuf) { 470 b.checkempty() 471 work.empty.push(&b.node) 472} 473 474// putfull puts the workbuf on the work.full list for the GC. 475// putfull accepts partially full buffers so the GC can avoid competing 476// with the mutators for ownership of partially full buffers. 477//go:nowritebarrier 478func putfull(b *workbuf) { 479 b.checknonempty() 480 work.full.push(&b.node) 481} 482 483// trygetfull tries to get a full or partially empty workbuffer. 484// If one is not immediately available return nil 485//go:nowritebarrier 486func trygetfull() *workbuf { 487 b := (*workbuf)(work.full.pop()) 488 if b != nil { 489 b.checknonempty() 490 return b 491 } 492 return b 493} 494 495//go:nowritebarrier 496func handoff(b *workbuf) *workbuf { 497 // Make new buffer with half of b's pointers. 498 b1 := getempty() 499 n := b.nobj / 2 500 b.nobj -= n 501 b1.nobj = n 502 memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0])) 503 504 // Put b on full list - let first half of b get stolen. 505 putfull(b) 506 return b1 507} 508 509// prepareFreeWorkbufs moves busy workbuf spans to free list so they 510// can be freed to the heap. This must only be called when all 511// workbufs are on the empty list. 512func prepareFreeWorkbufs() { 513 lock(&work.wbufSpans.lock) 514 if work.full != 0 { 515 throw("cannot free workbufs when work.full != 0") 516 } 517 // Since all workbufs are on the empty list, we don't care 518 // which ones are in which spans. We can wipe the entire empty 519 // list and move all workbuf spans to the free list. 520 work.empty = 0 521 work.wbufSpans.free.takeAll(&work.wbufSpans.busy) 522 unlock(&work.wbufSpans.lock) 523} 524 525// freeSomeWbufs frees some workbufs back to the heap and returns 526// true if it should be called again to free more. 527func freeSomeWbufs(preemptible bool) bool { 528 const batchSize = 64 // ~1–2 µs per span. 529 lock(&work.wbufSpans.lock) 530 if gcphase != _GCoff || work.wbufSpans.free.isEmpty() { 531 unlock(&work.wbufSpans.lock) 532 return false 533 } 534 systemstack(func() { 535 gp := getg().m.curg 536 for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ { 537 span := work.wbufSpans.free.first 538 if span == nil { 539 break 540 } 541 work.wbufSpans.free.remove(span) 542 mheap_.freeManual(span, &memstats.gc_sys) 543 } 544 }) 545 more := !work.wbufSpans.free.isEmpty() 546 unlock(&work.wbufSpans.lock) 547 return more 548} 549