1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package runtime 6 7import ( 8 "runtime/internal/atomic" 9 "runtime/internal/sys" 10 "unsafe" 11) 12 13const ( 14 _WorkbufSize = 2048 // in bytes; larger values result in less contention 15 16 // workbufAlloc is the number of bytes to allocate at a time 17 // for new workbufs. This must be a multiple of pageSize and 18 // should be a multiple of _WorkbufSize. 19 // 20 // Larger values reduce workbuf allocation overhead. Smaller 21 // values reduce heap fragmentation. 22 workbufAlloc = 32 << 10 23) 24 25// throwOnGCWork causes any operations that add pointers to a gcWork 26// buffer to throw. 27// 28// TODO(austin): This is a temporary debugging measure for issue 29// #27993. To be removed before release. 30var throwOnGCWork bool 31 32func init() { 33 if workbufAlloc%pageSize != 0 || workbufAlloc%_WorkbufSize != 0 { 34 throw("bad workbufAlloc") 35 } 36} 37 38// Garbage collector work pool abstraction. 39// 40// This implements a producer/consumer model for pointers to grey 41// objects. A grey object is one that is marked and on a work 42// queue. A black object is marked and not on a work queue. 43// 44// Write barriers, root discovery, stack scanning, and object scanning 45// produce pointers to grey objects. Scanning consumes pointers to 46// grey objects, thus blackening them, and then scans them, 47// potentially producing new pointers to grey objects. 48 49// A gcWork provides the interface to produce and consume work for the 50// garbage collector. 51// 52// A gcWork can be used on the stack as follows: 53// 54// (preemption must be disabled) 55// gcw := &getg().m.p.ptr().gcw 56// .. call gcw.put() to produce and gcw.tryGet() to consume .. 57// 58// It's important that any use of gcWork during the mark phase prevent 59// the garbage collector from transitioning to mark termination since 60// gcWork may locally hold GC work buffers. This can be done by 61// disabling preemption (systemstack or acquirem). 62type gcWork struct { 63 // wbuf1 and wbuf2 are the primary and secondary work buffers. 64 // 65 // This can be thought of as a stack of both work buffers' 66 // pointers concatenated. When we pop the last pointer, we 67 // shift the stack up by one work buffer by bringing in a new 68 // full buffer and discarding an empty one. When we fill both 69 // buffers, we shift the stack down by one work buffer by 70 // bringing in a new empty buffer and discarding a full one. 71 // This way we have one buffer's worth of hysteresis, which 72 // amortizes the cost of getting or putting a work buffer over 73 // at least one buffer of work and reduces contention on the 74 // global work lists. 75 // 76 // wbuf1 is always the buffer we're currently pushing to and 77 // popping from and wbuf2 is the buffer that will be discarded 78 // next. 79 // 80 // Invariant: Both wbuf1 and wbuf2 are nil or neither are. 81 wbuf1, wbuf2 *workbuf 82 83 // Bytes marked (blackened) on this gcWork. This is aggregated 84 // into work.bytesMarked by dispose. 85 bytesMarked uint64 86 87 // Scan work performed on this gcWork. This is aggregated into 88 // gcController by dispose and may also be flushed by callers. 89 scanWork int64 90 91 // flushedWork indicates that a non-empty work buffer was 92 // flushed to the global work list since the last gcMarkDone 93 // termination check. Specifically, this indicates that this 94 // gcWork may have communicated work to another gcWork. 95 flushedWork bool 96 97 // pauseGen causes put operations to spin while pauseGen == 98 // gcWorkPauseGen if debugCachedWork is true. 99 pauseGen uint32 100 101 // putGen is the pauseGen of the last putGen. 102 putGen uint32 103 104 // pauseStack is the stack at which this P was paused if 105 // debugCachedWork is true. 106 pauseStack [16]location 107} 108 109// Most of the methods of gcWork are go:nowritebarrierrec because the 110// write barrier itself can invoke gcWork methods but the methods are 111// not generally re-entrant. Hence, if a gcWork method invoked the 112// write barrier while the gcWork was in an inconsistent state, and 113// the write barrier in turn invoked a gcWork method, it could 114// permanently corrupt the gcWork. 115 116func (w *gcWork) init() { 117 w.wbuf1 = getempty() 118 wbuf2 := trygetfull() 119 if wbuf2 == nil { 120 wbuf2 = getempty() 121 } 122 w.wbuf2 = wbuf2 123} 124 125func (w *gcWork) checkPut(ptr uintptr, ptrs []uintptr) { 126 if debugCachedWork { 127 alreadyFailed := w.putGen == w.pauseGen 128 w.putGen = w.pauseGen 129 if !canPreemptM(getg().m) { 130 // If we were to spin, the runtime may 131 // deadlock. Since we can't be preempted, the 132 // spin could prevent gcMarkDone from 133 // finishing the ragged barrier, which is what 134 // releases us from the spin. 135 return 136 } 137 for atomic.Load(&gcWorkPauseGen) == w.pauseGen { 138 } 139 if throwOnGCWork { 140 printlock() 141 if alreadyFailed { 142 println("runtime: checkPut already failed at this generation") 143 } 144 println("runtime: late gcWork put") 145 if ptr != 0 { 146 gcDumpObject("ptr", ptr, ^uintptr(0)) 147 } 148 for _, ptr := range ptrs { 149 gcDumpObject("ptrs", ptr, ^uintptr(0)) 150 } 151 println("runtime: paused at") 152 for _, loc := range w.pauseStack { 153 if loc.pc == 0 { 154 break 155 } 156 if loc.function != "" { 157 // Obviously this doesn't 158 // relate to ancestor 159 // tracebacks, but this 160 // function prints what we 161 // want. 162 printAncestorTracebackFuncInfo(loc.function, loc.filename, loc.lineno, loc.pc) 163 } else { 164 println("\tunknown PC ", hex(loc.pc), "\n") 165 } 166 } 167 throw("throwOnGCWork") 168 } 169 } 170} 171 172// put enqueues a pointer for the garbage collector to trace. 173// obj must point to the beginning of a heap object or an oblet. 174//go:nowritebarrierrec 175func (w *gcWork) put(obj uintptr) { 176 w.checkPut(obj, nil) 177 178 flushed := false 179 wbuf := w.wbuf1 180 if wbuf == nil { 181 w.init() 182 wbuf = w.wbuf1 183 // wbuf is empty at this point. 184 } else if wbuf.nobj == len(wbuf.obj) { 185 w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 186 wbuf = w.wbuf1 187 if wbuf.nobj == len(wbuf.obj) { 188 putfull(wbuf) 189 w.flushedWork = true 190 wbuf = getempty() 191 w.wbuf1 = wbuf 192 flushed = true 193 } 194 } 195 196 wbuf.obj[wbuf.nobj] = obj 197 wbuf.nobj++ 198 199 // If we put a buffer on full, let the GC controller know so 200 // it can encourage more workers to run. We delay this until 201 // the end of put so that w is in a consistent state, since 202 // enlistWorker may itself manipulate w. 203 if flushed && gcphase == _GCmark { 204 gcController.enlistWorker() 205 } 206} 207 208// putFast does a put and reports whether it can be done quickly 209// otherwise it returns false and the caller needs to call put. 210//go:nowritebarrierrec 211func (w *gcWork) putFast(obj uintptr) bool { 212 w.checkPut(obj, nil) 213 214 wbuf := w.wbuf1 215 if wbuf == nil { 216 return false 217 } else if wbuf.nobj == len(wbuf.obj) { 218 return false 219 } 220 221 wbuf.obj[wbuf.nobj] = obj 222 wbuf.nobj++ 223 return true 224} 225 226// putBatch performs a put on every pointer in obj. See put for 227// constraints on these pointers. 228// 229//go:nowritebarrierrec 230func (w *gcWork) putBatch(obj []uintptr) { 231 if len(obj) == 0 { 232 return 233 } 234 235 w.checkPut(0, obj) 236 237 flushed := false 238 wbuf := w.wbuf1 239 if wbuf == nil { 240 w.init() 241 wbuf = w.wbuf1 242 } 243 244 for len(obj) > 0 { 245 for wbuf.nobj == len(wbuf.obj) { 246 putfull(wbuf) 247 w.flushedWork = true 248 w.wbuf1, w.wbuf2 = w.wbuf2, getempty() 249 wbuf = w.wbuf1 250 flushed = true 251 } 252 n := copy(wbuf.obj[wbuf.nobj:], obj) 253 wbuf.nobj += n 254 obj = obj[n:] 255 } 256 257 if flushed && gcphase == _GCmark { 258 gcController.enlistWorker() 259 } 260} 261 262// tryGet dequeues a pointer for the garbage collector to trace. 263// 264// If there are no pointers remaining in this gcWork or in the global 265// queue, tryGet returns 0. Note that there may still be pointers in 266// other gcWork instances or other caches. 267//go:nowritebarrierrec 268func (w *gcWork) tryGet() uintptr { 269 wbuf := w.wbuf1 270 if wbuf == nil { 271 w.init() 272 wbuf = w.wbuf1 273 // wbuf is empty at this point. 274 } 275 if wbuf.nobj == 0 { 276 w.wbuf1, w.wbuf2 = w.wbuf2, w.wbuf1 277 wbuf = w.wbuf1 278 if wbuf.nobj == 0 { 279 owbuf := wbuf 280 wbuf = trygetfull() 281 if wbuf == nil { 282 return 0 283 } 284 putempty(owbuf) 285 w.wbuf1 = wbuf 286 } 287 } 288 289 wbuf.nobj-- 290 return wbuf.obj[wbuf.nobj] 291} 292 293// tryGetFast dequeues a pointer for the garbage collector to trace 294// if one is readily available. Otherwise it returns 0 and 295// the caller is expected to call tryGet(). 296//go:nowritebarrierrec 297func (w *gcWork) tryGetFast() uintptr { 298 wbuf := w.wbuf1 299 if wbuf == nil { 300 return 0 301 } 302 if wbuf.nobj == 0 { 303 return 0 304 } 305 306 wbuf.nobj-- 307 return wbuf.obj[wbuf.nobj] 308} 309 310// dispose returns any cached pointers to the global queue. 311// The buffers are being put on the full queue so that the 312// write barriers will not simply reacquire them before the 313// GC can inspect them. This helps reduce the mutator's 314// ability to hide pointers during the concurrent mark phase. 315// 316//go:nowritebarrierrec 317func (w *gcWork) dispose() { 318 if wbuf := w.wbuf1; wbuf != nil { 319 if wbuf.nobj == 0 { 320 putempty(wbuf) 321 } else { 322 putfull(wbuf) 323 w.flushedWork = true 324 } 325 w.wbuf1 = nil 326 327 wbuf = w.wbuf2 328 if wbuf.nobj == 0 { 329 putempty(wbuf) 330 } else { 331 putfull(wbuf) 332 w.flushedWork = true 333 } 334 w.wbuf2 = nil 335 } 336 if w.bytesMarked != 0 { 337 // dispose happens relatively infrequently. If this 338 // atomic becomes a problem, we should first try to 339 // dispose less and if necessary aggregate in a per-P 340 // counter. 341 atomic.Xadd64(&work.bytesMarked, int64(w.bytesMarked)) 342 w.bytesMarked = 0 343 } 344 if w.scanWork != 0 { 345 atomic.Xaddint64(&gcController.scanWork, w.scanWork) 346 w.scanWork = 0 347 } 348} 349 350// balance moves some work that's cached in this gcWork back on the 351// global queue. 352//go:nowritebarrierrec 353func (w *gcWork) balance() { 354 if w.wbuf1 == nil { 355 return 356 } 357 if wbuf := w.wbuf2; wbuf.nobj != 0 { 358 w.checkPut(0, wbuf.obj[:wbuf.nobj]) 359 putfull(wbuf) 360 w.flushedWork = true 361 w.wbuf2 = getempty() 362 } else if wbuf := w.wbuf1; wbuf.nobj > 4 { 363 w.checkPut(0, wbuf.obj[:wbuf.nobj]) 364 w.wbuf1 = handoff(wbuf) 365 w.flushedWork = true // handoff did putfull 366 } else { 367 return 368 } 369 // We flushed a buffer to the full list, so wake a worker. 370 if gcphase == _GCmark { 371 gcController.enlistWorker() 372 } 373} 374 375// empty reports whether w has no mark work available. 376//go:nowritebarrierrec 377func (w *gcWork) empty() bool { 378 return w.wbuf1 == nil || (w.wbuf1.nobj == 0 && w.wbuf2.nobj == 0) 379} 380 381// Internally, the GC work pool is kept in arrays in work buffers. 382// The gcWork interface caches a work buffer until full (or empty) to 383// avoid contending on the global work buffer lists. 384 385type workbufhdr struct { 386 node lfnode // must be first 387 nobj int 388} 389 390//go:notinheap 391type workbuf struct { 392 workbufhdr 393 // account for the above fields 394 obj [(_WorkbufSize - unsafe.Sizeof(workbufhdr{})) / sys.PtrSize]uintptr 395} 396 397// workbuf factory routines. These funcs are used to manage the 398// workbufs. 399// If the GC asks for some work these are the only routines that 400// make wbufs available to the GC. 401 402func (b *workbuf) checknonempty() { 403 if b.nobj == 0 { 404 throw("workbuf is empty") 405 } 406} 407 408func (b *workbuf) checkempty() { 409 if b.nobj != 0 { 410 throw("workbuf is not empty") 411 } 412} 413 414// getempty pops an empty work buffer off the work.empty list, 415// allocating new buffers if none are available. 416//go:nowritebarrier 417func getempty() *workbuf { 418 var b *workbuf 419 if work.empty != 0 { 420 b = (*workbuf)(work.empty.pop()) 421 if b != nil { 422 b.checkempty() 423 } 424 } 425 if b == nil { 426 // Allocate more workbufs. 427 var s *mspan 428 if work.wbufSpans.free.first != nil { 429 lock(&work.wbufSpans.lock) 430 s = work.wbufSpans.free.first 431 if s != nil { 432 work.wbufSpans.free.remove(s) 433 work.wbufSpans.busy.insert(s) 434 } 435 unlock(&work.wbufSpans.lock) 436 } 437 if s == nil { 438 systemstack(func() { 439 s = mheap_.allocManual(workbufAlloc/pageSize, &memstats.gc_sys) 440 }) 441 if s == nil { 442 throw("out of memory") 443 } 444 // Record the new span in the busy list. 445 lock(&work.wbufSpans.lock) 446 work.wbufSpans.busy.insert(s) 447 unlock(&work.wbufSpans.lock) 448 } 449 // Slice up the span into new workbufs. Return one and 450 // put the rest on the empty list. 451 for i := uintptr(0); i+_WorkbufSize <= workbufAlloc; i += _WorkbufSize { 452 newb := (*workbuf)(unsafe.Pointer(s.base() + i)) 453 newb.nobj = 0 454 lfnodeValidate(&newb.node) 455 if i == 0 { 456 b = newb 457 } else { 458 putempty(newb) 459 } 460 } 461 } 462 return b 463} 464 465// putempty puts a workbuf onto the work.empty list. 466// Upon entry this go routine owns b. The lfstack.push relinquishes ownership. 467//go:nowritebarrier 468func putempty(b *workbuf) { 469 b.checkempty() 470 work.empty.push(&b.node) 471} 472 473// putfull puts the workbuf on the work.full list for the GC. 474// putfull accepts partially full buffers so the GC can avoid competing 475// with the mutators for ownership of partially full buffers. 476//go:nowritebarrier 477func putfull(b *workbuf) { 478 b.checknonempty() 479 work.full.push(&b.node) 480} 481 482// trygetfull tries to get a full or partially empty workbuffer. 483// If one is not immediately available return nil 484//go:nowritebarrier 485func trygetfull() *workbuf { 486 b := (*workbuf)(work.full.pop()) 487 if b != nil { 488 b.checknonempty() 489 return b 490 } 491 return b 492} 493 494//go:nowritebarrier 495func handoff(b *workbuf) *workbuf { 496 // Make new buffer with half of b's pointers. 497 b1 := getempty() 498 n := b.nobj / 2 499 b.nobj -= n 500 b1.nobj = n 501 memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), uintptr(n)*unsafe.Sizeof(b1.obj[0])) 502 503 // Put b on full list - let first half of b get stolen. 504 putfull(b) 505 return b1 506} 507 508// prepareFreeWorkbufs moves busy workbuf spans to free list so they 509// can be freed to the heap. This must only be called when all 510// workbufs are on the empty list. 511func prepareFreeWorkbufs() { 512 lock(&work.wbufSpans.lock) 513 if work.full != 0 { 514 throw("cannot free workbufs when work.full != 0") 515 } 516 // Since all workbufs are on the empty list, we don't care 517 // which ones are in which spans. We can wipe the entire empty 518 // list and move all workbuf spans to the free list. 519 work.empty = 0 520 work.wbufSpans.free.takeAll(&work.wbufSpans.busy) 521 unlock(&work.wbufSpans.lock) 522} 523 524// freeSomeWbufs frees some workbufs back to the heap and returns 525// true if it should be called again to free more. 526func freeSomeWbufs(preemptible bool) bool { 527 const batchSize = 64 // ~1–2 µs per span. 528 lock(&work.wbufSpans.lock) 529 if gcphase != _GCoff || work.wbufSpans.free.isEmpty() { 530 unlock(&work.wbufSpans.lock) 531 return false 532 } 533 systemstack(func() { 534 gp := getg().m.curg 535 for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ { 536 span := work.wbufSpans.free.first 537 if span == nil { 538 break 539 } 540 work.wbufSpans.free.remove(span) 541 mheap_.freeManual(span, &memstats.gc_sys) 542 } 543 }) 544 more := !work.wbufSpans.free.isEmpty() 545 unlock(&work.wbufSpans.lock) 546 return more 547} 548