1// Copyright 2013 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// +build aix darwin dragonfly freebsd hurd js,wasm linux netbsd openbsd solaris windows 6 7package runtime 8 9import ( 10 "runtime/internal/atomic" 11 "unsafe" 12) 13 14// Export temporarily for gccgo's C code to call: 15//go:linkname netpoll 16 17// Integrated network poller (platform-independent part). 18// A particular implementation (epoll/kqueue/port/AIX/Windows) 19// must define the following functions: 20// 21// func netpollinit() 22// Initialize the poller. Only called once. 23// 24// func netpollopen(fd uintptr, pd *pollDesc) int32 25// Arm edge-triggered notifications for fd. The pd argument is to pass 26// back to netpollready when fd is ready. Return an errno value. 27// 28// func netpoll(delta int64) gList 29// Poll the network. If delta < 0, block indefinitely. If delta == 0, 30// poll without blocking. If delta > 0, block for up to delta nanoseconds. 31// Return a list of goroutines built by calling netpollready. 32// 33// func netpollBreak() 34// Wake up the network poller, assumed to be blocked in netpoll. 35// 36// func netpollIsPollDescriptor(fd uintptr) bool 37// Reports whether fd is a file descriptor used by the poller. 38 39// pollDesc contains 2 binary semaphores, rg and wg, to park reader and writer 40// goroutines respectively. The semaphore can be in the following states: 41// pdReady - io readiness notification is pending; 42// a goroutine consumes the notification by changing the state to nil. 43// pdWait - a goroutine prepares to park on the semaphore, but not yet parked; 44// the goroutine commits to park by changing the state to G pointer, 45// or, alternatively, concurrent io notification changes the state to READY, 46// or, alternatively, concurrent timeout/close changes the state to nil. 47// G pointer - the goroutine is blocked on the semaphore; 48// io notification or timeout/close changes the state to READY or nil respectively 49// and unparks the goroutine. 50// nil - nothing of the above. 51const ( 52 pdReady uintptr = 1 53 pdWait uintptr = 2 54) 55 56const pollBlockSize = 4 * 1024 57 58// Network poller descriptor. 59// 60// No heap pointers. 61// 62//go:notinheap 63type pollDesc struct { 64 link *pollDesc // in pollcache, protected by pollcache.lock 65 66 // The lock protects pollOpen, pollSetDeadline, pollUnblock and deadlineimpl operations. 67 // This fully covers seq, rt and wt variables. fd is constant throughout the PollDesc lifetime. 68 // pollReset, pollWait, pollWaitCanceled and runtime·netpollready (IO readiness notification) 69 // proceed w/o taking the lock. So closing, everr, rg, rd, wg and wd are manipulated 70 // in a lock-free way by all operations. 71 // NOTE(dvyukov): the following code uses uintptr to store *g (rg/wg), 72 // that will blow up when GC starts moving objects. 73 lock mutex // protects the following fields 74 fd uintptr 75 closing bool 76 everr bool // marks event scanning error happened 77 user uint32 // user settable cookie 78 rseq uintptr // protects from stale read timers 79 rg uintptr // pdReady, pdWait, G waiting for read or nil 80 rt timer // read deadline timer (set if rt.f != nil) 81 rd int64 // read deadline 82 wseq uintptr // protects from stale write timers 83 wg uintptr // pdReady, pdWait, G waiting for write or nil 84 wt timer // write deadline timer 85 wd int64 // write deadline 86} 87 88type pollCache struct { 89 lock mutex 90 first *pollDesc 91 // PollDesc objects must be type-stable, 92 // because we can get ready notification from epoll/kqueue 93 // after the descriptor is closed/reused. 94 // Stale notifications are detected using seq variable, 95 // seq is incremented when deadlines are changed or descriptor is reused. 96} 97 98var ( 99 netpollInitLock mutex 100 netpollInited uint32 101 102 pollcache pollCache 103 netpollWaiters uint32 104) 105 106//go:linkname poll_runtime_pollServerInit internal..z2fpoll.runtime_pollServerInit 107func poll_runtime_pollServerInit() { 108 netpollGenericInit() 109} 110 111func netpollGenericInit() { 112 if atomic.Load(&netpollInited) == 0 { 113 lock(&netpollInitLock) 114 if netpollInited == 0 { 115 netpollinit() 116 atomic.Store(&netpollInited, 1) 117 } 118 unlock(&netpollInitLock) 119 } 120} 121 122func netpollinited() bool { 123 return atomic.Load(&netpollInited) != 0 124} 125 126//go:linkname poll_runtime_isPollServerDescriptor internal..z2fpoll.runtime_isPollServerDescriptor 127 128// poll_runtime_isPollServerDescriptor reports whether fd is a 129// descriptor being used by netpoll. 130func poll_runtime_isPollServerDescriptor(fd uintptr) bool { 131 return netpollIsPollDescriptor(fd) 132} 133 134//go:linkname poll_runtime_pollOpen internal..z2fpoll.runtime_pollOpen 135func poll_runtime_pollOpen(fd uintptr) (uintptr, int) { 136 pd := pollcache.alloc() 137 lock(&pd.lock) 138 if pd.wg != 0 && pd.wg != pdReady { 139 throw("runtime: blocked write on free polldesc") 140 } 141 if pd.rg != 0 && pd.rg != pdReady { 142 throw("runtime: blocked read on free polldesc") 143 } 144 pd.fd = fd 145 pd.closing = false 146 pd.everr = false 147 pd.rseq++ 148 pd.rg = 0 149 pd.rd = 0 150 pd.wseq++ 151 pd.wg = 0 152 pd.wd = 0 153 unlock(&pd.lock) 154 155 var errno int32 156 errno = netpollopen(fd, pd) 157 return uintptr(unsafe.Pointer(pd)), int(errno) 158} 159 160//go:linkname poll_runtime_pollClose internal..z2fpoll.runtime_pollClose 161func poll_runtime_pollClose(ctx uintptr) { 162 pd := (*pollDesc)(unsafe.Pointer(ctx)) 163 if !pd.closing { 164 throw("runtime: close polldesc w/o unblock") 165 } 166 if pd.wg != 0 && pd.wg != pdReady { 167 throw("runtime: blocked write on closing polldesc") 168 } 169 if pd.rg != 0 && pd.rg != pdReady { 170 throw("runtime: blocked read on closing polldesc") 171 } 172 netpollclose(pd.fd) 173 pollcache.free(pd) 174} 175 176func (c *pollCache) free(pd *pollDesc) { 177 lock(&c.lock) 178 pd.link = c.first 179 c.first = pd 180 unlock(&c.lock) 181} 182 183//go:linkname poll_runtime_pollReset internal..z2fpoll.runtime_pollReset 184func poll_runtime_pollReset(ctx uintptr, mode int) int { 185 pd := (*pollDesc)(unsafe.Pointer(ctx)) 186 err := netpollcheckerr(pd, int32(mode)) 187 if err != 0 { 188 return err 189 } 190 if mode == 'r' { 191 pd.rg = 0 192 } else if mode == 'w' { 193 pd.wg = 0 194 } 195 return 0 196} 197 198//go:linkname poll_runtime_pollWait internal..z2fpoll.runtime_pollWait 199func poll_runtime_pollWait(ctx uintptr, mode int) int { 200 pd := (*pollDesc)(unsafe.Pointer(ctx)) 201 err := netpollcheckerr(pd, int32(mode)) 202 if err != 0 { 203 return err 204 } 205 // As for now only Solaris, illumos, and AIX use level-triggered IO. 206 if GOOS == "solaris" || GOOS == "illumos" || GOOS == "aix" || GOOS == "hurd" { 207 netpollarm(pd, mode) 208 } 209 for !netpollblock(pd, int32(mode), false) { 210 err = netpollcheckerr(pd, int32(mode)) 211 if err != 0 { 212 return err 213 } 214 // Can happen if timeout has fired and unblocked us, 215 // but before we had a chance to run, timeout has been reset. 216 // Pretend it has not happened and retry. 217 } 218 return 0 219} 220 221//go:linkname poll_runtime_pollWaitCanceled internal..z2fpoll.runtime_pollWaitCanceled 222func poll_runtime_pollWaitCanceled(ctx uintptr, mode int) { 223 pd := (*pollDesc)(unsafe.Pointer(ctx)) 224 // This function is used only on windows after a failed attempt to cancel 225 // a pending async IO operation. Wait for ioready, ignore closing or timeouts. 226 for !netpollblock(pd, int32(mode), true) { 227 } 228} 229 230//go:linkname poll_runtime_pollSetDeadline internal..z2fpoll.runtime_pollSetDeadline 231func poll_runtime_pollSetDeadline(ctx uintptr, d int64, mode int) { 232 pd := (*pollDesc)(unsafe.Pointer(ctx)) 233 lock(&pd.lock) 234 if pd.closing { 235 unlock(&pd.lock) 236 return 237 } 238 rd0, wd0 := pd.rd, pd.wd 239 combo0 := rd0 > 0 && rd0 == wd0 240 if d > 0 { 241 d += nanotime() 242 if d <= 0 { 243 // If the user has a deadline in the future, but the delay calculation 244 // overflows, then set the deadline to the maximum possible value. 245 d = 1<<63 - 1 246 } 247 } 248 if mode == 'r' || mode == 'r'+'w' { 249 pd.rd = d 250 } 251 if mode == 'w' || mode == 'r'+'w' { 252 pd.wd = d 253 } 254 combo := pd.rd > 0 && pd.rd == pd.wd 255 rtf := netpollReadDeadline 256 if combo { 257 rtf = netpollDeadline 258 } 259 if pd.rt.f == nil { 260 if pd.rd > 0 { 261 pd.rt.f = rtf 262 // Copy current seq into the timer arg. 263 // Timer func will check the seq against current descriptor seq, 264 // if they differ the descriptor was reused or timers were reset. 265 pd.rt.arg = pd 266 pd.rt.seq = pd.rseq 267 resettimer(&pd.rt, pd.rd) 268 } 269 } else if pd.rd != rd0 || combo != combo0 { 270 pd.rseq++ // invalidate current timers 271 if pd.rd > 0 { 272 modtimer(&pd.rt, pd.rd, 0, rtf, pd, pd.rseq) 273 } else { 274 deltimer(&pd.rt) 275 pd.rt.f = nil 276 } 277 } 278 if pd.wt.f == nil { 279 if pd.wd > 0 && !combo { 280 pd.wt.f = netpollWriteDeadline 281 pd.wt.arg = pd 282 pd.wt.seq = pd.wseq 283 resettimer(&pd.wt, pd.wd) 284 } 285 } else if pd.wd != wd0 || combo != combo0 { 286 pd.wseq++ // invalidate current timers 287 if pd.wd > 0 && !combo { 288 modtimer(&pd.wt, pd.wd, 0, netpollWriteDeadline, pd, pd.wseq) 289 } else { 290 deltimer(&pd.wt) 291 pd.wt.f = nil 292 } 293 } 294 // If we set the new deadline in the past, unblock currently pending IO if any. 295 var rg, wg *g 296 if pd.rd < 0 || pd.wd < 0 { 297 atomic.StorepNoWB(noescape(unsafe.Pointer(&wg)), nil) // full memory barrier between stores to rd/wd and load of rg/wg in netpollunblock 298 if pd.rd < 0 { 299 rg = netpollunblock(pd, 'r', false) 300 } 301 if pd.wd < 0 { 302 wg = netpollunblock(pd, 'w', false) 303 } 304 } 305 unlock(&pd.lock) 306 if rg != nil { 307 netpollgoready(rg, 3) 308 } 309 if wg != nil { 310 netpollgoready(wg, 3) 311 } 312} 313 314//go:linkname poll_runtime_pollUnblock internal..z2fpoll.runtime_pollUnblock 315func poll_runtime_pollUnblock(ctx uintptr) { 316 pd := (*pollDesc)(unsafe.Pointer(ctx)) 317 lock(&pd.lock) 318 if pd.closing { 319 throw("runtime: unblock on closing polldesc") 320 } 321 pd.closing = true 322 pd.rseq++ 323 pd.wseq++ 324 var rg, wg *g 325 atomic.StorepNoWB(noescape(unsafe.Pointer(&rg)), nil) // full memory barrier between store to closing and read of rg/wg in netpollunblock 326 rg = netpollunblock(pd, 'r', false) 327 wg = netpollunblock(pd, 'w', false) 328 if pd.rt.f != nil { 329 deltimer(&pd.rt) 330 pd.rt.f = nil 331 } 332 if pd.wt.f != nil { 333 deltimer(&pd.wt) 334 pd.wt.f = nil 335 } 336 unlock(&pd.lock) 337 if rg != nil { 338 netpollgoready(rg, 3) 339 } 340 if wg != nil { 341 netpollgoready(wg, 3) 342 } 343} 344 345// netpollready is called by the platform-specific netpoll function. 346// It declares that the fd associated with pd is ready for I/O. 347// The toRun argument is used to build a list of goroutines to return 348// from netpoll. The mode argument is 'r', 'w', or 'r'+'w' to indicate 349// whether the fd is ready for reading or writing or both. 350// 351// This may run while the world is stopped, so write barriers are not allowed. 352//go:nowritebarrier 353func netpollready(toRun *gList, pd *pollDesc, mode int32) { 354 var rg, wg *g 355 if mode == 'r' || mode == 'r'+'w' { 356 rg = netpollunblock(pd, 'r', true) 357 } 358 if mode == 'w' || mode == 'r'+'w' { 359 wg = netpollunblock(pd, 'w', true) 360 } 361 if rg != nil { 362 toRun.push(rg) 363 } 364 if wg != nil { 365 toRun.push(wg) 366 } 367} 368 369func netpollcheckerr(pd *pollDesc, mode int32) int { 370 if pd.closing { 371 return 1 // ErrFileClosing or ErrNetClosing 372 } 373 if (mode == 'r' && pd.rd < 0) || (mode == 'w' && pd.wd < 0) { 374 return 2 // ErrTimeout 375 } 376 // Report an event scanning error only on a read event. 377 // An error on a write event will be captured in a subsequent 378 // write call that is able to report a more specific error. 379 if mode == 'r' && pd.everr { 380 return 3 // ErrNotPollable 381 } 382 return 0 383} 384 385func netpollblockcommit(gp *g, gpp unsafe.Pointer) bool { 386 r := atomic.Casuintptr((*uintptr)(gpp), pdWait, uintptr(unsafe.Pointer(gp))) 387 if r { 388 // Bump the count of goroutines waiting for the poller. 389 // The scheduler uses this to decide whether to block 390 // waiting for the poller if there is nothing else to do. 391 atomic.Xadd(&netpollWaiters, 1) 392 } 393 return r 394} 395 396func netpollgoready(gp *g, traceskip int) { 397 atomic.Xadd(&netpollWaiters, -1) 398 goready(gp, traceskip+1) 399} 400 401// returns true if IO is ready, or false if timedout or closed 402// waitio - wait only for completed IO, ignore errors 403func netpollblock(pd *pollDesc, mode int32, waitio bool) bool { 404 gpp := &pd.rg 405 if mode == 'w' { 406 gpp = &pd.wg 407 } 408 409 // set the gpp semaphore to WAIT 410 for { 411 old := *gpp 412 if old == pdReady { 413 *gpp = 0 414 return true 415 } 416 if old != 0 { 417 throw("runtime: double wait") 418 } 419 if atomic.Casuintptr(gpp, 0, pdWait) { 420 break 421 } 422 } 423 424 // need to recheck error states after setting gpp to WAIT 425 // this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl 426 // do the opposite: store to closing/rd/wd, membarrier, load of rg/wg 427 if waitio || netpollcheckerr(pd, mode) == 0 { 428 gopark(netpollblockcommit, unsafe.Pointer(gpp), waitReasonIOWait, traceEvGoBlockNet, 5) 429 } 430 // be careful to not lose concurrent READY notification 431 old := atomic.Xchguintptr(gpp, 0) 432 if old > pdWait { 433 throw("runtime: corrupted polldesc") 434 } 435 return old == pdReady 436} 437 438func netpollunblock(pd *pollDesc, mode int32, ioready bool) *g { 439 gpp := &pd.rg 440 if mode == 'w' { 441 gpp = &pd.wg 442 } 443 444 for { 445 old := *gpp 446 if old == pdReady { 447 return nil 448 } 449 if old == 0 && !ioready { 450 // Only set READY for ioready. runtime_pollWait 451 // will check for timeout/cancel before waiting. 452 return nil 453 } 454 var new uintptr 455 if ioready { 456 new = pdReady 457 } 458 if atomic.Casuintptr(gpp, old, new) { 459 if old == pdReady || old == pdWait { 460 old = 0 461 } 462 return (*g)(unsafe.Pointer(old)) 463 } 464 } 465} 466 467func netpolldeadlineimpl(pd *pollDesc, seq uintptr, read, write bool) { 468 lock(&pd.lock) 469 // Seq arg is seq when the timer was set. 470 // If it's stale, ignore the timer event. 471 currentSeq := pd.rseq 472 if !read { 473 currentSeq = pd.wseq 474 } 475 if seq != currentSeq { 476 // The descriptor was reused or timers were reset. 477 unlock(&pd.lock) 478 return 479 } 480 var rg *g 481 if read { 482 if pd.rd <= 0 || pd.rt.f == nil { 483 throw("runtime: inconsistent read deadline") 484 } 485 pd.rd = -1 486 atomic.StorepNoWB(unsafe.Pointer(&pd.rt.f), nil) // full memory barrier between store to rd and load of rg in netpollunblock 487 rg = netpollunblock(pd, 'r', false) 488 } 489 var wg *g 490 if write { 491 if pd.wd <= 0 || pd.wt.f == nil && !read { 492 throw("runtime: inconsistent write deadline") 493 } 494 pd.wd = -1 495 atomic.StorepNoWB(unsafe.Pointer(&pd.wt.f), nil) // full memory barrier between store to wd and load of wg in netpollunblock 496 wg = netpollunblock(pd, 'w', false) 497 } 498 unlock(&pd.lock) 499 if rg != nil { 500 netpollgoready(rg, 0) 501 } 502 if wg != nil { 503 netpollgoready(wg, 0) 504 } 505} 506 507func netpollDeadline(arg interface{}, seq uintptr) { 508 netpolldeadlineimpl(arg.(*pollDesc), seq, true, true) 509} 510 511func netpollReadDeadline(arg interface{}, seq uintptr) { 512 netpolldeadlineimpl(arg.(*pollDesc), seq, true, false) 513} 514 515func netpollWriteDeadline(arg interface{}, seq uintptr) { 516 netpolldeadlineimpl(arg.(*pollDesc), seq, false, true) 517} 518 519func (c *pollCache) alloc() *pollDesc { 520 lock(&c.lock) 521 if c.first == nil { 522 const pdSize = unsafe.Sizeof(pollDesc{}) 523 n := pollBlockSize / pdSize 524 if n == 0 { 525 n = 1 526 } 527 // Must be in non-GC memory because can be referenced 528 // only from epoll/kqueue internals. 529 mem := persistentalloc(n*pdSize, 0, &memstats.other_sys) 530 for i := uintptr(0); i < n; i++ { 531 pd := (*pollDesc)(add(mem, i*pdSize)) 532 pd.link = c.first 533 c.first = pd 534 } 535 } 536 pd := c.first 537 c.first = pd.link 538 unlock(&c.lock) 539 return pd 540} 541