1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris
6
7package net
8
9import (
10	"io"
11	"os"
12	"runtime"
13	"sync/atomic"
14	"syscall"
15	"time"
16)
17
18// Network file descriptor.
19type netFD struct {
20	// locking/lifetime of sysfd + serialize access to Read and Write methods
21	fdmu fdMutex
22
23	// immutable until Close
24	sysfd       int
25	family      int
26	sotype      int
27	isConnected bool
28	net         string
29	laddr       Addr
30	raddr       Addr
31
32	// wait server
33	pd pollDesc
34}
35
36func sysInit() {
37}
38
39func dial(network string, ra Addr, dialer func(time.Time) (Conn, error), deadline time.Time) (Conn, error) {
40	return dialer(deadline)
41}
42
43func newFD(sysfd, family, sotype int, net string) (*netFD, error) {
44	return &netFD{sysfd: sysfd, family: family, sotype: sotype, net: net}, nil
45}
46
47func (fd *netFD) init() error {
48	if err := fd.pd.Init(fd); err != nil {
49		return err
50	}
51	return nil
52}
53
54func (fd *netFD) setAddr(laddr, raddr Addr) {
55	fd.laddr = laddr
56	fd.raddr = raddr
57	runtime.SetFinalizer(fd, (*netFD).Close)
58}
59
60func (fd *netFD) name() string {
61	var ls, rs string
62	if fd.laddr != nil {
63		ls = fd.laddr.String()
64	}
65	if fd.raddr != nil {
66		rs = fd.raddr.String()
67	}
68	return fd.net + ":" + ls + "->" + rs
69}
70
71func (fd *netFD) connect(la, ra syscall.Sockaddr, deadline time.Time, cancel <-chan struct{}) error {
72	// Do not need to call fd.writeLock here,
73	// because fd is not yet accessible to user,
74	// so no concurrent operations are possible.
75	switch err := connectFunc(fd.sysfd, ra); err {
76	case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR:
77	case nil, syscall.EISCONN:
78		if !deadline.IsZero() && deadline.Before(time.Now()) {
79			return errTimeout
80		}
81		if err := fd.init(); err != nil {
82			return err
83		}
84		return nil
85	case syscall.EINVAL:
86		// On Solaris we can see EINVAL if the socket has
87		// already been accepted and closed by the server.
88		// Treat this as a successful connection--writes to
89		// the socket will see EOF.  For details and a test
90		// case in C see https://golang.org/issue/6828.
91		if runtime.GOOS == "solaris" {
92			return nil
93		}
94		fallthrough
95	default:
96		return os.NewSyscallError("connect", err)
97	}
98	if err := fd.init(); err != nil {
99		return err
100	}
101	if !deadline.IsZero() {
102		fd.setWriteDeadline(deadline)
103		defer fd.setWriteDeadline(noDeadline)
104	}
105	if cancel != nil {
106		done := make(chan bool)
107		defer close(done)
108		go func() {
109			select {
110			case <-cancel:
111				// Force the runtime's poller to immediately give
112				// up waiting for writability.
113				fd.setWriteDeadline(aLongTimeAgo)
114			case <-done:
115			}
116		}()
117	}
118	for {
119		// Performing multiple connect system calls on a
120		// non-blocking socket under Unix variants does not
121		// necessarily result in earlier errors being
122		// returned. Instead, once runtime-integrated network
123		// poller tells us that the socket is ready, get the
124		// SO_ERROR socket option to see if the connection
125		// succeeded or failed. See issue 7474 for further
126		// details.
127		if err := fd.pd.WaitWrite(); err != nil {
128			select {
129			case <-cancel:
130				return errCanceled
131			default:
132			}
133			return err
134		}
135		nerr, err := getsockoptIntFunc(fd.sysfd, syscall.SOL_SOCKET, syscall.SO_ERROR)
136		if err != nil {
137			return os.NewSyscallError("getsockopt", err)
138		}
139		switch err := syscall.Errno(nerr); err {
140		case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR:
141		case syscall.Errno(0), syscall.EISCONN:
142			return nil
143		default:
144			return os.NewSyscallError("getsockopt", err)
145		}
146	}
147}
148
149func (fd *netFD) destroy() {
150	// Poller may want to unregister fd in readiness notification mechanism,
151	// so this must be executed before closeFunc.
152	fd.pd.Close()
153	closeFunc(fd.sysfd)
154	fd.sysfd = -1
155	runtime.SetFinalizer(fd, nil)
156}
157
158// Add a reference to this fd.
159// Returns an error if the fd cannot be used.
160func (fd *netFD) incref() error {
161	if !fd.fdmu.Incref() {
162		return errClosing
163	}
164	return nil
165}
166
167// Remove a reference to this FD and close if we've been asked to do so
168// (and there are no references left).
169func (fd *netFD) decref() {
170	if fd.fdmu.Decref() {
171		fd.destroy()
172	}
173}
174
175// Add a reference to this fd and lock for reading.
176// Returns an error if the fd cannot be used.
177func (fd *netFD) readLock() error {
178	if !fd.fdmu.RWLock(true) {
179		return errClosing
180	}
181	return nil
182}
183
184// Unlock for reading and remove a reference to this FD.
185func (fd *netFD) readUnlock() {
186	if fd.fdmu.RWUnlock(true) {
187		fd.destroy()
188	}
189}
190
191// Add a reference to this fd and lock for writing.
192// Returns an error if the fd cannot be used.
193func (fd *netFD) writeLock() error {
194	if !fd.fdmu.RWLock(false) {
195		return errClosing
196	}
197	return nil
198}
199
200// Unlock for writing and remove a reference to this FD.
201func (fd *netFD) writeUnlock() {
202	if fd.fdmu.RWUnlock(false) {
203		fd.destroy()
204	}
205}
206
207func (fd *netFD) Close() error {
208	if !fd.fdmu.IncrefAndClose() {
209		return errClosing
210	}
211	// Unblock any I/O.  Once it all unblocks and returns,
212	// so that it cannot be referring to fd.sysfd anymore,
213	// the final decref will close fd.sysfd.  This should happen
214	// fairly quickly, since all the I/O is non-blocking, and any
215	// attempts to block in the pollDesc will return errClosing.
216	fd.pd.Evict()
217	fd.decref()
218	return nil
219}
220
221func (fd *netFD) shutdown(how int) error {
222	if err := fd.incref(); err != nil {
223		return err
224	}
225	defer fd.decref()
226	return os.NewSyscallError("shutdown", syscall.Shutdown(fd.sysfd, how))
227}
228
229func (fd *netFD) closeRead() error {
230	return fd.shutdown(syscall.SHUT_RD)
231}
232
233func (fd *netFD) closeWrite() error {
234	return fd.shutdown(syscall.SHUT_WR)
235}
236
237func (fd *netFD) Read(p []byte) (n int, err error) {
238	if err := fd.readLock(); err != nil {
239		return 0, err
240	}
241	defer fd.readUnlock()
242	if err := fd.pd.PrepareRead(); err != nil {
243		return 0, err
244	}
245	for {
246		n, err = syscall.Read(fd.sysfd, p)
247		if err != nil {
248			n = 0
249			if err == syscall.EAGAIN {
250				if err = fd.pd.WaitRead(); err == nil {
251					continue
252				}
253			}
254		}
255		err = fd.eofError(n, err)
256		break
257	}
258	if _, ok := err.(syscall.Errno); ok {
259		err = os.NewSyscallError("read", err)
260	}
261	return
262}
263
264func (fd *netFD) readFrom(p []byte) (n int, sa syscall.Sockaddr, err error) {
265	if err := fd.readLock(); err != nil {
266		return 0, nil, err
267	}
268	defer fd.readUnlock()
269	if err := fd.pd.PrepareRead(); err != nil {
270		return 0, nil, err
271	}
272	for {
273		n, sa, err = syscall.Recvfrom(fd.sysfd, p, 0)
274		if err != nil {
275			n = 0
276			if err == syscall.EAGAIN {
277				if err = fd.pd.WaitRead(); err == nil {
278					continue
279				}
280			}
281		}
282		err = fd.eofError(n, err)
283		break
284	}
285	if _, ok := err.(syscall.Errno); ok {
286		err = os.NewSyscallError("recvfrom", err)
287	}
288	return
289}
290
291func (fd *netFD) readMsg(p []byte, oob []byte) (n, oobn, flags int, sa syscall.Sockaddr, err error) {
292	if err := fd.readLock(); err != nil {
293		return 0, 0, 0, nil, err
294	}
295	defer fd.readUnlock()
296	if err := fd.pd.PrepareRead(); err != nil {
297		return 0, 0, 0, nil, err
298	}
299	for {
300		n, oobn, flags, sa, err = syscall.Recvmsg(fd.sysfd, p, oob, 0)
301		if err != nil {
302			// TODO(dfc) should n and oobn be set to 0
303			if err == syscall.EAGAIN {
304				if err = fd.pd.WaitRead(); err == nil {
305					continue
306				}
307			}
308		}
309		err = fd.eofError(n, err)
310		break
311	}
312	if _, ok := err.(syscall.Errno); ok {
313		err = os.NewSyscallError("recvmsg", err)
314	}
315	return
316}
317
318func (fd *netFD) Write(p []byte) (nn int, err error) {
319	if err := fd.writeLock(); err != nil {
320		return 0, err
321	}
322	defer fd.writeUnlock()
323	if err := fd.pd.PrepareWrite(); err != nil {
324		return 0, err
325	}
326	for {
327		var n int
328		n, err = syscall.Write(fd.sysfd, p[nn:])
329		if n > 0 {
330			nn += n
331		}
332		if nn == len(p) {
333			break
334		}
335		if err == syscall.EAGAIN {
336			if err = fd.pd.WaitWrite(); err == nil {
337				continue
338			}
339		}
340		if err != nil {
341			break
342		}
343		if n == 0 {
344			err = io.ErrUnexpectedEOF
345			break
346		}
347	}
348	if _, ok := err.(syscall.Errno); ok {
349		err = os.NewSyscallError("write", err)
350	}
351	return nn, err
352}
353
354func (fd *netFD) writeTo(p []byte, sa syscall.Sockaddr) (n int, err error) {
355	if err := fd.writeLock(); err != nil {
356		return 0, err
357	}
358	defer fd.writeUnlock()
359	if err := fd.pd.PrepareWrite(); err != nil {
360		return 0, err
361	}
362	for {
363		err = syscall.Sendto(fd.sysfd, p, 0, sa)
364		if err == syscall.EAGAIN {
365			if err = fd.pd.WaitWrite(); err == nil {
366				continue
367			}
368		}
369		break
370	}
371	if err == nil {
372		n = len(p)
373	}
374	if _, ok := err.(syscall.Errno); ok {
375		err = os.NewSyscallError("sendto", err)
376	}
377	return
378}
379
380func (fd *netFD) writeMsg(p []byte, oob []byte, sa syscall.Sockaddr) (n int, oobn int, err error) {
381	if err := fd.writeLock(); err != nil {
382		return 0, 0, err
383	}
384	defer fd.writeUnlock()
385	if err := fd.pd.PrepareWrite(); err != nil {
386		return 0, 0, err
387	}
388	for {
389		n, err = syscall.SendmsgN(fd.sysfd, p, oob, sa, 0)
390		if err == syscall.EAGAIN {
391			if err = fd.pd.WaitWrite(); err == nil {
392				continue
393			}
394		}
395		break
396	}
397	if err == nil {
398		oobn = len(oob)
399	}
400	if _, ok := err.(syscall.Errno); ok {
401		err = os.NewSyscallError("sendmsg", err)
402	}
403	return
404}
405
406func (fd *netFD) accept() (netfd *netFD, err error) {
407	if err := fd.readLock(); err != nil {
408		return nil, err
409	}
410	defer fd.readUnlock()
411
412	var s int
413	var rsa syscall.Sockaddr
414	if err = fd.pd.PrepareRead(); err != nil {
415		return nil, err
416	}
417	for {
418		s, rsa, err = accept(fd.sysfd)
419		if err != nil {
420			nerr, ok := err.(*os.SyscallError)
421			if !ok {
422				return nil, err
423			}
424			switch nerr.Err {
425			case syscall.EAGAIN:
426				if err = fd.pd.WaitRead(); err == nil {
427					continue
428				}
429			case syscall.ECONNABORTED:
430				// This means that a socket on the
431				// listen queue was closed before we
432				// Accept()ed it; it's a silly error,
433				// so try again.
434				continue
435			}
436			return nil, err
437		}
438		break
439	}
440
441	if netfd, err = newFD(s, fd.family, fd.sotype, fd.net); err != nil {
442		closeFunc(s)
443		return nil, err
444	}
445	if err = netfd.init(); err != nil {
446		fd.Close()
447		return nil, err
448	}
449	lsa, _ := syscall.Getsockname(netfd.sysfd)
450	netfd.setAddr(netfd.addrFunc()(lsa), netfd.addrFunc()(rsa))
451	return netfd, nil
452}
453
454// Use a helper function to call fcntl.  This is defined in C in
455// libgo/runtime.
456//extern __go_fcntl_uintptr
457func fcntl(uintptr, uintptr, uintptr) (uintptr, uintptr)
458
459// tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used.
460// If the kernel doesn't support it, this is set to 0.
461var tryDupCloexec = int32(1)
462
463func dupCloseOnExec(fd int) (newfd int, err error) {
464	if atomic.LoadInt32(&tryDupCloexec) == 1 && syscall.F_DUPFD_CLOEXEC != 0 {
465		syscall.Entersyscall()
466		r0, errno := fcntl(uintptr(fd), syscall.F_DUPFD_CLOEXEC, 0)
467		syscall.Exitsyscall()
468		e1 := syscall.Errno(errno)
469		if runtime.GOOS == "darwin" && e1 == syscall.EBADF {
470			// On OS X 10.6 and below (but we only support
471			// >= 10.6), F_DUPFD_CLOEXEC is unsupported
472			// and fcntl there falls back (undocumented)
473			// to doing an ioctl instead, returning EBADF
474			// in this case because fd is not of the
475			// expected device fd type.  Treat it as
476			// EINVAL instead, so we fall back to the
477			// normal dup path.
478			// TODO: only do this on 10.6 if we can detect 10.6
479			// cheaply.
480			e1 = syscall.EINVAL
481		}
482		switch e1 {
483		case 0:
484			return int(r0), nil
485		case syscall.EINVAL:
486			// Old kernel. Fall back to the portable way
487			// from now on.
488			atomic.StoreInt32(&tryDupCloexec, 0)
489		default:
490			return -1, os.NewSyscallError("fcntl", e1)
491		}
492	}
493	return dupCloseOnExecOld(fd)
494}
495
496// dupCloseOnExecUnixOld is the traditional way to dup an fd and
497// set its O_CLOEXEC bit, using two system calls.
498func dupCloseOnExecOld(fd int) (newfd int, err error) {
499	syscall.ForkLock.RLock()
500	defer syscall.ForkLock.RUnlock()
501	newfd, err = syscall.Dup(fd)
502	if err != nil {
503		return -1, os.NewSyscallError("dup", err)
504	}
505	syscall.CloseOnExec(newfd)
506	return
507}
508
509func (fd *netFD) dup() (f *os.File, err error) {
510	ns, err := dupCloseOnExec(fd.sysfd)
511	if err != nil {
512		return nil, err
513	}
514
515	// We want blocking mode for the new fd, hence the double negative.
516	// This also puts the old fd into blocking mode, meaning that
517	// I/O will block the thread instead of letting us use the epoll server.
518	// Everything will still work, just with more threads.
519	if err = syscall.SetNonblock(ns, false); err != nil {
520		return nil, os.NewSyscallError("setnonblock", err)
521	}
522
523	return os.NewFile(uintptr(ns), fd.name()), nil
524}
525