1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris
6
7package net
8
9import (
10	"io"
11	"os"
12	"runtime"
13	"sync/atomic"
14	"syscall"
15	"time"
16)
17
18// Network file descriptor.
19type netFD struct {
20	// locking/lifetime of sysfd + serialize access to Read and Write methods
21	fdmu fdMutex
22
23	// immutable until Close
24	sysfd       int
25	family      int
26	sotype      int
27	isConnected bool
28	net         string
29	laddr       Addr
30	raddr       Addr
31
32	// wait server
33	pd pollDesc
34}
35
36func sysInit() {
37}
38
39func dial(network string, ra Addr, dialer func(time.Time) (Conn, error), deadline time.Time) (Conn, error) {
40	return dialer(deadline)
41}
42
43func newFD(sysfd, family, sotype int, net string) (*netFD, error) {
44	return &netFD{sysfd: sysfd, family: family, sotype: sotype, net: net}, nil
45}
46
47func (fd *netFD) init() error {
48	if err := fd.pd.Init(fd); err != nil {
49		return err
50	}
51	return nil
52}
53
54func (fd *netFD) setAddr(laddr, raddr Addr) {
55	fd.laddr = laddr
56	fd.raddr = raddr
57	runtime.SetFinalizer(fd, (*netFD).Close)
58}
59
60func (fd *netFD) name() string {
61	var ls, rs string
62	if fd.laddr != nil {
63		ls = fd.laddr.String()
64	}
65	if fd.raddr != nil {
66		rs = fd.raddr.String()
67	}
68	return fd.net + ":" + ls + "->" + rs
69}
70
71func (fd *netFD) connect(la, ra syscall.Sockaddr, deadline time.Time) error {
72	// Do not need to call fd.writeLock here,
73	// because fd is not yet accessible to user,
74	// so no concurrent operations are possible.
75	switch err := connectFunc(fd.sysfd, ra); err {
76	case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR:
77	case nil, syscall.EISCONN:
78		if !deadline.IsZero() && deadline.Before(time.Now()) {
79			return errTimeout
80		}
81		if err := fd.init(); err != nil {
82			return err
83		}
84		return nil
85	case syscall.EINVAL:
86		// On Solaris we can see EINVAL if the socket has
87		// already been accepted and closed by the server.
88		// Treat this as a successful connection--writes to
89		// the socket will see EOF.  For details and a test
90		// case in C see https://golang.org/issue/6828.
91		if runtime.GOOS == "solaris" {
92			return nil
93		}
94		fallthrough
95	default:
96		return os.NewSyscallError("connect", err)
97	}
98	if err := fd.init(); err != nil {
99		return err
100	}
101	if !deadline.IsZero() {
102		fd.setWriteDeadline(deadline)
103		defer fd.setWriteDeadline(noDeadline)
104	}
105	for {
106		// Performing multiple connect system calls on a
107		// non-blocking socket under Unix variants does not
108		// necessarily result in earlier errors being
109		// returned. Instead, once runtime-integrated network
110		// poller tells us that the socket is ready, get the
111		// SO_ERROR socket option to see if the connection
112		// succeeded or failed. See issue 7474 for further
113		// details.
114		if err := fd.pd.WaitWrite(); err != nil {
115			return err
116		}
117		nerr, err := getsockoptIntFunc(fd.sysfd, syscall.SOL_SOCKET, syscall.SO_ERROR)
118		if err != nil {
119			return os.NewSyscallError("getsockopt", err)
120		}
121		switch err := syscall.Errno(nerr); err {
122		case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR:
123		case syscall.Errno(0), syscall.EISCONN:
124			return nil
125		default:
126			return os.NewSyscallError("getsockopt", err)
127		}
128	}
129}
130
131func (fd *netFD) destroy() {
132	// Poller may want to unregister fd in readiness notification mechanism,
133	// so this must be executed before closeFunc.
134	fd.pd.Close()
135	closeFunc(fd.sysfd)
136	fd.sysfd = -1
137	runtime.SetFinalizer(fd, nil)
138}
139
140// Add a reference to this fd.
141// Returns an error if the fd cannot be used.
142func (fd *netFD) incref() error {
143	if !fd.fdmu.Incref() {
144		return errClosing
145	}
146	return nil
147}
148
149// Remove a reference to this FD and close if we've been asked to do so
150// (and there are no references left).
151func (fd *netFD) decref() {
152	if fd.fdmu.Decref() {
153		fd.destroy()
154	}
155}
156
157// Add a reference to this fd and lock for reading.
158// Returns an error if the fd cannot be used.
159func (fd *netFD) readLock() error {
160	if !fd.fdmu.RWLock(true) {
161		return errClosing
162	}
163	return nil
164}
165
166// Unlock for reading and remove a reference to this FD.
167func (fd *netFD) readUnlock() {
168	if fd.fdmu.RWUnlock(true) {
169		fd.destroy()
170	}
171}
172
173// Add a reference to this fd and lock for writing.
174// Returns an error if the fd cannot be used.
175func (fd *netFD) writeLock() error {
176	if !fd.fdmu.RWLock(false) {
177		return errClosing
178	}
179	return nil
180}
181
182// Unlock for writing and remove a reference to this FD.
183func (fd *netFD) writeUnlock() {
184	if fd.fdmu.RWUnlock(false) {
185		fd.destroy()
186	}
187}
188
189func (fd *netFD) Close() error {
190	if !fd.fdmu.IncrefAndClose() {
191		return errClosing
192	}
193	// Unblock any I/O.  Once it all unblocks and returns,
194	// so that it cannot be referring to fd.sysfd anymore,
195	// the final decref will close fd.sysfd.  This should happen
196	// fairly quickly, since all the I/O is non-blocking, and any
197	// attempts to block in the pollDesc will return errClosing.
198	fd.pd.Evict()
199	fd.decref()
200	return nil
201}
202
203func (fd *netFD) shutdown(how int) error {
204	if err := fd.incref(); err != nil {
205		return err
206	}
207	defer fd.decref()
208	return os.NewSyscallError("shutdown", syscall.Shutdown(fd.sysfd, how))
209}
210
211func (fd *netFD) closeRead() error {
212	return fd.shutdown(syscall.SHUT_RD)
213}
214
215func (fd *netFD) closeWrite() error {
216	return fd.shutdown(syscall.SHUT_WR)
217}
218
219func (fd *netFD) Read(p []byte) (n int, err error) {
220	if err := fd.readLock(); err != nil {
221		return 0, err
222	}
223	defer fd.readUnlock()
224	if err := fd.pd.PrepareRead(); err != nil {
225		return 0, err
226	}
227	for {
228		n, err = syscall.Read(fd.sysfd, p)
229		if err != nil {
230			n = 0
231			if err == syscall.EAGAIN {
232				if err = fd.pd.WaitRead(); err == nil {
233					continue
234				}
235			}
236		}
237		err = fd.eofError(n, err)
238		break
239	}
240	if _, ok := err.(syscall.Errno); ok {
241		err = os.NewSyscallError("read", err)
242	}
243	return
244}
245
246func (fd *netFD) readFrom(p []byte) (n int, sa syscall.Sockaddr, err error) {
247	if err := fd.readLock(); err != nil {
248		return 0, nil, err
249	}
250	defer fd.readUnlock()
251	if err := fd.pd.PrepareRead(); err != nil {
252		return 0, nil, err
253	}
254	for {
255		n, sa, err = syscall.Recvfrom(fd.sysfd, p, 0)
256		if err != nil {
257			n = 0
258			if err == syscall.EAGAIN {
259				if err = fd.pd.WaitRead(); err == nil {
260					continue
261				}
262			}
263		}
264		err = fd.eofError(n, err)
265		break
266	}
267	if _, ok := err.(syscall.Errno); ok {
268		err = os.NewSyscallError("recvfrom", err)
269	}
270	return
271}
272
273func (fd *netFD) readMsg(p []byte, oob []byte) (n, oobn, flags int, sa syscall.Sockaddr, err error) {
274	if err := fd.readLock(); err != nil {
275		return 0, 0, 0, nil, err
276	}
277	defer fd.readUnlock()
278	if err := fd.pd.PrepareRead(); err != nil {
279		return 0, 0, 0, nil, err
280	}
281	for {
282		n, oobn, flags, sa, err = syscall.Recvmsg(fd.sysfd, p, oob, 0)
283		if err != nil {
284			// TODO(dfc) should n and oobn be set to 0
285			if err == syscall.EAGAIN {
286				if err = fd.pd.WaitRead(); err == nil {
287					continue
288				}
289			}
290		}
291		err = fd.eofError(n, err)
292		break
293	}
294	if _, ok := err.(syscall.Errno); ok {
295		err = os.NewSyscallError("recvmsg", err)
296	}
297	return
298}
299
300func (fd *netFD) Write(p []byte) (nn int, err error) {
301	if err := fd.writeLock(); err != nil {
302		return 0, err
303	}
304	defer fd.writeUnlock()
305	if err := fd.pd.PrepareWrite(); err != nil {
306		return 0, err
307	}
308	for {
309		var n int
310		n, err = syscall.Write(fd.sysfd, p[nn:])
311		if n > 0 {
312			nn += n
313		}
314		if nn == len(p) {
315			break
316		}
317		if err == syscall.EAGAIN {
318			if err = fd.pd.WaitWrite(); err == nil {
319				continue
320			}
321		}
322		if err != nil {
323			break
324		}
325		if n == 0 {
326			err = io.ErrUnexpectedEOF
327			break
328		}
329	}
330	if _, ok := err.(syscall.Errno); ok {
331		err = os.NewSyscallError("write", err)
332	}
333	return nn, err
334}
335
336func (fd *netFD) writeTo(p []byte, sa syscall.Sockaddr) (n int, err error) {
337	if err := fd.writeLock(); err != nil {
338		return 0, err
339	}
340	defer fd.writeUnlock()
341	if err := fd.pd.PrepareWrite(); err != nil {
342		return 0, err
343	}
344	for {
345		err = syscall.Sendto(fd.sysfd, p, 0, sa)
346		if err == syscall.EAGAIN {
347			if err = fd.pd.WaitWrite(); err == nil {
348				continue
349			}
350		}
351		break
352	}
353	if err == nil {
354		n = len(p)
355	}
356	if _, ok := err.(syscall.Errno); ok {
357		err = os.NewSyscallError("sendto", err)
358	}
359	return
360}
361
362func (fd *netFD) writeMsg(p []byte, oob []byte, sa syscall.Sockaddr) (n int, oobn int, err error) {
363	if err := fd.writeLock(); err != nil {
364		return 0, 0, err
365	}
366	defer fd.writeUnlock()
367	if err := fd.pd.PrepareWrite(); err != nil {
368		return 0, 0, err
369	}
370	for {
371		n, err = syscall.SendmsgN(fd.sysfd, p, oob, sa, 0)
372		if err == syscall.EAGAIN {
373			if err = fd.pd.WaitWrite(); err == nil {
374				continue
375			}
376		}
377		break
378	}
379	if err == nil {
380		oobn = len(oob)
381	}
382	if _, ok := err.(syscall.Errno); ok {
383		err = os.NewSyscallError("sendmsg", err)
384	}
385	return
386}
387
388func (fd *netFD) accept() (netfd *netFD, err error) {
389	if err := fd.readLock(); err != nil {
390		return nil, err
391	}
392	defer fd.readUnlock()
393
394	var s int
395	var rsa syscall.Sockaddr
396	if err = fd.pd.PrepareRead(); err != nil {
397		return nil, err
398	}
399	for {
400		s, rsa, err = accept(fd.sysfd)
401		if err != nil {
402			nerr, ok := err.(*os.SyscallError)
403			if !ok {
404				return nil, err
405			}
406			switch nerr.Err {
407			case syscall.EAGAIN:
408				if err = fd.pd.WaitRead(); err == nil {
409					continue
410				}
411			case syscall.ECONNABORTED:
412				// This means that a socket on the
413				// listen queue was closed before we
414				// Accept()ed it; it's a silly error,
415				// so try again.
416				continue
417			}
418			return nil, err
419		}
420		break
421	}
422
423	if netfd, err = newFD(s, fd.family, fd.sotype, fd.net); err != nil {
424		closeFunc(s)
425		return nil, err
426	}
427	if err = netfd.init(); err != nil {
428		fd.Close()
429		return nil, err
430	}
431	lsa, _ := syscall.Getsockname(netfd.sysfd)
432	netfd.setAddr(netfd.addrFunc()(lsa), netfd.addrFunc()(rsa))
433	return netfd, nil
434}
435
436// Use a helper function to call fcntl.  This is defined in C in
437// libgo/runtime.
438//extern __go_fcntl_uintptr
439func fcntl(uintptr, uintptr, uintptr) (uintptr, uintptr)
440
441// tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used.
442// If the kernel doesn't support it, this is set to 0.
443var tryDupCloexec = int32(1)
444
445func dupCloseOnExec(fd int) (newfd int, err error) {
446	if atomic.LoadInt32(&tryDupCloexec) == 1 && syscall.F_DUPFD_CLOEXEC != 0 {
447		syscall.Entersyscall()
448		r0, errno := fcntl(uintptr(fd), syscall.F_DUPFD_CLOEXEC, 0)
449		syscall.Exitsyscall()
450		e1 := syscall.Errno(errno)
451		if runtime.GOOS == "darwin" && e1 == syscall.EBADF {
452			// On OS X 10.6 and below (but we only support
453			// >= 10.6), F_DUPFD_CLOEXEC is unsupported
454			// and fcntl there falls back (undocumented)
455			// to doing an ioctl instead, returning EBADF
456			// in this case because fd is not of the
457			// expected device fd type.  Treat it as
458			// EINVAL instead, so we fall back to the
459			// normal dup path.
460			// TODO: only do this on 10.6 if we can detect 10.6
461			// cheaply.
462			e1 = syscall.EINVAL
463		}
464		switch e1 {
465		case 0:
466			return int(r0), nil
467		case syscall.EINVAL:
468			// Old kernel. Fall back to the portable way
469			// from now on.
470			atomic.StoreInt32(&tryDupCloexec, 0)
471		default:
472			return -1, os.NewSyscallError("fcntl", e1)
473		}
474	}
475	return dupCloseOnExecOld(fd)
476}
477
478// dupCloseOnExecUnixOld is the traditional way to dup an fd and
479// set its O_CLOEXEC bit, using two system calls.
480func dupCloseOnExecOld(fd int) (newfd int, err error) {
481	syscall.ForkLock.RLock()
482	defer syscall.ForkLock.RUnlock()
483	newfd, err = syscall.Dup(fd)
484	if err != nil {
485		return -1, os.NewSyscallError("dup", err)
486	}
487	syscall.CloseOnExec(newfd)
488	return
489}
490
491func (fd *netFD) dup() (f *os.File, err error) {
492	ns, err := dupCloseOnExec(fd.sysfd)
493	if err != nil {
494		return nil, err
495	}
496
497	// We want blocking mode for the new fd, hence the double negative.
498	// This also puts the old fd into blocking mode, meaning that
499	// I/O will block the thread instead of letting us use the epoll server.
500	// Everything will still work, just with more threads.
501	if err = syscall.SetNonblock(ns, false); err != nil {
502		return nil, os.NewSyscallError("setnonblock", err)
503	}
504
505	return os.NewFile(uintptr(ns), fd.name()), nil
506}
507