1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build darwin dragonfly freebsd linux netbsd openbsd
6
7package net
8
9import (
10	"io"
11	"os"
12	"runtime"
13	"sync/atomic"
14	"syscall"
15	"time"
16)
17
18// Network file descriptor.
19type netFD struct {
20	// locking/lifetime of sysfd + serialize access to Read and Write methods
21	fdmu fdMutex
22
23	// immutable until Close
24	sysfd       int
25	family      int
26	sotype      int
27	isConnected bool
28	net         string
29	laddr       Addr
30	raddr       Addr
31
32	// wait server
33	pd pollDesc
34}
35
36func sysInit() {
37}
38
39func dial(network string, ra Addr, dialer func(time.Time) (Conn, error), deadline time.Time) (Conn, error) {
40	return dialer(deadline)
41}
42
43func newFD(sysfd, family, sotype int, net string) (*netFD, error) {
44	return &netFD{sysfd: sysfd, family: family, sotype: sotype, net: net}, nil
45}
46
47func (fd *netFD) init() error {
48	if err := fd.pd.Init(fd); err != nil {
49		return err
50	}
51	return nil
52}
53
54func (fd *netFD) setAddr(laddr, raddr Addr) {
55	fd.laddr = laddr
56	fd.raddr = raddr
57	runtime.SetFinalizer(fd, (*netFD).Close)
58}
59
60func (fd *netFD) name() string {
61	var ls, rs string
62	if fd.laddr != nil {
63		ls = fd.laddr.String()
64	}
65	if fd.raddr != nil {
66		rs = fd.raddr.String()
67	}
68	return fd.net + ":" + ls + "->" + rs
69}
70
71func (fd *netFD) connect(la, ra syscall.Sockaddr) error {
72	// Do not need to call fd.writeLock here,
73	// because fd is not yet accessible to user,
74	// so no concurrent operations are possible.
75	if err := fd.pd.PrepareWrite(); err != nil {
76		return err
77	}
78	for {
79		err := syscall.Connect(fd.sysfd, ra)
80		if err == nil || err == syscall.EISCONN {
81			break
82		}
83
84		// On Solaris we can see EINVAL if the socket has
85		// already been accepted and closed by the server.
86		// Treat this as a successful connection--writes to
87		// the socket will see EOF.  For details and a test
88		// case in C see http://golang.org/issue/6828.
89		if runtime.GOOS == "solaris" && err == syscall.EINVAL {
90			break
91		}
92
93		if err != syscall.EINPROGRESS && err != syscall.EALREADY && err != syscall.EINTR {
94			return err
95		}
96		if err = fd.pd.WaitWrite(); err != nil {
97			return err
98		}
99	}
100	return nil
101}
102
103func (fd *netFD) destroy() {
104	// Poller may want to unregister fd in readiness notification mechanism,
105	// so this must be executed before closesocket.
106	fd.pd.Close()
107	closesocket(fd.sysfd)
108	fd.sysfd = -1
109	runtime.SetFinalizer(fd, nil)
110}
111
112// Add a reference to this fd.
113// Returns an error if the fd cannot be used.
114func (fd *netFD) incref() error {
115	if !fd.fdmu.Incref() {
116		return errClosing
117	}
118	return nil
119}
120
121// Remove a reference to this FD and close if we've been asked to do so
122// (and there are no references left).
123func (fd *netFD) decref() {
124	if fd.fdmu.Decref() {
125		fd.destroy()
126	}
127}
128
129// Add a reference to this fd and lock for reading.
130// Returns an error if the fd cannot be used.
131func (fd *netFD) readLock() error {
132	if !fd.fdmu.RWLock(true) {
133		return errClosing
134	}
135	return nil
136}
137
138// Unlock for reading and remove a reference to this FD.
139func (fd *netFD) readUnlock() {
140	if fd.fdmu.RWUnlock(true) {
141		fd.destroy()
142	}
143}
144
145// Add a reference to this fd and lock for writing.
146// Returns an error if the fd cannot be used.
147func (fd *netFD) writeLock() error {
148	if !fd.fdmu.RWLock(false) {
149		return errClosing
150	}
151	return nil
152}
153
154// Unlock for writing and remove a reference to this FD.
155func (fd *netFD) writeUnlock() {
156	if fd.fdmu.RWUnlock(false) {
157		fd.destroy()
158	}
159}
160
161func (fd *netFD) Close() error {
162	fd.pd.Lock() // needed for both fd.incref(true) and pollDesc.Evict
163	if !fd.fdmu.IncrefAndClose() {
164		fd.pd.Unlock()
165		return errClosing
166	}
167	// Unblock any I/O.  Once it all unblocks and returns,
168	// so that it cannot be referring to fd.sysfd anymore,
169	// the final decref will close fd.sysfd.  This should happen
170	// fairly quickly, since all the I/O is non-blocking, and any
171	// attempts to block in the pollDesc will return errClosing.
172	doWakeup := fd.pd.Evict()
173	fd.pd.Unlock()
174	fd.decref()
175	if doWakeup {
176		fd.pd.Wakeup()
177	}
178	return nil
179}
180
181func (fd *netFD) shutdown(how int) error {
182	if err := fd.incref(); err != nil {
183		return err
184	}
185	defer fd.decref()
186	err := syscall.Shutdown(fd.sysfd, how)
187	if err != nil {
188		return &OpError{"shutdown", fd.net, fd.laddr, err}
189	}
190	return nil
191}
192
193func (fd *netFD) CloseRead() error {
194	return fd.shutdown(syscall.SHUT_RD)
195}
196
197func (fd *netFD) CloseWrite() error {
198	return fd.shutdown(syscall.SHUT_WR)
199}
200
201func (fd *netFD) Read(p []byte) (n int, err error) {
202	if err := fd.readLock(); err != nil {
203		return 0, err
204	}
205	defer fd.readUnlock()
206	if err := fd.pd.PrepareRead(); err != nil {
207		return 0, &OpError{"read", fd.net, fd.raddr, err}
208	}
209	for {
210		n, err = syscall.Read(int(fd.sysfd), p)
211		if err != nil {
212			n = 0
213			if err == syscall.EAGAIN {
214				if err = fd.pd.WaitRead(); err == nil {
215					continue
216				}
217			}
218		}
219		err = chkReadErr(n, err, fd)
220		break
221	}
222	if err != nil && err != io.EOF {
223		err = &OpError{"read", fd.net, fd.raddr, err}
224	}
225	return
226}
227
228func (fd *netFD) ReadFrom(p []byte) (n int, sa syscall.Sockaddr, err error) {
229	if err := fd.readLock(); err != nil {
230		return 0, nil, err
231	}
232	defer fd.readUnlock()
233	if err := fd.pd.PrepareRead(); err != nil {
234		return 0, nil, &OpError{"read", fd.net, fd.laddr, err}
235	}
236	for {
237		n, sa, err = syscall.Recvfrom(fd.sysfd, p, 0)
238		if err != nil {
239			n = 0
240			if err == syscall.EAGAIN {
241				if err = fd.pd.WaitRead(); err == nil {
242					continue
243				}
244			}
245		}
246		err = chkReadErr(n, err, fd)
247		break
248	}
249	if err != nil && err != io.EOF {
250		err = &OpError{"read", fd.net, fd.laddr, err}
251	}
252	return
253}
254
255func (fd *netFD) ReadMsg(p []byte, oob []byte) (n, oobn, flags int, sa syscall.Sockaddr, err error) {
256	if err := fd.readLock(); err != nil {
257		return 0, 0, 0, nil, err
258	}
259	defer fd.readUnlock()
260	if err := fd.pd.PrepareRead(); err != nil {
261		return 0, 0, 0, nil, &OpError{"read", fd.net, fd.laddr, err}
262	}
263	for {
264		n, oobn, flags, sa, err = syscall.Recvmsg(fd.sysfd, p, oob, 0)
265		if err != nil {
266			// TODO(dfc) should n and oobn be set to 0
267			if err == syscall.EAGAIN {
268				if err = fd.pd.WaitRead(); err == nil {
269					continue
270				}
271			}
272		}
273		err = chkReadErr(n, err, fd)
274		break
275	}
276	if err != nil && err != io.EOF {
277		err = &OpError{"read", fd.net, fd.laddr, err}
278	}
279	return
280}
281
282func chkReadErr(n int, err error, fd *netFD) error {
283	if n == 0 && err == nil && fd.sotype != syscall.SOCK_DGRAM && fd.sotype != syscall.SOCK_RAW {
284		return io.EOF
285	}
286	return err
287}
288
289func (fd *netFD) Write(p []byte) (nn int, err error) {
290	if err := fd.writeLock(); err != nil {
291		return 0, err
292	}
293	defer fd.writeUnlock()
294	if err := fd.pd.PrepareWrite(); err != nil {
295		return 0, &OpError{"write", fd.net, fd.raddr, err}
296	}
297	for {
298		var n int
299		n, err = syscall.Write(int(fd.sysfd), p[nn:])
300		if n > 0 {
301			nn += n
302		}
303		if nn == len(p) {
304			break
305		}
306		if err == syscall.EAGAIN {
307			if err = fd.pd.WaitWrite(); err == nil {
308				continue
309			}
310		}
311		if err != nil {
312			n = 0
313			break
314		}
315		if n == 0 {
316			err = io.ErrUnexpectedEOF
317			break
318		}
319	}
320	if err != nil {
321		err = &OpError{"write", fd.net, fd.raddr, err}
322	}
323	return nn, err
324}
325
326func (fd *netFD) WriteTo(p []byte, sa syscall.Sockaddr) (n int, err error) {
327	if err := fd.writeLock(); err != nil {
328		return 0, err
329	}
330	defer fd.writeUnlock()
331	if err := fd.pd.PrepareWrite(); err != nil {
332		return 0, &OpError{"write", fd.net, fd.raddr, err}
333	}
334	for {
335		err = syscall.Sendto(fd.sysfd, p, 0, sa)
336		if err == syscall.EAGAIN {
337			if err = fd.pd.WaitWrite(); err == nil {
338				continue
339			}
340		}
341		break
342	}
343	if err == nil {
344		n = len(p)
345	} else {
346		err = &OpError{"write", fd.net, fd.raddr, err}
347	}
348	return
349}
350
351func (fd *netFD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (n int, oobn int, err error) {
352	if err := fd.writeLock(); err != nil {
353		return 0, 0, err
354	}
355	defer fd.writeUnlock()
356	if err := fd.pd.PrepareWrite(); err != nil {
357		return 0, 0, &OpError{"write", fd.net, fd.raddr, err}
358	}
359	for {
360		err = syscall.Sendmsg(fd.sysfd, p, oob, sa, 0)
361		if err == syscall.EAGAIN {
362			if err = fd.pd.WaitWrite(); err == nil {
363				continue
364			}
365		}
366		break
367	}
368	if err == nil {
369		n = len(p)
370		oobn = len(oob)
371	} else {
372		err = &OpError{"write", fd.net, fd.raddr, err}
373	}
374	return
375}
376
377func (fd *netFD) accept(toAddr func(syscall.Sockaddr) Addr) (netfd *netFD, err error) {
378	if err := fd.readLock(); err != nil {
379		return nil, err
380	}
381	defer fd.readUnlock()
382
383	var s int
384	var rsa syscall.Sockaddr
385	if err = fd.pd.PrepareRead(); err != nil {
386		return nil, &OpError{"accept", fd.net, fd.laddr, err}
387	}
388	for {
389		s, rsa, err = accept(fd.sysfd)
390		if err != nil {
391			if err == syscall.EAGAIN {
392				if err = fd.pd.WaitRead(); err == nil {
393					continue
394				}
395			} else if err == syscall.ECONNABORTED {
396				// This means that a socket on the listen queue was closed
397				// before we Accept()ed it; it's a silly error, so try again.
398				continue
399			}
400			return nil, &OpError{"accept", fd.net, fd.laddr, err}
401		}
402		break
403	}
404
405	if netfd, err = newFD(s, fd.family, fd.sotype, fd.net); err != nil {
406		closesocket(s)
407		return nil, err
408	}
409	if err = netfd.init(); err != nil {
410		fd.Close()
411		return nil, err
412	}
413	lsa, _ := syscall.Getsockname(netfd.sysfd)
414	netfd.setAddr(toAddr(lsa), toAddr(rsa))
415	return netfd, nil
416}
417
418// tryDupCloexec indicates whether F_DUPFD_CLOEXEC should be used.
419// If the kernel doesn't support it, this is set to 0.
420var tryDupCloexec = int32(1)
421
422func dupCloseOnExec(fd int) (newfd int, err error) {
423	if atomic.LoadInt32(&tryDupCloexec) == 1 && syscall.F_DUPFD_CLOEXEC != 0 {
424		r0, _, e1 := syscall.Syscall(syscall.SYS_FCNTL, uintptr(fd), syscall.F_DUPFD_CLOEXEC, 0)
425		if runtime.GOOS == "darwin" && e1 == syscall.EBADF {
426			// On OS X 10.6 and below (but we only support
427			// >= 10.6), F_DUPFD_CLOEXEC is unsupported
428			// and fcntl there falls back (undocumented)
429			// to doing an ioctl instead, returning EBADF
430			// in this case because fd is not of the
431			// expected device fd type.  Treat it as
432			// EINVAL instead, so we fall back to the
433			// normal dup path.
434			// TODO: only do this on 10.6 if we can detect 10.6
435			// cheaply.
436			e1 = syscall.EINVAL
437		}
438		switch e1 {
439		case 0:
440			return int(r0), nil
441		case syscall.EINVAL:
442			// Old kernel. Fall back to the portable way
443			// from now on.
444			atomic.StoreInt32(&tryDupCloexec, 0)
445		default:
446			return -1, e1
447		}
448	}
449	return dupCloseOnExecOld(fd)
450}
451
452// dupCloseOnExecUnixOld is the traditional way to dup an fd and
453// set its O_CLOEXEC bit, using two system calls.
454func dupCloseOnExecOld(fd int) (newfd int, err error) {
455	syscall.ForkLock.RLock()
456	defer syscall.ForkLock.RUnlock()
457	newfd, err = syscall.Dup(fd)
458	if err != nil {
459		return -1, err
460	}
461	syscall.CloseOnExec(newfd)
462	return
463}
464
465func (fd *netFD) dup() (f *os.File, err error) {
466	ns, err := dupCloseOnExec(fd.sysfd)
467	if err != nil {
468		syscall.ForkLock.RUnlock()
469		return nil, &OpError{"dup", fd.net, fd.laddr, err}
470	}
471
472	// We want blocking mode for the new fd, hence the double negative.
473	// This also puts the old fd into blocking mode, meaning that
474	// I/O will block the thread instead of letting us use the epoll server.
475	// Everything will still work, just with more threads.
476	if err = syscall.SetNonblock(ns, false); err != nil {
477		return nil, &OpError{"setnonblock", fd.net, fd.laddr, err}
478	}
479
480	return os.NewFile(uintptr(ns), fd.name()), nil
481}
482
483func closesocket(s int) error {
484	return syscall.Close(s)
485}
486
487func skipRawSocketTests() (skip bool, skipmsg string, err error) {
488	if os.Getuid() != 0 {
489		return true, "skipping test; must be root", nil
490	}
491	return false, "", nil
492}
493