1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris
6
7package poll
8
9import (
10	"io"
11	"syscall"
12)
13
14// FD is a file descriptor. The net and os packages use this type as a
15// field of a larger type representing a network connection or OS file.
16type FD struct {
17	// Lock sysfd and serialize access to Read and Write methods.
18	fdmu fdMutex
19
20	// System file descriptor. Immutable until Close.
21	Sysfd int
22
23	// I/O poller.
24	pd pollDesc
25
26	// Writev cache.
27	iovecs *[]syscall.Iovec
28
29	// Whether this is a streaming descriptor, as opposed to a
30	// packet-based descriptor like a UDP socket. Immutable.
31	IsStream bool
32
33	// Whether a zero byte read indicates EOF. This is false for a
34	// message based socket connection.
35	ZeroReadIsEOF bool
36
37	// Whether this is a file rather than a network socket.
38	isFile bool
39}
40
41// Init initializes the FD. The Sysfd field should already be set.
42// This can be called multiple times on a single FD.
43// The net argument is a network name from the net package (e.g., "tcp"),
44// or "file".
45// Set pollable to true if fd should be managed by runtime netpoll.
46func (fd *FD) Init(net string, pollable bool) error {
47	// We don't actually care about the various network types.
48	if net == "file" {
49		fd.isFile = true
50	}
51	if !pollable {
52		return nil
53	}
54	return fd.pd.init(fd)
55}
56
57// Destroy closes the file descriptor. This is called when there are
58// no remaining references.
59func (fd *FD) destroy() error {
60	// Poller may want to unregister fd in readiness notification mechanism,
61	// so this must be executed before CloseFunc.
62	fd.pd.close()
63	err := CloseFunc(fd.Sysfd)
64	fd.Sysfd = -1
65	return err
66}
67
68// Close closes the FD. The underlying file descriptor is closed by the
69// destroy method when there are no remaining references.
70func (fd *FD) Close() error {
71	if !fd.fdmu.increfAndClose() {
72		return errClosing(fd.isFile)
73	}
74	// Unblock any I/O.  Once it all unblocks and returns,
75	// so that it cannot be referring to fd.sysfd anymore,
76	// the final decref will close fd.sysfd. This should happen
77	// fairly quickly, since all the I/O is non-blocking, and any
78	// attempts to block in the pollDesc will return errClosing(fd.isFile).
79	fd.pd.evict()
80	// The call to decref will call destroy if there are no other
81	// references.
82	return fd.decref()
83}
84
85// Shutdown wraps the shutdown network call.
86func (fd *FD) Shutdown(how int) error {
87	if err := fd.incref(); err != nil {
88		return err
89	}
90	defer fd.decref()
91	return syscall.Shutdown(fd.Sysfd, how)
92}
93
94// Darwin and FreeBSD can't read or write 2GB+ files at a time,
95// even on 64-bit systems.
96// The same is true of socket implementations on many systems.
97// See golang.org/issue/7812 and golang.org/issue/16266.
98// Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
99const maxRW = 1 << 30
100
101// Read implements io.Reader.
102func (fd *FD) Read(p []byte) (int, error) {
103	if err := fd.readLock(); err != nil {
104		return 0, err
105	}
106	defer fd.readUnlock()
107	if len(p) == 0 {
108		// If the caller wanted a zero byte read, return immediately
109		// without trying (but after acquiring the readLock).
110		// Otherwise syscall.Read returns 0, nil which looks like
111		// io.EOF.
112		// TODO(bradfitz): make it wait for readability? (Issue 15735)
113		return 0, nil
114	}
115	if err := fd.pd.prepareRead(fd.isFile); err != nil {
116		return 0, err
117	}
118	if fd.IsStream && len(p) > maxRW {
119		p = p[:maxRW]
120	}
121	for {
122		n, err := syscall.Read(fd.Sysfd, p)
123		if err != nil {
124			n = 0
125			if err == syscall.EAGAIN && fd.pd.pollable() {
126				if err = fd.pd.waitRead(fd.isFile); err == nil {
127					continue
128				}
129			}
130		}
131		err = fd.eofError(n, err)
132		return n, err
133	}
134}
135
136// Pread wraps the pread system call.
137func (fd *FD) Pread(p []byte, off int64) (int, error) {
138	// Call incref, not readLock, because since pread specifies the
139	// offset it is independent from other reads.
140	// Similarly, using the poller doesn't make sense for pread.
141	if err := fd.incref(); err != nil {
142		return 0, err
143	}
144	if fd.IsStream && len(p) > maxRW {
145		p = p[:maxRW]
146	}
147	n, err := syscall.Pread(fd.Sysfd, p, off)
148	if err != nil {
149		n = 0
150	}
151	fd.decref()
152	err = fd.eofError(n, err)
153	return n, err
154}
155
156// ReadFrom wraps the recvfrom network call.
157func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) {
158	if err := fd.readLock(); err != nil {
159		return 0, nil, err
160	}
161	defer fd.readUnlock()
162	if err := fd.pd.prepareRead(fd.isFile); err != nil {
163		return 0, nil, err
164	}
165	for {
166		n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0)
167		if err != nil {
168			n = 0
169			if err == syscall.EAGAIN && fd.pd.pollable() {
170				if err = fd.pd.waitRead(fd.isFile); err == nil {
171					continue
172				}
173			}
174		}
175		err = fd.eofError(n, err)
176		return n, sa, err
177	}
178}
179
180// ReadMsg wraps the recvmsg network call.
181func (fd *FD) ReadMsg(p []byte, oob []byte) (int, int, int, syscall.Sockaddr, error) {
182	if err := fd.readLock(); err != nil {
183		return 0, 0, 0, nil, err
184	}
185	defer fd.readUnlock()
186	if err := fd.pd.prepareRead(fd.isFile); err != nil {
187		return 0, 0, 0, nil, err
188	}
189	for {
190		n, oobn, flags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, 0)
191		if err != nil {
192			// TODO(dfc) should n and oobn be set to 0
193			if err == syscall.EAGAIN && fd.pd.pollable() {
194				if err = fd.pd.waitRead(fd.isFile); err == nil {
195					continue
196				}
197			}
198		}
199		err = fd.eofError(n, err)
200		return n, oobn, flags, sa, err
201	}
202}
203
204// Write implements io.Writer.
205func (fd *FD) Write(p []byte) (int, error) {
206	if err := fd.writeLock(); err != nil {
207		return 0, err
208	}
209	defer fd.writeUnlock()
210	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
211		return 0, err
212	}
213	var nn int
214	for {
215		max := len(p)
216		if fd.IsStream && max-nn > maxRW {
217			max = nn + maxRW
218		}
219		n, err := syscall.Write(fd.Sysfd, p[nn:max])
220		if n > 0 {
221			nn += n
222		}
223		if nn == len(p) {
224			return nn, err
225		}
226		if err == syscall.EAGAIN && fd.pd.pollable() {
227			if err = fd.pd.waitWrite(fd.isFile); err == nil {
228				continue
229			}
230		}
231		if err != nil {
232			return nn, err
233		}
234		if n == 0 {
235			return nn, io.ErrUnexpectedEOF
236		}
237	}
238}
239
240// Pwrite wraps the pwrite system call.
241func (fd *FD) Pwrite(p []byte, off int64) (int, error) {
242	// Call incref, not writeLock, because since pwrite specifies the
243	// offset it is independent from other writes.
244	// Similarly, using the poller doesn't make sense for pwrite.
245	if err := fd.incref(); err != nil {
246		return 0, err
247	}
248	defer fd.decref()
249	var nn int
250	for {
251		max := len(p)
252		if fd.IsStream && max-nn > maxRW {
253			max = nn + maxRW
254		}
255		n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn))
256		if n > 0 {
257			nn += n
258		}
259		if nn == len(p) {
260			return nn, err
261		}
262		if err != nil {
263			return nn, err
264		}
265		if n == 0 {
266			return nn, io.ErrUnexpectedEOF
267		}
268	}
269}
270
271// WriteTo wraps the sendto network call.
272func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) {
273	if err := fd.writeLock(); err != nil {
274		return 0, err
275	}
276	defer fd.writeUnlock()
277	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
278		return 0, err
279	}
280	for {
281		err := syscall.Sendto(fd.Sysfd, p, 0, sa)
282		if err == syscall.EAGAIN && fd.pd.pollable() {
283			if err = fd.pd.waitWrite(fd.isFile); err == nil {
284				continue
285			}
286		}
287		if err != nil {
288			return 0, err
289		}
290		return len(p), nil
291	}
292}
293
294// WriteMsg wraps the sendmsg network call.
295func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
296	if err := fd.writeLock(); err != nil {
297		return 0, 0, err
298	}
299	defer fd.writeUnlock()
300	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
301		return 0, 0, err
302	}
303	for {
304		n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0)
305		if err == syscall.EAGAIN && fd.pd.pollable() {
306			if err = fd.pd.waitWrite(fd.isFile); err == nil {
307				continue
308			}
309		}
310		if err != nil {
311			return n, 0, err
312		}
313		return n, len(oob), err
314	}
315}
316
317// Accept wraps the accept network call.
318func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
319	if err := fd.readLock(); err != nil {
320		return -1, nil, "", err
321	}
322	defer fd.readUnlock()
323
324	if err := fd.pd.prepareRead(fd.isFile); err != nil {
325		return -1, nil, "", err
326	}
327	for {
328		s, rsa, errcall, err := accept(fd.Sysfd)
329		if err == nil {
330			return s, rsa, "", err
331		}
332		switch err {
333		case syscall.EAGAIN:
334			if fd.pd.pollable() {
335				if err = fd.pd.waitRead(fd.isFile); err == nil {
336					continue
337				}
338			}
339		case syscall.ECONNABORTED:
340			// This means that a socket on the listen
341			// queue was closed before we Accept()ed it;
342			// it's a silly error, so try again.
343			continue
344		}
345		return -1, nil, errcall, err
346	}
347}
348
349// Seek wraps syscall.Seek.
350func (fd *FD) Seek(offset int64, whence int) (int64, error) {
351	if err := fd.incref(); err != nil {
352		return 0, err
353	}
354	defer fd.decref()
355	return syscall.Seek(fd.Sysfd, offset, whence)
356}
357
358// ReadDirent wraps syscall.ReadDirent.
359// We treat this like an ordinary system call rather than a call
360// that tries to fill the buffer.
361func (fd *FD) ReadDirent(buf []byte) (int, error) {
362	if err := fd.incref(); err != nil {
363		return 0, err
364	}
365	defer fd.decref()
366	for {
367		n, err := syscall.ReadDirent(fd.Sysfd, buf)
368		if err != nil {
369			n = 0
370			if err == syscall.EAGAIN && fd.pd.pollable() {
371				if err = fd.pd.waitRead(fd.isFile); err == nil {
372					continue
373				}
374			}
375		}
376		// Do not call eofError; caller does not expect to see io.EOF.
377		return n, err
378	}
379}
380
381// Fchdir wraps syscall.Fchdir.
382func (fd *FD) Fchdir() error {
383	if err := fd.incref(); err != nil {
384		return err
385	}
386	defer fd.decref()
387	return syscall.Fchdir(fd.Sysfd)
388}
389
390// Fstat wraps syscall.Fstat
391func (fd *FD) Fstat(s *syscall.Stat_t) error {
392	if err := fd.incref(); err != nil {
393		return err
394	}
395	defer fd.decref()
396	return syscall.Fstat(fd.Sysfd, s)
397}
398
399// On Unix variants only, expose the IO event for the net code.
400
401// WaitWrite waits until data can be read from fd.
402func (fd *FD) WaitWrite() error {
403	return fd.pd.waitWrite(fd.isFile)
404}
405
406// RawControl invokes the user-defined function f for a non-IO
407// operation.
408func (fd *FD) RawControl(f func(uintptr)) error {
409	if err := fd.incref(); err != nil {
410		return err
411	}
412	defer fd.decref()
413	f(uintptr(fd.Sysfd))
414	return nil
415}
416
417// RawRead invokes the user-defined function f for a read operation.
418func (fd *FD) RawRead(f func(uintptr) bool) error {
419	if err := fd.readLock(); err != nil {
420		return err
421	}
422	defer fd.readUnlock()
423	if err := fd.pd.prepareRead(fd.isFile); err != nil {
424		return err
425	}
426	for {
427		if f(uintptr(fd.Sysfd)) {
428			return nil
429		}
430		if err := fd.pd.waitRead(fd.isFile); err != nil {
431			return err
432		}
433	}
434}
435
436// RawWrite invokes the user-defined function f for a write operation.
437func (fd *FD) RawWrite(f func(uintptr) bool) error {
438	if err := fd.writeLock(); err != nil {
439		return err
440	}
441	defer fd.writeUnlock()
442	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
443		return err
444	}
445	for {
446		if f(uintptr(fd.Sysfd)) {
447			return nil
448		}
449		if err := fd.pd.waitWrite(fd.isFile); err != nil {
450			return err
451		}
452	}
453}
454