1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris
6
7package poll
8
9import (
10	"io"
11	"runtime"
12	"syscall"
13)
14
15// FD is a file descriptor. The net and os packages use this type as a
16// field of a larger type representing a network connection or OS file.
17type FD struct {
18	// Lock sysfd and serialize access to Read and Write methods.
19	fdmu fdMutex
20
21	// System file descriptor. Immutable until Close.
22	Sysfd int
23
24	// I/O poller.
25	pd pollDesc
26
27	// Writev cache.
28	iovecs *[]syscall.Iovec
29
30	// Semaphore signaled when file is closed.
31	csema uint32
32
33	// Whether this is a streaming descriptor, as opposed to a
34	// packet-based descriptor like a UDP socket. Immutable.
35	IsStream bool
36
37	// Whether a zero byte read indicates EOF. This is false for a
38	// message based socket connection.
39	ZeroReadIsEOF bool
40
41	// Whether this is a file rather than a network socket.
42	isFile bool
43
44	// Whether this file has been set to blocking mode.
45	isBlocking bool
46}
47
48// Init initializes the FD. The Sysfd field should already be set.
49// This can be called multiple times on a single FD.
50// The net argument is a network name from the net package (e.g., "tcp"),
51// or "file".
52// Set pollable to true if fd should be managed by runtime netpoll.
53func (fd *FD) Init(net string, pollable bool) error {
54	// We don't actually care about the various network types.
55	if net == "file" {
56		fd.isFile = true
57	}
58	if !pollable {
59		fd.isBlocking = true
60		return nil
61	}
62	return fd.pd.init(fd)
63}
64
65// Destroy closes the file descriptor. This is called when there are
66// no remaining references.
67func (fd *FD) destroy() error {
68	// Poller may want to unregister fd in readiness notification mechanism,
69	// so this must be executed before CloseFunc.
70	fd.pd.close()
71	err := CloseFunc(fd.Sysfd)
72	fd.Sysfd = -1
73	runtime_Semrelease(&fd.csema)
74	return err
75}
76
77// Close closes the FD. The underlying file descriptor is closed by the
78// destroy method when there are no remaining references.
79func (fd *FD) Close() error {
80	if !fd.fdmu.increfAndClose() {
81		return errClosing(fd.isFile)
82	}
83
84	// Unblock any I/O.  Once it all unblocks and returns,
85	// so that it cannot be referring to fd.sysfd anymore,
86	// the final decref will close fd.sysfd. This should happen
87	// fairly quickly, since all the I/O is non-blocking, and any
88	// attempts to block in the pollDesc will return errClosing(fd.isFile).
89	fd.pd.evict()
90
91	// The call to decref will call destroy if there are no other
92	// references.
93	err := fd.decref()
94
95	// Wait until the descriptor is closed. If this was the only
96	// reference, it is already closed. Only wait if the file has
97	// not been set to blocking mode, as otherwise any current I/O
98	// may be blocking, and that would block the Close.
99	if !fd.isBlocking {
100		runtime_Semacquire(&fd.csema)
101	}
102
103	return err
104}
105
106// Shutdown wraps the shutdown network call.
107func (fd *FD) Shutdown(how int) error {
108	if err := fd.incref(); err != nil {
109		return err
110	}
111	defer fd.decref()
112	return syscall.Shutdown(fd.Sysfd, how)
113}
114
115// SetBlocking puts the file into blocking mode.
116func (fd *FD) SetBlocking() error {
117	if err := fd.incref(); err != nil {
118		return err
119	}
120	defer fd.decref()
121	fd.isBlocking = true
122	return syscall.SetNonblock(fd.Sysfd, false)
123}
124
125// Darwin and FreeBSD can't read or write 2GB+ files at a time,
126// even on 64-bit systems.
127// The same is true of socket implementations on many systems.
128// See golang.org/issue/7812 and golang.org/issue/16266.
129// Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
130const maxRW = 1 << 30
131
132// Read implements io.Reader.
133func (fd *FD) Read(p []byte) (int, error) {
134	if err := fd.readLock(); err != nil {
135		return 0, err
136	}
137	defer fd.readUnlock()
138	if len(p) == 0 {
139		// If the caller wanted a zero byte read, return immediately
140		// without trying (but after acquiring the readLock).
141		// Otherwise syscall.Read returns 0, nil which looks like
142		// io.EOF.
143		// TODO(bradfitz): make it wait for readability? (Issue 15735)
144		return 0, nil
145	}
146	if err := fd.pd.prepareRead(fd.isFile); err != nil {
147		return 0, err
148	}
149	if fd.IsStream && len(p) > maxRW {
150		p = p[:maxRW]
151	}
152	for {
153		n, err := syscall.Read(fd.Sysfd, p)
154		if err != nil {
155			n = 0
156			if err == syscall.EAGAIN && fd.pd.pollable() {
157				if err = fd.pd.waitRead(fd.isFile); err == nil {
158					continue
159				}
160			}
161
162			// On MacOS we can see EINTR here if the user
163			// pressed ^Z.  See issue #22838.
164			if runtime.GOOS == "darwin" && err == syscall.EINTR {
165				continue
166			}
167		}
168		err = fd.eofError(n, err)
169		return n, err
170	}
171}
172
173// Pread wraps the pread system call.
174func (fd *FD) Pread(p []byte, off int64) (int, error) {
175	// Call incref, not readLock, because since pread specifies the
176	// offset it is independent from other reads.
177	// Similarly, using the poller doesn't make sense for pread.
178	if err := fd.incref(); err != nil {
179		return 0, err
180	}
181	if fd.IsStream && len(p) > maxRW {
182		p = p[:maxRW]
183	}
184	n, err := syscall.Pread(fd.Sysfd, p, off)
185	if err != nil {
186		n = 0
187	}
188	fd.decref()
189	err = fd.eofError(n, err)
190	return n, err
191}
192
193// ReadFrom wraps the recvfrom network call.
194func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) {
195	if err := fd.readLock(); err != nil {
196		return 0, nil, err
197	}
198	defer fd.readUnlock()
199	if err := fd.pd.prepareRead(fd.isFile); err != nil {
200		return 0, nil, err
201	}
202	for {
203		n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0)
204		if err != nil {
205			n = 0
206			if err == syscall.EAGAIN && fd.pd.pollable() {
207				if err = fd.pd.waitRead(fd.isFile); err == nil {
208					continue
209				}
210			}
211		}
212		err = fd.eofError(n, err)
213		return n, sa, err
214	}
215}
216
217// ReadMsg wraps the recvmsg network call.
218func (fd *FD) ReadMsg(p []byte, oob []byte) (int, int, int, syscall.Sockaddr, error) {
219	if err := fd.readLock(); err != nil {
220		return 0, 0, 0, nil, err
221	}
222	defer fd.readUnlock()
223	if err := fd.pd.prepareRead(fd.isFile); err != nil {
224		return 0, 0, 0, nil, err
225	}
226	for {
227		n, oobn, flags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, 0)
228		if err != nil {
229			// TODO(dfc) should n and oobn be set to 0
230			if err == syscall.EAGAIN && fd.pd.pollable() {
231				if err = fd.pd.waitRead(fd.isFile); err == nil {
232					continue
233				}
234			}
235		}
236		err = fd.eofError(n, err)
237		return n, oobn, flags, sa, err
238	}
239}
240
241// Write implements io.Writer.
242func (fd *FD) Write(p []byte) (int, error) {
243	if err := fd.writeLock(); err != nil {
244		return 0, err
245	}
246	defer fd.writeUnlock()
247	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
248		return 0, err
249	}
250	var nn int
251	for {
252		max := len(p)
253		if fd.IsStream && max-nn > maxRW {
254			max = nn + maxRW
255		}
256		n, err := syscall.Write(fd.Sysfd, p[nn:max])
257		if n > 0 {
258			nn += n
259		}
260		if nn == len(p) {
261			return nn, err
262		}
263		if err == syscall.EAGAIN && fd.pd.pollable() {
264			if err = fd.pd.waitWrite(fd.isFile); err == nil {
265				continue
266			}
267		}
268		if err != nil {
269			return nn, err
270		}
271		if n == 0 {
272			return nn, io.ErrUnexpectedEOF
273		}
274	}
275}
276
277// Pwrite wraps the pwrite system call.
278func (fd *FD) Pwrite(p []byte, off int64) (int, error) {
279	// Call incref, not writeLock, because since pwrite specifies the
280	// offset it is independent from other writes.
281	// Similarly, using the poller doesn't make sense for pwrite.
282	if err := fd.incref(); err != nil {
283		return 0, err
284	}
285	defer fd.decref()
286	var nn int
287	for {
288		max := len(p)
289		if fd.IsStream && max-nn > maxRW {
290			max = nn + maxRW
291		}
292		n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn))
293		if n > 0 {
294			nn += n
295		}
296		if nn == len(p) {
297			return nn, err
298		}
299		if err != nil {
300			return nn, err
301		}
302		if n == 0 {
303			return nn, io.ErrUnexpectedEOF
304		}
305	}
306}
307
308// WriteTo wraps the sendto network call.
309func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) {
310	if err := fd.writeLock(); err != nil {
311		return 0, err
312	}
313	defer fd.writeUnlock()
314	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
315		return 0, err
316	}
317	for {
318		err := syscall.Sendto(fd.Sysfd, p, 0, sa)
319		if err == syscall.EAGAIN && fd.pd.pollable() {
320			if err = fd.pd.waitWrite(fd.isFile); err == nil {
321				continue
322			}
323		}
324		if err != nil {
325			return 0, err
326		}
327		return len(p), nil
328	}
329}
330
331// WriteMsg wraps the sendmsg network call.
332func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
333	if err := fd.writeLock(); err != nil {
334		return 0, 0, err
335	}
336	defer fd.writeUnlock()
337	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
338		return 0, 0, err
339	}
340	for {
341		n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0)
342		if err == syscall.EAGAIN && fd.pd.pollable() {
343			if err = fd.pd.waitWrite(fd.isFile); err == nil {
344				continue
345			}
346		}
347		if err != nil {
348			return n, 0, err
349		}
350		return n, len(oob), err
351	}
352}
353
354// Accept wraps the accept network call.
355func (fd *FD) Accept() (int, syscall.Sockaddr, string, error) {
356	if err := fd.readLock(); err != nil {
357		return -1, nil, "", err
358	}
359	defer fd.readUnlock()
360
361	if err := fd.pd.prepareRead(fd.isFile); err != nil {
362		return -1, nil, "", err
363	}
364	for {
365		s, rsa, errcall, err := accept(fd.Sysfd)
366		if err == nil {
367			return s, rsa, "", err
368		}
369		switch err {
370		case syscall.EAGAIN:
371			if fd.pd.pollable() {
372				if err = fd.pd.waitRead(fd.isFile); err == nil {
373					continue
374				}
375			}
376		case syscall.ECONNABORTED:
377			// This means that a socket on the listen
378			// queue was closed before we Accept()ed it;
379			// it's a silly error, so try again.
380			continue
381		}
382		return -1, nil, errcall, err
383	}
384}
385
386// Seek wraps syscall.Seek.
387func (fd *FD) Seek(offset int64, whence int) (int64, error) {
388	if err := fd.incref(); err != nil {
389		return 0, err
390	}
391	defer fd.decref()
392	return syscall.Seek(fd.Sysfd, offset, whence)
393}
394
395// ReadDirent wraps syscall.ReadDirent.
396// We treat this like an ordinary system call rather than a call
397// that tries to fill the buffer.
398func (fd *FD) ReadDirent(buf []byte) (int, error) {
399	if err := fd.incref(); err != nil {
400		return 0, err
401	}
402	defer fd.decref()
403	for {
404		n, err := syscall.ReadDirent(fd.Sysfd, buf)
405		if err != nil {
406			n = 0
407			if err == syscall.EAGAIN && fd.pd.pollable() {
408				if err = fd.pd.waitRead(fd.isFile); err == nil {
409					continue
410				}
411			}
412		}
413		// Do not call eofError; caller does not expect to see io.EOF.
414		return n, err
415	}
416}
417
418// Fchdir wraps syscall.Fchdir.
419func (fd *FD) Fchdir() error {
420	if err := fd.incref(); err != nil {
421		return err
422	}
423	defer fd.decref()
424	return syscall.Fchdir(fd.Sysfd)
425}
426
427// Fstat wraps syscall.Fstat
428func (fd *FD) Fstat(s *syscall.Stat_t) error {
429	if err := fd.incref(); err != nil {
430		return err
431	}
432	defer fd.decref()
433	return syscall.Fstat(fd.Sysfd, s)
434}
435
436// On Unix variants only, expose the IO event for the net code.
437
438// WaitWrite waits until data can be read from fd.
439func (fd *FD) WaitWrite() error {
440	return fd.pd.waitWrite(fd.isFile)
441}
442
443// WriteOnce is for testing only. It makes a single write call.
444func (fd *FD) WriteOnce(p []byte) (int, error) {
445	if err := fd.writeLock(); err != nil {
446		return 0, err
447	}
448	defer fd.writeUnlock()
449	return syscall.Write(fd.Sysfd, p)
450}
451
452// RawControl invokes the user-defined function f for a non-IO
453// operation.
454func (fd *FD) RawControl(f func(uintptr)) error {
455	if err := fd.incref(); err != nil {
456		return err
457	}
458	defer fd.decref()
459	f(uintptr(fd.Sysfd))
460	return nil
461}
462
463// RawRead invokes the user-defined function f for a read operation.
464func (fd *FD) RawRead(f func(uintptr) bool) error {
465	if err := fd.readLock(); err != nil {
466		return err
467	}
468	defer fd.readUnlock()
469	if err := fd.pd.prepareRead(fd.isFile); err != nil {
470		return err
471	}
472	for {
473		if f(uintptr(fd.Sysfd)) {
474			return nil
475		}
476		if err := fd.pd.waitRead(fd.isFile); err != nil {
477			return err
478		}
479	}
480}
481
482// RawWrite invokes the user-defined function f for a write operation.
483func (fd *FD) RawWrite(f func(uintptr) bool) error {
484	if err := fd.writeLock(); err != nil {
485		return err
486	}
487	defer fd.writeUnlock()
488	if err := fd.pd.prepareWrite(fd.isFile); err != nil {
489		return err
490	}
491	for {
492		if f(uintptr(fd.Sysfd)) {
493			return nil
494		}
495		if err := fd.pd.waitWrite(fd.isFile); err != nil {
496			return err
497		}
498	}
499}
500