xref: /openbsd/sys/kern/sys_pipe.c (revision 264ca280)
1 /*	$OpenBSD: sys_pipe.c,v 1.72 2016/01/15 18:10:48 stefan Exp $	*/
2 
3 /*
4  * Copyright (c) 1996 John S. Dyson
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice immediately at the beginning of the file, without modification,
12  *    this list of conditions, and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Absolutely no warranty of function or purpose is made by the author
17  *    John S. Dyson.
18  * 4. Modifications may be freely made to this file if the above conditions
19  *    are met.
20  */
21 
22 /*
23  * This file contains a high-performance replacement for the socket-based
24  * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
25  * all features of sockets, but does do everything that pipes normally
26  * do.
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/proc.h>
32 #include <sys/file.h>
33 #include <sys/filedesc.h>
34 #include <sys/pool.h>
35 #include <sys/ioctl.h>
36 #include <sys/stat.h>
37 #include <sys/signalvar.h>
38 #include <sys/mount.h>
39 #include <sys/syscallargs.h>
40 #include <sys/event.h>
41 #include <sys/lock.h>
42 #include <sys/poll.h>
43 
44 #include <uvm/uvm_extern.h>
45 
46 #include <sys/pipe.h>
47 
48 /*
49  * interfaces to the outside world
50  */
51 int	pipe_read(struct file *, off_t *, struct uio *, struct ucred *);
52 int	pipe_write(struct file *, off_t *, struct uio *, struct ucred *);
53 int	pipe_close(struct file *, struct proc *);
54 int	pipe_poll(struct file *, int events, struct proc *);
55 int	pipe_kqfilter(struct file *fp, struct knote *kn);
56 int	pipe_ioctl(struct file *, u_long, caddr_t, struct proc *);
57 int	pipe_stat(struct file *fp, struct stat *ub, struct proc *p);
58 
59 static struct fileops pipeops = {
60 	pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter,
61 	pipe_stat, pipe_close
62 };
63 
64 void	filt_pipedetach(struct knote *kn);
65 int	filt_piperead(struct knote *kn, long hint);
66 int	filt_pipewrite(struct knote *kn, long hint);
67 
68 struct filterops pipe_rfiltops =
69 	{ 1, NULL, filt_pipedetach, filt_piperead };
70 struct filterops pipe_wfiltops =
71 	{ 1, NULL, filt_pipedetach, filt_pipewrite };
72 
73 /*
74  * Default pipe buffer size(s), this can be kind-of large now because pipe
75  * space is pageable.  The pipe code will try to maintain locality of
76  * reference for performance reasons, so small amounts of outstanding I/O
77  * will not wipe the cache.
78  */
79 #define MINPIPESIZE (PIPE_SIZE/3)
80 
81 /*
82  * Limit the number of "big" pipes
83  */
84 #define LIMITBIGPIPES	32
85 int nbigpipe;
86 static int amountpipekva;
87 
88 struct pool pipe_pool;
89 
90 int	dopipe(struct proc *, int *, int);
91 void	pipeclose(struct pipe *);
92 void	pipe_free_kmem(struct pipe *);
93 int	pipe_create(struct pipe *);
94 int	pipelock(struct pipe *);
95 void	pipeunlock(struct pipe *);
96 void	pipeselwakeup(struct pipe *);
97 int	pipespace(struct pipe *, u_int);
98 
99 /*
100  * The pipe system call for the DTYPE_PIPE type of pipes
101  */
102 
103 int
104 sys_pipe(struct proc *p, void *v, register_t *retval)
105 {
106 	struct sys_pipe_args /* {
107 		syscallarg(int *) fdp;
108 	} */ *uap = v;
109 
110 	return (dopipe(p, SCARG(uap, fdp), 0));
111 }
112 
113 int
114 sys_pipe2(struct proc *p, void *v, register_t *retval)
115 {
116 	struct sys_pipe2_args /* {
117 		syscallarg(int *) fdp;
118 		syscallarg(int) flags;
119 	} */ *uap = v;
120 
121 	if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK))
122 		return (EINVAL);
123 
124 	return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags)));
125 }
126 
127 int
128 dopipe(struct proc *p, int *ufds, int flags)
129 {
130 	struct filedesc *fdp = p->p_fd;
131 	struct file *rf, *wf;
132 	struct pipe *rpipe, *wpipe = NULL;
133 	int fds[2], error;
134 
135 	rpipe = pool_get(&pipe_pool, PR_WAITOK);
136 	error = pipe_create(rpipe);
137 	if (error != 0)
138 		goto free1;
139 	wpipe = pool_get(&pipe_pool, PR_WAITOK);
140 	error = pipe_create(wpipe);
141 	if (error != 0)
142 		goto free1;
143 
144 	fdplock(fdp);
145 
146 	error = falloc(p, &rf, &fds[0]);
147 	if (error != 0)
148 		goto free2;
149 	rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
150 	rf->f_type = DTYPE_PIPE;
151 	rf->f_data = rpipe;
152 	rf->f_ops = &pipeops;
153 
154 	error = falloc(p, &wf, &fds[1]);
155 	if (error != 0)
156 		goto free3;
157 	wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
158 	wf->f_type = DTYPE_PIPE;
159 	wf->f_data = wpipe;
160 	wf->f_ops = &pipeops;
161 
162 	if (flags & O_CLOEXEC) {
163 		fdp->fd_ofileflags[fds[0]] |= UF_EXCLOSE;
164 		fdp->fd_ofileflags[fds[1]] |= UF_EXCLOSE;
165 	}
166 
167 	rpipe->pipe_peer = wpipe;
168 	wpipe->pipe_peer = rpipe;
169 
170 	FILE_SET_MATURE(rf, p);
171 	FILE_SET_MATURE(wf, p);
172 
173 	error = copyout(fds, ufds, sizeof(fds));
174 	if (error != 0) {
175 		fdrelease(p, fds[0]);
176 		fdrelease(p, fds[1]);
177 	}
178 	fdpunlock(fdp);
179 	return (error);
180 
181 free3:
182 	fdremove(fdp, fds[0]);
183 	closef(rf, p);
184 	rpipe = NULL;
185 free2:
186 	fdpunlock(fdp);
187 free1:
188 	pipeclose(wpipe);
189 	pipeclose(rpipe);
190 	return (error);
191 }
192 
193 /*
194  * Allocate kva for pipe circular buffer, the space is pageable.
195  * This routine will 'realloc' the size of a pipe safely, if it fails
196  * it will retain the old buffer.
197  * If it fails it will return ENOMEM.
198  */
199 int
200 pipespace(struct pipe *cpipe, u_int size)
201 {
202 	caddr_t buffer;
203 
204 	buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok);
205 	if (buffer == NULL) {
206 		return (ENOMEM);
207 	}
208 
209 	/* free old resources if we are resizing */
210 	pipe_free_kmem(cpipe);
211 	cpipe->pipe_buffer.buffer = buffer;
212 	cpipe->pipe_buffer.size = size;
213 	cpipe->pipe_buffer.in = 0;
214 	cpipe->pipe_buffer.out = 0;
215 	cpipe->pipe_buffer.cnt = 0;
216 
217 	amountpipekva += cpipe->pipe_buffer.size;
218 
219 	return (0);
220 }
221 
222 /*
223  * initialize and allocate VM and memory for pipe
224  */
225 int
226 pipe_create(struct pipe *cpipe)
227 {
228 	int error;
229 
230 	/* so pipe_free_kmem() doesn't follow junk pointer */
231 	cpipe->pipe_buffer.buffer = NULL;
232 	/*
233 	 * protect so pipeclose() doesn't follow a junk pointer
234 	 * if pipespace() fails.
235 	 */
236 	memset(&cpipe->pipe_sel, 0, sizeof(cpipe->pipe_sel));
237 	cpipe->pipe_state = 0;
238 	cpipe->pipe_peer = NULL;
239 	cpipe->pipe_busy = 0;
240 
241 	error = pipespace(cpipe, PIPE_SIZE);
242 	if (error != 0)
243 		return (error);
244 
245 	getnanotime(&cpipe->pipe_ctime);
246 	cpipe->pipe_atime = cpipe->pipe_ctime;
247 	cpipe->pipe_mtime = cpipe->pipe_ctime;
248 	cpipe->pipe_pgid = NO_PID;
249 
250 	return (0);
251 }
252 
253 
254 /*
255  * lock a pipe for I/O, blocking other access
256  */
257 int
258 pipelock(struct pipe *cpipe)
259 {
260 	int error;
261 	while (cpipe->pipe_state & PIPE_LOCK) {
262 		cpipe->pipe_state |= PIPE_LWANT;
263 		if ((error = tsleep(cpipe, PRIBIO|PCATCH, "pipelk", 0)))
264 			return error;
265 	}
266 	cpipe->pipe_state |= PIPE_LOCK;
267 	return 0;
268 }
269 
270 /*
271  * unlock a pipe I/O lock
272  */
273 void
274 pipeunlock(struct pipe *cpipe)
275 {
276 	cpipe->pipe_state &= ~PIPE_LOCK;
277 	if (cpipe->pipe_state & PIPE_LWANT) {
278 		cpipe->pipe_state &= ~PIPE_LWANT;
279 		wakeup(cpipe);
280 	}
281 }
282 
283 void
284 pipeselwakeup(struct pipe *cpipe)
285 {
286 	if (cpipe->pipe_state & PIPE_SEL) {
287 		cpipe->pipe_state &= ~PIPE_SEL;
288 		selwakeup(&cpipe->pipe_sel);
289 	} else
290 		KNOTE(&cpipe->pipe_sel.si_note, 0);
291 	if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_pgid != NO_PID)
292 		gsignal(cpipe->pipe_pgid, SIGIO);
293 }
294 
295 int
296 pipe_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred)
297 {
298 	struct pipe *rpipe = fp->f_data;
299 	int error;
300 	size_t size, nread = 0;
301 
302 	error = pipelock(rpipe);
303 	if (error)
304 		return (error);
305 
306 	++rpipe->pipe_busy;
307 
308 	while (uio->uio_resid) {
309 		/*
310 		 * normal pipe buffer receive
311 		 */
312 		if (rpipe->pipe_buffer.cnt > 0) {
313 			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
314 			if (size > rpipe->pipe_buffer.cnt)
315 				size = rpipe->pipe_buffer.cnt;
316 			if (size > uio->uio_resid)
317 				size = uio->uio_resid;
318 			error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
319 					size, uio);
320 			if (error) {
321 				break;
322 			}
323 			rpipe->pipe_buffer.out += size;
324 			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
325 				rpipe->pipe_buffer.out = 0;
326 
327 			rpipe->pipe_buffer.cnt -= size;
328 			/*
329 			 * If there is no more to read in the pipe, reset
330 			 * its pointers to the beginning.  This improves
331 			 * cache hit stats.
332 			 */
333 			if (rpipe->pipe_buffer.cnt == 0) {
334 				rpipe->pipe_buffer.in = 0;
335 				rpipe->pipe_buffer.out = 0;
336 			}
337 			nread += size;
338 		} else {
339 			/*
340 			 * detect EOF condition
341 			 * read returns 0 on EOF, no need to set error
342 			 */
343 			if (rpipe->pipe_state & PIPE_EOF)
344 				break;
345 
346 			/*
347 			 * If the "write-side" has been blocked, wake it up now.
348 			 */
349 			if (rpipe->pipe_state & PIPE_WANTW) {
350 				rpipe->pipe_state &= ~PIPE_WANTW;
351 				wakeup(rpipe);
352 			}
353 
354 			/*
355 			 * Break if some data was read.
356 			 */
357 			if (nread > 0)
358 				break;
359 
360 			/*
361 			 * Unlock the pipe buffer for our remaining processing.
362 			 * We will either break out with an error or we will
363 			 * sleep and relock to loop.
364 			 */
365 			pipeunlock(rpipe);
366 
367 			/*
368 			 * Handle non-blocking mode operation or
369 			 * wait for more data.
370 			 */
371 			if (fp->f_flag & FNONBLOCK) {
372 				error = EAGAIN;
373 			} else {
374 				rpipe->pipe_state |= PIPE_WANTR;
375 				if ((error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) == 0)
376 					error = pipelock(rpipe);
377 			}
378 			if (error)
379 				goto unlocked_error;
380 		}
381 	}
382 	pipeunlock(rpipe);
383 
384 	if (error == 0)
385 		getnanotime(&rpipe->pipe_atime);
386 unlocked_error:
387 	--rpipe->pipe_busy;
388 
389 	/*
390 	 * PIPE_WANT processing only makes sense if pipe_busy is 0.
391 	 */
392 	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
393 		rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
394 		wakeup(rpipe);
395 	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
396 		/*
397 		 * Handle write blocking hysteresis.
398 		 */
399 		if (rpipe->pipe_state & PIPE_WANTW) {
400 			rpipe->pipe_state &= ~PIPE_WANTW;
401 			wakeup(rpipe);
402 		}
403 	}
404 
405 	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
406 		pipeselwakeup(rpipe);
407 
408 	return (error);
409 }
410 
411 int
412 pipe_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred)
413 {
414 	int error = 0;
415 	size_t orig_resid;
416 	struct pipe *wpipe, *rpipe;
417 
418 	rpipe = fp->f_data;
419 	wpipe = rpipe->pipe_peer;
420 
421 	/*
422 	 * detect loss of pipe read side, issue SIGPIPE if lost.
423 	 */
424 	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
425 		return (EPIPE);
426 	}
427 	++wpipe->pipe_busy;
428 
429 	/*
430 	 * If it is advantageous to resize the pipe buffer, do
431 	 * so.
432 	 */
433 	if ((uio->uio_resid > PIPE_SIZE) &&
434 	    (nbigpipe < LIMITBIGPIPES) &&
435 	    (wpipe->pipe_buffer.size <= PIPE_SIZE) &&
436 	    (wpipe->pipe_buffer.cnt == 0)) {
437 
438 		if ((error = pipelock(wpipe)) == 0) {
439 			if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
440 				nbigpipe++;
441 			pipeunlock(wpipe);
442 		}
443 	}
444 
445 	/*
446 	 * If an early error occurred unbusy and return, waking up any pending
447 	 * readers.
448 	 */
449 	if (error) {
450 		--wpipe->pipe_busy;
451 		if ((wpipe->pipe_busy == 0) &&
452 		    (wpipe->pipe_state & PIPE_WANT)) {
453 			wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
454 			wakeup(wpipe);
455 		}
456 		return (error);
457 	}
458 
459 	orig_resid = uio->uio_resid;
460 
461 	while (uio->uio_resid) {
462 		size_t space;
463 
464 retrywrite:
465 		if (wpipe->pipe_state & PIPE_EOF) {
466 			error = EPIPE;
467 			break;
468 		}
469 
470 		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
471 
472 		/* Writes of size <= PIPE_BUF must be atomic. */
473 		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
474 			space = 0;
475 
476 		if (space > 0) {
477 			if ((error = pipelock(wpipe)) == 0) {
478 				size_t size;	/* Transfer size */
479 				size_t segsize;	/* first segment to transfer */
480 
481 				/*
482 				 * If a process blocked in uiomove, our
483 				 * value for space might be bad.
484 				 *
485 				 * XXX will we be ok if the reader has gone
486 				 * away here?
487 				 */
488 				if (space > wpipe->pipe_buffer.size -
489 				    wpipe->pipe_buffer.cnt) {
490 					pipeunlock(wpipe);
491 					goto retrywrite;
492 				}
493 
494 				/*
495 				 * Transfer size is minimum of uio transfer
496 				 * and free space in pipe buffer.
497 				 */
498 				if (space > uio->uio_resid)
499 					size = uio->uio_resid;
500 				else
501 					size = space;
502 				/*
503 				 * First segment to transfer is minimum of
504 				 * transfer size and contiguous space in
505 				 * pipe buffer.  If first segment to transfer
506 				 * is less than the transfer size, we've got
507 				 * a wraparound in the buffer.
508 				 */
509 				segsize = wpipe->pipe_buffer.size -
510 					wpipe->pipe_buffer.in;
511 				if (segsize > size)
512 					segsize = size;
513 
514 				/* Transfer first segment */
515 
516 				error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
517 						segsize, uio);
518 
519 				if (error == 0 && segsize < size) {
520 					/*
521 					 * Transfer remaining part now, to
522 					 * support atomic writes.  Wraparound
523 					 * happened.
524 					 */
525 #ifdef DIAGNOSTIC
526 					if (wpipe->pipe_buffer.in + segsize !=
527 					    wpipe->pipe_buffer.size)
528 						panic("Expected pipe buffer wraparound disappeared");
529 #endif
530 
531 					error = uiomove(&wpipe->pipe_buffer.buffer[0],
532 							size - segsize, uio);
533 				}
534 				if (error == 0) {
535 					wpipe->pipe_buffer.in += size;
536 					if (wpipe->pipe_buffer.in >=
537 					    wpipe->pipe_buffer.size) {
538 #ifdef DIAGNOSTIC
539 						if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
540 							panic("Expected wraparound bad");
541 #endif
542 						wpipe->pipe_buffer.in = size - segsize;
543 					}
544 
545 					wpipe->pipe_buffer.cnt += size;
546 #ifdef DIAGNOSTIC
547 					if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
548 						panic("Pipe buffer overflow");
549 #endif
550 				}
551 				pipeunlock(wpipe);
552 			}
553 			if (error)
554 				break;
555 		} else {
556 			/*
557 			 * If the "read-side" has been blocked, wake it up now.
558 			 */
559 			if (wpipe->pipe_state & PIPE_WANTR) {
560 				wpipe->pipe_state &= ~PIPE_WANTR;
561 				wakeup(wpipe);
562 			}
563 
564 			/*
565 			 * don't block on non-blocking I/O
566 			 */
567 			if (fp->f_flag & FNONBLOCK) {
568 				error = EAGAIN;
569 				break;
570 			}
571 
572 			/*
573 			 * We have no more space and have something to offer,
574 			 * wake up select/poll.
575 			 */
576 			pipeselwakeup(wpipe);
577 
578 			wpipe->pipe_state |= PIPE_WANTW;
579 			error = tsleep(wpipe, (PRIBIO + 1)|PCATCH,
580 			    "pipewr", 0);
581 			if (error)
582 				break;
583 			/*
584 			 * If read side wants to go away, we just issue a
585 			 * signal to ourselves.
586 			 */
587 			if (wpipe->pipe_state & PIPE_EOF) {
588 				error = EPIPE;
589 				break;
590 			}
591 		}
592 	}
593 
594 	--wpipe->pipe_busy;
595 
596 	if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
597 		wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
598 		wakeup(wpipe);
599 	} else if (wpipe->pipe_buffer.cnt > 0) {
600 		/*
601 		 * If we have put any characters in the buffer, we wake up
602 		 * the reader.
603 		 */
604 		if (wpipe->pipe_state & PIPE_WANTR) {
605 			wpipe->pipe_state &= ~PIPE_WANTR;
606 			wakeup(wpipe);
607 		}
608 	}
609 
610 	/*
611 	 * Don't return EPIPE if I/O was successful
612 	 */
613 	if ((wpipe->pipe_buffer.cnt == 0) &&
614 	    (uio->uio_resid == 0) &&
615 	    (error == EPIPE)) {
616 		error = 0;
617 	}
618 
619 	if (error == 0)
620 		getnanotime(&wpipe->pipe_mtime);
621 	/*
622 	 * We have something to offer, wake up select/poll.
623 	 */
624 	if (wpipe->pipe_buffer.cnt)
625 		pipeselwakeup(wpipe);
626 
627 	return (error);
628 }
629 
630 /*
631  * we implement a very minimal set of ioctls for compatibility with sockets.
632  */
633 int
634 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p)
635 {
636 	struct pipe *mpipe = fp->f_data;
637 
638 	switch (cmd) {
639 
640 	case FIONBIO:
641 		return (0);
642 
643 	case FIOASYNC:
644 		if (*(int *)data) {
645 			mpipe->pipe_state |= PIPE_ASYNC;
646 		} else {
647 			mpipe->pipe_state &= ~PIPE_ASYNC;
648 		}
649 		return (0);
650 
651 	case FIONREAD:
652 		*(int *)data = mpipe->pipe_buffer.cnt;
653 		return (0);
654 
655 	case SIOCSPGRP:
656 		mpipe->pipe_pgid = *(int *)data;
657 		return (0);
658 
659 	case SIOCGPGRP:
660 		*(int *)data = mpipe->pipe_pgid;
661 		return (0);
662 
663 	}
664 	return (ENOTTY);
665 }
666 
667 int
668 pipe_poll(struct file *fp, int events, struct proc *p)
669 {
670 	struct pipe *rpipe = fp->f_data;
671 	struct pipe *wpipe;
672 	int revents = 0;
673 
674 	wpipe = rpipe->pipe_peer;
675 	if (events & (POLLIN | POLLRDNORM)) {
676 		if ((rpipe->pipe_buffer.cnt > 0) ||
677 		    (rpipe->pipe_state & PIPE_EOF))
678 			revents |= events & (POLLIN | POLLRDNORM);
679 	}
680 
681 	/* NOTE: POLLHUP and POLLOUT/POLLWRNORM are mutually exclusive */
682 	if ((rpipe->pipe_state & PIPE_EOF) ||
683 	    (wpipe == NULL) ||
684 	    (wpipe->pipe_state & PIPE_EOF))
685 		revents |= POLLHUP;
686 	else if (events & (POLLOUT | POLLWRNORM)) {
687 		if ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)
688 			revents |= events & (POLLOUT | POLLWRNORM);
689 	}
690 
691 	if (revents == 0) {
692 		if (events & (POLLIN | POLLRDNORM)) {
693 			selrecord(p, &rpipe->pipe_sel);
694 			rpipe->pipe_state |= PIPE_SEL;
695 		}
696 		if (events & (POLLOUT | POLLWRNORM)) {
697 			selrecord(p, &wpipe->pipe_sel);
698 			wpipe->pipe_state |= PIPE_SEL;
699 		}
700 	}
701 	return (revents);
702 }
703 
704 int
705 pipe_stat(struct file *fp, struct stat *ub, struct proc *p)
706 {
707 	struct pipe *pipe = fp->f_data;
708 
709 	memset(ub, 0, sizeof(*ub));
710 	ub->st_mode = S_IFIFO;
711 	ub->st_blksize = pipe->pipe_buffer.size;
712 	ub->st_size = pipe->pipe_buffer.cnt;
713 	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
714 	ub->st_atim.tv_sec  = pipe->pipe_atime.tv_sec;
715 	ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec;
716 	ub->st_mtim.tv_sec  = pipe->pipe_mtime.tv_sec;
717 	ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec;
718 	ub->st_ctim.tv_sec  = pipe->pipe_ctime.tv_sec;
719 	ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec;
720 	ub->st_uid = fp->f_cred->cr_uid;
721 	ub->st_gid = fp->f_cred->cr_gid;
722 	/*
723 	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
724 	 * XXX (st_dev, st_ino) should be unique.
725 	 */
726 	return (0);
727 }
728 
729 int
730 pipe_close(struct file *fp, struct proc *p)
731 {
732 	struct pipe *cpipe = fp->f_data;
733 
734 	fp->f_ops = NULL;
735 	fp->f_data = NULL;
736 	pipeclose(cpipe);
737 	return (0);
738 }
739 
740 void
741 pipe_free_kmem(struct pipe *cpipe)
742 {
743 	if (cpipe->pipe_buffer.buffer != NULL) {
744 		if (cpipe->pipe_buffer.size > PIPE_SIZE)
745 			--nbigpipe;
746 		amountpipekva -= cpipe->pipe_buffer.size;
747 		km_free(cpipe->pipe_buffer.buffer, cpipe->pipe_buffer.size,
748 		    &kv_any, &kp_pageable);
749 		cpipe->pipe_buffer.buffer = NULL;
750 	}
751 }
752 
753 /*
754  * shutdown the pipe
755  */
756 void
757 pipeclose(struct pipe *cpipe)
758 {
759 	struct pipe *ppipe;
760 	if (cpipe) {
761 
762 		pipeselwakeup(cpipe);
763 
764 		/*
765 		 * If the other side is blocked, wake it up saying that
766 		 * we want to close it down.
767 		 */
768 		cpipe->pipe_state |= PIPE_EOF;
769 		while (cpipe->pipe_busy) {
770 			wakeup(cpipe);
771 			cpipe->pipe_state |= PIPE_WANT;
772 			tsleep(cpipe, PRIBIO, "pipecl", 0);
773 		}
774 
775 		/*
776 		 * Disconnect from peer
777 		 */
778 		if ((ppipe = cpipe->pipe_peer) != NULL) {
779 			pipeselwakeup(ppipe);
780 
781 			ppipe->pipe_state |= PIPE_EOF;
782 			wakeup(ppipe);
783 			ppipe->pipe_peer = NULL;
784 		}
785 
786 		/*
787 		 * free resources
788 		 */
789 		pipe_free_kmem(cpipe);
790 		pool_put(&pipe_pool, cpipe);
791 	}
792 }
793 
794 int
795 pipe_kqfilter(struct file *fp, struct knote *kn)
796 {
797 	struct pipe *rpipe = kn->kn_fp->f_data;
798 	struct pipe *wpipe = rpipe->pipe_peer;
799 
800 	switch (kn->kn_filter) {
801 	case EVFILT_READ:
802 		kn->kn_fop = &pipe_rfiltops;
803 		SLIST_INSERT_HEAD(&rpipe->pipe_sel.si_note, kn, kn_selnext);
804 		break;
805 	case EVFILT_WRITE:
806 		if (wpipe == NULL) {
807 			/* other end of pipe has been closed */
808 			return (EPIPE);
809 		}
810 		kn->kn_fop = &pipe_wfiltops;
811 		SLIST_INSERT_HEAD(&wpipe->pipe_sel.si_note, kn, kn_selnext);
812 		break;
813 	default:
814 		return (EINVAL);
815 	}
816 
817 	return (0);
818 }
819 
820 void
821 filt_pipedetach(struct knote *kn)
822 {
823 	struct pipe *rpipe = kn->kn_fp->f_data;
824 	struct pipe *wpipe = rpipe->pipe_peer;
825 
826 	switch (kn->kn_filter) {
827 	case EVFILT_READ:
828 		SLIST_REMOVE(&rpipe->pipe_sel.si_note, kn, knote, kn_selnext);
829 		break;
830 	case EVFILT_WRITE:
831 		if (wpipe == NULL)
832 			return;
833 		SLIST_REMOVE(&wpipe->pipe_sel.si_note, kn, knote, kn_selnext);
834 		break;
835 	}
836 }
837 
838 int
839 filt_piperead(struct knote *kn, long hint)
840 {
841 	struct pipe *rpipe = kn->kn_fp->f_data;
842 	struct pipe *wpipe = rpipe->pipe_peer;
843 
844 	kn->kn_data = rpipe->pipe_buffer.cnt;
845 
846 	if ((rpipe->pipe_state & PIPE_EOF) ||
847 	    (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
848 		kn->kn_flags |= EV_EOF;
849 		return (1);
850 	}
851 	return (kn->kn_data > 0);
852 }
853 
854 int
855 filt_pipewrite(struct knote *kn, long hint)
856 {
857 	struct pipe *rpipe = kn->kn_fp->f_data;
858 	struct pipe *wpipe = rpipe->pipe_peer;
859 
860 	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
861 		kn->kn_data = 0;
862 		kn->kn_flags |= EV_EOF;
863 		return (1);
864 	}
865 	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
866 
867 	return (kn->kn_data >= PIPE_BUF);
868 }
869 
870 void
871 pipe_init(void)
872 {
873 	pool_init(&pipe_pool, sizeof(struct pipe), 0, 0, PR_WAITOK, "pipepl",
874 	    NULL);
875 }
876 
877