1 /* $OpenBSD: sys_pipe.c,v 1.148 2024/12/30 02:46:00 guenther Exp $ */
2
3 /*
4 * Copyright (c) 1996 John S. Dyson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice immediately at the beginning of the file, without modification,
12 * this list of conditions, and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Absolutely no warranty of function or purpose is made by the author
17 * John S. Dyson.
18 * 4. Modifications may be freely made to this file if the above conditions
19 * are met.
20 */
21
22 /*
23 * This file contains a high-performance replacement for the socket-based
24 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support
25 * all features of sockets, but does do everything that pipes normally
26 * do.
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/proc.h>
32 #include <sys/fcntl.h>
33 #include <sys/file.h>
34 #include <sys/filedesc.h>
35 #include <sys/pool.h>
36 #include <sys/ioctl.h>
37 #include <sys/stat.h>
38 #include <sys/signalvar.h>
39 #include <sys/mount.h>
40 #include <sys/syscallargs.h>
41 #include <sys/event.h>
42 #ifdef KTRACE
43 #include <sys/ktrace.h>
44 #endif
45
46 #include <uvm/uvm_extern.h>
47
48 #include <sys/pipe.h>
49
50 struct pipe_pair {
51 struct pipe pp_wpipe;
52 struct pipe pp_rpipe;
53 struct rwlock pp_lock;
54 };
55
56 /*
57 * interfaces to the outside world
58 */
59 int pipe_read(struct file *, struct uio *, int);
60 int pipe_write(struct file *, struct uio *, int);
61 int pipe_close(struct file *, struct proc *);
62 int pipe_kqfilter(struct file *fp, struct knote *kn);
63 int pipe_ioctl(struct file *, u_long, caddr_t, struct proc *);
64 int pipe_stat(struct file *fp, struct stat *ub, struct proc *p);
65
66 static const struct fileops pipeops = {
67 .fo_read = pipe_read,
68 .fo_write = pipe_write,
69 .fo_ioctl = pipe_ioctl,
70 .fo_kqfilter = pipe_kqfilter,
71 .fo_stat = pipe_stat,
72 .fo_close = pipe_close
73 };
74
75 void filt_pipedetach(struct knote *kn);
76 int filt_piperead(struct knote *kn, long hint);
77 int filt_pipewrite(struct knote *kn, long hint);
78 int filt_pipeexcept(struct knote *kn, long hint);
79 int filt_pipemodify(struct kevent *kev, struct knote *kn);
80 int filt_pipeprocess(struct knote *kn, struct kevent *kev);
81
82 const struct filterops pipe_rfiltops = {
83 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
84 .f_attach = NULL,
85 .f_detach = filt_pipedetach,
86 .f_event = filt_piperead,
87 .f_modify = filt_pipemodify,
88 .f_process = filt_pipeprocess,
89 };
90
91 const struct filterops pipe_wfiltops = {
92 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
93 .f_attach = NULL,
94 .f_detach = filt_pipedetach,
95 .f_event = filt_pipewrite,
96 .f_modify = filt_pipemodify,
97 .f_process = filt_pipeprocess,
98 };
99
100 const struct filterops pipe_efiltops = {
101 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
102 .f_attach = NULL,
103 .f_detach = filt_pipedetach,
104 .f_event = filt_pipeexcept,
105 .f_modify = filt_pipemodify,
106 .f_process = filt_pipeprocess,
107 };
108
109 /*
110 * Default pipe buffer size(s), this can be kind-of large now because pipe
111 * space is pageable. The pipe code will try to maintain locality of
112 * reference for performance reasons, so small amounts of outstanding I/O
113 * will not wipe the cache.
114 */
115 #define MINPIPESIZE (PIPE_SIZE/3)
116
117 /*
118 * Limit the number of "big" pipes
119 */
120 #define LIMITBIGPIPES 32
121 unsigned int nbigpipe;
122 static unsigned int amountpipekva;
123
124 struct pool pipe_pair_pool;
125
126 int dopipe(struct proc *, int *, int);
127 void pipe_wakeup(struct pipe *);
128
129 int pipe_create(struct pipe *);
130 void pipe_destroy(struct pipe *);
131 int pipe_rundown(struct pipe *);
132 struct pipe *pipe_peer(struct pipe *);
133 int pipe_buffer_realloc(struct pipe *, u_int);
134 void pipe_buffer_free(struct pipe *);
135
136 int pipe_iolock(struct pipe *);
137 void pipe_iounlock(struct pipe *);
138 int pipe_iosleep(struct pipe *, const char *);
139
140 struct pipe_pair *pipe_pair_create(void);
141 void pipe_pair_destroy(struct pipe_pair *);
142
143 /*
144 * The pipe system call for the DTYPE_PIPE type of pipes
145 */
146
147 int
sys_pipe(struct proc * p,void * v,register_t * retval)148 sys_pipe(struct proc *p, void *v, register_t *retval)
149 {
150 struct sys_pipe_args /* {
151 syscallarg(int *) fdp;
152 } */ *uap = v;
153
154 return (dopipe(p, SCARG(uap, fdp), 0));
155 }
156
157 int
sys_pipe2(struct proc * p,void * v,register_t * retval)158 sys_pipe2(struct proc *p, void *v, register_t *retval)
159 {
160 struct sys_pipe2_args /* {
161 syscallarg(int *) fdp;
162 syscallarg(int) flags;
163 } */ *uap = v;
164
165 if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK))
166 return (EINVAL);
167
168 return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags)));
169 }
170
171 int
dopipe(struct proc * p,int * ufds,int flags)172 dopipe(struct proc *p, int *ufds, int flags)
173 {
174 struct filedesc *fdp = p->p_fd;
175 struct file *rf, *wf;
176 struct pipe_pair *pp;
177 struct pipe *rpipe, *wpipe = NULL;
178 int fds[2], cloexec, error;
179
180 cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0;
181
182 pp = pipe_pair_create();
183 if (pp == NULL)
184 return (ENOMEM);
185 wpipe = &pp->pp_wpipe;
186 rpipe = &pp->pp_rpipe;
187
188 fdplock(fdp);
189
190 error = falloc(p, &rf, &fds[0]);
191 if (error != 0)
192 goto free2;
193 rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
194 rf->f_type = DTYPE_PIPE;
195 rf->f_data = rpipe;
196 rf->f_ops = &pipeops;
197
198 error = falloc(p, &wf, &fds[1]);
199 if (error != 0)
200 goto free3;
201 wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK);
202 wf->f_type = DTYPE_PIPE;
203 wf->f_data = wpipe;
204 wf->f_ops = &pipeops;
205
206 fdinsert(fdp, fds[0], cloexec, rf);
207 fdinsert(fdp, fds[1], cloexec, wf);
208
209 error = copyout(fds, ufds, sizeof(fds));
210 if (error == 0) {
211 fdpunlock(fdp);
212 #ifdef KTRACE
213 if (KTRPOINT(p, KTR_STRUCT))
214 ktrfds(p, fds, 2);
215 #endif
216 } else {
217 /* fdrelease() unlocks fdp. */
218 fdrelease(p, fds[0]);
219 fdplock(fdp);
220 fdrelease(p, fds[1]);
221 }
222
223 FRELE(rf, p);
224 FRELE(wf, p);
225 return (error);
226
227 free3:
228 fdremove(fdp, fds[0]);
229 closef(rf, p);
230 rpipe = NULL;
231 free2:
232 fdpunlock(fdp);
233 pipe_destroy(wpipe);
234 pipe_destroy(rpipe);
235 return (error);
236 }
237
238 /*
239 * Allocate kva for pipe circular buffer, the space is pageable.
240 * This routine will 'realloc' the size of a pipe safely, if it fails
241 * it will retain the old buffer.
242 * If it fails it will return ENOMEM.
243 */
244 int
pipe_buffer_realloc(struct pipe * cpipe,u_int size)245 pipe_buffer_realloc(struct pipe *cpipe, u_int size)
246 {
247 caddr_t buffer;
248
249 /* buffer uninitialized or pipe locked */
250 KASSERT((cpipe->pipe_buffer.buffer == NULL) ||
251 (cpipe->pipe_state & PIPE_LOCK));
252
253 /* buffer should be empty */
254 KASSERT(cpipe->pipe_buffer.cnt == 0);
255
256 buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok);
257 if (buffer == NULL)
258 return (ENOMEM);
259
260 /* free old resources if we are resizing */
261 pipe_buffer_free(cpipe);
262
263 cpipe->pipe_buffer.buffer = buffer;
264 cpipe->pipe_buffer.size = size;
265 cpipe->pipe_buffer.in = 0;
266 cpipe->pipe_buffer.out = 0;
267
268 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
269
270 return (0);
271 }
272
273 /*
274 * initialize and allocate VM and memory for pipe
275 */
276 int
pipe_create(struct pipe * cpipe)277 pipe_create(struct pipe *cpipe)
278 {
279 int error;
280
281 error = pipe_buffer_realloc(cpipe, PIPE_SIZE);
282 if (error != 0)
283 return (error);
284
285 sigio_init(&cpipe->pipe_sigio);
286
287 getnanotime(&cpipe->pipe_ctime);
288 cpipe->pipe_atime = cpipe->pipe_ctime;
289 cpipe->pipe_mtime = cpipe->pipe_ctime;
290
291 return (0);
292 }
293
294 struct pipe *
pipe_peer(struct pipe * cpipe)295 pipe_peer(struct pipe *cpipe)
296 {
297 struct pipe *peer;
298
299 rw_assert_anylock(cpipe->pipe_lock);
300
301 peer = cpipe->pipe_peer;
302 if (peer == NULL || (peer->pipe_state & PIPE_EOF))
303 return (NULL);
304 return (peer);
305 }
306
307 /*
308 * Lock a pipe for exclusive I/O access.
309 */
310 int
pipe_iolock(struct pipe * cpipe)311 pipe_iolock(struct pipe *cpipe)
312 {
313 int error;
314
315 rw_assert_wrlock(cpipe->pipe_lock);
316
317 while (cpipe->pipe_state & PIPE_LOCK) {
318 cpipe->pipe_state |= PIPE_LWANT;
319 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH,
320 "pipeiolk", INFSLP);
321 if (error)
322 return (error);
323 }
324 cpipe->pipe_state |= PIPE_LOCK;
325 return (0);
326 }
327
328 /*
329 * Unlock a pipe I/O lock.
330 */
331 void
pipe_iounlock(struct pipe * cpipe)332 pipe_iounlock(struct pipe *cpipe)
333 {
334 rw_assert_wrlock(cpipe->pipe_lock);
335 KASSERT(cpipe->pipe_state & PIPE_LOCK);
336
337 cpipe->pipe_state &= ~PIPE_LOCK;
338 if (cpipe->pipe_state & PIPE_LWANT) {
339 cpipe->pipe_state &= ~PIPE_LWANT;
340 wakeup(cpipe);
341 }
342 }
343
344 /*
345 * Unlock the pipe I/O lock and go to sleep. Returns 0 on success and the I/O
346 * lock is relocked. Otherwise if a signal was caught, non-zero is returned and
347 * the I/O lock is not locked.
348 *
349 * Any caller must obtain a reference to the pipe by incrementing `pipe_busy'
350 * before calling this function in order ensure that the same pipe is not
351 * destroyed while sleeping.
352 */
353 int
pipe_iosleep(struct pipe * cpipe,const char * wmesg)354 pipe_iosleep(struct pipe *cpipe, const char *wmesg)
355 {
356 int error;
357
358 pipe_iounlock(cpipe);
359 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, wmesg,
360 INFSLP);
361 if (error)
362 return (error);
363 return (pipe_iolock(cpipe));
364 }
365
366 void
pipe_wakeup(struct pipe * cpipe)367 pipe_wakeup(struct pipe *cpipe)
368 {
369 rw_assert_wrlock(cpipe->pipe_lock);
370
371 knote_locked(&cpipe->pipe_klist, 0);
372
373 if (cpipe->pipe_state & PIPE_ASYNC)
374 pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
375 }
376
377 int
pipe_read(struct file * fp,struct uio * uio,int fflags)378 pipe_read(struct file *fp, struct uio *uio, int fflags)
379 {
380 struct pipe *rpipe = fp->f_data;
381 size_t nread = 0, size;
382 int error;
383
384 rw_enter_write(rpipe->pipe_lock);
385 ++rpipe->pipe_busy;
386 error = pipe_iolock(rpipe);
387 if (error) {
388 --rpipe->pipe_busy;
389 pipe_rundown(rpipe);
390 rw_exit_write(rpipe->pipe_lock);
391 return (error);
392 }
393
394 while (uio->uio_resid) {
395 /* Normal pipe buffer receive. */
396 if (rpipe->pipe_buffer.cnt > 0) {
397 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
398 if (size > rpipe->pipe_buffer.cnt)
399 size = rpipe->pipe_buffer.cnt;
400 if (size > uio->uio_resid)
401 size = uio->uio_resid;
402 rw_exit_write(rpipe->pipe_lock);
403 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
404 size, uio);
405 rw_enter_write(rpipe->pipe_lock);
406 if (error) {
407 break;
408 }
409 rpipe->pipe_buffer.out += size;
410 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
411 rpipe->pipe_buffer.out = 0;
412
413 rpipe->pipe_buffer.cnt -= size;
414 /*
415 * If there is no more to read in the pipe, reset
416 * its pointers to the beginning. This improves
417 * cache hit stats.
418 */
419 if (rpipe->pipe_buffer.cnt == 0) {
420 rpipe->pipe_buffer.in = 0;
421 rpipe->pipe_buffer.out = 0;
422 }
423 nread += size;
424 } else {
425 /*
426 * detect EOF condition
427 * read returns 0 on EOF, no need to set error
428 */
429 if (rpipe->pipe_state & PIPE_EOF)
430 break;
431
432 /* If the "write-side" has been blocked, wake it up. */
433 if (rpipe->pipe_state & PIPE_WANTW) {
434 rpipe->pipe_state &= ~PIPE_WANTW;
435 wakeup(rpipe);
436 }
437
438 /* Break if some data was read. */
439 if (nread > 0)
440 break;
441
442 /* Handle non-blocking mode operation. */
443 if (fp->f_flag & FNONBLOCK) {
444 error = EAGAIN;
445 break;
446 }
447
448 /* Wait for more data. */
449 rpipe->pipe_state |= PIPE_WANTR;
450 error = pipe_iosleep(rpipe, "piperd");
451 if (error)
452 goto unlocked_error;
453 }
454 }
455 pipe_iounlock(rpipe);
456
457 if (error == 0)
458 getnanotime(&rpipe->pipe_atime);
459 unlocked_error:
460 --rpipe->pipe_busy;
461
462 if (pipe_rundown(rpipe) == 0 && rpipe->pipe_buffer.cnt < MINPIPESIZE) {
463 /* Handle write blocking hysteresis. */
464 if (rpipe->pipe_state & PIPE_WANTW) {
465 rpipe->pipe_state &= ~PIPE_WANTW;
466 wakeup(rpipe);
467 }
468 }
469
470 if (rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF)
471 pipe_wakeup(rpipe);
472
473 rw_exit_write(rpipe->pipe_lock);
474 return (error);
475 }
476
477 int
pipe_write(struct file * fp,struct uio * uio,int fflags)478 pipe_write(struct file *fp, struct uio *uio, int fflags)
479 {
480 struct pipe *rpipe = fp->f_data, *wpipe;
481 struct rwlock *lock = rpipe->pipe_lock;
482 size_t orig_resid;
483 int error;
484
485 rw_enter_write(lock);
486 wpipe = pipe_peer(rpipe);
487
488 /* Detect loss of pipe read side, issue SIGPIPE if lost. */
489 if (wpipe == NULL) {
490 rw_exit_write(lock);
491 return (EPIPE);
492 }
493
494 ++wpipe->pipe_busy;
495 error = pipe_iolock(wpipe);
496 if (error) {
497 --wpipe->pipe_busy;
498 pipe_rundown(wpipe);
499 rw_exit_write(lock);
500 return (error);
501 }
502
503
504 /* If it is advantageous to resize the pipe buffer, do so. */
505 if (uio->uio_resid > PIPE_SIZE &&
506 wpipe->pipe_buffer.size <= PIPE_SIZE &&
507 wpipe->pipe_buffer.cnt == 0) {
508 unsigned int npipe;
509
510 npipe = atomic_inc_int_nv(&nbigpipe);
511 if (npipe > LIMITBIGPIPES ||
512 pipe_buffer_realloc(wpipe, BIG_PIPE_SIZE) != 0)
513 atomic_dec_int(&nbigpipe);
514 }
515
516 orig_resid = uio->uio_resid;
517
518 while (uio->uio_resid) {
519 size_t space;
520
521 if (wpipe->pipe_state & PIPE_EOF) {
522 error = EPIPE;
523 break;
524 }
525
526 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
527
528 /* Writes of size <= PIPE_BUF must be atomic. */
529 if (space < uio->uio_resid && orig_resid <= PIPE_BUF)
530 space = 0;
531
532 if (space > 0) {
533 size_t size; /* Transfer size */
534 size_t segsize; /* first segment to transfer */
535
536 /*
537 * Transfer size is minimum of uio transfer
538 * and free space in pipe buffer.
539 */
540 if (space > uio->uio_resid)
541 size = uio->uio_resid;
542 else
543 size = space;
544 /*
545 * First segment to transfer is minimum of
546 * transfer size and contiguous space in
547 * pipe buffer. If first segment to transfer
548 * is less than the transfer size, we've got
549 * a wraparound in the buffer.
550 */
551 segsize = wpipe->pipe_buffer.size -
552 wpipe->pipe_buffer.in;
553 if (segsize > size)
554 segsize = size;
555
556 /* Transfer first segment */
557
558 rw_exit_write(lock);
559 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
560 segsize, uio);
561 rw_enter_write(lock);
562
563 if (error == 0 && segsize < size) {
564 /*
565 * Transfer remaining part now, to
566 * support atomic writes. Wraparound
567 * happened.
568 */
569 #ifdef DIAGNOSTIC
570 if (wpipe->pipe_buffer.in + segsize !=
571 wpipe->pipe_buffer.size)
572 panic("Expected pipe buffer wraparound disappeared");
573 #endif
574
575 rw_exit_write(lock);
576 error = uiomove(&wpipe->pipe_buffer.buffer[0],
577 size - segsize, uio);
578 rw_enter_write(lock);
579 }
580 if (error == 0) {
581 wpipe->pipe_buffer.in += size;
582 if (wpipe->pipe_buffer.in >=
583 wpipe->pipe_buffer.size) {
584 #ifdef DIAGNOSTIC
585 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
586 panic("Expected wraparound bad");
587 #endif
588 wpipe->pipe_buffer.in = size - segsize;
589 }
590
591 wpipe->pipe_buffer.cnt += size;
592 #ifdef DIAGNOSTIC
593 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
594 panic("Pipe buffer overflow");
595 #endif
596 }
597 if (error)
598 break;
599 } else {
600 /* If the "read-side" has been blocked, wake it up. */
601 if (wpipe->pipe_state & PIPE_WANTR) {
602 wpipe->pipe_state &= ~PIPE_WANTR;
603 wakeup(wpipe);
604 }
605
606 /* Don't block on non-blocking I/O. */
607 if (fp->f_flag & FNONBLOCK) {
608 error = EAGAIN;
609 break;
610 }
611
612 /*
613 * We have no more space and have something to offer,
614 * wake up select/poll.
615 */
616 pipe_wakeup(wpipe);
617
618 wpipe->pipe_state |= PIPE_WANTW;
619 error = pipe_iosleep(wpipe, "pipewr");
620 if (error)
621 goto unlocked_error;
622
623 /*
624 * If read side wants to go away, we just issue a
625 * signal to ourselves.
626 */
627 if (wpipe->pipe_state & PIPE_EOF) {
628 error = EPIPE;
629 break;
630 }
631 }
632 }
633 pipe_iounlock(wpipe);
634
635 unlocked_error:
636 --wpipe->pipe_busy;
637
638 if (pipe_rundown(wpipe) == 0 && wpipe->pipe_buffer.cnt > 0) {
639 /*
640 * If we have put any characters in the buffer, we wake up
641 * the reader.
642 */
643 if (wpipe->pipe_state & PIPE_WANTR) {
644 wpipe->pipe_state &= ~PIPE_WANTR;
645 wakeup(wpipe);
646 }
647 }
648
649 /* Don't return EPIPE if I/O was successful. */
650 if (wpipe->pipe_buffer.cnt == 0 &&
651 uio->uio_resid == 0 &&
652 error == EPIPE) {
653 error = 0;
654 }
655
656 if (error == 0)
657 getnanotime(&wpipe->pipe_mtime);
658 /* We have something to offer, wake up select/poll. */
659 if (wpipe->pipe_buffer.cnt)
660 pipe_wakeup(wpipe);
661
662 rw_exit_write(lock);
663 return (error);
664 }
665
666 /*
667 * we implement a very minimal set of ioctls for compatibility with sockets.
668 */
669 int
pipe_ioctl(struct file * fp,u_long cmd,caddr_t data,struct proc * p)670 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p)
671 {
672 struct pipe *mpipe = fp->f_data;
673 int error = 0;
674
675 switch (cmd) {
676
677 case FIOASYNC:
678 rw_enter_write(mpipe->pipe_lock);
679 if (*(int *)data) {
680 mpipe->pipe_state |= PIPE_ASYNC;
681 } else {
682 mpipe->pipe_state &= ~PIPE_ASYNC;
683 }
684 rw_exit_write(mpipe->pipe_lock);
685 break;
686
687 case FIONREAD:
688 rw_enter_read(mpipe->pipe_lock);
689 *(int *)data = mpipe->pipe_buffer.cnt;
690 rw_exit_read(mpipe->pipe_lock);
691 break;
692
693 case FIOSETOWN:
694 case SIOCSPGRP:
695 case TIOCSPGRP:
696 error = sigio_setown(&mpipe->pipe_sigio, cmd, data);
697 break;
698
699 case FIOGETOWN:
700 case SIOCGPGRP:
701 case TIOCGPGRP:
702 sigio_getown(&mpipe->pipe_sigio, cmd, data);
703 break;
704
705 default:
706 error = ENOTTY;
707 }
708
709 return (error);
710 }
711
712 int
pipe_stat(struct file * fp,struct stat * ub,struct proc * p)713 pipe_stat(struct file *fp, struct stat *ub, struct proc *p)
714 {
715 struct pipe *pipe = fp->f_data;
716
717 memset(ub, 0, sizeof(*ub));
718
719 rw_enter_read(pipe->pipe_lock);
720 ub->st_mode = S_IFIFO;
721 ub->st_blksize = pipe->pipe_buffer.size;
722 ub->st_size = pipe->pipe_buffer.cnt;
723 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
724 ub->st_atim.tv_sec = pipe->pipe_atime.tv_sec;
725 ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec;
726 ub->st_mtim.tv_sec = pipe->pipe_mtime.tv_sec;
727 ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec;
728 ub->st_ctim.tv_sec = pipe->pipe_ctime.tv_sec;
729 ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec;
730 ub->st_uid = fp->f_cred->cr_uid;
731 ub->st_gid = fp->f_cred->cr_gid;
732 rw_exit_read(pipe->pipe_lock);
733 /*
734 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
735 * XXX (st_dev, st_ino) should be unique.
736 */
737 return (0);
738 }
739
740 int
pipe_close(struct file * fp,struct proc * p)741 pipe_close(struct file *fp, struct proc *p)
742 {
743 struct pipe *cpipe = fp->f_data;
744
745 fp->f_ops = NULL;
746 fp->f_data = NULL;
747 pipe_destroy(cpipe);
748 return (0);
749 }
750
751 /*
752 * Free kva for pipe circular buffer.
753 * No pipe lock check as only called from pipe_buffer_realloc() and pipeclose()
754 */
755 void
pipe_buffer_free(struct pipe * cpipe)756 pipe_buffer_free(struct pipe *cpipe)
757 {
758 u_int size;
759
760 if (cpipe->pipe_buffer.buffer == NULL)
761 return;
762
763 size = cpipe->pipe_buffer.size;
764
765 km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable);
766
767 cpipe->pipe_buffer.buffer = NULL;
768
769 atomic_sub_int(&amountpipekva, size);
770 if (size > PIPE_SIZE)
771 atomic_dec_int(&nbigpipe);
772 }
773
774 /*
775 * shutdown the pipe, and free resources.
776 */
777 void
pipe_destroy(struct pipe * cpipe)778 pipe_destroy(struct pipe *cpipe)
779 {
780 struct pipe *ppipe;
781
782 if (cpipe == NULL)
783 return;
784
785 rw_enter_write(cpipe->pipe_lock);
786
787 pipe_wakeup(cpipe);
788 sigio_free(&cpipe->pipe_sigio);
789
790 /*
791 * If the other side is blocked, wake it up saying that
792 * we want to close it down.
793 */
794 cpipe->pipe_state |= PIPE_EOF;
795 while (cpipe->pipe_busy) {
796 wakeup(cpipe);
797 cpipe->pipe_state |= PIPE_WANTD;
798 rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO, "pipecl", INFSLP);
799 }
800
801 /* Disconnect from peer. */
802 if ((ppipe = cpipe->pipe_peer) != NULL) {
803 pipe_wakeup(ppipe);
804
805 ppipe->pipe_state |= PIPE_EOF;
806 wakeup(ppipe);
807 ppipe->pipe_peer = NULL;
808 }
809
810 pipe_buffer_free(cpipe);
811
812 rw_exit_write(cpipe->pipe_lock);
813
814 if (ppipe == NULL)
815 pipe_pair_destroy(cpipe->pipe_pair);
816 }
817
818 /*
819 * Returns non-zero if a rundown is currently ongoing.
820 */
821 int
pipe_rundown(struct pipe * cpipe)822 pipe_rundown(struct pipe *cpipe)
823 {
824 rw_assert_wrlock(cpipe->pipe_lock);
825
826 if (cpipe->pipe_busy > 0 || (cpipe->pipe_state & PIPE_WANTD) == 0)
827 return (0);
828
829 /* Only wakeup pipe_destroy() once the pipe is no longer busy. */
830 cpipe->pipe_state &= ~(PIPE_WANTD | PIPE_WANTR | PIPE_WANTW);
831 wakeup(cpipe);
832 return (1);
833 }
834
835 int
pipe_kqfilter(struct file * fp,struct knote * kn)836 pipe_kqfilter(struct file *fp, struct knote *kn)
837 {
838 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
839 struct rwlock *lock = rpipe->pipe_lock;
840 int error = 0;
841
842 rw_enter_write(lock);
843 wpipe = pipe_peer(rpipe);
844
845 switch (kn->kn_filter) {
846 case EVFILT_READ:
847 kn->kn_fop = &pipe_rfiltops;
848 kn->kn_hook = rpipe;
849 klist_insert_locked(&rpipe->pipe_klist, kn);
850 break;
851 case EVFILT_WRITE:
852 if (wpipe == NULL) {
853 /*
854 * The other end of the pipe has been closed.
855 * Since the filter now always indicates a pending
856 * event, attach the knote to the current side
857 * to proceed with the registration.
858 */
859 wpipe = rpipe;
860 }
861 kn->kn_fop = &pipe_wfiltops;
862 kn->kn_hook = wpipe;
863 klist_insert_locked(&wpipe->pipe_klist, kn);
864 break;
865 case EVFILT_EXCEPT:
866 if (kn->kn_flags & __EV_SELECT) {
867 /* Prevent triggering exceptfds. */
868 error = EPERM;
869 break;
870 }
871 if ((kn->kn_flags & __EV_POLL) == 0) {
872 /* Disallow usage through kevent(2). */
873 error = EINVAL;
874 break;
875 }
876 kn->kn_fop = &pipe_efiltops;
877 kn->kn_hook = rpipe;
878 klist_insert_locked(&rpipe->pipe_klist, kn);
879 break;
880 default:
881 error = EINVAL;
882 }
883
884 rw_exit_write(lock);
885
886 return (error);
887 }
888
889 void
filt_pipedetach(struct knote * kn)890 filt_pipedetach(struct knote *kn)
891 {
892 struct pipe *cpipe = kn->kn_hook;
893
894 klist_remove(&cpipe->pipe_klist, kn);
895 }
896
897 int
filt_piperead(struct knote * kn,long hint)898 filt_piperead(struct knote *kn, long hint)
899 {
900 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
901
902 rw_assert_wrlock(rpipe->pipe_lock);
903
904 wpipe = pipe_peer(rpipe);
905
906 kn->kn_data = rpipe->pipe_buffer.cnt;
907
908 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) {
909 kn->kn_flags |= EV_EOF;
910 if (kn->kn_flags & __EV_POLL)
911 kn->kn_flags |= __EV_HUP;
912 return (1);
913 }
914
915 return (kn->kn_data > 0);
916 }
917
918 int
filt_pipewrite(struct knote * kn,long hint)919 filt_pipewrite(struct knote *kn, long hint)
920 {
921 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
922
923 rw_assert_wrlock(rpipe->pipe_lock);
924
925 wpipe = pipe_peer(rpipe);
926
927 if (wpipe == NULL) {
928 kn->kn_data = 0;
929 kn->kn_flags |= EV_EOF;
930 if (kn->kn_flags & __EV_POLL)
931 kn->kn_flags |= __EV_HUP;
932 return (1);
933 }
934 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
935
936 return (kn->kn_data >= PIPE_BUF);
937 }
938
939 int
filt_pipeexcept(struct knote * kn,long hint)940 filt_pipeexcept(struct knote *kn, long hint)
941 {
942 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
943 int active = 0;
944
945 rw_assert_wrlock(rpipe->pipe_lock);
946
947 wpipe = pipe_peer(rpipe);
948
949 if (kn->kn_flags & __EV_POLL) {
950 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) {
951 kn->kn_flags |= __EV_HUP;
952 active = 1;
953 }
954 }
955
956 return (active);
957 }
958
959 int
filt_pipemodify(struct kevent * kev,struct knote * kn)960 filt_pipemodify(struct kevent *kev, struct knote *kn)
961 {
962 struct pipe *rpipe = kn->kn_fp->f_data;
963 int active;
964
965 rw_enter_write(rpipe->pipe_lock);
966 active = knote_modify(kev, kn);
967 rw_exit_write(rpipe->pipe_lock);
968
969 return (active);
970 }
971
972 int
filt_pipeprocess(struct knote * kn,struct kevent * kev)973 filt_pipeprocess(struct knote *kn, struct kevent *kev)
974 {
975 struct pipe *rpipe = kn->kn_fp->f_data;
976 int active;
977
978 rw_enter_write(rpipe->pipe_lock);
979 active = knote_process(kn, kev);
980 rw_exit_write(rpipe->pipe_lock);
981
982 return (active);
983 }
984
985 void
pipe_init(void)986 pipe_init(void)
987 {
988 pool_init(&pipe_pair_pool, sizeof(struct pipe_pair), 0, IPL_MPFLOOR,
989 PR_WAITOK, "pipepl", NULL);
990 }
991
992 struct pipe_pair *
pipe_pair_create(void)993 pipe_pair_create(void)
994 {
995 struct pipe_pair *pp;
996
997 pp = pool_get(&pipe_pair_pool, PR_WAITOK | PR_ZERO);
998 pp->pp_wpipe.pipe_pair = pp;
999 pp->pp_rpipe.pipe_pair = pp;
1000 pp->pp_wpipe.pipe_peer = &pp->pp_rpipe;
1001 pp->pp_rpipe.pipe_peer = &pp->pp_wpipe;
1002 /*
1003 * One lock is used per pipe pair in order to obtain exclusive access to
1004 * the pipe pair.
1005 */
1006 rw_init(&pp->pp_lock, "pipelk");
1007 pp->pp_wpipe.pipe_lock = &pp->pp_lock;
1008 pp->pp_rpipe.pipe_lock = &pp->pp_lock;
1009
1010 klist_init_rwlock(&pp->pp_wpipe.pipe_klist, &pp->pp_lock);
1011 klist_init_rwlock(&pp->pp_rpipe.pipe_klist, &pp->pp_lock);
1012
1013 if (pipe_create(&pp->pp_wpipe) || pipe_create(&pp->pp_rpipe))
1014 goto err;
1015 return (pp);
1016 err:
1017 pipe_destroy(&pp->pp_wpipe);
1018 pipe_destroy(&pp->pp_rpipe);
1019 return (NULL);
1020 }
1021
1022 void
pipe_pair_destroy(struct pipe_pair * pp)1023 pipe_pair_destroy(struct pipe_pair *pp)
1024 {
1025 klist_free(&pp->pp_wpipe.pipe_klist);
1026 klist_free(&pp->pp_rpipe.pipe_klist);
1027 pool_put(&pipe_pair_pool, pp);
1028 }
1029