1 /* $OpenBSD: sys_pipe.c,v 1.146 2023/05/09 14:22:17 visa Exp $ */ 2 3 /* 4 * Copyright (c) 1996 John S. Dyson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Absolutely no warranty of function or purpose is made by the author 17 * John S. Dyson. 18 * 4. Modifications may be freely made to this file if the above conditions 19 * are met. 20 */ 21 22 /* 23 * This file contains a high-performance replacement for the socket-based 24 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 25 * all features of sockets, but does do everything that pipes normally 26 * do. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/proc.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/pool.h> 36 #include <sys/ioctl.h> 37 #include <sys/stat.h> 38 #include <sys/signalvar.h> 39 #include <sys/mount.h> 40 #include <sys/syscallargs.h> 41 #include <sys/event.h> 42 #ifdef KTRACE 43 #include <sys/ktrace.h> 44 #endif 45 46 #include <uvm/uvm_extern.h> 47 48 #include <sys/pipe.h> 49 50 struct pipe_pair { 51 struct pipe pp_wpipe; 52 struct pipe pp_rpipe; 53 struct rwlock pp_lock; 54 }; 55 56 /* 57 * interfaces to the outside world 58 */ 59 int pipe_read(struct file *, struct uio *, int); 60 int pipe_write(struct file *, struct uio *, int); 61 int pipe_close(struct file *, struct proc *); 62 int pipe_kqfilter(struct file *fp, struct knote *kn); 63 int pipe_ioctl(struct file *, u_long, caddr_t, struct proc *); 64 int pipe_stat(struct file *fp, struct stat *ub, struct proc *p); 65 66 static const struct fileops pipeops = { 67 .fo_read = pipe_read, 68 .fo_write = pipe_write, 69 .fo_ioctl = pipe_ioctl, 70 .fo_kqfilter = pipe_kqfilter, 71 .fo_stat = pipe_stat, 72 .fo_close = pipe_close 73 }; 74 75 void filt_pipedetach(struct knote *kn); 76 int filt_piperead(struct knote *kn, long hint); 77 int filt_pipewrite(struct knote *kn, long hint); 78 int filt_pipeexcept(struct knote *kn, long hint); 79 int filt_pipemodify(struct kevent *kev, struct knote *kn); 80 int filt_pipeprocess(struct knote *kn, struct kevent *kev); 81 82 const struct filterops pipe_rfiltops = { 83 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 84 .f_attach = NULL, 85 .f_detach = filt_pipedetach, 86 .f_event = filt_piperead, 87 .f_modify = filt_pipemodify, 88 .f_process = filt_pipeprocess, 89 }; 90 91 const struct filterops pipe_wfiltops = { 92 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 93 .f_attach = NULL, 94 .f_detach = filt_pipedetach, 95 .f_event = filt_pipewrite, 96 .f_modify = filt_pipemodify, 97 .f_process = filt_pipeprocess, 98 }; 99 100 const struct filterops pipe_efiltops = { 101 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 102 .f_attach = NULL, 103 .f_detach = filt_pipedetach, 104 .f_event = filt_pipeexcept, 105 .f_modify = filt_pipemodify, 106 .f_process = filt_pipeprocess, 107 }; 108 109 /* 110 * Default pipe buffer size(s), this can be kind-of large now because pipe 111 * space is pageable. The pipe code will try to maintain locality of 112 * reference for performance reasons, so small amounts of outstanding I/O 113 * will not wipe the cache. 114 */ 115 #define MINPIPESIZE (PIPE_SIZE/3) 116 117 /* 118 * Limit the number of "big" pipes 119 */ 120 #define LIMITBIGPIPES 32 121 unsigned int nbigpipe; 122 static unsigned int amountpipekva; 123 124 struct pool pipe_pair_pool; 125 126 int dopipe(struct proc *, int *, int); 127 void pipe_wakeup(struct pipe *); 128 129 int pipe_create(struct pipe *); 130 void pipe_destroy(struct pipe *); 131 int pipe_rundown(struct pipe *); 132 struct pipe *pipe_peer(struct pipe *); 133 int pipe_buffer_realloc(struct pipe *, u_int); 134 void pipe_buffer_free(struct pipe *); 135 136 int pipe_iolock(struct pipe *); 137 void pipe_iounlock(struct pipe *); 138 int pipe_iosleep(struct pipe *, const char *); 139 140 struct pipe_pair *pipe_pair_create(void); 141 void pipe_pair_destroy(struct pipe_pair *); 142 143 /* 144 * The pipe system call for the DTYPE_PIPE type of pipes 145 */ 146 147 int 148 sys_pipe(struct proc *p, void *v, register_t *retval) 149 { 150 struct sys_pipe_args /* { 151 syscallarg(int *) fdp; 152 } */ *uap = v; 153 154 return (dopipe(p, SCARG(uap, fdp), 0)); 155 } 156 157 int 158 sys_pipe2(struct proc *p, void *v, register_t *retval) 159 { 160 struct sys_pipe2_args /* { 161 syscallarg(int *) fdp; 162 syscallarg(int) flags; 163 } */ *uap = v; 164 165 if (SCARG(uap, flags) & ~(O_CLOEXEC | FNONBLOCK)) 166 return (EINVAL); 167 168 return (dopipe(p, SCARG(uap, fdp), SCARG(uap, flags))); 169 } 170 171 int 172 dopipe(struct proc *p, int *ufds, int flags) 173 { 174 struct filedesc *fdp = p->p_fd; 175 struct file *rf, *wf; 176 struct pipe_pair *pp; 177 struct pipe *rpipe, *wpipe = NULL; 178 int fds[2], cloexec, error; 179 180 cloexec = (flags & O_CLOEXEC) ? UF_EXCLOSE : 0; 181 182 pp = pipe_pair_create(); 183 if (pp == NULL) 184 return (ENOMEM); 185 wpipe = &pp->pp_wpipe; 186 rpipe = &pp->pp_rpipe; 187 188 fdplock(fdp); 189 190 error = falloc(p, &rf, &fds[0]); 191 if (error != 0) 192 goto free2; 193 rf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 194 rf->f_type = DTYPE_PIPE; 195 rf->f_data = rpipe; 196 rf->f_ops = &pipeops; 197 198 error = falloc(p, &wf, &fds[1]); 199 if (error != 0) 200 goto free3; 201 wf->f_flag = FREAD | FWRITE | (flags & FNONBLOCK); 202 wf->f_type = DTYPE_PIPE; 203 wf->f_data = wpipe; 204 wf->f_ops = &pipeops; 205 206 fdinsert(fdp, fds[0], cloexec, rf); 207 fdinsert(fdp, fds[1], cloexec, wf); 208 209 error = copyout(fds, ufds, sizeof(fds)); 210 if (error == 0) { 211 fdpunlock(fdp); 212 #ifdef KTRACE 213 if (KTRPOINT(p, KTR_STRUCT)) 214 ktrfds(p, fds, 2); 215 #endif 216 } else { 217 /* fdrelease() unlocks fdp. */ 218 fdrelease(p, fds[0]); 219 fdplock(fdp); 220 fdrelease(p, fds[1]); 221 } 222 223 FRELE(rf, p); 224 FRELE(wf, p); 225 return (error); 226 227 free3: 228 fdremove(fdp, fds[0]); 229 closef(rf, p); 230 rpipe = NULL; 231 free2: 232 fdpunlock(fdp); 233 pipe_destroy(wpipe); 234 pipe_destroy(rpipe); 235 return (error); 236 } 237 238 /* 239 * Allocate kva for pipe circular buffer, the space is pageable. 240 * This routine will 'realloc' the size of a pipe safely, if it fails 241 * it will retain the old buffer. 242 * If it fails it will return ENOMEM. 243 */ 244 int 245 pipe_buffer_realloc(struct pipe *cpipe, u_int size) 246 { 247 caddr_t buffer; 248 249 /* buffer uninitialized or pipe locked */ 250 KASSERT((cpipe->pipe_buffer.buffer == NULL) || 251 (cpipe->pipe_state & PIPE_LOCK)); 252 253 /* buffer should be empty */ 254 KASSERT(cpipe->pipe_buffer.cnt == 0); 255 256 KERNEL_LOCK(); 257 buffer = km_alloc(size, &kv_any, &kp_pageable, &kd_waitok); 258 KERNEL_UNLOCK(); 259 if (buffer == NULL) 260 return (ENOMEM); 261 262 /* free old resources if we are resizing */ 263 pipe_buffer_free(cpipe); 264 265 cpipe->pipe_buffer.buffer = buffer; 266 cpipe->pipe_buffer.size = size; 267 cpipe->pipe_buffer.in = 0; 268 cpipe->pipe_buffer.out = 0; 269 270 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 271 272 return (0); 273 } 274 275 /* 276 * initialize and allocate VM and memory for pipe 277 */ 278 int 279 pipe_create(struct pipe *cpipe) 280 { 281 int error; 282 283 error = pipe_buffer_realloc(cpipe, PIPE_SIZE); 284 if (error != 0) 285 return (error); 286 287 sigio_init(&cpipe->pipe_sigio); 288 289 getnanotime(&cpipe->pipe_ctime); 290 cpipe->pipe_atime = cpipe->pipe_ctime; 291 cpipe->pipe_mtime = cpipe->pipe_ctime; 292 293 return (0); 294 } 295 296 struct pipe * 297 pipe_peer(struct pipe *cpipe) 298 { 299 struct pipe *peer; 300 301 rw_assert_anylock(cpipe->pipe_lock); 302 303 peer = cpipe->pipe_peer; 304 if (peer == NULL || (peer->pipe_state & PIPE_EOF)) 305 return (NULL); 306 return (peer); 307 } 308 309 /* 310 * Lock a pipe for exclusive I/O access. 311 */ 312 int 313 pipe_iolock(struct pipe *cpipe) 314 { 315 int error; 316 317 rw_assert_wrlock(cpipe->pipe_lock); 318 319 while (cpipe->pipe_state & PIPE_LOCK) { 320 cpipe->pipe_state |= PIPE_LWANT; 321 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, 322 "pipeiolk", INFSLP); 323 if (error) 324 return (error); 325 } 326 cpipe->pipe_state |= PIPE_LOCK; 327 return (0); 328 } 329 330 /* 331 * Unlock a pipe I/O lock. 332 */ 333 void 334 pipe_iounlock(struct pipe *cpipe) 335 { 336 rw_assert_wrlock(cpipe->pipe_lock); 337 KASSERT(cpipe->pipe_state & PIPE_LOCK); 338 339 cpipe->pipe_state &= ~PIPE_LOCK; 340 if (cpipe->pipe_state & PIPE_LWANT) { 341 cpipe->pipe_state &= ~PIPE_LWANT; 342 wakeup(cpipe); 343 } 344 } 345 346 /* 347 * Unlock the pipe I/O lock and go to sleep. Returns 0 on success and the I/O 348 * lock is relocked. Otherwise if a signal was caught, non-zero is returned and 349 * the I/O lock is not locked. 350 * 351 * Any caller must obtain a reference to the pipe by incrementing `pipe_busy' 352 * before calling this function in order ensure that the same pipe is not 353 * destroyed while sleeping. 354 */ 355 int 356 pipe_iosleep(struct pipe *cpipe, const char *wmesg) 357 { 358 int error; 359 360 pipe_iounlock(cpipe); 361 error = rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO | PCATCH, wmesg, 362 INFSLP); 363 if (error) 364 return (error); 365 return (pipe_iolock(cpipe)); 366 } 367 368 void 369 pipe_wakeup(struct pipe *cpipe) 370 { 371 rw_assert_wrlock(cpipe->pipe_lock); 372 373 knote_locked(&cpipe->pipe_klist, 0); 374 375 if (cpipe->pipe_state & PIPE_ASYNC) 376 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 377 } 378 379 int 380 pipe_read(struct file *fp, struct uio *uio, int fflags) 381 { 382 struct pipe *rpipe = fp->f_data; 383 size_t nread = 0, size; 384 int error; 385 386 rw_enter_write(rpipe->pipe_lock); 387 ++rpipe->pipe_busy; 388 error = pipe_iolock(rpipe); 389 if (error) { 390 --rpipe->pipe_busy; 391 pipe_rundown(rpipe); 392 rw_exit_write(rpipe->pipe_lock); 393 return (error); 394 } 395 396 while (uio->uio_resid) { 397 /* Normal pipe buffer receive. */ 398 if (rpipe->pipe_buffer.cnt > 0) { 399 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 400 if (size > rpipe->pipe_buffer.cnt) 401 size = rpipe->pipe_buffer.cnt; 402 if (size > uio->uio_resid) 403 size = uio->uio_resid; 404 rw_exit_write(rpipe->pipe_lock); 405 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 406 size, uio); 407 rw_enter_write(rpipe->pipe_lock); 408 if (error) { 409 break; 410 } 411 rpipe->pipe_buffer.out += size; 412 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 413 rpipe->pipe_buffer.out = 0; 414 415 rpipe->pipe_buffer.cnt -= size; 416 /* 417 * If there is no more to read in the pipe, reset 418 * its pointers to the beginning. This improves 419 * cache hit stats. 420 */ 421 if (rpipe->pipe_buffer.cnt == 0) { 422 rpipe->pipe_buffer.in = 0; 423 rpipe->pipe_buffer.out = 0; 424 } 425 nread += size; 426 } else { 427 /* 428 * detect EOF condition 429 * read returns 0 on EOF, no need to set error 430 */ 431 if (rpipe->pipe_state & PIPE_EOF) 432 break; 433 434 /* If the "write-side" has been blocked, wake it up. */ 435 if (rpipe->pipe_state & PIPE_WANTW) { 436 rpipe->pipe_state &= ~PIPE_WANTW; 437 wakeup(rpipe); 438 } 439 440 /* Break if some data was read. */ 441 if (nread > 0) 442 break; 443 444 /* Handle non-blocking mode operation. */ 445 if (fp->f_flag & FNONBLOCK) { 446 error = EAGAIN; 447 break; 448 } 449 450 /* Wait for more data. */ 451 rpipe->pipe_state |= PIPE_WANTR; 452 error = pipe_iosleep(rpipe, "piperd"); 453 if (error) 454 goto unlocked_error; 455 } 456 } 457 pipe_iounlock(rpipe); 458 459 if (error == 0) 460 getnanotime(&rpipe->pipe_atime); 461 unlocked_error: 462 --rpipe->pipe_busy; 463 464 if (pipe_rundown(rpipe) == 0 && rpipe->pipe_buffer.cnt < MINPIPESIZE) { 465 /* Handle write blocking hysteresis. */ 466 if (rpipe->pipe_state & PIPE_WANTW) { 467 rpipe->pipe_state &= ~PIPE_WANTW; 468 wakeup(rpipe); 469 } 470 } 471 472 if (rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt >= PIPE_BUF) 473 pipe_wakeup(rpipe); 474 475 rw_exit_write(rpipe->pipe_lock); 476 return (error); 477 } 478 479 int 480 pipe_write(struct file *fp, struct uio *uio, int fflags) 481 { 482 struct pipe *rpipe = fp->f_data, *wpipe; 483 struct rwlock *lock = rpipe->pipe_lock; 484 size_t orig_resid; 485 int error; 486 487 rw_enter_write(lock); 488 wpipe = pipe_peer(rpipe); 489 490 /* Detect loss of pipe read side, issue SIGPIPE if lost. */ 491 if (wpipe == NULL) { 492 rw_exit_write(lock); 493 return (EPIPE); 494 } 495 496 ++wpipe->pipe_busy; 497 error = pipe_iolock(wpipe); 498 if (error) { 499 --wpipe->pipe_busy; 500 pipe_rundown(wpipe); 501 rw_exit_write(lock); 502 return (error); 503 } 504 505 506 /* If it is advantageous to resize the pipe buffer, do so. */ 507 if (uio->uio_resid > PIPE_SIZE && 508 wpipe->pipe_buffer.size <= PIPE_SIZE && 509 wpipe->pipe_buffer.cnt == 0) { 510 unsigned int npipe; 511 512 npipe = atomic_inc_int_nv(&nbigpipe); 513 if (npipe > LIMITBIGPIPES || 514 pipe_buffer_realloc(wpipe, BIG_PIPE_SIZE) != 0) 515 atomic_dec_int(&nbigpipe); 516 } 517 518 orig_resid = uio->uio_resid; 519 520 while (uio->uio_resid) { 521 size_t space; 522 523 if (wpipe->pipe_state & PIPE_EOF) { 524 error = EPIPE; 525 break; 526 } 527 528 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 529 530 /* Writes of size <= PIPE_BUF must be atomic. */ 531 if (space < uio->uio_resid && orig_resid <= PIPE_BUF) 532 space = 0; 533 534 if (space > 0) { 535 size_t size; /* Transfer size */ 536 size_t segsize; /* first segment to transfer */ 537 538 /* 539 * Transfer size is minimum of uio transfer 540 * and free space in pipe buffer. 541 */ 542 if (space > uio->uio_resid) 543 size = uio->uio_resid; 544 else 545 size = space; 546 /* 547 * First segment to transfer is minimum of 548 * transfer size and contiguous space in 549 * pipe buffer. If first segment to transfer 550 * is less than the transfer size, we've got 551 * a wraparound in the buffer. 552 */ 553 segsize = wpipe->pipe_buffer.size - 554 wpipe->pipe_buffer.in; 555 if (segsize > size) 556 segsize = size; 557 558 /* Transfer first segment */ 559 560 rw_exit_write(lock); 561 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 562 segsize, uio); 563 rw_enter_write(lock); 564 565 if (error == 0 && segsize < size) { 566 /* 567 * Transfer remaining part now, to 568 * support atomic writes. Wraparound 569 * happened. 570 */ 571 #ifdef DIAGNOSTIC 572 if (wpipe->pipe_buffer.in + segsize != 573 wpipe->pipe_buffer.size) 574 panic("Expected pipe buffer wraparound disappeared"); 575 #endif 576 577 rw_exit_write(lock); 578 error = uiomove(&wpipe->pipe_buffer.buffer[0], 579 size - segsize, uio); 580 rw_enter_write(lock); 581 } 582 if (error == 0) { 583 wpipe->pipe_buffer.in += size; 584 if (wpipe->pipe_buffer.in >= 585 wpipe->pipe_buffer.size) { 586 #ifdef DIAGNOSTIC 587 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 588 panic("Expected wraparound bad"); 589 #endif 590 wpipe->pipe_buffer.in = size - segsize; 591 } 592 593 wpipe->pipe_buffer.cnt += size; 594 #ifdef DIAGNOSTIC 595 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 596 panic("Pipe buffer overflow"); 597 #endif 598 } 599 if (error) 600 break; 601 } else { 602 /* If the "read-side" has been blocked, wake it up. */ 603 if (wpipe->pipe_state & PIPE_WANTR) { 604 wpipe->pipe_state &= ~PIPE_WANTR; 605 wakeup(wpipe); 606 } 607 608 /* Don't block on non-blocking I/O. */ 609 if (fp->f_flag & FNONBLOCK) { 610 error = EAGAIN; 611 break; 612 } 613 614 /* 615 * We have no more space and have something to offer, 616 * wake up select/poll. 617 */ 618 pipe_wakeup(wpipe); 619 620 wpipe->pipe_state |= PIPE_WANTW; 621 error = pipe_iosleep(wpipe, "pipewr"); 622 if (error) 623 goto unlocked_error; 624 625 /* 626 * If read side wants to go away, we just issue a 627 * signal to ourselves. 628 */ 629 if (wpipe->pipe_state & PIPE_EOF) { 630 error = EPIPE; 631 break; 632 } 633 } 634 } 635 pipe_iounlock(wpipe); 636 637 unlocked_error: 638 --wpipe->pipe_busy; 639 640 if (pipe_rundown(wpipe) == 0 && wpipe->pipe_buffer.cnt > 0) { 641 /* 642 * If we have put any characters in the buffer, we wake up 643 * the reader. 644 */ 645 if (wpipe->pipe_state & PIPE_WANTR) { 646 wpipe->pipe_state &= ~PIPE_WANTR; 647 wakeup(wpipe); 648 } 649 } 650 651 /* Don't return EPIPE if I/O was successful. */ 652 if (wpipe->pipe_buffer.cnt == 0 && 653 uio->uio_resid == 0 && 654 error == EPIPE) { 655 error = 0; 656 } 657 658 if (error == 0) 659 getnanotime(&wpipe->pipe_mtime); 660 /* We have something to offer, wake up select/poll. */ 661 if (wpipe->pipe_buffer.cnt) 662 pipe_wakeup(wpipe); 663 664 rw_exit_write(lock); 665 return (error); 666 } 667 668 /* 669 * we implement a very minimal set of ioctls for compatibility with sockets. 670 */ 671 int 672 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p) 673 { 674 struct pipe *mpipe = fp->f_data; 675 int error = 0; 676 677 switch (cmd) { 678 679 case FIONBIO: 680 break; 681 682 case FIOASYNC: 683 rw_enter_write(mpipe->pipe_lock); 684 if (*(int *)data) { 685 mpipe->pipe_state |= PIPE_ASYNC; 686 } else { 687 mpipe->pipe_state &= ~PIPE_ASYNC; 688 } 689 rw_exit_write(mpipe->pipe_lock); 690 break; 691 692 case FIONREAD: 693 rw_enter_read(mpipe->pipe_lock); 694 *(int *)data = mpipe->pipe_buffer.cnt; 695 rw_exit_read(mpipe->pipe_lock); 696 break; 697 698 case FIOSETOWN: 699 case SIOCSPGRP: 700 case TIOCSPGRP: 701 error = sigio_setown(&mpipe->pipe_sigio, cmd, data); 702 break; 703 704 case FIOGETOWN: 705 case SIOCGPGRP: 706 case TIOCGPGRP: 707 sigio_getown(&mpipe->pipe_sigio, cmd, data); 708 break; 709 710 default: 711 error = ENOTTY; 712 } 713 714 return (error); 715 } 716 717 int 718 pipe_stat(struct file *fp, struct stat *ub, struct proc *p) 719 { 720 struct pipe *pipe = fp->f_data; 721 722 memset(ub, 0, sizeof(*ub)); 723 724 rw_enter_read(pipe->pipe_lock); 725 ub->st_mode = S_IFIFO; 726 ub->st_blksize = pipe->pipe_buffer.size; 727 ub->st_size = pipe->pipe_buffer.cnt; 728 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 729 ub->st_atim.tv_sec = pipe->pipe_atime.tv_sec; 730 ub->st_atim.tv_nsec = pipe->pipe_atime.tv_nsec; 731 ub->st_mtim.tv_sec = pipe->pipe_mtime.tv_sec; 732 ub->st_mtim.tv_nsec = pipe->pipe_mtime.tv_nsec; 733 ub->st_ctim.tv_sec = pipe->pipe_ctime.tv_sec; 734 ub->st_ctim.tv_nsec = pipe->pipe_ctime.tv_nsec; 735 ub->st_uid = fp->f_cred->cr_uid; 736 ub->st_gid = fp->f_cred->cr_gid; 737 rw_exit_read(pipe->pipe_lock); 738 /* 739 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 740 * XXX (st_dev, st_ino) should be unique. 741 */ 742 return (0); 743 } 744 745 int 746 pipe_close(struct file *fp, struct proc *p) 747 { 748 struct pipe *cpipe = fp->f_data; 749 750 fp->f_ops = NULL; 751 fp->f_data = NULL; 752 pipe_destroy(cpipe); 753 return (0); 754 } 755 756 /* 757 * Free kva for pipe circular buffer. 758 * No pipe lock check as only called from pipe_buffer_realloc() and pipeclose() 759 */ 760 void 761 pipe_buffer_free(struct pipe *cpipe) 762 { 763 u_int size; 764 765 if (cpipe->pipe_buffer.buffer == NULL) 766 return; 767 768 size = cpipe->pipe_buffer.size; 769 770 KERNEL_LOCK(); 771 km_free(cpipe->pipe_buffer.buffer, size, &kv_any, &kp_pageable); 772 KERNEL_UNLOCK(); 773 774 cpipe->pipe_buffer.buffer = NULL; 775 776 atomic_sub_int(&amountpipekva, size); 777 if (size > PIPE_SIZE) 778 atomic_dec_int(&nbigpipe); 779 } 780 781 /* 782 * shutdown the pipe, and free resources. 783 */ 784 void 785 pipe_destroy(struct pipe *cpipe) 786 { 787 struct pipe *ppipe; 788 789 if (cpipe == NULL) 790 return; 791 792 rw_enter_write(cpipe->pipe_lock); 793 794 pipe_wakeup(cpipe); 795 sigio_free(&cpipe->pipe_sigio); 796 797 /* 798 * If the other side is blocked, wake it up saying that 799 * we want to close it down. 800 */ 801 cpipe->pipe_state |= PIPE_EOF; 802 while (cpipe->pipe_busy) { 803 wakeup(cpipe); 804 cpipe->pipe_state |= PIPE_WANTD; 805 rwsleep_nsec(cpipe, cpipe->pipe_lock, PRIBIO, "pipecl", INFSLP); 806 } 807 808 /* Disconnect from peer. */ 809 if ((ppipe = cpipe->pipe_peer) != NULL) { 810 pipe_wakeup(ppipe); 811 812 ppipe->pipe_state |= PIPE_EOF; 813 wakeup(ppipe); 814 ppipe->pipe_peer = NULL; 815 } 816 817 pipe_buffer_free(cpipe); 818 819 rw_exit_write(cpipe->pipe_lock); 820 821 if (ppipe == NULL) 822 pipe_pair_destroy(cpipe->pipe_pair); 823 } 824 825 /* 826 * Returns non-zero if a rundown is currently ongoing. 827 */ 828 int 829 pipe_rundown(struct pipe *cpipe) 830 { 831 rw_assert_wrlock(cpipe->pipe_lock); 832 833 if (cpipe->pipe_busy > 0 || (cpipe->pipe_state & PIPE_WANTD) == 0) 834 return (0); 835 836 /* Only wakeup pipe_destroy() once the pipe is no longer busy. */ 837 cpipe->pipe_state &= ~(PIPE_WANTD | PIPE_WANTR | PIPE_WANTW); 838 wakeup(cpipe); 839 return (1); 840 } 841 842 int 843 pipe_kqfilter(struct file *fp, struct knote *kn) 844 { 845 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 846 struct rwlock *lock = rpipe->pipe_lock; 847 int error = 0; 848 849 rw_enter_write(lock); 850 wpipe = pipe_peer(rpipe); 851 852 switch (kn->kn_filter) { 853 case EVFILT_READ: 854 kn->kn_fop = &pipe_rfiltops; 855 kn->kn_hook = rpipe; 856 klist_insert_locked(&rpipe->pipe_klist, kn); 857 break; 858 case EVFILT_WRITE: 859 if (wpipe == NULL) { 860 /* 861 * The other end of the pipe has been closed. 862 * Since the filter now always indicates a pending 863 * event, attach the knote to the current side 864 * to proceed with the registration. 865 */ 866 wpipe = rpipe; 867 } 868 kn->kn_fop = &pipe_wfiltops; 869 kn->kn_hook = wpipe; 870 klist_insert_locked(&wpipe->pipe_klist, kn); 871 break; 872 case EVFILT_EXCEPT: 873 if (kn->kn_flags & __EV_SELECT) { 874 /* Prevent triggering exceptfds. */ 875 error = EPERM; 876 break; 877 } 878 if ((kn->kn_flags & __EV_POLL) == 0) { 879 /* Disallow usage through kevent(2). */ 880 error = EINVAL; 881 break; 882 } 883 kn->kn_fop = &pipe_efiltops; 884 kn->kn_hook = rpipe; 885 klist_insert_locked(&rpipe->pipe_klist, kn); 886 break; 887 default: 888 error = EINVAL; 889 } 890 891 rw_exit_write(lock); 892 893 return (error); 894 } 895 896 void 897 filt_pipedetach(struct knote *kn) 898 { 899 struct pipe *cpipe = kn->kn_hook; 900 901 klist_remove(&cpipe->pipe_klist, kn); 902 } 903 904 int 905 filt_piperead(struct knote *kn, long hint) 906 { 907 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 908 909 rw_assert_wrlock(rpipe->pipe_lock); 910 911 wpipe = pipe_peer(rpipe); 912 913 kn->kn_data = rpipe->pipe_buffer.cnt; 914 915 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { 916 kn->kn_flags |= EV_EOF; 917 if (kn->kn_flags & __EV_POLL) 918 kn->kn_flags |= __EV_HUP; 919 return (1); 920 } 921 922 return (kn->kn_data > 0); 923 } 924 925 int 926 filt_pipewrite(struct knote *kn, long hint) 927 { 928 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 929 930 rw_assert_wrlock(rpipe->pipe_lock); 931 932 wpipe = pipe_peer(rpipe); 933 934 if (wpipe == NULL) { 935 kn->kn_data = 0; 936 kn->kn_flags |= EV_EOF; 937 if (kn->kn_flags & __EV_POLL) 938 kn->kn_flags |= __EV_HUP; 939 return (1); 940 } 941 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 942 943 return (kn->kn_data >= PIPE_BUF); 944 } 945 946 int 947 filt_pipeexcept(struct knote *kn, long hint) 948 { 949 struct pipe *rpipe = kn->kn_fp->f_data, *wpipe; 950 int active = 0; 951 952 rw_assert_wrlock(rpipe->pipe_lock); 953 954 wpipe = pipe_peer(rpipe); 955 956 if (kn->kn_flags & __EV_POLL) { 957 if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) { 958 kn->kn_flags |= __EV_HUP; 959 active = 1; 960 } 961 } 962 963 return (active); 964 } 965 966 int 967 filt_pipemodify(struct kevent *kev, struct knote *kn) 968 { 969 struct pipe *rpipe = kn->kn_fp->f_data; 970 int active; 971 972 rw_enter_write(rpipe->pipe_lock); 973 active = knote_modify(kev, kn); 974 rw_exit_write(rpipe->pipe_lock); 975 976 return (active); 977 } 978 979 int 980 filt_pipeprocess(struct knote *kn, struct kevent *kev) 981 { 982 struct pipe *rpipe = kn->kn_fp->f_data; 983 int active; 984 985 rw_enter_write(rpipe->pipe_lock); 986 active = knote_process(kn, kev); 987 rw_exit_write(rpipe->pipe_lock); 988 989 return (active); 990 } 991 992 void 993 pipe_init(void) 994 { 995 pool_init(&pipe_pair_pool, sizeof(struct pipe_pair), 0, IPL_MPFLOOR, 996 PR_WAITOK, "pipepl", NULL); 997 } 998 999 struct pipe_pair * 1000 pipe_pair_create(void) 1001 { 1002 struct pipe_pair *pp; 1003 1004 pp = pool_get(&pipe_pair_pool, PR_WAITOK | PR_ZERO); 1005 pp->pp_wpipe.pipe_pair = pp; 1006 pp->pp_rpipe.pipe_pair = pp; 1007 pp->pp_wpipe.pipe_peer = &pp->pp_rpipe; 1008 pp->pp_rpipe.pipe_peer = &pp->pp_wpipe; 1009 /* 1010 * One lock is used per pipe pair in order to obtain exclusive access to 1011 * the pipe pair. 1012 */ 1013 rw_init(&pp->pp_lock, "pipelk"); 1014 pp->pp_wpipe.pipe_lock = &pp->pp_lock; 1015 pp->pp_rpipe.pipe_lock = &pp->pp_lock; 1016 1017 klist_init_rwlock(&pp->pp_wpipe.pipe_klist, &pp->pp_lock); 1018 klist_init_rwlock(&pp->pp_rpipe.pipe_klist, &pp->pp_lock); 1019 1020 if (pipe_create(&pp->pp_wpipe) || pipe_create(&pp->pp_rpipe)) 1021 goto err; 1022 return (pp); 1023 err: 1024 pipe_destroy(&pp->pp_wpipe); 1025 pipe_destroy(&pp->pp_rpipe); 1026 return (NULL); 1027 } 1028 1029 void 1030 pipe_pair_destroy(struct pipe_pair *pp) 1031 { 1032 klist_free(&pp->pp_wpipe.pipe_klist); 1033 klist_free(&pp->pp_rpipe.pipe_klist); 1034 pool_put(&pipe_pair_pool, pp); 1035 } 1036