1 /* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 * 19 * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.60.2.13 2002/08/05 15:05:15 des Exp $ 20 * $DragonFly: src/sys/kern/sys_pipe.c,v 1.40 2006/08/02 01:25:25 dillon Exp $ 21 */ 22 23 /* 24 * This file contains a high-performance replacement for the socket-based 25 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 26 * all features of sockets, but does do everything that pipes normally 27 * do. 28 */ 29 30 /* 31 * This code has two modes of operation, a small write mode and a large 32 * write mode. The small write mode acts like conventional pipes with 33 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 34 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 35 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 36 * the receiving process can copy it directly from the pages in the sending 37 * process. 38 * 39 * If the sending process receives a signal, it is possible that it will 40 * go away, and certainly its address space can change, because control 41 * is returned back to the user-mode side. In that case, the pipe code 42 * arranges to copy the buffer supplied by the user process, to a pageable 43 * kernel buffer, and the receiving process will grab the data from the 44 * pageable kernel buffer. Since signals don't happen all that often, 45 * the copy operation is normally eliminated. 46 * 47 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 48 * happen for small transfers so that the system will not spend all of 49 * its time context switching. PIPE_SIZE is constrained by the 50 * amount of kernel virtual memory. 51 */ 52 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/kernel.h> 56 #include <sys/proc.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filedesc.h> 60 #include <sys/filio.h> 61 #include <sys/ttycom.h> 62 #include <sys/stat.h> 63 #include <sys/poll.h> 64 #include <sys/select.h> 65 #include <sys/signalvar.h> 66 #include <sys/sysproto.h> 67 #include <sys/pipe.h> 68 #include <sys/vnode.h> 69 #include <sys/uio.h> 70 #include <sys/event.h> 71 #include <sys/globaldata.h> 72 #include <sys/module.h> 73 #include <sys/malloc.h> 74 #include <sys/sysctl.h> 75 #include <sys/socket.h> 76 77 #include <vm/vm.h> 78 #include <vm/vm_param.h> 79 #include <sys/lock.h> 80 #include <vm/vm_object.h> 81 #include <vm/vm_kern.h> 82 #include <vm/vm_extern.h> 83 #include <vm/pmap.h> 84 #include <vm/vm_map.h> 85 #include <vm/vm_page.h> 86 #include <vm/vm_zone.h> 87 88 #include <sys/file2.h> 89 90 #include <machine/cpufunc.h> 91 92 /* 93 * interfaces to the outside world 94 */ 95 static int pipe_read (struct file *fp, struct uio *uio, 96 struct ucred *cred, int flags); 97 static int pipe_write (struct file *fp, struct uio *uio, 98 struct ucred *cred, int flags); 99 static int pipe_close (struct file *fp); 100 static int pipe_shutdown (struct file *fp, int how); 101 static int pipe_poll (struct file *fp, int events, struct ucred *cred); 102 static int pipe_kqfilter (struct file *fp, struct knote *kn); 103 static int pipe_stat (struct file *fp, struct stat *sb, struct ucred *cred); 104 static int pipe_ioctl (struct file *fp, u_long cmd, caddr_t data, struct ucred *cred); 105 106 static struct fileops pipeops = { 107 .fo_read = pipe_read, 108 .fo_write = pipe_write, 109 .fo_ioctl = pipe_ioctl, 110 .fo_poll = pipe_poll, 111 .fo_kqfilter = pipe_kqfilter, 112 .fo_stat = pipe_stat, 113 .fo_close = pipe_close, 114 .fo_shutdown = pipe_shutdown 115 }; 116 117 static void filt_pipedetach(struct knote *kn); 118 static int filt_piperead(struct knote *kn, long hint); 119 static int filt_pipewrite(struct knote *kn, long hint); 120 121 static struct filterops pipe_rfiltops = 122 { 1, NULL, filt_pipedetach, filt_piperead }; 123 static struct filterops pipe_wfiltops = 124 { 1, NULL, filt_pipedetach, filt_pipewrite }; 125 126 MALLOC_DEFINE(M_PIPE, "pipe", "pipe structures"); 127 128 /* 129 * Default pipe buffer size(s), this can be kind-of large now because pipe 130 * space is pageable. The pipe code will try to maintain locality of 131 * reference for performance reasons, so small amounts of outstanding I/O 132 * will not wipe the cache. 133 */ 134 #define MINPIPESIZE (PIPE_SIZE/3) 135 #define MAXPIPESIZE (2*PIPE_SIZE/3) 136 137 /* 138 * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 139 * is there so that on large systems, we don't exhaust it. 140 */ 141 #define MAXPIPEKVA (8*1024*1024) 142 143 /* 144 * Limit for direct transfers, we cannot, of course limit 145 * the amount of kva for pipes in general though. 146 */ 147 #define LIMITPIPEKVA (16*1024*1024) 148 149 /* 150 * Limit the number of "big" pipes 151 */ 152 #define LIMITBIGPIPES 32 153 #define PIPEQ_MAX_CACHE 16 /* per-cpu pipe structure cache */ 154 155 static int pipe_maxbig = LIMITBIGPIPES; 156 static int pipe_maxcache = PIPEQ_MAX_CACHE; 157 static int pipe_nbig; 158 static int pipe_bcache_alloc; 159 static int pipe_bkmem_alloc; 160 static int pipe_dwrite_enable = 1; /* 0:copy, 1:kmem/sfbuf 2:force */ 161 static int pipe_dwrite_sfbuf = 1; /* 0:kmem_map 1:sfbufs 2:sfbufs_dmap */ 162 /* 3:sfbuf_dmap w/ forced invlpg */ 163 164 SYSCTL_NODE(_kern, OID_AUTO, pipe, CTLFLAG_RW, 0, "Pipe operation"); 165 SYSCTL_INT(_kern_pipe, OID_AUTO, nbig, 166 CTLFLAG_RD, &pipe_nbig, 0, "numer of big pipes allocated"); 167 SYSCTL_INT(_kern_pipe, OID_AUTO, maxcache, 168 CTLFLAG_RW, &pipe_maxcache, 0, "max pipes cached per-cpu"); 169 SYSCTL_INT(_kern_pipe, OID_AUTO, maxbig, 170 CTLFLAG_RW, &pipe_maxbig, 0, "max number of big pipes"); 171 SYSCTL_INT(_kern_pipe, OID_AUTO, dwrite_enable, 172 CTLFLAG_RW, &pipe_dwrite_enable, 0, "1:enable/2:force direct writes"); 173 SYSCTL_INT(_kern_pipe, OID_AUTO, dwrite_sfbuf, 174 CTLFLAG_RW, &pipe_dwrite_sfbuf, 0, 175 "(if dwrite_enable) 0:kmem 1:sfbuf 2:sfbuf_dmap 3:sfbuf_dmap_forceinvlpg"); 176 #if !defined(NO_PIPE_SYSCTL_STATS) 177 SYSCTL_INT(_kern_pipe, OID_AUTO, bcache_alloc, 178 CTLFLAG_RW, &pipe_bcache_alloc, 0, "pipe buffer from pcpu cache"); 179 SYSCTL_INT(_kern_pipe, OID_AUTO, bkmem_alloc, 180 CTLFLAG_RW, &pipe_bkmem_alloc, 0, "pipe buffer from kmem"); 181 #endif 182 183 static void pipeclose (struct pipe *cpipe); 184 static void pipe_free_kmem (struct pipe *cpipe); 185 static int pipe_create (struct pipe **cpipep); 186 static __inline int pipelock (struct pipe *cpipe, int catch); 187 static __inline void pipeunlock (struct pipe *cpipe); 188 static __inline void pipeselwakeup (struct pipe *cpipe); 189 #ifndef PIPE_NODIRECT 190 static int pipe_build_write_buffer (struct pipe *wpipe, struct uio *uio); 191 static int pipe_direct_write (struct pipe *wpipe, struct uio *uio); 192 static void pipe_clone_write_buffer (struct pipe *wpipe); 193 #endif 194 static int pipespace (struct pipe *cpipe, int size); 195 196 /* 197 * The pipe system call for the DTYPE_PIPE type of pipes 198 * 199 * pipe_ARgs(int dummy) 200 */ 201 202 /* ARGSUSED */ 203 int 204 sys_pipe(struct pipe_args *uap) 205 { 206 struct thread *td = curthread; 207 struct proc *p = td->td_proc; 208 struct file *rf, *wf; 209 struct pipe *rpipe, *wpipe; 210 int fd1, fd2, error; 211 212 KKASSERT(p); 213 214 rpipe = wpipe = NULL; 215 if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 216 pipeclose(rpipe); 217 pipeclose(wpipe); 218 return (ENFILE); 219 } 220 221 rpipe->pipe_state |= PIPE_DIRECTOK; 222 wpipe->pipe_state |= PIPE_DIRECTOK; 223 224 /* 225 * Select the direct-map features to use for this pipe. Since the 226 * sysctl's can change on the fly we record the settings when the 227 * pipe is created. 228 * 229 * Generally speaking the system will default to what we consider 230 * to be the best-balanced and most stable option. Right now this 231 * is SFBUF1. Modes 2 and 3 are considered experiemental at the 232 * moment. 233 */ 234 wpipe->pipe_feature = PIPE_COPY; 235 if (pipe_dwrite_enable) { 236 switch(pipe_dwrite_sfbuf) { 237 case 0: 238 wpipe->pipe_feature = PIPE_KMEM; 239 break; 240 case 1: 241 wpipe->pipe_feature = PIPE_SFBUF1; 242 break; 243 case 2: 244 case 3: 245 wpipe->pipe_feature = PIPE_SFBUF2; 246 break; 247 } 248 } 249 rpipe->pipe_feature = wpipe->pipe_feature; 250 251 error = falloc(p, &rf, &fd1); 252 if (error) { 253 pipeclose(rpipe); 254 pipeclose(wpipe); 255 return (error); 256 } 257 uap->sysmsg_fds[0] = fd1; 258 259 /* 260 * Warning: once we've gotten past allocation of the fd for the 261 * read-side, we can only drop the read side via fdrop() in order 262 * to avoid races against processes which manage to dup() the read 263 * side while we are blocked trying to allocate the write side. 264 */ 265 rf->f_type = DTYPE_PIPE; 266 rf->f_flag = FREAD | FWRITE; 267 rf->f_ops = &pipeops; 268 rf->f_data = rpipe; 269 error = falloc(p, &wf, &fd2); 270 if (error) { 271 fsetfd(p, NULL, fd1); 272 fdrop(rf); 273 /* rpipe has been closed by fdrop(). */ 274 pipeclose(wpipe); 275 return (error); 276 } 277 wf->f_type = DTYPE_PIPE; 278 wf->f_flag = FREAD | FWRITE; 279 wf->f_ops = &pipeops; 280 wf->f_data = wpipe; 281 uap->sysmsg_fds[1] = fd2; 282 283 rpipe->pipe_peer = wpipe; 284 wpipe->pipe_peer = rpipe; 285 286 fsetfd(p, rf, fd1); 287 fsetfd(p, wf, fd2); 288 fdrop(rf); 289 fdrop(wf); 290 291 return (0); 292 } 293 294 /* 295 * Allocate kva for pipe circular buffer, the space is pageable 296 * This routine will 'realloc' the size of a pipe safely, if it fails 297 * it will retain the old buffer. 298 * If it fails it will return ENOMEM. 299 */ 300 static int 301 pipespace(struct pipe *cpipe, int size) 302 { 303 struct vm_object *object; 304 caddr_t buffer; 305 int npages, error; 306 307 npages = round_page(size) / PAGE_SIZE; 308 object = cpipe->pipe_buffer.object; 309 310 /* 311 * [re]create the object if necessary and reserve space for it 312 * in the kernel_map. The object and memory are pageable. On 313 * success, free the old resources before assigning the new 314 * ones. 315 */ 316 if (object == NULL || object->size != npages) { 317 object = vm_object_allocate(OBJT_DEFAULT, npages); 318 buffer = (caddr_t) vm_map_min(kernel_map); 319 320 error = vm_map_find(kernel_map, object, 0, 321 (vm_offset_t *) &buffer, size, 1, 322 VM_PROT_ALL, VM_PROT_ALL, 0); 323 324 if (error != KERN_SUCCESS) { 325 vm_object_deallocate(object); 326 return (ENOMEM); 327 } 328 pipe_free_kmem(cpipe); 329 cpipe->pipe_buffer.object = object; 330 cpipe->pipe_buffer.buffer = buffer; 331 cpipe->pipe_buffer.size = size; 332 ++pipe_bkmem_alloc; 333 } else { 334 ++pipe_bcache_alloc; 335 } 336 cpipe->pipe_buffer.in = 0; 337 cpipe->pipe_buffer.out = 0; 338 cpipe->pipe_buffer.cnt = 0; 339 return (0); 340 } 341 342 /* 343 * Initialize and allocate VM and memory for pipe, pulling the pipe from 344 * our per-cpu cache if possible. For now make sure it is sized for the 345 * smaller PIPE_SIZE default. 346 */ 347 static int 348 pipe_create(cpipep) 349 struct pipe **cpipep; 350 { 351 globaldata_t gd = mycpu; 352 struct pipe *cpipe; 353 int error; 354 355 if ((cpipe = gd->gd_pipeq) != NULL) { 356 gd->gd_pipeq = cpipe->pipe_peer; 357 --gd->gd_pipeqcount; 358 cpipe->pipe_peer = NULL; 359 } else { 360 cpipe = malloc(sizeof(struct pipe), M_PIPE, M_WAITOK|M_ZERO); 361 } 362 *cpipep = cpipe; 363 if ((error = pipespace(cpipe, PIPE_SIZE)) != 0) 364 return (error); 365 vfs_timestamp(&cpipe->pipe_ctime); 366 cpipe->pipe_atime = cpipe->pipe_ctime; 367 cpipe->pipe_mtime = cpipe->pipe_ctime; 368 return (0); 369 } 370 371 372 /* 373 * lock a pipe for I/O, blocking other access 374 */ 375 static __inline int 376 pipelock(cpipe, catch) 377 struct pipe *cpipe; 378 int catch; 379 { 380 int error; 381 382 while (cpipe->pipe_state & PIPE_LOCK) { 383 cpipe->pipe_state |= PIPE_LWANT; 384 error = tsleep(cpipe, (catch ? PCATCH : 0), "pipelk", 0); 385 if (error != 0) 386 return (error); 387 } 388 cpipe->pipe_state |= PIPE_LOCK; 389 return (0); 390 } 391 392 /* 393 * unlock a pipe I/O lock 394 */ 395 static __inline void 396 pipeunlock(cpipe) 397 struct pipe *cpipe; 398 { 399 400 cpipe->pipe_state &= ~PIPE_LOCK; 401 if (cpipe->pipe_state & PIPE_LWANT) { 402 cpipe->pipe_state &= ~PIPE_LWANT; 403 wakeup(cpipe); 404 } 405 } 406 407 static __inline void 408 pipeselwakeup(cpipe) 409 struct pipe *cpipe; 410 { 411 412 if (cpipe->pipe_state & PIPE_SEL) { 413 cpipe->pipe_state &= ~PIPE_SEL; 414 selwakeup(&cpipe->pipe_sel); 415 } 416 if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 417 pgsigio(cpipe->pipe_sigio, SIGIO, 0); 418 KNOTE(&cpipe->pipe_sel.si_note, 0); 419 } 420 421 /* 422 * MPALMOSTSAFE (acquires mplock) 423 */ 424 static int 425 pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, int fflags) 426 { 427 struct pipe *rpipe; 428 int error; 429 int nread = 0; 430 int nbio; 431 u_int size; 432 433 get_mplock(); 434 rpipe = (struct pipe *) fp->f_data; 435 ++rpipe->pipe_busy; 436 error = pipelock(rpipe, 1); 437 if (error) 438 goto unlocked_error; 439 440 if (fflags & O_FBLOCKING) 441 nbio = 0; 442 else if (fflags & O_FNONBLOCKING) 443 nbio = 1; 444 else if (fp->f_flag & O_NONBLOCK) 445 nbio = 1; 446 else 447 nbio = 0; 448 449 while (uio->uio_resid) { 450 caddr_t va; 451 452 if (rpipe->pipe_buffer.cnt > 0) { 453 /* 454 * normal pipe buffer receive 455 */ 456 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 457 if (size > rpipe->pipe_buffer.cnt) 458 size = rpipe->pipe_buffer.cnt; 459 if (size > (u_int) uio->uio_resid) 460 size = (u_int) uio->uio_resid; 461 462 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 463 size, uio); 464 if (error) 465 break; 466 467 rpipe->pipe_buffer.out += size; 468 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 469 rpipe->pipe_buffer.out = 0; 470 471 rpipe->pipe_buffer.cnt -= size; 472 473 /* 474 * If there is no more to read in the pipe, reset 475 * its pointers to the beginning. This improves 476 * cache hit stats. 477 */ 478 if (rpipe->pipe_buffer.cnt == 0) { 479 rpipe->pipe_buffer.in = 0; 480 rpipe->pipe_buffer.out = 0; 481 } 482 nread += size; 483 #ifndef PIPE_NODIRECT 484 } else if (rpipe->pipe_kva && 485 rpipe->pipe_feature == PIPE_KMEM && 486 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 487 == PIPE_DIRECTW 488 ) { 489 /* 490 * Direct copy using source-side kva mapping 491 */ 492 size = rpipe->pipe_map.xio_bytes - 493 rpipe->pipe_buffer.out; 494 if (size > (u_int)uio->uio_resid) 495 size = (u_int)uio->uio_resid; 496 va = (caddr_t)rpipe->pipe_kva + 497 xio_kvaoffset(&rpipe->pipe_map, rpipe->pipe_buffer.out); 498 error = uiomove(va, size, uio); 499 if (error) 500 break; 501 nread += size; 502 rpipe->pipe_buffer.out += size; 503 if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { 504 rpipe->pipe_state |= PIPE_DIRECTIP; 505 rpipe->pipe_state &= ~PIPE_DIRECTW; 506 /* reset out index for copy mode */ 507 rpipe->pipe_buffer.out = 0; 508 wakeup(rpipe); 509 } 510 } else if (rpipe->pipe_buffer.out != rpipe->pipe_map.xio_bytes && 511 rpipe->pipe_kva && 512 rpipe->pipe_feature == PIPE_SFBUF2 && 513 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 514 == PIPE_DIRECTW 515 ) { 516 /* 517 * Direct copy, bypassing a kernel buffer. We cannot 518 * mess with the direct-write buffer until 519 * PIPE_DIRECTIP is cleared. In order to prevent 520 * the pipe_write code from racing itself in 521 * direct_write, we set DIRECTIP when we clear 522 * DIRECTW after we have exhausted the buffer. 523 */ 524 if (pipe_dwrite_sfbuf == 3) 525 rpipe->pipe_kvamask = 0; 526 pmap_qenter2(rpipe->pipe_kva, rpipe->pipe_map.xio_pages, 527 rpipe->pipe_map.xio_npages, 528 &rpipe->pipe_kvamask); 529 size = rpipe->pipe_map.xio_bytes - 530 rpipe->pipe_buffer.out; 531 if (size > (u_int)uio->uio_resid) 532 size = (u_int)uio->uio_resid; 533 va = (caddr_t)rpipe->pipe_kva + xio_kvaoffset(&rpipe->pipe_map, rpipe->pipe_buffer.out); 534 error = uiomove(va, size, uio); 535 if (error) 536 break; 537 nread += size; 538 rpipe->pipe_buffer.out += size; 539 if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { 540 rpipe->pipe_state |= PIPE_DIRECTIP; 541 rpipe->pipe_state &= ~PIPE_DIRECTW; 542 /* reset out index for copy mode */ 543 rpipe->pipe_buffer.out = 0; 544 wakeup(rpipe); 545 } 546 } else if (rpipe->pipe_buffer.out != rpipe->pipe_map.xio_bytes && 547 rpipe->pipe_feature == PIPE_SFBUF1 && 548 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 549 == PIPE_DIRECTW 550 ) { 551 /* 552 * Direct copy, bypassing a kernel buffer. We cannot 553 * mess with the direct-write buffer until 554 * PIPE_DIRECTIP is cleared. In order to prevent 555 * the pipe_write code from racing itself in 556 * direct_write, we set DIRECTIP when we clear 557 * DIRECTW after we have exhausted the buffer. 558 */ 559 error = xio_uio_copy(&rpipe->pipe_map, rpipe->pipe_buffer.out, uio, &size); 560 if (error) 561 break; 562 nread += size; 563 rpipe->pipe_buffer.out += size; 564 if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { 565 rpipe->pipe_state |= PIPE_DIRECTIP; 566 rpipe->pipe_state &= ~PIPE_DIRECTW; 567 /* reset out index for copy mode */ 568 rpipe->pipe_buffer.out = 0; 569 wakeup(rpipe); 570 } 571 #endif 572 } else { 573 /* 574 * detect EOF condition 575 * read returns 0 on EOF, no need to set error 576 */ 577 if (rpipe->pipe_state & PIPE_EOF) 578 break; 579 580 /* 581 * If the "write-side" has been blocked, wake it up now. 582 */ 583 if (rpipe->pipe_state & PIPE_WANTW) { 584 rpipe->pipe_state &= ~PIPE_WANTW; 585 wakeup(rpipe); 586 } 587 588 /* 589 * Break if some data was read. 590 */ 591 if (nread > 0) 592 break; 593 594 /* 595 * Unlock the pipe buffer for our remaining 596 * processing. We will either break out with an 597 * error or we will sleep and relock to loop. 598 */ 599 pipeunlock(rpipe); 600 601 /* 602 * Handle non-blocking mode operation or 603 * wait for more data. 604 */ 605 if (nbio) { 606 error = EAGAIN; 607 } else { 608 rpipe->pipe_state |= PIPE_WANTR; 609 if ((error = tsleep(rpipe, PCATCH|PNORESCHED, 610 "piperd", 0)) == 0) { 611 error = pipelock(rpipe, 1); 612 } 613 } 614 if (error) 615 goto unlocked_error; 616 } 617 } 618 pipeunlock(rpipe); 619 620 if (error == 0) 621 vfs_timestamp(&rpipe->pipe_atime); 622 unlocked_error: 623 --rpipe->pipe_busy; 624 625 /* 626 * PIPE_WANT processing only makes sense if pipe_busy is 0. 627 */ 628 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 629 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 630 wakeup(rpipe); 631 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 632 /* 633 * Handle write blocking hysteresis. 634 */ 635 if (rpipe->pipe_state & PIPE_WANTW) { 636 rpipe->pipe_state &= ~PIPE_WANTW; 637 wakeup(rpipe); 638 } 639 } 640 641 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 642 pipeselwakeup(rpipe); 643 rel_mplock(); 644 return (error); 645 } 646 647 #ifndef PIPE_NODIRECT 648 /* 649 * Map the sending processes' buffer into kernel space and wire it. 650 * This is similar to a physical write operation. 651 */ 652 static int 653 pipe_build_write_buffer(wpipe, uio) 654 struct pipe *wpipe; 655 struct uio *uio; 656 { 657 int error; 658 u_int size; 659 660 size = (u_int) uio->uio_iov->iov_len; 661 if (size > wpipe->pipe_buffer.size) 662 size = wpipe->pipe_buffer.size; 663 664 if (uio->uio_segflg == UIO_SYSSPACE) { 665 error = xio_init_kbuf(&wpipe->pipe_map, uio->uio_iov->iov_base, 666 size); 667 } else { 668 error = xio_init_ubuf(&wpipe->pipe_map, uio->uio_iov->iov_base, 669 size, XIOF_READ); 670 } 671 wpipe->pipe_buffer.out = 0; 672 if (error) 673 return(error); 674 675 /* 676 * Create a kernel map for KMEM and SFBUF2 copy modes. SFBUF2 will 677 * map the pages on the target while KMEM maps the pages now. 678 */ 679 switch(wpipe->pipe_feature) { 680 case PIPE_KMEM: 681 case PIPE_SFBUF2: 682 if (wpipe->pipe_kva == NULL) { 683 wpipe->pipe_kva = 684 kmem_alloc_nofault(kernel_map, XIO_INTERNAL_SIZE); 685 wpipe->pipe_kvamask = 0; 686 } 687 if (wpipe->pipe_feature == PIPE_KMEM) { 688 pmap_qenter(wpipe->pipe_kva, wpipe->pipe_map.xio_pages, 689 wpipe->pipe_map.xio_npages); 690 } 691 break; 692 default: 693 break; 694 } 695 696 /* 697 * And update the uio data. The XIO might have loaded fewer bytes 698 * then requested so reload 'size'. 699 */ 700 size = wpipe->pipe_map.xio_bytes; 701 uio->uio_iov->iov_len -= size; 702 uio->uio_iov->iov_base += size; 703 if (uio->uio_iov->iov_len == 0) 704 uio->uio_iov++; 705 uio->uio_resid -= size; 706 uio->uio_offset += size; 707 return (0); 708 } 709 710 /* 711 * In the case of a signal, the writing process might go away. This 712 * code copies the data into the circular buffer so that the source 713 * pages can be freed without loss of data. 714 * 715 * Note that in direct mode pipe_buffer.out is used to track the 716 * XIO offset. We are converting the direct mode into buffered mode 717 * which changes the meaning of pipe_buffer.out. 718 */ 719 static void 720 pipe_clone_write_buffer(wpipe) 721 struct pipe *wpipe; 722 { 723 int size; 724 int offset; 725 726 offset = wpipe->pipe_buffer.out; 727 size = wpipe->pipe_map.xio_bytes - offset; 728 729 KKASSERT(size <= wpipe->pipe_buffer.size); 730 731 wpipe->pipe_buffer.in = size; 732 wpipe->pipe_buffer.out = 0; 733 wpipe->pipe_buffer.cnt = size; 734 wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTIP); 735 736 xio_copy_xtok(&wpipe->pipe_map, offset, wpipe->pipe_buffer.buffer, size); 737 xio_release(&wpipe->pipe_map); 738 if (wpipe->pipe_kva) { 739 pmap_qremove(wpipe->pipe_kva, XIO_INTERNAL_PAGES); 740 kmem_free(kernel_map, wpipe->pipe_kva, XIO_INTERNAL_SIZE); 741 wpipe->pipe_kva = NULL; 742 } 743 } 744 745 /* 746 * This implements the pipe buffer write mechanism. Note that only 747 * a direct write OR a normal pipe write can be pending at any given time. 748 * If there are any characters in the pipe buffer, the direct write will 749 * be deferred until the receiving process grabs all of the bytes from 750 * the pipe buffer. Then the direct mapping write is set-up. 751 */ 752 static int 753 pipe_direct_write(wpipe, uio) 754 struct pipe *wpipe; 755 struct uio *uio; 756 { 757 int error; 758 759 retry: 760 while (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 761 if (wpipe->pipe_state & PIPE_WANTR) { 762 wpipe->pipe_state &= ~PIPE_WANTR; 763 wakeup(wpipe); 764 } 765 wpipe->pipe_state |= PIPE_WANTW; 766 error = tsleep(wpipe, PCATCH, "pipdww", 0); 767 if (error) 768 goto error2; 769 if (wpipe->pipe_state & PIPE_EOF) { 770 error = EPIPE; 771 goto error2; 772 } 773 } 774 KKASSERT(wpipe->pipe_map.xio_bytes == 0); 775 if (wpipe->pipe_buffer.cnt > 0) { 776 if (wpipe->pipe_state & PIPE_WANTR) { 777 wpipe->pipe_state &= ~PIPE_WANTR; 778 wakeup(wpipe); 779 } 780 781 wpipe->pipe_state |= PIPE_WANTW; 782 error = tsleep(wpipe, PCATCH, "pipdwc", 0); 783 if (error) 784 goto error2; 785 if (wpipe->pipe_state & PIPE_EOF) { 786 error = EPIPE; 787 goto error2; 788 } 789 goto retry; 790 } 791 792 /* 793 * Build our direct-write buffer 794 */ 795 wpipe->pipe_state |= PIPE_DIRECTW | PIPE_DIRECTIP; 796 error = pipe_build_write_buffer(wpipe, uio); 797 if (error) 798 goto error1; 799 wpipe->pipe_state &= ~PIPE_DIRECTIP; 800 801 /* 802 * Wait until the receiver has snarfed the data. Since we are likely 803 * going to sleep we optimize the case and yield synchronously, 804 * possibly avoiding the tsleep(). 805 */ 806 error = 0; 807 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 808 if (wpipe->pipe_state & PIPE_EOF) { 809 pipelock(wpipe, 0); 810 xio_release(&wpipe->pipe_map); 811 if (wpipe->pipe_kva) { 812 pmap_qremove(wpipe->pipe_kva, XIO_INTERNAL_PAGES); 813 kmem_free(kernel_map, wpipe->pipe_kva, XIO_INTERNAL_SIZE); 814 wpipe->pipe_kva = NULL; 815 } 816 pipeunlock(wpipe); 817 pipeselwakeup(wpipe); 818 error = EPIPE; 819 goto error1; 820 } 821 if (wpipe->pipe_state & PIPE_WANTR) { 822 wpipe->pipe_state &= ~PIPE_WANTR; 823 wakeup(wpipe); 824 } 825 pipeselwakeup(wpipe); 826 error = tsleep(wpipe, PCATCH|PNORESCHED, "pipdwt", 0); 827 } 828 pipelock(wpipe,0); 829 if (wpipe->pipe_state & PIPE_DIRECTW) { 830 /* 831 * this bit of trickery substitutes a kernel buffer for 832 * the process that might be going away. 833 */ 834 pipe_clone_write_buffer(wpipe); 835 KKASSERT((wpipe->pipe_state & PIPE_DIRECTIP) == 0); 836 } else { 837 /* 838 * note: The pipe_kva mapping is not qremove'd here. For 839 * legacy PIPE_KMEM mode this constitutes an improvement 840 * over the original FreeBSD-4 algorithm. For PIPE_SFBUF2 841 * mode the kva mapping must not be removed to get the 842 * caching benefit. 843 * 844 * For testing purposes we will give the original algorithm 845 * the benefit of the doubt 'what it could have been', and 846 * keep the optimization. 847 */ 848 KKASSERT(wpipe->pipe_state & PIPE_DIRECTIP); 849 xio_release(&wpipe->pipe_map); 850 wpipe->pipe_state &= ~PIPE_DIRECTIP; 851 } 852 pipeunlock(wpipe); 853 return (error); 854 855 /* 856 * Direct-write error, clear the direct write flags. 857 */ 858 error1: 859 wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTIP); 860 /* fallthrough */ 861 862 /* 863 * General error, wakeup the other side if it happens to be sleeping. 864 */ 865 error2: 866 wakeup(wpipe); 867 return (error); 868 } 869 #endif 870 871 /* 872 * MPALMOSTSAFE - acquires mplock 873 */ 874 static int 875 pipe_write(struct file *fp, struct uio *uio, struct ucred *cred, int fflags) 876 { 877 int error = 0; 878 int orig_resid; 879 int nbio; 880 struct pipe *wpipe, *rpipe; 881 882 get_mplock(); 883 rpipe = (struct pipe *) fp->f_data; 884 wpipe = rpipe->pipe_peer; 885 886 /* 887 * detect loss of pipe read side, issue SIGPIPE if lost. 888 */ 889 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 890 rel_mplock(); 891 return (EPIPE); 892 } 893 ++wpipe->pipe_busy; 894 895 if (fflags & O_FBLOCKING) 896 nbio = 0; 897 else if (fflags & O_FNONBLOCKING) 898 nbio = 1; 899 else if (fp->f_flag & O_NONBLOCK) 900 nbio = 1; 901 else 902 nbio = 0; 903 904 /* 905 * If it is advantageous to resize the pipe buffer, do 906 * so. 907 */ 908 if ((uio->uio_resid > PIPE_SIZE) && 909 (pipe_nbig < pipe_maxbig) && 910 (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) == 0 && 911 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 912 (wpipe->pipe_buffer.cnt == 0)) { 913 914 if ((error = pipelock(wpipe,1)) == 0) { 915 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 916 pipe_nbig++; 917 pipeunlock(wpipe); 918 } 919 } 920 921 /* 922 * If an early error occured unbusy and return, waking up any pending 923 * readers. 924 */ 925 if (error) { 926 --wpipe->pipe_busy; 927 if ((wpipe->pipe_busy == 0) && 928 (wpipe->pipe_state & PIPE_WANT)) { 929 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 930 wakeup(wpipe); 931 } 932 rel_mplock(); 933 return(error); 934 } 935 936 KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 937 938 orig_resid = uio->uio_resid; 939 940 while (uio->uio_resid) { 941 int space; 942 943 #ifndef PIPE_NODIRECT 944 /* 945 * If the transfer is large, we can gain performance if 946 * we do process-to-process copies directly. 947 * If the write is non-blocking, we don't use the 948 * direct write mechanism. 949 * 950 * The direct write mechanism will detect the reader going 951 * away on us. 952 */ 953 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT || 954 pipe_dwrite_enable > 1) && 955 nbio == 0 && 956 pipe_dwrite_enable) { 957 error = pipe_direct_write( wpipe, uio); 958 if (error) 959 break; 960 continue; 961 } 962 #endif 963 964 /* 965 * Pipe buffered writes cannot be coincidental with 966 * direct writes. We wait until the currently executing 967 * direct write is completed before we start filling the 968 * pipe buffer. We break out if a signal occurs or the 969 * reader goes away. 970 */ 971 retrywrite: 972 while (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 973 if (wpipe->pipe_state & PIPE_WANTR) { 974 wpipe->pipe_state &= ~PIPE_WANTR; 975 wakeup(wpipe); 976 } 977 error = tsleep(wpipe, PCATCH, "pipbww", 0); 978 if (wpipe->pipe_state & PIPE_EOF) 979 break; 980 if (error) 981 break; 982 } 983 if (wpipe->pipe_state & PIPE_EOF) { 984 error = EPIPE; 985 break; 986 } 987 988 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 989 990 /* Writes of size <= PIPE_BUF must be atomic. */ 991 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 992 space = 0; 993 994 /* 995 * Write to fill, read size handles write hysteresis. Also 996 * additional restrictions can cause select-based non-blocking 997 * writes to spin. 998 */ 999 if (space > 0) { 1000 if ((error = pipelock(wpipe,1)) == 0) { 1001 int size; /* Transfer size */ 1002 int segsize; /* first segment to transfer */ 1003 1004 /* 1005 * It is possible for a direct write to 1006 * slip in on us... handle it here... 1007 */ 1008 if (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 1009 pipeunlock(wpipe); 1010 goto retrywrite; 1011 } 1012 /* 1013 * If a process blocked in uiomove, our 1014 * value for space might be bad. 1015 * 1016 * XXX will we be ok if the reader has gone 1017 * away here? 1018 */ 1019 if (space > wpipe->pipe_buffer.size - 1020 wpipe->pipe_buffer.cnt) { 1021 pipeunlock(wpipe); 1022 goto retrywrite; 1023 } 1024 1025 /* 1026 * Transfer size is minimum of uio transfer 1027 * and free space in pipe buffer. 1028 */ 1029 if (space > uio->uio_resid) 1030 size = uio->uio_resid; 1031 else 1032 size = space; 1033 /* 1034 * First segment to transfer is minimum of 1035 * transfer size and contiguous space in 1036 * pipe buffer. If first segment to transfer 1037 * is less than the transfer size, we've got 1038 * a wraparound in the buffer. 1039 */ 1040 segsize = wpipe->pipe_buffer.size - 1041 wpipe->pipe_buffer.in; 1042 if (segsize > size) 1043 segsize = size; 1044 1045 /* Transfer first segment */ 1046 1047 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1048 segsize, uio); 1049 1050 if (error == 0 && segsize < size) { 1051 /* 1052 * Transfer remaining part now, to 1053 * support atomic writes. Wraparound 1054 * happened. 1055 */ 1056 if (wpipe->pipe_buffer.in + segsize != 1057 wpipe->pipe_buffer.size) 1058 panic("Expected pipe buffer wraparound disappeared"); 1059 1060 error = uiomove(&wpipe->pipe_buffer.buffer[0], 1061 size - segsize, uio); 1062 } 1063 if (error == 0) { 1064 wpipe->pipe_buffer.in += size; 1065 if (wpipe->pipe_buffer.in >= 1066 wpipe->pipe_buffer.size) { 1067 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 1068 panic("Expected wraparound bad"); 1069 wpipe->pipe_buffer.in = size - segsize; 1070 } 1071 1072 wpipe->pipe_buffer.cnt += size; 1073 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 1074 panic("Pipe buffer overflow"); 1075 1076 } 1077 pipeunlock(wpipe); 1078 } 1079 if (error) 1080 break; 1081 1082 } else { 1083 /* 1084 * If the "read-side" has been blocked, wake it up now 1085 * and yield to let it drain synchronously rather 1086 * then block. 1087 */ 1088 if (wpipe->pipe_state & PIPE_WANTR) { 1089 wpipe->pipe_state &= ~PIPE_WANTR; 1090 wakeup(wpipe); 1091 } 1092 1093 /* 1094 * don't block on non-blocking I/O 1095 */ 1096 if (nbio) { 1097 error = EAGAIN; 1098 break; 1099 } 1100 1101 /* 1102 * We have no more space and have something to offer, 1103 * wake up select/poll. 1104 */ 1105 pipeselwakeup(wpipe); 1106 1107 wpipe->pipe_state |= PIPE_WANTW; 1108 error = tsleep(wpipe, PCATCH|PNORESCHED, "pipewr", 0); 1109 if (error != 0) 1110 break; 1111 /* 1112 * If read side wants to go away, we just issue a signal 1113 * to ourselves. 1114 */ 1115 if (wpipe->pipe_state & PIPE_EOF) { 1116 error = EPIPE; 1117 break; 1118 } 1119 } 1120 } 1121 1122 --wpipe->pipe_busy; 1123 1124 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1125 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1126 wakeup(wpipe); 1127 } else if (wpipe->pipe_buffer.cnt > 0) { 1128 /* 1129 * If we have put any characters in the buffer, we wake up 1130 * the reader. 1131 */ 1132 if (wpipe->pipe_state & PIPE_WANTR) { 1133 wpipe->pipe_state &= ~PIPE_WANTR; 1134 wakeup(wpipe); 1135 } 1136 } 1137 1138 /* 1139 * Don't return EPIPE if I/O was successful 1140 */ 1141 if ((wpipe->pipe_buffer.cnt == 0) && 1142 (uio->uio_resid == 0) && 1143 (error == EPIPE)) { 1144 error = 0; 1145 } 1146 1147 if (error == 0) 1148 vfs_timestamp(&wpipe->pipe_mtime); 1149 1150 /* 1151 * We have something to offer, 1152 * wake up select/poll. 1153 */ 1154 if (wpipe->pipe_buffer.cnt) 1155 pipeselwakeup(wpipe); 1156 rel_mplock(); 1157 return (error); 1158 } 1159 1160 /* 1161 * MPALMOSTSAFE - acquires mplock 1162 * 1163 * we implement a very minimal set of ioctls for compatibility with sockets. 1164 */ 1165 int 1166 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct ucred *cred) 1167 { 1168 struct pipe *mpipe; 1169 int error; 1170 1171 get_mplock(); 1172 mpipe = (struct pipe *)fp->f_data; 1173 1174 switch (cmd) { 1175 case FIOASYNC: 1176 if (*(int *)data) { 1177 mpipe->pipe_state |= PIPE_ASYNC; 1178 } else { 1179 mpipe->pipe_state &= ~PIPE_ASYNC; 1180 } 1181 error = 0; 1182 break; 1183 case FIONREAD: 1184 if (mpipe->pipe_state & PIPE_DIRECTW) { 1185 *(int *)data = mpipe->pipe_map.xio_bytes - 1186 mpipe->pipe_buffer.out; 1187 } else { 1188 *(int *)data = mpipe->pipe_buffer.cnt; 1189 } 1190 error = 0; 1191 break; 1192 case FIOSETOWN: 1193 error = fsetown(*(int *)data, &mpipe->pipe_sigio); 1194 break; 1195 case FIOGETOWN: 1196 *(int *)data = fgetown(mpipe->pipe_sigio); 1197 error = 0; 1198 break; 1199 case TIOCSPGRP: 1200 /* This is deprecated, FIOSETOWN should be used instead. */ 1201 error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); 1202 break; 1203 1204 case TIOCGPGRP: 1205 /* This is deprecated, FIOGETOWN should be used instead. */ 1206 *(int *)data = -fgetown(mpipe->pipe_sigio); 1207 error = 0; 1208 break; 1209 default: 1210 error = ENOTTY; 1211 break; 1212 } 1213 rel_mplock(); 1214 return (error); 1215 } 1216 1217 /* 1218 * MPALMOSTSAFE - acquires mplock 1219 */ 1220 int 1221 pipe_poll(struct file *fp, int events, struct ucred *cred) 1222 { 1223 struct pipe *rpipe; 1224 struct pipe *wpipe; 1225 int revents = 0; 1226 1227 get_mplock(); 1228 rpipe = (struct pipe *)fp->f_data; 1229 wpipe = rpipe->pipe_peer; 1230 if (events & (POLLIN | POLLRDNORM)) 1231 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1232 (rpipe->pipe_buffer.cnt > 0) || 1233 (rpipe->pipe_state & PIPE_EOF)) 1234 revents |= events & (POLLIN | POLLRDNORM); 1235 1236 if (events & (POLLOUT | POLLWRNORM)) 1237 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 1238 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1239 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1240 revents |= events & (POLLOUT | POLLWRNORM); 1241 1242 if ((rpipe->pipe_state & PIPE_EOF) || 1243 (wpipe == NULL) || 1244 (wpipe->pipe_state & PIPE_EOF)) 1245 revents |= POLLHUP; 1246 1247 if (revents == 0) { 1248 if (events & (POLLIN | POLLRDNORM)) { 1249 selrecord(curthread, &rpipe->pipe_sel); 1250 rpipe->pipe_state |= PIPE_SEL; 1251 } 1252 1253 if (events & (POLLOUT | POLLWRNORM)) { 1254 selrecord(curthread, &wpipe->pipe_sel); 1255 wpipe->pipe_state |= PIPE_SEL; 1256 } 1257 } 1258 rel_mplock(); 1259 return (revents); 1260 } 1261 1262 /* 1263 * MPALMOSTSAFE - acquires mplock 1264 */ 1265 static int 1266 pipe_stat(struct file *fp, struct stat *ub, struct ucred *cred) 1267 { 1268 struct pipe *pipe; 1269 1270 get_mplock(); 1271 pipe = (struct pipe *)fp->f_data; 1272 1273 bzero((caddr_t)ub, sizeof(*ub)); 1274 ub->st_mode = S_IFIFO; 1275 ub->st_blksize = pipe->pipe_buffer.size; 1276 ub->st_size = pipe->pipe_buffer.cnt; 1277 if (ub->st_size == 0 && (pipe->pipe_state & PIPE_DIRECTW)) { 1278 ub->st_size = pipe->pipe_map.xio_bytes - 1279 pipe->pipe_buffer.out; 1280 } 1281 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1282 ub->st_atimespec = pipe->pipe_atime; 1283 ub->st_mtimespec = pipe->pipe_mtime; 1284 ub->st_ctimespec = pipe->pipe_ctime; 1285 /* 1286 * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev, 1287 * st_flags, st_gen. 1288 * XXX (st_dev, st_ino) should be unique. 1289 */ 1290 rel_mplock(); 1291 return (0); 1292 } 1293 1294 /* 1295 * MPALMOSTSAFE - acquires mplock 1296 */ 1297 static int 1298 pipe_close(struct file *fp) 1299 { 1300 struct pipe *cpipe = (struct pipe *)fp->f_data; 1301 1302 get_mplock(); 1303 fp->f_ops = &badfileops; 1304 fp->f_data = NULL; 1305 funsetown(cpipe->pipe_sigio); 1306 pipeclose(cpipe); 1307 rel_mplock(); 1308 return (0); 1309 } 1310 1311 /* 1312 * Shutdown one or both directions of a full-duplex pipe. 1313 * 1314 * MPALMOSTSAFE - acquires mplock 1315 */ 1316 static int 1317 pipe_shutdown(struct file *fp, int how) 1318 { 1319 struct pipe *rpipe; 1320 struct pipe *wpipe; 1321 int error = EPIPE; 1322 1323 get_mplock(); 1324 rpipe = (struct pipe *)fp->f_data; 1325 1326 switch(how) { 1327 case SHUT_RDWR: 1328 case SHUT_RD: 1329 if (rpipe) { 1330 rpipe->pipe_state |= PIPE_EOF; 1331 pipeselwakeup(rpipe); 1332 if (rpipe->pipe_busy) 1333 wakeup(rpipe); 1334 error = 0; 1335 } 1336 if (how == SHUT_RD) 1337 break; 1338 /* fall through */ 1339 case SHUT_WR: 1340 if (rpipe && (wpipe = rpipe->pipe_peer) != NULL) { 1341 wpipe->pipe_state |= PIPE_EOF; 1342 pipeselwakeup(wpipe); 1343 if (wpipe->pipe_busy) 1344 wakeup(wpipe); 1345 error = 0; 1346 } 1347 } 1348 rel_mplock(); 1349 return (error); 1350 } 1351 1352 static void 1353 pipe_free_kmem(struct pipe *cpipe) 1354 { 1355 if (cpipe->pipe_buffer.buffer != NULL) { 1356 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1357 --pipe_nbig; 1358 kmem_free(kernel_map, 1359 (vm_offset_t)cpipe->pipe_buffer.buffer, 1360 cpipe->pipe_buffer.size); 1361 cpipe->pipe_buffer.buffer = NULL; 1362 cpipe->pipe_buffer.object = NULL; 1363 } 1364 #ifndef PIPE_NODIRECT 1365 KKASSERT(cpipe->pipe_map.xio_bytes == 0 && 1366 cpipe->pipe_map.xio_offset == 0 && 1367 cpipe->pipe_map.xio_npages == 0); 1368 #endif 1369 } 1370 1371 /* 1372 * shutdown the pipe 1373 */ 1374 static void 1375 pipeclose(struct pipe *cpipe) 1376 { 1377 globaldata_t gd; 1378 struct pipe *ppipe; 1379 1380 if (cpipe == NULL) 1381 return; 1382 1383 pipeselwakeup(cpipe); 1384 1385 /* 1386 * If the other side is blocked, wake it up saying that 1387 * we want to close it down. 1388 */ 1389 while (cpipe->pipe_busy) { 1390 wakeup(cpipe); 1391 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 1392 tsleep(cpipe, 0, "pipecl", 0); 1393 } 1394 1395 /* 1396 * Disconnect from peer 1397 */ 1398 if ((ppipe = cpipe->pipe_peer) != NULL) { 1399 pipeselwakeup(ppipe); 1400 1401 ppipe->pipe_state |= PIPE_EOF; 1402 wakeup(ppipe); 1403 KNOTE(&ppipe->pipe_sel.si_note, 0); 1404 ppipe->pipe_peer = NULL; 1405 } 1406 1407 if (cpipe->pipe_kva) { 1408 pmap_qremove(cpipe->pipe_kva, XIO_INTERNAL_PAGES); 1409 kmem_free(kernel_map, cpipe->pipe_kva, XIO_INTERNAL_SIZE); 1410 cpipe->pipe_kva = NULL; 1411 } 1412 1413 /* 1414 * free or cache resources 1415 */ 1416 gd = mycpu; 1417 if (gd->gd_pipeqcount >= pipe_maxcache || 1418 cpipe->pipe_buffer.size != PIPE_SIZE 1419 ) { 1420 pipe_free_kmem(cpipe); 1421 free(cpipe, M_PIPE); 1422 } else { 1423 KKASSERT(cpipe->pipe_map.xio_npages == 0 && 1424 cpipe->pipe_map.xio_bytes == 0 && 1425 cpipe->pipe_map.xio_offset == 0); 1426 cpipe->pipe_state = 0; 1427 cpipe->pipe_busy = 0; 1428 cpipe->pipe_peer = gd->gd_pipeq; 1429 gd->gd_pipeq = cpipe; 1430 ++gd->gd_pipeqcount; 1431 } 1432 } 1433 1434 /* 1435 * MPALMOSTSAFE - acquires mplock 1436 */ 1437 static int 1438 pipe_kqfilter(struct file *fp, struct knote *kn) 1439 { 1440 struct pipe *cpipe; 1441 1442 get_mplock(); 1443 cpipe = (struct pipe *)kn->kn_fp->f_data; 1444 1445 switch (kn->kn_filter) { 1446 case EVFILT_READ: 1447 kn->kn_fop = &pipe_rfiltops; 1448 break; 1449 case EVFILT_WRITE: 1450 kn->kn_fop = &pipe_wfiltops; 1451 cpipe = cpipe->pipe_peer; 1452 if (cpipe == NULL) { 1453 /* other end of pipe has been closed */ 1454 rel_mplock(); 1455 return (EPIPE); 1456 } 1457 break; 1458 default: 1459 return (1); 1460 } 1461 kn->kn_hook = (caddr_t)cpipe; 1462 1463 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1464 rel_mplock(); 1465 return (0); 1466 } 1467 1468 static void 1469 filt_pipedetach(struct knote *kn) 1470 { 1471 struct pipe *cpipe = (struct pipe *)kn->kn_hook; 1472 1473 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1474 } 1475 1476 /*ARGSUSED*/ 1477 static int 1478 filt_piperead(struct knote *kn, long hint) 1479 { 1480 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1481 struct pipe *wpipe = rpipe->pipe_peer; 1482 1483 kn->kn_data = rpipe->pipe_buffer.cnt; 1484 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) { 1485 kn->kn_data = rpipe->pipe_map.xio_bytes - 1486 rpipe->pipe_buffer.out; 1487 } 1488 1489 if ((rpipe->pipe_state & PIPE_EOF) || 1490 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1491 kn->kn_flags |= EV_EOF; 1492 return (1); 1493 } 1494 return (kn->kn_data > 0); 1495 } 1496 1497 /*ARGSUSED*/ 1498 static int 1499 filt_pipewrite(struct knote *kn, long hint) 1500 { 1501 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1502 struct pipe *wpipe = rpipe->pipe_peer; 1503 1504 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1505 kn->kn_data = 0; 1506 kn->kn_flags |= EV_EOF; 1507 return (1); 1508 } 1509 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1510 if (wpipe->pipe_state & PIPE_DIRECTW) 1511 kn->kn_data = 0; 1512 1513 return (kn->kn_data >= PIPE_BUF); 1514 } 1515