1 /* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 * 19 * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.60.2.13 2002/08/05 15:05:15 des Exp $ 20 * $DragonFly: src/sys/kern/sys_pipe.c,v 1.44 2006/12/28 21:24:01 dillon Exp $ 21 */ 22 23 /* 24 * This file contains a high-performance replacement for the socket-based 25 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 26 * all features of sockets, but does do everything that pipes normally 27 * do. 28 */ 29 30 /* 31 * This code has two modes of operation, a small write mode and a large 32 * write mode. The small write mode acts like conventional pipes with 33 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 34 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 35 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 36 * the receiving process can copy it directly from the pages in the sending 37 * process. 38 * 39 * If the sending process receives a signal, it is possible that it will 40 * go away, and certainly its address space can change, because control 41 * is returned back to the user-mode side. In that case, the pipe code 42 * arranges to copy the buffer supplied by the user process, to a pageable 43 * kernel buffer, and the receiving process will grab the data from the 44 * pageable kernel buffer. Since signals don't happen all that often, 45 * the copy operation is normally eliminated. 46 * 47 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 48 * happen for small transfers so that the system will not spend all of 49 * its time context switching. PIPE_SIZE is constrained by the 50 * amount of kernel virtual memory. 51 */ 52 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/kernel.h> 56 #include <sys/proc.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filedesc.h> 60 #include <sys/filio.h> 61 #include <sys/ttycom.h> 62 #include <sys/stat.h> 63 #include <sys/poll.h> 64 #include <sys/select.h> 65 #include <sys/signalvar.h> 66 #include <sys/sysproto.h> 67 #include <sys/pipe.h> 68 #include <sys/vnode.h> 69 #include <sys/uio.h> 70 #include <sys/event.h> 71 #include <sys/globaldata.h> 72 #include <sys/module.h> 73 #include <sys/malloc.h> 74 #include <sys/sysctl.h> 75 #include <sys/socket.h> 76 77 #include <vm/vm.h> 78 #include <vm/vm_param.h> 79 #include <sys/lock.h> 80 #include <vm/vm_object.h> 81 #include <vm/vm_kern.h> 82 #include <vm/vm_extern.h> 83 #include <vm/pmap.h> 84 #include <vm/vm_map.h> 85 #include <vm/vm_page.h> 86 #include <vm/vm_zone.h> 87 88 #include <sys/file2.h> 89 90 #include <machine/cpufunc.h> 91 92 /* 93 * interfaces to the outside world 94 */ 95 static int pipe_read (struct file *fp, struct uio *uio, 96 struct ucred *cred, int flags); 97 static int pipe_write (struct file *fp, struct uio *uio, 98 struct ucred *cred, int flags); 99 static int pipe_close (struct file *fp); 100 static int pipe_shutdown (struct file *fp, int how); 101 static int pipe_poll (struct file *fp, int events, struct ucred *cred); 102 static int pipe_kqfilter (struct file *fp, struct knote *kn); 103 static int pipe_stat (struct file *fp, struct stat *sb, struct ucred *cred); 104 static int pipe_ioctl (struct file *fp, u_long cmd, caddr_t data, struct ucred *cred); 105 106 static struct fileops pipeops = { 107 .fo_read = pipe_read, 108 .fo_write = pipe_write, 109 .fo_ioctl = pipe_ioctl, 110 .fo_poll = pipe_poll, 111 .fo_kqfilter = pipe_kqfilter, 112 .fo_stat = pipe_stat, 113 .fo_close = pipe_close, 114 .fo_shutdown = pipe_shutdown 115 }; 116 117 static void filt_pipedetach(struct knote *kn); 118 static int filt_piperead(struct knote *kn, long hint); 119 static int filt_pipewrite(struct knote *kn, long hint); 120 121 static struct filterops pipe_rfiltops = 122 { 1, NULL, filt_pipedetach, filt_piperead }; 123 static struct filterops pipe_wfiltops = 124 { 1, NULL, filt_pipedetach, filt_pipewrite }; 125 126 MALLOC_DEFINE(M_PIPE, "pipe", "pipe structures"); 127 128 /* 129 * Default pipe buffer size(s), this can be kind-of large now because pipe 130 * space is pageable. The pipe code will try to maintain locality of 131 * reference for performance reasons, so small amounts of outstanding I/O 132 * will not wipe the cache. 133 */ 134 #define MINPIPESIZE (PIPE_SIZE/3) 135 #define MAXPIPESIZE (2*PIPE_SIZE/3) 136 137 /* 138 * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 139 * is there so that on large systems, we don't exhaust it. 140 */ 141 #define MAXPIPEKVA (8*1024*1024) 142 143 /* 144 * Limit for direct transfers, we cannot, of course limit 145 * the amount of kva for pipes in general though. 146 */ 147 #define LIMITPIPEKVA (16*1024*1024) 148 149 /* 150 * Limit the number of "big" pipes 151 */ 152 #define LIMITBIGPIPES 32 153 #define PIPEQ_MAX_CACHE 16 /* per-cpu pipe structure cache */ 154 155 static int pipe_maxbig = LIMITBIGPIPES; 156 static int pipe_maxcache = PIPEQ_MAX_CACHE; 157 static int pipe_nbig; 158 static int pipe_bcache_alloc; 159 static int pipe_bkmem_alloc; 160 static int pipe_dwrite_enable = 1; /* 0:copy, 1:kmem/sfbuf 2:force */ 161 static int pipe_dwrite_sfbuf = 1; /* 0:kmem_map 1:sfbufs 2:sfbufs_dmap */ 162 /* 3:sfbuf_dmap w/ forced invlpg */ 163 164 SYSCTL_NODE(_kern, OID_AUTO, pipe, CTLFLAG_RW, 0, "Pipe operation"); 165 SYSCTL_INT(_kern_pipe, OID_AUTO, nbig, 166 CTLFLAG_RD, &pipe_nbig, 0, "numer of big pipes allocated"); 167 SYSCTL_INT(_kern_pipe, OID_AUTO, maxcache, 168 CTLFLAG_RW, &pipe_maxcache, 0, "max pipes cached per-cpu"); 169 SYSCTL_INT(_kern_pipe, OID_AUTO, maxbig, 170 CTLFLAG_RW, &pipe_maxbig, 0, "max number of big pipes"); 171 SYSCTL_INT(_kern_pipe, OID_AUTO, dwrite_enable, 172 CTLFLAG_RW, &pipe_dwrite_enable, 0, "1:enable/2:force direct writes"); 173 SYSCTL_INT(_kern_pipe, OID_AUTO, dwrite_sfbuf, 174 CTLFLAG_RW, &pipe_dwrite_sfbuf, 0, 175 "(if dwrite_enable) 0:kmem 1:sfbuf 2:sfbuf_dmap 3:sfbuf_dmap_forceinvlpg"); 176 #if !defined(NO_PIPE_SYSCTL_STATS) 177 SYSCTL_INT(_kern_pipe, OID_AUTO, bcache_alloc, 178 CTLFLAG_RW, &pipe_bcache_alloc, 0, "pipe buffer from pcpu cache"); 179 SYSCTL_INT(_kern_pipe, OID_AUTO, bkmem_alloc, 180 CTLFLAG_RW, &pipe_bkmem_alloc, 0, "pipe buffer from kmem"); 181 #endif 182 183 static void pipeclose (struct pipe *cpipe); 184 static void pipe_free_kmem (struct pipe *cpipe); 185 static int pipe_create (struct pipe **cpipep); 186 static __inline int pipelock (struct pipe *cpipe, int catch); 187 static __inline void pipeunlock (struct pipe *cpipe); 188 static __inline void pipeselwakeup (struct pipe *cpipe); 189 #ifndef PIPE_NODIRECT 190 static int pipe_build_write_buffer (struct pipe *wpipe, struct uio *uio); 191 static int pipe_direct_write (struct pipe *wpipe, struct uio *uio); 192 static void pipe_clone_write_buffer (struct pipe *wpipe); 193 #endif 194 static int pipespace (struct pipe *cpipe, int size); 195 196 /* 197 * The pipe system call for the DTYPE_PIPE type of pipes 198 * 199 * pipe_ARgs(int dummy) 200 */ 201 202 /* ARGSUSED */ 203 int 204 sys_pipe(struct pipe_args *uap) 205 { 206 struct thread *td = curthread; 207 struct proc *p = td->td_proc; 208 struct file *rf, *wf; 209 struct pipe *rpipe, *wpipe; 210 int fd1, fd2, error; 211 212 KKASSERT(p); 213 214 rpipe = wpipe = NULL; 215 if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 216 pipeclose(rpipe); 217 pipeclose(wpipe); 218 return (ENFILE); 219 } 220 221 rpipe->pipe_state |= PIPE_DIRECTOK; 222 wpipe->pipe_state |= PIPE_DIRECTOK; 223 224 /* 225 * Select the direct-map features to use for this pipe. Since the 226 * sysctl's can change on the fly we record the settings when the 227 * pipe is created. 228 * 229 * Generally speaking the system will default to what we consider 230 * to be the best-balanced and most stable option. Right now this 231 * is SFBUF1. Modes 2 and 3 are considered experiemental at the 232 * moment. 233 */ 234 wpipe->pipe_feature = PIPE_COPY; 235 if (pipe_dwrite_enable) { 236 switch(pipe_dwrite_sfbuf) { 237 case 0: 238 wpipe->pipe_feature = PIPE_KMEM; 239 break; 240 case 1: 241 wpipe->pipe_feature = PIPE_SFBUF1; 242 break; 243 case 2: 244 case 3: 245 wpipe->pipe_feature = PIPE_SFBUF2; 246 break; 247 } 248 } 249 rpipe->pipe_feature = wpipe->pipe_feature; 250 251 error = falloc(p, &rf, &fd1); 252 if (error) { 253 pipeclose(rpipe); 254 pipeclose(wpipe); 255 return (error); 256 } 257 uap->sysmsg_fds[0] = fd1; 258 259 /* 260 * Warning: once we've gotten past allocation of the fd for the 261 * read-side, we can only drop the read side via fdrop() in order 262 * to avoid races against processes which manage to dup() the read 263 * side while we are blocked trying to allocate the write side. 264 */ 265 rf->f_type = DTYPE_PIPE; 266 rf->f_flag = FREAD | FWRITE; 267 rf->f_ops = &pipeops; 268 rf->f_data = rpipe; 269 error = falloc(p, &wf, &fd2); 270 if (error) { 271 fsetfd(p, NULL, fd1); 272 fdrop(rf); 273 /* rpipe has been closed by fdrop(). */ 274 pipeclose(wpipe); 275 return (error); 276 } 277 wf->f_type = DTYPE_PIPE; 278 wf->f_flag = FREAD | FWRITE; 279 wf->f_ops = &pipeops; 280 wf->f_data = wpipe; 281 uap->sysmsg_fds[1] = fd2; 282 283 rpipe->pipe_peer = wpipe; 284 wpipe->pipe_peer = rpipe; 285 286 fsetfd(p, rf, fd1); 287 fsetfd(p, wf, fd2); 288 fdrop(rf); 289 fdrop(wf); 290 291 return (0); 292 } 293 294 /* 295 * Allocate kva for pipe circular buffer, the space is pageable 296 * This routine will 'realloc' the size of a pipe safely, if it fails 297 * it will retain the old buffer. 298 * If it fails it will return ENOMEM. 299 */ 300 static int 301 pipespace(struct pipe *cpipe, int size) 302 { 303 struct vm_object *object; 304 caddr_t buffer; 305 int npages, error; 306 307 npages = round_page(size) / PAGE_SIZE; 308 object = cpipe->pipe_buffer.object; 309 310 /* 311 * [re]create the object if necessary and reserve space for it 312 * in the kernel_map. The object and memory are pageable. On 313 * success, free the old resources before assigning the new 314 * ones. 315 */ 316 if (object == NULL || object->size != npages) { 317 object = vm_object_allocate(OBJT_DEFAULT, npages); 318 buffer = (caddr_t)vm_map_min(&kernel_map); 319 320 error = vm_map_find(&kernel_map, object, 0, 321 (vm_offset_t *)&buffer, size, 322 1, 323 VM_MAPTYPE_NORMAL, 324 VM_PROT_ALL, VM_PROT_ALL, 325 0); 326 327 if (error != KERN_SUCCESS) { 328 vm_object_deallocate(object); 329 return (ENOMEM); 330 } 331 pipe_free_kmem(cpipe); 332 cpipe->pipe_buffer.object = object; 333 cpipe->pipe_buffer.buffer = buffer; 334 cpipe->pipe_buffer.size = size; 335 ++pipe_bkmem_alloc; 336 } else { 337 ++pipe_bcache_alloc; 338 } 339 cpipe->pipe_buffer.in = 0; 340 cpipe->pipe_buffer.out = 0; 341 cpipe->pipe_buffer.cnt = 0; 342 return (0); 343 } 344 345 /* 346 * Initialize and allocate VM and memory for pipe, pulling the pipe from 347 * our per-cpu cache if possible. For now make sure it is sized for the 348 * smaller PIPE_SIZE default. 349 */ 350 static int 351 pipe_create(struct pipe **cpipep) 352 { 353 globaldata_t gd = mycpu; 354 struct pipe *cpipe; 355 int error; 356 357 if ((cpipe = gd->gd_pipeq) != NULL) { 358 gd->gd_pipeq = cpipe->pipe_peer; 359 --gd->gd_pipeqcount; 360 cpipe->pipe_peer = NULL; 361 } else { 362 cpipe = kmalloc(sizeof(struct pipe), M_PIPE, M_WAITOK|M_ZERO); 363 } 364 *cpipep = cpipe; 365 if ((error = pipespace(cpipe, PIPE_SIZE)) != 0) 366 return (error); 367 vfs_timestamp(&cpipe->pipe_ctime); 368 cpipe->pipe_atime = cpipe->pipe_ctime; 369 cpipe->pipe_mtime = cpipe->pipe_ctime; 370 return (0); 371 } 372 373 374 /* 375 * lock a pipe for I/O, blocking other access 376 */ 377 static __inline int 378 pipelock(struct pipe *cpipe, int catch) 379 { 380 int error; 381 382 while (cpipe->pipe_state & PIPE_LOCK) { 383 cpipe->pipe_state |= PIPE_LWANT; 384 error = tsleep(cpipe, (catch ? PCATCH : 0), "pipelk", 0); 385 if (error != 0) 386 return (error); 387 } 388 cpipe->pipe_state |= PIPE_LOCK; 389 return (0); 390 } 391 392 /* 393 * unlock a pipe I/O lock 394 */ 395 static __inline void 396 pipeunlock(struct pipe *cpipe) 397 { 398 399 cpipe->pipe_state &= ~PIPE_LOCK; 400 if (cpipe->pipe_state & PIPE_LWANT) { 401 cpipe->pipe_state &= ~PIPE_LWANT; 402 wakeup(cpipe); 403 } 404 } 405 406 static __inline void 407 pipeselwakeup(struct pipe *cpipe) 408 { 409 410 if (cpipe->pipe_state & PIPE_SEL) { 411 cpipe->pipe_state &= ~PIPE_SEL; 412 selwakeup(&cpipe->pipe_sel); 413 } 414 if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 415 pgsigio(cpipe->pipe_sigio, SIGIO, 0); 416 KNOTE(&cpipe->pipe_sel.si_note, 0); 417 } 418 419 /* 420 * MPALMOSTSAFE (acquires mplock) 421 */ 422 static int 423 pipe_read(struct file *fp, struct uio *uio, struct ucred *cred, int fflags) 424 { 425 struct pipe *rpipe; 426 int error; 427 int nread = 0; 428 int nbio; 429 u_int size; 430 431 get_mplock(); 432 rpipe = (struct pipe *) fp->f_data; 433 ++rpipe->pipe_busy; 434 error = pipelock(rpipe, 1); 435 if (error) 436 goto unlocked_error; 437 438 if (fflags & O_FBLOCKING) 439 nbio = 0; 440 else if (fflags & O_FNONBLOCKING) 441 nbio = 1; 442 else if (fp->f_flag & O_NONBLOCK) 443 nbio = 1; 444 else 445 nbio = 0; 446 447 while (uio->uio_resid) { 448 caddr_t va; 449 450 if (rpipe->pipe_buffer.cnt > 0) { 451 /* 452 * normal pipe buffer receive 453 */ 454 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 455 if (size > rpipe->pipe_buffer.cnt) 456 size = rpipe->pipe_buffer.cnt; 457 if (size > (u_int) uio->uio_resid) 458 size = (u_int) uio->uio_resid; 459 460 error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 461 size, uio); 462 if (error) 463 break; 464 465 rpipe->pipe_buffer.out += size; 466 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 467 rpipe->pipe_buffer.out = 0; 468 469 rpipe->pipe_buffer.cnt -= size; 470 471 /* 472 * If there is no more to read in the pipe, reset 473 * its pointers to the beginning. This improves 474 * cache hit stats. 475 */ 476 if (rpipe->pipe_buffer.cnt == 0) { 477 rpipe->pipe_buffer.in = 0; 478 rpipe->pipe_buffer.out = 0; 479 } 480 nread += size; 481 #ifndef PIPE_NODIRECT 482 } else if (rpipe->pipe_kva && 483 rpipe->pipe_feature == PIPE_KMEM && 484 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 485 == PIPE_DIRECTW 486 ) { 487 /* 488 * Direct copy using source-side kva mapping 489 */ 490 size = rpipe->pipe_map.xio_bytes - 491 rpipe->pipe_buffer.out; 492 if (size > (u_int)uio->uio_resid) 493 size = (u_int)uio->uio_resid; 494 va = (caddr_t)rpipe->pipe_kva + 495 xio_kvaoffset(&rpipe->pipe_map, rpipe->pipe_buffer.out); 496 error = uiomove(va, size, uio); 497 if (error) 498 break; 499 nread += size; 500 rpipe->pipe_buffer.out += size; 501 if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { 502 rpipe->pipe_state |= PIPE_DIRECTIP; 503 rpipe->pipe_state &= ~PIPE_DIRECTW; 504 /* reset out index for copy mode */ 505 rpipe->pipe_buffer.out = 0; 506 wakeup(rpipe); 507 } 508 } else if (rpipe->pipe_buffer.out != rpipe->pipe_map.xio_bytes && 509 rpipe->pipe_kva && 510 rpipe->pipe_feature == PIPE_SFBUF2 && 511 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 512 == PIPE_DIRECTW 513 ) { 514 /* 515 * Direct copy, bypassing a kernel buffer. We cannot 516 * mess with the direct-write buffer until 517 * PIPE_DIRECTIP is cleared. In order to prevent 518 * the pipe_write code from racing itself in 519 * direct_write, we set DIRECTIP when we clear 520 * DIRECTW after we have exhausted the buffer. 521 */ 522 if (pipe_dwrite_sfbuf == 3) 523 rpipe->pipe_kvamask = 0; 524 pmap_qenter2(rpipe->pipe_kva, rpipe->pipe_map.xio_pages, 525 rpipe->pipe_map.xio_npages, 526 &rpipe->pipe_kvamask); 527 size = rpipe->pipe_map.xio_bytes - 528 rpipe->pipe_buffer.out; 529 if (size > (u_int)uio->uio_resid) 530 size = (u_int)uio->uio_resid; 531 va = (caddr_t)rpipe->pipe_kva + xio_kvaoffset(&rpipe->pipe_map, rpipe->pipe_buffer.out); 532 error = uiomove(va, size, uio); 533 if (error) 534 break; 535 nread += size; 536 rpipe->pipe_buffer.out += size; 537 if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { 538 rpipe->pipe_state |= PIPE_DIRECTIP; 539 rpipe->pipe_state &= ~PIPE_DIRECTW; 540 /* reset out index for copy mode */ 541 rpipe->pipe_buffer.out = 0; 542 wakeup(rpipe); 543 } 544 } else if (rpipe->pipe_buffer.out != rpipe->pipe_map.xio_bytes && 545 rpipe->pipe_feature == PIPE_SFBUF1 && 546 (rpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) 547 == PIPE_DIRECTW 548 ) { 549 /* 550 * Direct copy, bypassing a kernel buffer. We cannot 551 * mess with the direct-write buffer until 552 * PIPE_DIRECTIP is cleared. In order to prevent 553 * the pipe_write code from racing itself in 554 * direct_write, we set DIRECTIP when we clear 555 * DIRECTW after we have exhausted the buffer. 556 */ 557 error = xio_uio_copy(&rpipe->pipe_map, rpipe->pipe_buffer.out, uio, &size); 558 if (error) 559 break; 560 nread += size; 561 rpipe->pipe_buffer.out += size; 562 if (rpipe->pipe_buffer.out == rpipe->pipe_map.xio_bytes) { 563 rpipe->pipe_state |= PIPE_DIRECTIP; 564 rpipe->pipe_state &= ~PIPE_DIRECTW; 565 /* reset out index for copy mode */ 566 rpipe->pipe_buffer.out = 0; 567 wakeup(rpipe); 568 } 569 #endif 570 } else { 571 /* 572 * detect EOF condition 573 * read returns 0 on EOF, no need to set error 574 */ 575 if (rpipe->pipe_state & PIPE_EOF) 576 break; 577 578 /* 579 * If the "write-side" has been blocked, wake it up now. 580 */ 581 if (rpipe->pipe_state & PIPE_WANTW) { 582 rpipe->pipe_state &= ~PIPE_WANTW; 583 wakeup(rpipe); 584 } 585 586 /* 587 * Break if some data was read. 588 */ 589 if (nread > 0) 590 break; 591 592 /* 593 * Unlock the pipe buffer for our remaining 594 * processing. We will either break out with an 595 * error or we will sleep and relock to loop. 596 */ 597 pipeunlock(rpipe); 598 599 /* 600 * Handle non-blocking mode operation or 601 * wait for more data. 602 */ 603 if (nbio) { 604 error = EAGAIN; 605 } else { 606 rpipe->pipe_state |= PIPE_WANTR; 607 if ((error = tsleep(rpipe, PCATCH|PNORESCHED, 608 "piperd", 0)) == 0) { 609 error = pipelock(rpipe, 1); 610 } 611 } 612 if (error) 613 goto unlocked_error; 614 } 615 } 616 pipeunlock(rpipe); 617 618 if (error == 0) 619 vfs_timestamp(&rpipe->pipe_atime); 620 unlocked_error: 621 --rpipe->pipe_busy; 622 623 /* 624 * PIPE_WANT processing only makes sense if pipe_busy is 0. 625 */ 626 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 627 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 628 wakeup(rpipe); 629 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 630 /* 631 * Handle write blocking hysteresis. 632 */ 633 if (rpipe->pipe_state & PIPE_WANTW) { 634 rpipe->pipe_state &= ~PIPE_WANTW; 635 wakeup(rpipe); 636 } 637 } 638 639 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 640 pipeselwakeup(rpipe); 641 rel_mplock(); 642 return (error); 643 } 644 645 #ifndef PIPE_NODIRECT 646 /* 647 * Map the sending processes' buffer into kernel space and wire it. 648 * This is similar to a physical write operation. 649 */ 650 static int 651 pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio) 652 { 653 int error; 654 u_int size; 655 656 size = (u_int) uio->uio_iov->iov_len; 657 if (size > wpipe->pipe_buffer.size) 658 size = wpipe->pipe_buffer.size; 659 660 if (uio->uio_segflg == UIO_SYSSPACE) { 661 error = xio_init_kbuf(&wpipe->pipe_map, uio->uio_iov->iov_base, 662 size); 663 } else { 664 error = xio_init_ubuf(&wpipe->pipe_map, uio->uio_iov->iov_base, 665 size, XIOF_READ); 666 } 667 wpipe->pipe_buffer.out = 0; 668 if (error) 669 return(error); 670 671 /* 672 * Create a kernel map for KMEM and SFBUF2 copy modes. SFBUF2 will 673 * map the pages on the target while KMEM maps the pages now. 674 */ 675 switch(wpipe->pipe_feature) { 676 case PIPE_KMEM: 677 case PIPE_SFBUF2: 678 if (wpipe->pipe_kva == NULL) { 679 wpipe->pipe_kva = 680 kmem_alloc_nofault(&kernel_map, XIO_INTERNAL_SIZE); 681 wpipe->pipe_kvamask = 0; 682 } 683 if (wpipe->pipe_feature == PIPE_KMEM) { 684 pmap_qenter(wpipe->pipe_kva, wpipe->pipe_map.xio_pages, 685 wpipe->pipe_map.xio_npages); 686 } 687 break; 688 default: 689 break; 690 } 691 692 /* 693 * And update the uio data. The XIO might have loaded fewer bytes 694 * then requested so reload 'size'. 695 */ 696 size = wpipe->pipe_map.xio_bytes; 697 uio->uio_iov->iov_len -= size; 698 uio->uio_iov->iov_base += size; 699 if (uio->uio_iov->iov_len == 0) 700 uio->uio_iov++; 701 uio->uio_resid -= size; 702 uio->uio_offset += size; 703 return (0); 704 } 705 706 /* 707 * In the case of a signal, the writing process might go away. This 708 * code copies the data into the circular buffer so that the source 709 * pages can be freed without loss of data. 710 * 711 * Note that in direct mode pipe_buffer.out is used to track the 712 * XIO offset. We are converting the direct mode into buffered mode 713 * which changes the meaning of pipe_buffer.out. 714 */ 715 static void 716 pipe_clone_write_buffer(struct pipe *wpipe) 717 { 718 int size; 719 int offset; 720 721 offset = wpipe->pipe_buffer.out; 722 size = wpipe->pipe_map.xio_bytes - offset; 723 724 KKASSERT(size <= wpipe->pipe_buffer.size); 725 726 wpipe->pipe_buffer.in = size; 727 wpipe->pipe_buffer.out = 0; 728 wpipe->pipe_buffer.cnt = size; 729 wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTIP); 730 731 xio_copy_xtok(&wpipe->pipe_map, offset, wpipe->pipe_buffer.buffer, size); 732 xio_release(&wpipe->pipe_map); 733 if (wpipe->pipe_kva) { 734 pmap_qremove(wpipe->pipe_kva, XIO_INTERNAL_PAGES); 735 kmem_free(&kernel_map, wpipe->pipe_kva, XIO_INTERNAL_SIZE); 736 wpipe->pipe_kva = NULL; 737 } 738 } 739 740 /* 741 * This implements the pipe buffer write mechanism. Note that only 742 * a direct write OR a normal pipe write can be pending at any given time. 743 * If there are any characters in the pipe buffer, the direct write will 744 * be deferred until the receiving process grabs all of the bytes from 745 * the pipe buffer. Then the direct mapping write is set-up. 746 */ 747 static int 748 pipe_direct_write(struct pipe *wpipe, struct uio *uio) 749 { 750 int error; 751 752 retry: 753 while (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 754 if (wpipe->pipe_state & PIPE_WANTR) { 755 wpipe->pipe_state &= ~PIPE_WANTR; 756 wakeup(wpipe); 757 } 758 wpipe->pipe_state |= PIPE_WANTW; 759 error = tsleep(wpipe, PCATCH, "pipdww", 0); 760 if (error) 761 goto error2; 762 if (wpipe->pipe_state & PIPE_EOF) { 763 error = EPIPE; 764 goto error2; 765 } 766 } 767 KKASSERT(wpipe->pipe_map.xio_bytes == 0); 768 if (wpipe->pipe_buffer.cnt > 0) { 769 if (wpipe->pipe_state & PIPE_WANTR) { 770 wpipe->pipe_state &= ~PIPE_WANTR; 771 wakeup(wpipe); 772 } 773 774 wpipe->pipe_state |= PIPE_WANTW; 775 error = tsleep(wpipe, PCATCH, "pipdwc", 0); 776 if (error) 777 goto error2; 778 if (wpipe->pipe_state & PIPE_EOF) { 779 error = EPIPE; 780 goto error2; 781 } 782 goto retry; 783 } 784 785 /* 786 * Build our direct-write buffer 787 */ 788 wpipe->pipe_state |= PIPE_DIRECTW | PIPE_DIRECTIP; 789 error = pipe_build_write_buffer(wpipe, uio); 790 if (error) 791 goto error1; 792 wpipe->pipe_state &= ~PIPE_DIRECTIP; 793 794 /* 795 * Wait until the receiver has snarfed the data. Since we are likely 796 * going to sleep we optimize the case and yield synchronously, 797 * possibly avoiding the tsleep(). 798 */ 799 error = 0; 800 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 801 if (wpipe->pipe_state & PIPE_EOF) { 802 pipelock(wpipe, 0); 803 xio_release(&wpipe->pipe_map); 804 if (wpipe->pipe_kva) { 805 pmap_qremove(wpipe->pipe_kva, XIO_INTERNAL_PAGES); 806 kmem_free(&kernel_map, wpipe->pipe_kva, XIO_INTERNAL_SIZE); 807 wpipe->pipe_kva = NULL; 808 } 809 pipeunlock(wpipe); 810 pipeselwakeup(wpipe); 811 error = EPIPE; 812 goto error1; 813 } 814 if (wpipe->pipe_state & PIPE_WANTR) { 815 wpipe->pipe_state &= ~PIPE_WANTR; 816 wakeup(wpipe); 817 } 818 pipeselwakeup(wpipe); 819 error = tsleep(wpipe, PCATCH|PNORESCHED, "pipdwt", 0); 820 } 821 pipelock(wpipe,0); 822 if (wpipe->pipe_state & PIPE_DIRECTW) { 823 /* 824 * this bit of trickery substitutes a kernel buffer for 825 * the process that might be going away. 826 */ 827 pipe_clone_write_buffer(wpipe); 828 KKASSERT((wpipe->pipe_state & PIPE_DIRECTIP) == 0); 829 } else { 830 /* 831 * note: The pipe_kva mapping is not qremove'd here. For 832 * legacy PIPE_KMEM mode this constitutes an improvement 833 * over the original FreeBSD-4 algorithm. For PIPE_SFBUF2 834 * mode the kva mapping must not be removed to get the 835 * caching benefit. 836 * 837 * For testing purposes we will give the original algorithm 838 * the benefit of the doubt 'what it could have been', and 839 * keep the optimization. 840 */ 841 KKASSERT(wpipe->pipe_state & PIPE_DIRECTIP); 842 xio_release(&wpipe->pipe_map); 843 wpipe->pipe_state &= ~PIPE_DIRECTIP; 844 } 845 pipeunlock(wpipe); 846 return (error); 847 848 /* 849 * Direct-write error, clear the direct write flags. 850 */ 851 error1: 852 wpipe->pipe_state &= ~(PIPE_DIRECTW | PIPE_DIRECTIP); 853 /* fallthrough */ 854 855 /* 856 * General error, wakeup the other side if it happens to be sleeping. 857 */ 858 error2: 859 wakeup(wpipe); 860 return (error); 861 } 862 #endif 863 864 /* 865 * MPALMOSTSAFE - acquires mplock 866 */ 867 static int 868 pipe_write(struct file *fp, struct uio *uio, struct ucred *cred, int fflags) 869 { 870 int error = 0; 871 int orig_resid; 872 int nbio; 873 struct pipe *wpipe, *rpipe; 874 875 get_mplock(); 876 rpipe = (struct pipe *) fp->f_data; 877 wpipe = rpipe->pipe_peer; 878 879 /* 880 * detect loss of pipe read side, issue SIGPIPE if lost. 881 */ 882 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 883 rel_mplock(); 884 return (EPIPE); 885 } 886 ++wpipe->pipe_busy; 887 888 if (fflags & O_FBLOCKING) 889 nbio = 0; 890 else if (fflags & O_FNONBLOCKING) 891 nbio = 1; 892 else if (fp->f_flag & O_NONBLOCK) 893 nbio = 1; 894 else 895 nbio = 0; 896 897 /* 898 * If it is advantageous to resize the pipe buffer, do 899 * so. 900 */ 901 if ((uio->uio_resid > PIPE_SIZE) && 902 (pipe_nbig < pipe_maxbig) && 903 (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) == 0 && 904 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 905 (wpipe->pipe_buffer.cnt == 0)) { 906 907 if ((error = pipelock(wpipe,1)) == 0) { 908 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 909 pipe_nbig++; 910 pipeunlock(wpipe); 911 } 912 } 913 914 /* 915 * If an early error occured unbusy and return, waking up any pending 916 * readers. 917 */ 918 if (error) { 919 --wpipe->pipe_busy; 920 if ((wpipe->pipe_busy == 0) && 921 (wpipe->pipe_state & PIPE_WANT)) { 922 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 923 wakeup(wpipe); 924 } 925 rel_mplock(); 926 return(error); 927 } 928 929 KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 930 931 orig_resid = uio->uio_resid; 932 933 while (uio->uio_resid) { 934 int space; 935 936 #ifndef PIPE_NODIRECT 937 /* 938 * If the transfer is large, we can gain performance if 939 * we do process-to-process copies directly. 940 * If the write is non-blocking, we don't use the 941 * direct write mechanism. 942 * 943 * The direct write mechanism will detect the reader going 944 * away on us. 945 */ 946 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT || 947 pipe_dwrite_enable > 1) && 948 nbio == 0 && 949 pipe_dwrite_enable) { 950 error = pipe_direct_write( wpipe, uio); 951 if (error) 952 break; 953 continue; 954 } 955 #endif 956 957 /* 958 * Pipe buffered writes cannot be coincidental with 959 * direct writes. We wait until the currently executing 960 * direct write is completed before we start filling the 961 * pipe buffer. We break out if a signal occurs or the 962 * reader goes away. 963 */ 964 retrywrite: 965 while (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 966 if (wpipe->pipe_state & PIPE_WANTR) { 967 wpipe->pipe_state &= ~PIPE_WANTR; 968 wakeup(wpipe); 969 } 970 error = tsleep(wpipe, PCATCH, "pipbww", 0); 971 if (wpipe->pipe_state & PIPE_EOF) 972 break; 973 if (error) 974 break; 975 } 976 if (wpipe->pipe_state & PIPE_EOF) { 977 error = EPIPE; 978 break; 979 } 980 981 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 982 983 /* Writes of size <= PIPE_BUF must be atomic. */ 984 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 985 space = 0; 986 987 /* 988 * Write to fill, read size handles write hysteresis. Also 989 * additional restrictions can cause select-based non-blocking 990 * writes to spin. 991 */ 992 if (space > 0) { 993 if ((error = pipelock(wpipe,1)) == 0) { 994 int size; /* Transfer size */ 995 int segsize; /* first segment to transfer */ 996 997 /* 998 * It is possible for a direct write to 999 * slip in on us... handle it here... 1000 */ 1001 if (wpipe->pipe_state & (PIPE_DIRECTW|PIPE_DIRECTIP)) { 1002 pipeunlock(wpipe); 1003 goto retrywrite; 1004 } 1005 /* 1006 * If a process blocked in uiomove, our 1007 * value for space might be bad. 1008 * 1009 * XXX will we be ok if the reader has gone 1010 * away here? 1011 */ 1012 if (space > wpipe->pipe_buffer.size - 1013 wpipe->pipe_buffer.cnt) { 1014 pipeunlock(wpipe); 1015 goto retrywrite; 1016 } 1017 1018 /* 1019 * Transfer size is minimum of uio transfer 1020 * and free space in pipe buffer. 1021 */ 1022 if (space > uio->uio_resid) 1023 size = uio->uio_resid; 1024 else 1025 size = space; 1026 /* 1027 * First segment to transfer is minimum of 1028 * transfer size and contiguous space in 1029 * pipe buffer. If first segment to transfer 1030 * is less than the transfer size, we've got 1031 * a wraparound in the buffer. 1032 */ 1033 segsize = wpipe->pipe_buffer.size - 1034 wpipe->pipe_buffer.in; 1035 if (segsize > size) 1036 segsize = size; 1037 1038 /* Transfer first segment */ 1039 1040 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1041 segsize, uio); 1042 1043 if (error == 0 && segsize < size) { 1044 /* 1045 * Transfer remaining part now, to 1046 * support atomic writes. Wraparound 1047 * happened. 1048 */ 1049 if (wpipe->pipe_buffer.in + segsize != 1050 wpipe->pipe_buffer.size) 1051 panic("Expected pipe buffer wraparound disappeared"); 1052 1053 error = uiomove(&wpipe->pipe_buffer.buffer[0], 1054 size - segsize, uio); 1055 } 1056 if (error == 0) { 1057 wpipe->pipe_buffer.in += size; 1058 if (wpipe->pipe_buffer.in >= 1059 wpipe->pipe_buffer.size) { 1060 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 1061 panic("Expected wraparound bad"); 1062 wpipe->pipe_buffer.in = size - segsize; 1063 } 1064 1065 wpipe->pipe_buffer.cnt += size; 1066 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 1067 panic("Pipe buffer overflow"); 1068 1069 } 1070 pipeunlock(wpipe); 1071 } 1072 if (error) 1073 break; 1074 1075 } else { 1076 /* 1077 * If the "read-side" has been blocked, wake it up now 1078 * and yield to let it drain synchronously rather 1079 * then block. 1080 */ 1081 if (wpipe->pipe_state & PIPE_WANTR) { 1082 wpipe->pipe_state &= ~PIPE_WANTR; 1083 wakeup(wpipe); 1084 } 1085 1086 /* 1087 * don't block on non-blocking I/O 1088 */ 1089 if (nbio) { 1090 error = EAGAIN; 1091 break; 1092 } 1093 1094 /* 1095 * We have no more space and have something to offer, 1096 * wake up select/poll. 1097 */ 1098 pipeselwakeup(wpipe); 1099 1100 wpipe->pipe_state |= PIPE_WANTW; 1101 error = tsleep(wpipe, PCATCH|PNORESCHED, "pipewr", 0); 1102 if (error != 0) 1103 break; 1104 /* 1105 * If read side wants to go away, we just issue a signal 1106 * to ourselves. 1107 */ 1108 if (wpipe->pipe_state & PIPE_EOF) { 1109 error = EPIPE; 1110 break; 1111 } 1112 } 1113 } 1114 1115 --wpipe->pipe_busy; 1116 1117 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1118 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1119 wakeup(wpipe); 1120 } else if (wpipe->pipe_buffer.cnt > 0) { 1121 /* 1122 * If we have put any characters in the buffer, we wake up 1123 * the reader. 1124 */ 1125 if (wpipe->pipe_state & PIPE_WANTR) { 1126 wpipe->pipe_state &= ~PIPE_WANTR; 1127 wakeup(wpipe); 1128 } 1129 } 1130 1131 /* 1132 * Don't return EPIPE if I/O was successful 1133 */ 1134 if ((wpipe->pipe_buffer.cnt == 0) && 1135 (uio->uio_resid == 0) && 1136 (error == EPIPE)) { 1137 error = 0; 1138 } 1139 1140 if (error == 0) 1141 vfs_timestamp(&wpipe->pipe_mtime); 1142 1143 /* 1144 * We have something to offer, 1145 * wake up select/poll. 1146 */ 1147 if (wpipe->pipe_buffer.cnt) 1148 pipeselwakeup(wpipe); 1149 rel_mplock(); 1150 return (error); 1151 } 1152 1153 /* 1154 * MPALMOSTSAFE - acquires mplock 1155 * 1156 * we implement a very minimal set of ioctls for compatibility with sockets. 1157 */ 1158 int 1159 pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct ucred *cred) 1160 { 1161 struct pipe *mpipe; 1162 int error; 1163 1164 get_mplock(); 1165 mpipe = (struct pipe *)fp->f_data; 1166 1167 switch (cmd) { 1168 case FIOASYNC: 1169 if (*(int *)data) { 1170 mpipe->pipe_state |= PIPE_ASYNC; 1171 } else { 1172 mpipe->pipe_state &= ~PIPE_ASYNC; 1173 } 1174 error = 0; 1175 break; 1176 case FIONREAD: 1177 if (mpipe->pipe_state & PIPE_DIRECTW) { 1178 *(int *)data = mpipe->pipe_map.xio_bytes - 1179 mpipe->pipe_buffer.out; 1180 } else { 1181 *(int *)data = mpipe->pipe_buffer.cnt; 1182 } 1183 error = 0; 1184 break; 1185 case FIOSETOWN: 1186 error = fsetown(*(int *)data, &mpipe->pipe_sigio); 1187 break; 1188 case FIOGETOWN: 1189 *(int *)data = fgetown(mpipe->pipe_sigio); 1190 error = 0; 1191 break; 1192 case TIOCSPGRP: 1193 /* This is deprecated, FIOSETOWN should be used instead. */ 1194 error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); 1195 break; 1196 1197 case TIOCGPGRP: 1198 /* This is deprecated, FIOGETOWN should be used instead. */ 1199 *(int *)data = -fgetown(mpipe->pipe_sigio); 1200 error = 0; 1201 break; 1202 default: 1203 error = ENOTTY; 1204 break; 1205 } 1206 rel_mplock(); 1207 return (error); 1208 } 1209 1210 /* 1211 * MPALMOSTSAFE - acquires mplock 1212 */ 1213 int 1214 pipe_poll(struct file *fp, int events, struct ucred *cred) 1215 { 1216 struct pipe *rpipe; 1217 struct pipe *wpipe; 1218 int revents = 0; 1219 1220 get_mplock(); 1221 rpipe = (struct pipe *)fp->f_data; 1222 wpipe = rpipe->pipe_peer; 1223 if (events & (POLLIN | POLLRDNORM)) 1224 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1225 (rpipe->pipe_buffer.cnt > 0) || 1226 (rpipe->pipe_state & PIPE_EOF)) 1227 revents |= events & (POLLIN | POLLRDNORM); 1228 1229 if (events & (POLLOUT | POLLWRNORM)) 1230 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 1231 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1232 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1233 revents |= events & (POLLOUT | POLLWRNORM); 1234 1235 if ((rpipe->pipe_state & PIPE_EOF) || 1236 (wpipe == NULL) || 1237 (wpipe->pipe_state & PIPE_EOF)) 1238 revents |= POLLHUP; 1239 1240 if (revents == 0) { 1241 if (events & (POLLIN | POLLRDNORM)) { 1242 selrecord(curthread, &rpipe->pipe_sel); 1243 rpipe->pipe_state |= PIPE_SEL; 1244 } 1245 1246 if (events & (POLLOUT | POLLWRNORM)) { 1247 selrecord(curthread, &wpipe->pipe_sel); 1248 wpipe->pipe_state |= PIPE_SEL; 1249 } 1250 } 1251 rel_mplock(); 1252 return (revents); 1253 } 1254 1255 /* 1256 * MPALMOSTSAFE - acquires mplock 1257 */ 1258 static int 1259 pipe_stat(struct file *fp, struct stat *ub, struct ucred *cred) 1260 { 1261 struct pipe *pipe; 1262 1263 get_mplock(); 1264 pipe = (struct pipe *)fp->f_data; 1265 1266 bzero((caddr_t)ub, sizeof(*ub)); 1267 ub->st_mode = S_IFIFO; 1268 ub->st_blksize = pipe->pipe_buffer.size; 1269 ub->st_size = pipe->pipe_buffer.cnt; 1270 if (ub->st_size == 0 && (pipe->pipe_state & PIPE_DIRECTW)) { 1271 ub->st_size = pipe->pipe_map.xio_bytes - 1272 pipe->pipe_buffer.out; 1273 } 1274 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1275 ub->st_atimespec = pipe->pipe_atime; 1276 ub->st_mtimespec = pipe->pipe_mtime; 1277 ub->st_ctimespec = pipe->pipe_ctime; 1278 /* 1279 * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev, 1280 * st_flags, st_gen. 1281 * XXX (st_dev, st_ino) should be unique. 1282 */ 1283 rel_mplock(); 1284 return (0); 1285 } 1286 1287 /* 1288 * MPALMOSTSAFE - acquires mplock 1289 */ 1290 static int 1291 pipe_close(struct file *fp) 1292 { 1293 struct pipe *cpipe = (struct pipe *)fp->f_data; 1294 1295 get_mplock(); 1296 fp->f_ops = &badfileops; 1297 fp->f_data = NULL; 1298 funsetown(cpipe->pipe_sigio); 1299 pipeclose(cpipe); 1300 rel_mplock(); 1301 return (0); 1302 } 1303 1304 /* 1305 * Shutdown one or both directions of a full-duplex pipe. 1306 * 1307 * MPALMOSTSAFE - acquires mplock 1308 */ 1309 static int 1310 pipe_shutdown(struct file *fp, int how) 1311 { 1312 struct pipe *rpipe; 1313 struct pipe *wpipe; 1314 int error = EPIPE; 1315 1316 get_mplock(); 1317 rpipe = (struct pipe *)fp->f_data; 1318 1319 switch(how) { 1320 case SHUT_RDWR: 1321 case SHUT_RD: 1322 if (rpipe) { 1323 rpipe->pipe_state |= PIPE_EOF; 1324 pipeselwakeup(rpipe); 1325 if (rpipe->pipe_busy) 1326 wakeup(rpipe); 1327 error = 0; 1328 } 1329 if (how == SHUT_RD) 1330 break; 1331 /* fall through */ 1332 case SHUT_WR: 1333 if (rpipe && (wpipe = rpipe->pipe_peer) != NULL) { 1334 wpipe->pipe_state |= PIPE_EOF; 1335 pipeselwakeup(wpipe); 1336 if (wpipe->pipe_busy) 1337 wakeup(wpipe); 1338 error = 0; 1339 } 1340 } 1341 rel_mplock(); 1342 return (error); 1343 } 1344 1345 static void 1346 pipe_free_kmem(struct pipe *cpipe) 1347 { 1348 if (cpipe->pipe_buffer.buffer != NULL) { 1349 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1350 --pipe_nbig; 1351 kmem_free(&kernel_map, 1352 (vm_offset_t)cpipe->pipe_buffer.buffer, 1353 cpipe->pipe_buffer.size); 1354 cpipe->pipe_buffer.buffer = NULL; 1355 cpipe->pipe_buffer.object = NULL; 1356 } 1357 #ifndef PIPE_NODIRECT 1358 KKASSERT(cpipe->pipe_map.xio_bytes == 0 && 1359 cpipe->pipe_map.xio_offset == 0 && 1360 cpipe->pipe_map.xio_npages == 0); 1361 #endif 1362 } 1363 1364 /* 1365 * shutdown the pipe 1366 */ 1367 static void 1368 pipeclose(struct pipe *cpipe) 1369 { 1370 globaldata_t gd; 1371 struct pipe *ppipe; 1372 1373 if (cpipe == NULL) 1374 return; 1375 1376 pipeselwakeup(cpipe); 1377 1378 /* 1379 * If the other side is blocked, wake it up saying that 1380 * we want to close it down. 1381 */ 1382 while (cpipe->pipe_busy) { 1383 wakeup(cpipe); 1384 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 1385 tsleep(cpipe, 0, "pipecl", 0); 1386 } 1387 1388 /* 1389 * Disconnect from peer 1390 */ 1391 if ((ppipe = cpipe->pipe_peer) != NULL) { 1392 pipeselwakeup(ppipe); 1393 1394 ppipe->pipe_state |= PIPE_EOF; 1395 wakeup(ppipe); 1396 KNOTE(&ppipe->pipe_sel.si_note, 0); 1397 ppipe->pipe_peer = NULL; 1398 } 1399 1400 if (cpipe->pipe_kva) { 1401 pmap_qremove(cpipe->pipe_kva, XIO_INTERNAL_PAGES); 1402 kmem_free(&kernel_map, cpipe->pipe_kva, XIO_INTERNAL_SIZE); 1403 cpipe->pipe_kva = NULL; 1404 } 1405 1406 /* 1407 * free or cache resources 1408 */ 1409 gd = mycpu; 1410 if (gd->gd_pipeqcount >= pipe_maxcache || 1411 cpipe->pipe_buffer.size != PIPE_SIZE 1412 ) { 1413 pipe_free_kmem(cpipe); 1414 kfree(cpipe, M_PIPE); 1415 } else { 1416 KKASSERT(cpipe->pipe_map.xio_npages == 0 && 1417 cpipe->pipe_map.xio_bytes == 0 && 1418 cpipe->pipe_map.xio_offset == 0); 1419 cpipe->pipe_state = 0; 1420 cpipe->pipe_busy = 0; 1421 cpipe->pipe_peer = gd->gd_pipeq; 1422 gd->gd_pipeq = cpipe; 1423 ++gd->gd_pipeqcount; 1424 } 1425 } 1426 1427 /* 1428 * MPALMOSTSAFE - acquires mplock 1429 */ 1430 static int 1431 pipe_kqfilter(struct file *fp, struct knote *kn) 1432 { 1433 struct pipe *cpipe; 1434 1435 get_mplock(); 1436 cpipe = (struct pipe *)kn->kn_fp->f_data; 1437 1438 switch (kn->kn_filter) { 1439 case EVFILT_READ: 1440 kn->kn_fop = &pipe_rfiltops; 1441 break; 1442 case EVFILT_WRITE: 1443 kn->kn_fop = &pipe_wfiltops; 1444 cpipe = cpipe->pipe_peer; 1445 if (cpipe == NULL) { 1446 /* other end of pipe has been closed */ 1447 rel_mplock(); 1448 return (EPIPE); 1449 } 1450 break; 1451 default: 1452 return (1); 1453 } 1454 kn->kn_hook = (caddr_t)cpipe; 1455 1456 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1457 rel_mplock(); 1458 return (0); 1459 } 1460 1461 static void 1462 filt_pipedetach(struct knote *kn) 1463 { 1464 struct pipe *cpipe = (struct pipe *)kn->kn_hook; 1465 1466 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1467 } 1468 1469 /*ARGSUSED*/ 1470 static int 1471 filt_piperead(struct knote *kn, long hint) 1472 { 1473 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1474 struct pipe *wpipe = rpipe->pipe_peer; 1475 1476 kn->kn_data = rpipe->pipe_buffer.cnt; 1477 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) { 1478 kn->kn_data = rpipe->pipe_map.xio_bytes - 1479 rpipe->pipe_buffer.out; 1480 } 1481 1482 if ((rpipe->pipe_state & PIPE_EOF) || 1483 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1484 kn->kn_flags |= EV_EOF; 1485 return (1); 1486 } 1487 return (kn->kn_data > 0); 1488 } 1489 1490 /*ARGSUSED*/ 1491 static int 1492 filt_pipewrite(struct knote *kn, long hint) 1493 { 1494 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1495 struct pipe *wpipe = rpipe->pipe_peer; 1496 1497 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1498 kn->kn_data = 0; 1499 kn->kn_flags |= EV_EOF; 1500 return (1); 1501 } 1502 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1503 if (wpipe->pipe_state & PIPE_DIRECTW) 1504 kn->kn_data = 0; 1505 1506 return (kn->kn_data >= PIPE_BUF); 1507 } 1508