xref: /netbsd/sys/kern/sys_pipe.c (revision bf9ec67e)
1 /*	$NetBSD: sys_pipe.c,v 1.25 2002/03/17 19:41:07 atatat Exp $	*/
2 
3 /*
4  * Copyright (c) 1996 John S. Dyson
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice immediately at the beginning of the file, without modification,
12  *    this list of conditions, and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Absolutely no warranty of function or purpose is made by the author
17  *    John S. Dyson.
18  * 4. Modifications may be freely made to this file if the above conditions
19  *    are met.
20  *
21  * $FreeBSD: src/sys/kern/sys_pipe.c,v 1.95 2002/03/09 22:06:31 alfred Exp $
22  */
23 
24 /*
25  * This file contains a high-performance replacement for the socket-based
26  * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
27  * all features of sockets, but does do everything that pipes normally
28  * do.
29  *
30  * Adaption for NetBSD UVM, including uvm_loan() based direct write, was
31  * written by Jaromir Dolecek.
32  */
33 
34 /*
35  * This code has two modes of operation, a small write mode and a large
36  * write mode.  The small write mode acts like conventional pipes with
37  * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
38  * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
39  * and PIPE_SIZE in size, it is fully mapped into the kernel (on FreeBSD,
40  * those pages are also wired), and the receiving process can copy it directly
41  * from the pages in the sending process.
42  *
43  * If the sending process receives a signal, it is possible that it will
44  * go away, and certainly its address space can change, because control
45  * is returned back to the user-mode side.  In that case, the pipe code
46  * arranges to copy the buffer supplied by the user process on FreeBSD, to
47  * a pageable kernel buffer, and the receiving process will grab the data
48  * from the pageable kernel buffer.  Since signals don't happen all that often,
49  * the copy operation is normally eliminated.
50  * For NetBSD, the pages are mapped read-only, COW for kernel by uvm_loan(),
51  * so no explicit handling need to be done, all is handled by standard VM
52  * facilities.
53  *
54  * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
55  * happen for small transfers so that the system will not spend all of
56  * its time context switching.  PIPE_SIZE is constrained by the
57  * amount of kernel virtual memory.
58  */
59 
60 #include <sys/cdefs.h>
61 __KERNEL_RCSID(0, "$NetBSD: sys_pipe.c,v 1.25 2002/03/17 19:41:07 atatat Exp $");
62 
63 #include <sys/param.h>
64 #include <sys/systm.h>
65 #include <sys/proc.h>
66 #include <sys/fcntl.h>
67 #include <sys/file.h>
68 #include <sys/filedesc.h>
69 #include <sys/filio.h>
70 #include <sys/kernel.h>
71 #include <sys/lock.h>
72 #include <sys/ttycom.h>
73 #include <sys/stat.h>
74 #include <sys/malloc.h>
75 #include <sys/poll.h>
76 #include <sys/signalvar.h>
77 #include <sys/vnode.h>
78 #include <sys/uio.h>
79 #include <sys/lock.h>
80 #ifdef __FreeBSD__
81 #include <sys/mutex.h>
82 #endif
83 #ifdef __NetBSD__
84 #include <sys/select.h>
85 #include <sys/mount.h>
86 #include <sys/syscallargs.h>
87 #include <uvm/uvm.h>
88 #include <sys/sysctl.h>
89 #include <sys/kernel.h>
90 #endif /* NetBSD, FreeBSD */
91 
92 #include <sys/pipe.h>
93 
94 #ifdef __NetBSD__
95 /*
96  * Avoid microtime(9), it's slow. We don't guard the read from time(9)
97  * with splclock(9) since we don't actually need to be THAT sure the access
98  * is atomic.
99  */
100 #define vfs_timestamp(tv)	(*(tv) = time)
101 #endif
102 
103 /*
104  * Use this define if you want to disable *fancy* VM things.  Expect an
105  * approx 30% decrease in transfer rate.  This could be useful for
106  * OpenBSD.
107  */
108 /* #define PIPE_NODIRECT */
109 
110 /*
111  * interfaces to the outside world
112  */
113 #ifdef __FreeBSD__
114 static int pipe_read(struct file *fp, struct uio *uio,
115 		struct ucred *cred, int flags, struct thread *td);
116 static int pipe_write(struct file *fp, struct uio *uio,
117 		struct ucred *cred, int flags, struct thread *td);
118 static int pipe_close(struct file *fp, struct thread *td);
119 static int pipe_poll(struct file *fp, int events, struct ucred *cred,
120 		struct thread *td);
121 static int pipe_kqfilter(struct file *fp, struct knote *kn);
122 static int pipe_stat(struct file *fp, struct stat *sb, struct thread *td);
123 static int pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct thread *td);
124 
125 static struct fileops pipeops = {
126 	pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter,
127 	pipe_stat, pipe_close
128 };
129 
130 static void	filt_pipedetach(struct knote *kn);
131 static int	filt_piperead(struct knote *kn, long hint);
132 static int	filt_pipewrite(struct knote *kn, long hint);
133 
134 static struct filterops pipe_rfiltops =
135 	{ 1, NULL, filt_pipedetach, filt_piperead };
136 static struct filterops pipe_wfiltops =
137 	{ 1, NULL, filt_pipedetach, filt_pipewrite };
138 
139 #define PIPE_GET_GIANT(pipe)							\
140 	do {								\
141 		PIPE_UNLOCK(wpipe);					\
142 		mtx_lock(&Giant);					\
143 	} while (0)
144 
145 #define PIPE_DROP_GIANT(pipe)						\
146 	do {								\
147 		mtx_unlock(&Giant);					\
148 		PIPE_LOCK(wpipe);					\
149 	} while (0)
150 
151 #endif /* FreeBSD */
152 
153 #ifdef __NetBSD__
154 static int pipe_read(struct file *fp, off_t *offset, struct uio *uio,
155 		struct ucred *cred, int flags);
156 static int pipe_write(struct file *fp, off_t *offset, struct uio *uio,
157 		struct ucred *cred, int flags);
158 static int pipe_close(struct file *fp, struct proc *p);
159 static int pipe_poll(struct file *fp, int events, struct proc *p);
160 static int pipe_fcntl(struct file *fp, u_int com, caddr_t data,
161 		struct proc *p);
162 static int pipe_stat(struct file *fp, struct stat *sb, struct proc *p);
163 static int pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct proc *p);
164 
165 static struct fileops pipeops =
166     { pipe_read, pipe_write, pipe_ioctl, pipe_fcntl, pipe_poll,
167       pipe_stat, pipe_close };
168 
169 /* XXXSMP perhaps use spinlocks & KERNEL_PROC_(UN)LOCK() ? just clear now */
170 #define PIPE_GET_GIANT(pipe)
171 #define PIPE_DROP_GIANT(pipe)
172 #define GIANT_REQUIRED
173 
174 #endif /* NetBSD */
175 
176 /*
177  * Default pipe buffer size(s), this can be kind-of large now because pipe
178  * space is pageable.  The pipe code will try to maintain locality of
179  * reference for performance reasons, so small amounts of outstanding I/O
180  * will not wipe the cache.
181  */
182 #define MINPIPESIZE (PIPE_SIZE/3)
183 #define MAXPIPESIZE (2*PIPE_SIZE/3)
184 
185 /*
186  * Maximum amount of kva for pipes -- this is kind-of a soft limit, but
187  * is there so that on large systems, we don't exhaust it.
188  */
189 #define MAXPIPEKVA (8*1024*1024)
190 static int maxpipekva = MAXPIPEKVA;
191 
192 /*
193  * Limit for direct transfers, we cannot, of course limit
194  * the amount of kva for pipes in general though.
195  */
196 #define LIMITPIPEKVA (16*1024*1024)
197 static int limitpipekva = LIMITPIPEKVA;
198 
199 /*
200  * Limit the number of "big" pipes
201  */
202 #define LIMITBIGPIPES  32
203 static int maxbigpipes = LIMITBIGPIPES;
204 static int nbigpipe = 0;
205 
206 /*
207  * Amount of KVA consumed by pipe buffers.
208  */
209 static int amountpipekva = 0;
210 
211 static void pipeclose(struct pipe *cpipe);
212 static void pipe_free_kmem(struct pipe *cpipe);
213 static int pipe_create(struct pipe **cpipep, int allockva);
214 static __inline int pipelock(struct pipe *cpipe, int catch);
215 static __inline void pipeunlock(struct pipe *cpipe);
216 static __inline void pipeselwakeup(struct pipe *cpipe, struct pipe *sigp);
217 #ifndef PIPE_NODIRECT
218 static int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
219 #endif
220 static int pipespace(struct pipe *cpipe, int size);
221 
222 #ifdef __NetBSD__
223 #ifndef PIPE_NODIRECT
224 static int pipe_loan_alloc(struct pipe *, int);
225 static void pipe_loan_free(struct pipe *);
226 #endif /* PIPE_NODIRECT */
227 
228 static struct pool pipe_pool;
229 #endif /* NetBSD */
230 
231 #ifdef __FreeBSD__
232 static vm_zone_t pipe_zone;
233 
234 static void pipeinit(void *dummy __unused);
235 #ifndef PIPE_NODIRECT
236 static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
237 static void pipe_destroy_write_buffer(struct pipe *wpipe);
238 static void pipe_clone_write_buffer(struct pipe *wpipe);
239 #endif
240 
241 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
242 
243 static void
244 pipeinit(void *dummy __unused)
245 {
246 
247 	pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4);
248 }
249 #endif /* FreeBSD */
250 
251 /*
252  * The pipe system call for the DTYPE_PIPE type of pipes
253  */
254 
255 /* ARGSUSED */
256 #ifdef __FreeBSD__
257 int
258 pipe(td, uap)
259 	struct thread *td;
260 	struct pipe_args /* {
261 		int	dummy;
262 	} */ *uap;
263 #elif defined(__NetBSD__)
264 int
265 sys_pipe(p, v, retval)
266 	struct proc *p;
267 	void *v;
268 	register_t *retval;
269 #endif
270 {
271 	struct file *rf, *wf;
272 	struct pipe *rpipe, *wpipe;
273 	int fd, error;
274 #ifdef __FreeBSD__
275 	struct mtx *pmtx;
276 
277 	KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
278 
279 	pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO);
280 
281 	rpipe = wpipe = NULL;
282 	if (pipe_create(&rpipe, 1) || pipe_create(&wpipe, 1)) {
283 		pipeclose(rpipe);
284 		pipeclose(wpipe);
285 		free(pmtx, M_TEMP);
286 		return (ENFILE);
287 	}
288 
289 	error = falloc(td, &rf, &fd);
290 	if (error) {
291 		pipeclose(rpipe);
292 		pipeclose(wpipe);
293 		free(pmtx, M_TEMP);
294 		return (error);
295 	}
296 	fhold(rf);
297 	td->td_retval[0] = fd;
298 
299 	/*
300 	 * Warning: once we've gotten past allocation of the fd for the
301 	 * read-side, we can only drop the read side via fdrop() in order
302 	 * to avoid races against processes which manage to dup() the read
303 	 * side while we are blocked trying to allocate the write side.
304 	 */
305 	FILE_LOCK(rf);
306 	rf->f_flag = FREAD | FWRITE;
307 	rf->f_type = DTYPE_PIPE;
308 	rf->f_data = (caddr_t)rpipe;
309 	rf->f_ops = &pipeops;
310 	FILE_UNLOCK(rf);
311 	error = falloc(td, &wf, &fd);
312 	if (error) {
313 		struct filedesc *fdp = td->td_proc->p_fd;
314 		FILEDESC_LOCK(fdp);
315 		if (fdp->fd_ofiles[td->td_retval[0]] == rf) {
316 			fdp->fd_ofiles[td->td_retval[0]] = NULL;
317 			FILEDESC_UNLOCK(fdp);
318 			fdrop(rf, td);
319 		} else
320 			FILEDESC_UNLOCK(fdp);
321 		fdrop(rf, td);
322 		/* rpipe has been closed by fdrop(). */
323 		pipeclose(wpipe);
324 		free(pmtx, M_TEMP);
325 		return (error);
326 	}
327 	FILE_LOCK(wf);
328 	wf->f_flag = FREAD | FWRITE;
329 	wf->f_type = DTYPE_PIPE;
330 	wf->f_data = (caddr_t)wpipe;
331 	wf->f_ops = &pipeops;
332 	p->p_retval[1] = fd;
333 	rpipe->pipe_peer = wpipe;
334 	wpipe->pipe_peer = rpipe;
335 	mtx_init(pmtx, "pipe mutex", MTX_DEF);
336 	rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx;
337 	fdrop(rf, td);
338 #endif /* FreeBSD */
339 
340 #ifdef __NetBSD__
341 	rpipe = wpipe = NULL;
342 	if (pipe_create(&rpipe, 1) || pipe_create(&wpipe, 0)) {
343 		pipeclose(rpipe);
344 		pipeclose(wpipe);
345 		return (ENFILE);
346 	}
347 
348 	/*
349 	 * Note: the file structure returned from falloc() is marked
350 	 * as 'larval' initially. Unless we mark it as 'mature' by
351 	 * FILE_SET_MATURE(), any attempt to do anything with it would
352 	 * return EBADF, including e.g. dup(2) or close(2). This avoids
353 	 * file descriptor races if we block in the second falloc().
354 	 */
355 
356 	error = falloc(p, &rf, &fd);
357 	if (error)
358 		goto free2;
359 	retval[0] = fd;
360 	rf->f_flag = FREAD;
361 	rf->f_type = DTYPE_PIPE;
362 	rf->f_data = (caddr_t)rpipe;
363 	rf->f_ops = &pipeops;
364 
365 	error = falloc(p, &wf, &fd);
366 	if (error)
367 		goto free3;
368 	retval[1] = fd;
369 	wf->f_flag = FWRITE;
370 	wf->f_type = DTYPE_PIPE;
371 	wf->f_data = (caddr_t)wpipe;
372 	wf->f_ops = &pipeops;
373 
374 	rpipe->pipe_peer = wpipe;
375 	wpipe->pipe_peer = rpipe;
376 
377 	FILE_SET_MATURE(rf);
378 	FILE_SET_MATURE(wf);
379 	FILE_UNUSE(rf, p);
380 	FILE_UNUSE(wf, p);
381 	return (0);
382 free3:
383 	FILE_UNUSE(rf, p);
384 	ffree(rf);
385 	fdremove(p->p_fd, retval[0]);
386 free2:
387 	pipeclose(wpipe);
388 	pipeclose(rpipe);
389 #endif /* NetBSD */
390 
391 	return (error);
392 }
393 
394 /*
395  * Allocate kva for pipe circular buffer, the space is pageable
396  * This routine will 'realloc' the size of a pipe safely, if it fails
397  * it will retain the old buffer.
398  * If it fails it will return ENOMEM.
399  */
400 static int
401 pipespace(cpipe, size)
402 	struct pipe *cpipe;
403 	int size;
404 {
405 	caddr_t buffer;
406 #ifdef __FreeBSD__
407 	struct vm_object *object;
408 	int npages, error;
409 
410 	GIANT_REQUIRED;
411 	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
412 	       ("pipespace: pipe mutex locked"));
413 
414 	npages = round_page(size)/PAGE_SIZE;
415 	/*
416 	 * Create an object, I don't like the idea of paging to/from
417 	 * kernel_object.
418 	 */
419 	object = vm_object_allocate(OBJT_DEFAULT, npages);
420 	buffer = (caddr_t) vm_map_min(kernel_map);
421 
422 	/*
423 	 * Insert the object into the kernel map, and allocate kva for it.
424 	 * The map entry is, by default, pageable.
425 	 */
426 	error = vm_map_find(kernel_map, object, 0,
427 		(vm_offset_t *) &buffer, size, 1,
428 		VM_PROT_ALL, VM_PROT_ALL, 0);
429 
430 	if (error != KERN_SUCCESS) {
431 		vm_object_deallocate(object);
432 		return (ENOMEM);
433 	}
434 #endif /* FreeBSD */
435 
436 #ifdef __NetBSD__
437 	/*
438 	 * Allocate pageable virtual address space. Physical memory is allocated
439 	 * on demand.
440 	 */
441 	buffer = (caddr_t) uvm_km_valloc(kernel_map, round_page(size));
442 	if (buffer == NULL)
443 		return (ENOMEM);
444 #endif /* NetBSD */
445 
446 	/* free old resources if we're resizing */
447 	pipe_free_kmem(cpipe);
448 #ifdef __FreeBSD__
449 	cpipe->pipe_buffer.object = object;
450 #endif
451 	cpipe->pipe_buffer.buffer = buffer;
452 	cpipe->pipe_buffer.size = size;
453 	cpipe->pipe_buffer.in = 0;
454 	cpipe->pipe_buffer.out = 0;
455 	cpipe->pipe_buffer.cnt = 0;
456 	amountpipekva += cpipe->pipe_buffer.size;
457 	return (0);
458 }
459 
460 /*
461  * initialize and allocate VM and memory for pipe
462  */
463 static int
464 pipe_create(cpipep, allockva)
465 	struct pipe **cpipep;
466 	int allockva;
467 {
468 	struct pipe *cpipe;
469 	int error;
470 
471 #ifdef __FreeBSD__
472 	*cpipep = zalloc(pipe_zone);
473 #endif
474 #ifdef __NetBSD__
475 	*cpipep = pool_get(&pipe_pool, M_WAITOK);
476 #endif
477 	if (*cpipep == NULL)
478 		return (ENOMEM);
479 
480 	cpipe = *cpipep;
481 
482 	/* Initialize */
483 	memset(cpipe, 0, sizeof(*cpipe));
484 	cpipe->pipe_state = PIPE_SIGNALR;
485 
486 #ifdef __FreeBSD__
487 	cpipe->pipe_mtxp = NULL;	/* avoid pipespace assertion */
488 #endif
489 	if (allockva && (error = pipespace(cpipe, PIPE_SIZE)))
490 		return (error);
491 
492 	vfs_timestamp(&cpipe->pipe_ctime);
493 	cpipe->pipe_atime = cpipe->pipe_ctime;
494 	cpipe->pipe_mtime = cpipe->pipe_ctime;
495 #ifdef __NetBSD__
496 	cpipe->pipe_pgid = NO_PID;
497 	lockinit(&cpipe->pipe_lock, PRIBIO | PCATCH, "pipelk", 0, 0);
498 #endif
499 
500 	return (0);
501 }
502 
503 
504 /*
505  * lock a pipe for I/O, blocking other access
506  */
507 static __inline int
508 pipelock(cpipe, catch)
509 	struct pipe *cpipe;
510 	int catch;
511 {
512 	int error;
513 
514 #ifdef __FreeBSD__
515 	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
516 	while (cpipe->pipe_state & PIPE_LOCKFL) {
517 		cpipe->pipe_state |= PIPE_LWANT;
518 		error = msleep(cpipe, PIPE_MTX(cpipe),
519 		    catch ? (PRIBIO | PCATCH) : PRIBIO,
520 		    "pipelk", 0);
521 		if (error != 0)
522 			return (error);
523 	}
524 	cpipe->pipe_state |= PIPE_LOCKFL;
525 	return (0);
526 #endif
527 
528 #ifdef __NetBSD__
529 	do {
530 		error = lockmgr(&cpipe->pipe_lock, LK_EXCLUSIVE, NULL);
531 	} while (!catch && (error == EINTR || error == ERESTART));
532 	return (error);
533 #endif
534 }
535 
536 /*
537  * unlock a pipe I/O lock
538  */
539 static __inline void
540 pipeunlock(cpipe)
541 	struct pipe *cpipe;
542 {
543 
544 #ifdef __FreeBSD__
545 	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
546 	cpipe->pipe_state &= ~PIPE_LOCKFL;
547 	if (cpipe->pipe_state & PIPE_LWANT) {
548 		cpipe->pipe_state &= ~PIPE_LWANT;
549 		wakeup(cpipe);
550 	}
551 #endif
552 
553 #ifdef __NetBSD__
554 	lockmgr(&cpipe->pipe_lock, LK_RELEASE, NULL);
555 #endif
556 }
557 
558 /*
559  * Select/poll wakup. This also sends SIGIO to peer connected to
560  * 'sigpipe' side of pipe.
561  */
562 static __inline void
563 pipeselwakeup(selp, sigp)
564 	struct pipe *selp, *sigp;
565 {
566 	if (selp->pipe_state & PIPE_SEL) {
567 		selp->pipe_state &= ~PIPE_SEL;
568 		selwakeup(&selp->pipe_sel);
569 	}
570 #ifdef __FreeBSD__
571 	if (sigp && (sigp->pipe_state & PIPE_ASYNC) && sigp->pipe_sigio)
572 		pgsigio(sigp->pipe_sigio, SIGIO, 0);
573 	KNOTE(&selp->pipe_sel.si_note, 0);
574 #endif
575 
576 #ifdef __NetBSD__
577 	if (sigp && (sigp->pipe_state & PIPE_ASYNC)
578 	    && sigp->pipe_pgid != NO_PID){
579 		struct proc *p;
580 
581 		if (sigp->pipe_pgid < 0)
582 			gsignal(-sigp->pipe_pgid, SIGIO);
583 		else if (sigp->pipe_pgid > 0 && (p = pfind(sigp->pipe_pgid)) != 0)
584 			psignal(p, SIGIO);
585 	}
586 #endif /* NetBSD */
587 }
588 
589 /* ARGSUSED */
590 #ifdef __FreeBSD__
591 static int
592 pipe_read(fp, uio, cred, flags, td)
593 	struct file *fp;
594 	struct uio *uio;
595 	struct ucred *cred;
596 	struct thread *td;
597 	int flags;
598 	struct proc *p;
599 #elif defined(__NetBSD__)
600 static int
601 pipe_read(fp, offset, uio, cred, flags)
602 	struct file *fp;
603 	off_t *offset;
604 	struct uio *uio;
605 	struct ucred *cred;
606 	int flags;
607 #endif
608 {
609 	struct pipe *rpipe = (struct pipe *) fp->f_data;
610 	int error;
611 	size_t nread = 0;
612 	size_t size;
613 	size_t ocnt;
614 
615 	PIPE_LOCK(rpipe);
616 	++rpipe->pipe_busy;
617 	error = pipelock(rpipe, 1);
618 	if (error)
619 		goto unlocked_error;
620 
621 	ocnt = rpipe->pipe_buffer.cnt;
622 
623 	while (uio->uio_resid) {
624 		/*
625 		 * normal pipe buffer receive
626 		 */
627 		if (rpipe->pipe_buffer.cnt > 0) {
628 			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
629 			if (size > rpipe->pipe_buffer.cnt)
630 				size = rpipe->pipe_buffer.cnt;
631 			if (size > uio->uio_resid)
632 				size = uio->uio_resid;
633 
634 			PIPE_UNLOCK(rpipe);
635 			error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
636 					size, uio);
637 			PIPE_LOCK(rpipe);
638 			if (error)
639 				break;
640 
641 			rpipe->pipe_buffer.out += size;
642 			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
643 				rpipe->pipe_buffer.out = 0;
644 
645 			rpipe->pipe_buffer.cnt -= size;
646 
647 			/*
648 			 * If there is no more to read in the pipe, reset
649 			 * its pointers to the beginning.  This improves
650 			 * cache hit stats.
651 			 */
652 			if (rpipe->pipe_buffer.cnt == 0) {
653 				rpipe->pipe_buffer.in = 0;
654 				rpipe->pipe_buffer.out = 0;
655 			}
656 			nread += size;
657 #ifndef PIPE_NODIRECT
658 		/*
659 		 * Direct copy, bypassing a kernel buffer.
660 		 */
661 		} else if ((size = rpipe->pipe_map.cnt) &&
662 			   (rpipe->pipe_state & PIPE_DIRECTW)) {
663 			caddr_t	va;
664 			if (size > uio->uio_resid)
665 				size = uio->uio_resid;
666 
667 			va = (caddr_t) rpipe->pipe_map.kva +
668 			    rpipe->pipe_map.pos;
669 			PIPE_UNLOCK(rpipe);
670 			error = uiomove(va, size, uio);
671 			PIPE_LOCK(rpipe);
672 			if (error)
673 				break;
674 			nread += size;
675 			rpipe->pipe_map.pos += size;
676 			rpipe->pipe_map.cnt -= size;
677 			if (rpipe->pipe_map.cnt == 0) {
678 				rpipe->pipe_state &= ~PIPE_DIRECTW;
679 				wakeup(rpipe);
680 			}
681 #endif
682 		} else {
683 			/*
684 			 * detect EOF condition
685 			 * read returns 0 on EOF, no need to set error
686 			 */
687 			if (rpipe->pipe_state & PIPE_EOF)
688 				break;
689 
690 			/*
691 			 * If the "write-side" has been blocked, wake it up now.
692 			 */
693 			if (rpipe->pipe_state & PIPE_WANTW) {
694 				rpipe->pipe_state &= ~PIPE_WANTW;
695 				wakeup(rpipe);
696 			}
697 
698 			/*
699 			 * Break if some data was read.
700 			 */
701 			if (nread > 0)
702 				break;
703 
704 			/*
705 			 * don't block on non-blocking I/O
706 			 */
707 			if (fp->f_flag & FNONBLOCK) {
708 				error = EAGAIN;
709 				break;
710 			}
711 
712 			/*
713 			 * Unlock the pipe buffer for our remaining processing.
714 			 * We will either break out with an error or we will
715 			 * sleep and relock to loop.
716 			 */
717 			pipeunlock(rpipe);
718 
719 			/*
720 			 * We want to read more, wake up select/poll.
721 			 */
722 			pipeselwakeup(rpipe, rpipe->pipe_peer);
723 
724 			rpipe->pipe_state |= PIPE_WANTR;
725 #ifdef __FreeBSD__
726 			error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
727 				    "piperd", 0);
728 #else
729 			error = tsleep(rpipe, PRIBIO | PCATCH, "piperd", 0);
730 #endif
731 			if (error != 0 || (error = pipelock(rpipe, 1)))
732 				goto unlocked_error;
733 		}
734 	}
735 	pipeunlock(rpipe);
736 
737 	/* XXX: should probably do this before getting any locks. */
738 	if (error == 0)
739 		vfs_timestamp(&rpipe->pipe_atime);
740 unlocked_error:
741 	--rpipe->pipe_busy;
742 
743 	/*
744 	 * PIPE_WANTCLOSE processing only makes sense if pipe_busy is 0.
745 	 */
746 	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANTCLOSE)) {
747 		rpipe->pipe_state &= ~(PIPE_WANTCLOSE|PIPE_WANTW);
748 		wakeup(rpipe);
749 	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
750 		/*
751 		 * Handle write blocking hysteresis.
752 		 */
753 		if (rpipe->pipe_state & PIPE_WANTW) {
754 			rpipe->pipe_state &= ~PIPE_WANTW;
755 			wakeup(rpipe);
756 		}
757 	}
758 
759 	/*
760 	 * If anything was read off the buffer, signal to the writer it's
761 	 * possible to write more data. Also send signal if we are here for the
762 	 * first time after last write.
763 	 */
764 	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF
765 	    && (ocnt != rpipe->pipe_buffer.cnt || (rpipe->pipe_state & PIPE_SIGNALR))) {
766 		pipeselwakeup(rpipe, rpipe->pipe_peer);
767 		rpipe->pipe_state &= ~PIPE_SIGNALR;
768 	}
769 
770 	PIPE_UNLOCK(rpipe);
771 	return (error);
772 }
773 
774 #ifdef __FreeBSD__
775 #ifndef PIPE_NODIRECT
776 /*
777  * Map the sending processes' buffer into kernel space and wire it.
778  * This is similar to a physical write operation.
779  */
780 static int
781 pipe_build_write_buffer(wpipe, uio)
782 	struct pipe *wpipe;
783 	struct uio *uio;
784 {
785 	size_t size;
786 	int i;
787 	vm_offset_t addr, endaddr, paddr;
788 
789 	GIANT_REQUIRED;
790 	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
791 
792 	size = uio->uio_iov->iov_len;
793 	if (size > wpipe->pipe_buffer.size)
794 		size = wpipe->pipe_buffer.size;
795 
796 	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
797 	addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
798 	for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
799 		vm_page_t m;
800 
801 		if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 ||
802 		    (paddr = pmap_kextract(addr)) == 0) {
803 			int j;
804 
805 			for (j = 0; j < i; j++)
806 				vm_page_unwire(wpipe->pipe_map.ms[j], 1);
807 			return (EFAULT);
808 		}
809 
810 		m = PHYS_TO_VM_PAGE(paddr);
811 		vm_page_wire(m);
812 		wpipe->pipe_map.ms[i] = m;
813 	}
814 
815 /*
816  * set up the control block
817  */
818 	wpipe->pipe_map.npages = i;
819 	wpipe->pipe_map.pos =
820 	    ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
821 	wpipe->pipe_map.cnt = size;
822 
823 /*
824  * and map the buffer
825  */
826 	if (wpipe->pipe_map.kva == 0) {
827 		/*
828 		 * We need to allocate space for an extra page because the
829 		 * address range might (will) span pages at times.
830 		 */
831 		wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map,
832 			wpipe->pipe_buffer.size + PAGE_SIZE);
833 		amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE;
834 	}
835 	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
836 		wpipe->pipe_map.npages);
837 
838 /*
839  * and update the uio data
840  */
841 
842 	uio->uio_iov->iov_len -= size;
843 	uio->uio_iov->iov_base += size;
844 	if (uio->uio_iov->iov_len == 0)
845 		uio->uio_iov++;
846 	uio->uio_resid -= size;
847 	uio->uio_offset += size;
848 	return (0);
849 }
850 
851 /*
852  * unmap and unwire the process buffer
853  */
854 static void
855 pipe_destroy_write_buffer(wpipe)
856 	struct pipe *wpipe;
857 {
858 	int i;
859 
860 	GIANT_REQUIRED;
861 	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
862 
863 	if (wpipe->pipe_map.kva) {
864 		pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
865 
866 		if (amountpipekva > maxpipekva) {
867 			vm_offset_t kva = wpipe->pipe_map.kva;
868 			wpipe->pipe_map.kva = 0;
869 			kmem_free(kernel_map, kva,
870 				wpipe->pipe_buffer.size + PAGE_SIZE);
871 			amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
872 		}
873 	}
874 	for (i = 0; i < wpipe->pipe_map.npages; i++)
875 		vm_page_unwire(wpipe->pipe_map.ms[i], 1);
876 	wpipe->pipe_map.npages = 0;
877 }
878 
879 /*
880  * In the case of a signal, the writing process might go away.  This
881  * code copies the data into the circular buffer so that the source
882  * pages can be freed without loss of data.
883  */
884 static void
885 pipe_clone_write_buffer(wpipe)
886 	struct pipe *wpipe;
887 {
888 	int size;
889 	int pos;
890 
891 	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
892 	size = wpipe->pipe_map.cnt;
893 	pos = wpipe->pipe_map.pos;
894 	memcpy((caddr_t) wpipe->pipe_buffer.buffer,
895 	    (caddr_t) wpipe->pipe_map.kva + pos, size);
896 
897 	wpipe->pipe_buffer.in = size;
898 	wpipe->pipe_buffer.out = 0;
899 	wpipe->pipe_buffer.cnt = size;
900 	wpipe->pipe_state &= ~PIPE_DIRECTW;
901 
902 	PIPE_GET_GIANT(wpipe);
903 	pipe_destroy_write_buffer(wpipe);
904 	PIPE_DROP_GIANT(wpipe);
905 }
906 
907 /*
908  * This implements the pipe buffer write mechanism.  Note that only
909  * a direct write OR a normal pipe write can be pending at any given time.
910  * If there are any characters in the pipe buffer, the direct write will
911  * be deferred until the receiving process grabs all of the bytes from
912  * the pipe buffer.  Then the direct mapping write is set-up.
913  */
914 static int
915 pipe_direct_write(wpipe, uio)
916 	struct pipe *wpipe;
917 	struct uio *uio;
918 {
919 	int error;
920 
921 retry:
922 	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
923 	while (wpipe->pipe_state & PIPE_DIRECTW) {
924 		if (wpipe->pipe_state & PIPE_WANTR) {
925 			wpipe->pipe_state &= ~PIPE_WANTR;
926 			wakeup(wpipe);
927 		}
928 		wpipe->pipe_state |= PIPE_WANTW;
929 		error = msleep(wpipe, PIPE_MTX(wpipe),
930 		    PRIBIO | PCATCH, "pipdww", 0);
931 		if (error)
932 			goto error1;
933 		if (wpipe->pipe_state & PIPE_EOF) {
934 			error = EPIPE;
935 			goto error1;
936 		}
937 	}
938 	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
939 	if (wpipe->pipe_buffer.cnt > 0) {
940 		if (wpipe->pipe_state & PIPE_WANTR) {
941 			wpipe->pipe_state &= ~PIPE_WANTR;
942 			wakeup(wpipe);
943 		}
944 
945 		wpipe->pipe_state |= PIPE_WANTW;
946 		error = msleep(wpipe, PIPE_MTX(wpipe),
947 		    PRIBIO | PCATCH, "pipdwc", 0);
948 		if (error)
949 			goto error1;
950 		if (wpipe->pipe_state & PIPE_EOF) {
951 			error = EPIPE;
952 			goto error1;
953 		}
954 		goto retry;
955 	}
956 
957 	wpipe->pipe_state |= PIPE_DIRECTW;
958 
959 	PIPE_GET_GIANT(wpipe);
960 	error = pipe_build_write_buffer(wpipe, uio);
961 	PIPE_DROP_GIANT(wpipe);
962 	if (error) {
963 		wpipe->pipe_state &= ~PIPE_DIRECTW;
964 		goto error1;
965 	}
966 
967 	error = 0;
968 	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
969 		if (wpipe->pipe_state & PIPE_EOF) {
970 			pipelock(wpipe, 0);
971 			PIPE_GET_GIANT(wpipe);
972 			pipe_destroy_write_buffer(wpipe);
973 			PIPE_DROP_GIANT(wpipe);
974 			pipeunlock(wpipe);
975 			pipeselwakeup(wpipe, wpipe);
976 			error = EPIPE;
977 			goto error1;
978 		}
979 		if (wpipe->pipe_state & PIPE_WANTR) {
980 			wpipe->pipe_state &= ~PIPE_WANTR;
981 			wakeup(wpipe);
982 		}
983 		pipeselwakeup(wpipe, wpipe);
984 		error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
985 		    "pipdwt", 0);
986 	}
987 
988 	pipelock(wpipe,0);
989 	if (wpipe->pipe_state & PIPE_DIRECTW) {
990 		/*
991 		 * this bit of trickery substitutes a kernel buffer for
992 		 * the process that might be going away.
993 		 */
994 		pipe_clone_write_buffer(wpipe);
995 	} else {
996 		PIPE_GET_GIANT(wpipe);
997 		pipe_destroy_write_buffer(wpipe);
998 		PIPE_DROP_GIANT(wpipe);
999 	}
1000 	pipeunlock(wpipe);
1001 	return (error);
1002 
1003 error1:
1004 	wakeup(wpipe);
1005 	return (error);
1006 }
1007 #endif /* !PIPE_NODIRECT */
1008 #endif /* FreeBSD */
1009 
1010 #ifdef __NetBSD__
1011 #ifndef PIPE_NODIRECT
1012 /*
1013  * Allocate structure for loan transfer.
1014  */
1015 static int
1016 pipe_loan_alloc(wpipe, npages)
1017 	struct pipe *wpipe;
1018 	int npages;
1019 {
1020 	vsize_t len;
1021 
1022 	len = (vsize_t)npages << PAGE_SHIFT;
1023 	wpipe->pipe_map.kva = uvm_km_valloc_wait(kernel_map, len);
1024 	if (wpipe->pipe_map.kva == 0)
1025 		return (ENOMEM);
1026 
1027 	amountpipekva += len;
1028 	wpipe->pipe_map.npages = npages;
1029 	wpipe->pipe_map.pgs = malloc(npages * sizeof(struct vm_page *), M_PIPE,
1030 	    M_WAITOK);
1031 	return (0);
1032 }
1033 
1034 /*
1035  * Free resources allocated for loan transfer.
1036  */
1037 static void
1038 pipe_loan_free(wpipe)
1039 	struct pipe *wpipe;
1040 {
1041 	vsize_t len;
1042 
1043 	len = (vsize_t)wpipe->pipe_map.npages << PAGE_SHIFT;
1044 	uvm_km_free(kernel_map, wpipe->pipe_map.kva, len);
1045 	wpipe->pipe_map.kva = 0;
1046 	amountpipekva -= len;
1047 	free(wpipe->pipe_map.pgs, M_PIPE);
1048 	wpipe->pipe_map.pgs = NULL;
1049 }
1050 
1051 /*
1052  * NetBSD direct write, using uvm_loan() mechanism.
1053  * This implements the pipe buffer write mechanism.  Note that only
1054  * a direct write OR a normal pipe write can be pending at any given time.
1055  * If there are any characters in the pipe buffer, the direct write will
1056  * be deferred until the receiving process grabs all of the bytes from
1057  * the pipe buffer.  Then the direct mapping write is set-up.
1058  */
1059 static int
1060 pipe_direct_write(wpipe, uio)
1061 	struct pipe *wpipe;
1062 	struct uio *uio;
1063 {
1064 	int error, npages, j;
1065 	struct vm_page **pgs;
1066 	vaddr_t bbase, kva, base, bend;
1067 	vsize_t blen, bcnt;
1068 	voff_t bpos;
1069 
1070 retry:
1071 	while (wpipe->pipe_state & PIPE_DIRECTW) {
1072 		if (wpipe->pipe_state & PIPE_WANTR) {
1073 			wpipe->pipe_state &= ~PIPE_WANTR;
1074 			wakeup(wpipe);
1075 		}
1076 		wpipe->pipe_state |= PIPE_WANTW;
1077 		error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0);
1078 		if (error)
1079 			goto error;
1080 		if (wpipe->pipe_state & PIPE_EOF) {
1081 			error = EPIPE;
1082 			goto error;
1083 		}
1084 	}
1085 	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
1086 	if (wpipe->pipe_buffer.cnt > 0) {
1087 		if (wpipe->pipe_state & PIPE_WANTR) {
1088 			wpipe->pipe_state &= ~PIPE_WANTR;
1089 			wakeup(wpipe);
1090 		}
1091 
1092 		wpipe->pipe_state |= PIPE_WANTW;
1093 		error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0);
1094 		if (error)
1095 			goto error;
1096 		if (wpipe->pipe_state & PIPE_EOF) {
1097 			error = EPIPE;
1098 			goto error;
1099 		}
1100 		goto retry;
1101 	}
1102 
1103 	/*
1104 	 * Handle first PIPE_CHUNK_SIZE bytes of buffer. Deal with buffers
1105 	 * not aligned to PAGE_SIZE.
1106 	 */
1107 	bbase = (vaddr_t)uio->uio_iov->iov_base;
1108 	base = trunc_page(bbase);
1109 	bend = round_page(bbase + uio->uio_iov->iov_len);
1110 	blen = bend - base;
1111 	bpos = bbase - base;
1112 
1113 	if (blen > PIPE_DIRECT_CHUNK) {
1114 		blen = PIPE_DIRECT_CHUNK;
1115 		bend = base + blen;
1116 		bcnt = PIPE_DIRECT_CHUNK - bpos;
1117 	} else {
1118 		bcnt = uio->uio_iov->iov_len;
1119 	}
1120 	npages = blen >> PAGE_SHIFT;
1121 
1122 	wpipe->pipe_map.pos = bpos;
1123 	wpipe->pipe_map.cnt = bcnt;
1124 
1125 	/*
1126 	 * Free the old kva if we need more pages than we have
1127 	 * allocated.
1128 	 */
1129 	if (wpipe->pipe_map.kva && npages > wpipe->pipe_map.npages)
1130 		pipe_loan_free(wpipe);
1131 
1132 	/* Allocate new kva. */
1133 	if (wpipe->pipe_map.kva == 0) {
1134 		error = pipe_loan_alloc(wpipe, npages);
1135 		if (error) {
1136 			goto error;
1137 		}
1138 	}
1139 
1140 	/* Loan the write buffer memory from writer process */
1141 	pgs = wpipe->pipe_map.pgs;
1142 	error = uvm_loan(&uio->uio_procp->p_vmspace->vm_map, base, blen,
1143 	    pgs, UVM_LOAN_TOPAGE);
1144 	if (error) {
1145 		pgs = NULL;
1146 		goto cleanup;
1147 	}
1148 
1149 	/* Enter the loaned pages to kva */
1150 	kva = wpipe->pipe_map.kva;
1151 	for (j = 0; j < npages; j++, kva += PAGE_SIZE) {
1152 		pmap_kenter_pa(kva, VM_PAGE_TO_PHYS(pgs[j]), VM_PROT_READ);
1153 	}
1154 	pmap_update(pmap_kernel());
1155 
1156 	wpipe->pipe_state |= PIPE_DIRECTW;
1157 	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
1158 		if (wpipe->pipe_state & PIPE_EOF) {
1159 			error = EPIPE;
1160 			break;
1161 		}
1162 		if (wpipe->pipe_state & PIPE_WANTR) {
1163 			wpipe->pipe_state &= ~PIPE_WANTR;
1164 			wakeup(wpipe);
1165 		}
1166 		pipeselwakeup(wpipe, wpipe);
1167 		error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0);
1168 	}
1169 
1170 	if (error)
1171 		wpipe->pipe_state &= ~PIPE_DIRECTW;
1172 
1173 cleanup:
1174 	pipelock(wpipe, 0);
1175 	if (pgs != NULL) {
1176 		pmap_kremove(wpipe->pipe_map.kva, blen);
1177 		uvm_unloan(pgs, npages, UVM_LOAN_TOPAGE);
1178 	}
1179 	if (error || amountpipekva > maxpipekva)
1180 		pipe_loan_free(wpipe);
1181 	pipeunlock(wpipe);
1182 
1183 	if (error) {
1184 		pipeselwakeup(wpipe, wpipe);
1185 
1186 		/*
1187 		 * If nothing was read from what we offered, return error
1188 		 * straight on. Otherwise update uio resid first. Caller
1189 		 * will deal with the error condition, returning short
1190 		 * write, error, or restarting the write(2) as appropriate.
1191 		 */
1192 		if (wpipe->pipe_map.cnt == bcnt) {
1193 error:
1194 			wakeup(wpipe);
1195 			return (error);
1196 		}
1197 
1198 		bcnt -= wpipe->pipe_map.cnt;
1199 	}
1200 
1201 	uio->uio_resid -= bcnt;
1202 	/* uio_offset not updated, not set/used for write(2) */
1203 	uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + bcnt;
1204 	uio->uio_iov->iov_len -= bcnt;
1205 	if (uio->uio_iov->iov_len == 0) {
1206 		uio->uio_iov++;
1207 		uio->uio_iovcnt--;
1208 	}
1209 
1210 	return (error);
1211 }
1212 #endif /* !PIPE_NODIRECT */
1213 #endif /* NetBSD */
1214 
1215 #ifdef __FreeBSD__
1216 static int
1217 pipe_write(fp, uio, cred, flags, td)
1218 	struct file *fp;
1219 	off_t *offset;
1220 	struct uio *uio;
1221 	struct ucred *cred;
1222 	int flags;
1223 	struct thread *td;
1224 #elif defined(__NetBSD__)
1225 static int
1226 pipe_write(fp, offset, uio, cred, flags)
1227 	struct file *fp;
1228 	off_t *offset;
1229 	struct uio *uio;
1230 	struct ucred *cred;
1231 	int flags;
1232 #endif
1233 {
1234 	int error = 0;
1235 	struct pipe *wpipe, *rpipe;
1236 
1237 	rpipe = (struct pipe *) fp->f_data;
1238 	wpipe = rpipe->pipe_peer;
1239 
1240 	PIPE_LOCK(rpipe);
1241 	/*
1242 	 * detect loss of pipe read side, issue SIGPIPE if lost.
1243 	 */
1244 	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1245 		PIPE_UNLOCK(rpipe);
1246 		return (EPIPE);
1247 	}
1248 
1249 	++wpipe->pipe_busy;
1250 
1251 	/*
1252 	 * If it is advantageous to resize the pipe buffer, do
1253 	 * so.
1254 	 */
1255 	if ((uio->uio_resid > PIPE_SIZE) &&
1256 		(nbigpipe < maxbigpipes) &&
1257 #ifndef PIPE_NODIRECT
1258 		(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
1259 #endif
1260 		(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
1261 		(wpipe->pipe_buffer.cnt == 0)) {
1262 
1263 		if ((error = pipelock(wpipe,1)) == 0) {
1264 			PIPE_GET_GIANT(rpipe);
1265 			if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
1266 				nbigpipe++;
1267 			PIPE_DROP_GIANT(rpipe);
1268 			pipeunlock(wpipe);
1269 		} else {
1270 			/*
1271 			 * If an error occurred, unbusy and return, waking up
1272 			 * any waiting readers.
1273 			 */
1274 			--wpipe->pipe_busy;
1275 			if (wpipe->pipe_busy == 0
1276 			    && (wpipe->pipe_state & PIPE_WANTCLOSE)) {
1277 				wpipe->pipe_state &=
1278 				    ~(PIPE_WANTCLOSE | PIPE_WANTR);
1279 				wakeup(wpipe);
1280 			}
1281 
1282 			return (error);
1283 		}
1284 	}
1285 
1286 #ifdef __FreeBSD__
1287 	/*
1288 	 * If an early error occured unbusy and return, waking up any pending
1289 	 * readers.
1290 	 */
1291 	if (error) {
1292 		--wpipe->pipe_busy;
1293 		if ((wpipe->pipe_busy == 0) &&
1294 		    (wpipe->pipe_state & PIPE_WANT)) {
1295 			wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1296 			wakeup(wpipe);
1297 		}
1298 		PIPE_UNLOCK(rpipe);
1299 		return(error);
1300 	}
1301 
1302 	KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone"));
1303 #endif
1304 
1305 	while (uio->uio_resid) {
1306 		int space;
1307 
1308 #ifndef PIPE_NODIRECT
1309 		/*
1310 		 * If the transfer is large, we can gain performance if
1311 		 * we do process-to-process copies directly.
1312 		 * If the write is non-blocking, we don't use the
1313 		 * direct write mechanism.
1314 		 *
1315 		 * The direct write mechanism will detect the reader going
1316 		 * away on us.
1317 		 */
1318 		if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
1319 		    (fp->f_flag & FNONBLOCK) == 0 &&
1320 		    (wpipe->pipe_map.kva || (amountpipekva < limitpipekva))) {
1321 			error = pipe_direct_write(wpipe, uio);
1322 
1323 			/*
1324 			 * Break out if error occured, unless it's ENOMEM.
1325 			 * ENOMEM means we failed to allocate some resources
1326 			 * for direct write, so we just fallback to ordinary
1327 			 * write. If the direct write was successful,
1328 			 * process rest of data via ordinary write.
1329 			 */
1330 			if (!error)
1331 				continue;
1332 
1333 			if (error != ENOMEM)
1334 				break;
1335 		}
1336 #endif /* PIPE_NODIRECT */
1337 
1338 		/*
1339 		 * Pipe buffered writes cannot be coincidental with
1340 		 * direct writes.  We wait until the currently executing
1341 		 * direct write is completed before we start filling the
1342 		 * pipe buffer.  We break out if a signal occurs or the
1343 		 * reader goes away.
1344 		 */
1345 	retrywrite:
1346 		while (wpipe->pipe_state & PIPE_DIRECTW) {
1347 			if (wpipe->pipe_state & PIPE_WANTR) {
1348 				wpipe->pipe_state &= ~PIPE_WANTR;
1349 				wakeup(wpipe);
1350 			}
1351 #ifdef __FreeBSD__
1352 			error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
1353 			    "pipbww", 0);
1354 #else
1355 			error = tsleep(wpipe, PRIBIO | PCATCH, "pipbww", 0);
1356 #endif
1357 			if (wpipe->pipe_state & PIPE_EOF)
1358 				break;
1359 			if (error)
1360 				break;
1361 		}
1362 		if (wpipe->pipe_state & PIPE_EOF) {
1363 			error = EPIPE;
1364 			break;
1365 		}
1366 
1367 		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
1368 
1369 		/* Writes of size <= PIPE_BUF must be atomic. */
1370 		if ((space < uio->uio_resid) && (uio->uio_resid <= PIPE_BUF))
1371 			space = 0;
1372 
1373 		if (space > 0) {
1374 			int size;	/* Transfer size */
1375 			int segsize;	/* first segment to transfer */
1376 
1377 			if ((error = pipelock(wpipe,1)) != 0)
1378 				break;
1379 
1380 			/*
1381 			 * It is possible for a direct write to
1382 			 * slip in on us... handle it here...
1383 			 */
1384 			if (wpipe->pipe_state & PIPE_DIRECTW) {
1385 				pipeunlock(wpipe);
1386 				goto retrywrite;
1387 			}
1388 			/*
1389 			 * If a process blocked in uiomove, our
1390 			 * value for space might be bad.
1391 			 *
1392 			 * XXX will we be ok if the reader has gone
1393 			 * away here?
1394 			 */
1395 			if (space > wpipe->pipe_buffer.size -
1396 				    wpipe->pipe_buffer.cnt) {
1397 				pipeunlock(wpipe);
1398 				goto retrywrite;
1399 			}
1400 
1401 			/*
1402 			 * Transfer size is minimum of uio transfer
1403 			 * and free space in pipe buffer.
1404 			 */
1405 			if (space > uio->uio_resid)
1406 				size = uio->uio_resid;
1407 			else
1408 				size = space;
1409 			/*
1410 			 * First segment to transfer is minimum of
1411 			 * transfer size and contiguous space in
1412 			 * pipe buffer.  If first segment to transfer
1413 			 * is less than the transfer size, we've got
1414 			 * a wraparound in the buffer.
1415 			 */
1416 			segsize = wpipe->pipe_buffer.size -
1417 				wpipe->pipe_buffer.in;
1418 			if (segsize > size)
1419 				segsize = size;
1420 
1421 			/* Transfer first segment */
1422 
1423 			PIPE_UNLOCK(rpipe);
1424 			error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
1425 						segsize, uio);
1426 			PIPE_LOCK(rpipe);
1427 
1428 			if (error == 0 && segsize < size) {
1429 				/*
1430 				 * Transfer remaining part now, to
1431 				 * support atomic writes.  Wraparound
1432 				 * happened.
1433 				 */
1434 #ifdef DEBUG
1435 				if (wpipe->pipe_buffer.in + segsize !=
1436 				    wpipe->pipe_buffer.size)
1437 					panic("Expected pipe buffer wraparound disappeared");
1438 #endif
1439 
1440 				PIPE_UNLOCK(rpipe);
1441 				error = uiomove(&wpipe->pipe_buffer.buffer[0],
1442 						size - segsize, uio);
1443 				PIPE_LOCK(rpipe);
1444 			}
1445 			if (error == 0) {
1446 				wpipe->pipe_buffer.in += size;
1447 				if (wpipe->pipe_buffer.in >=
1448 				    wpipe->pipe_buffer.size) {
1449 #ifdef DEBUG
1450 					if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
1451 						panic("Expected wraparound bad");
1452 #endif
1453 					wpipe->pipe_buffer.in = size - segsize;
1454 				}
1455 
1456 				wpipe->pipe_buffer.cnt += size;
1457 #ifdef DEBUG
1458 				if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
1459 					panic("Pipe buffer overflow");
1460 #endif
1461 			}
1462 			pipeunlock(wpipe);
1463 			if (error)
1464 				break;
1465 		} else {
1466 			/*
1467 			 * If the "read-side" has been blocked, wake it up now.
1468 			 */
1469 			if (wpipe->pipe_state & PIPE_WANTR) {
1470 				wpipe->pipe_state &= ~PIPE_WANTR;
1471 				wakeup(wpipe);
1472 			}
1473 
1474 			/*
1475 			 * don't block on non-blocking I/O
1476 			 */
1477 			if (fp->f_flag & FNONBLOCK) {
1478 				error = EAGAIN;
1479 				break;
1480 			}
1481 
1482 			/*
1483 			 * We have no more space and have something to offer,
1484 			 * wake up select/poll.
1485 			 */
1486 			pipeselwakeup(wpipe, wpipe);
1487 
1488 			wpipe->pipe_state |= PIPE_WANTW;
1489 #ifdef __FreeBSD__
1490 			error = msleep(wpipe, PIPE_MTX(rpipe),
1491 			    PRIBIO | PCATCH, "pipewr", 0);
1492 #else
1493 			error = tsleep(wpipe, PRIBIO | PCATCH, "pipewr", 0);
1494 #endif
1495 			if (error != 0)
1496 				break;
1497 			/*
1498 			 * If read side wants to go away, we just issue a signal
1499 			 * to ourselves.
1500 			 */
1501 			if (wpipe->pipe_state & PIPE_EOF) {
1502 				error = EPIPE;
1503 				break;
1504 			}
1505 		}
1506 	}
1507 
1508 	--wpipe->pipe_busy;
1509 	if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANTCLOSE)) {
1510 		wpipe->pipe_state &= ~(PIPE_WANTCLOSE | PIPE_WANTR);
1511 		wakeup(wpipe);
1512 	} else if (wpipe->pipe_buffer.cnt > 0) {
1513 		/*
1514 		 * If we have put any characters in the buffer, we wake up
1515 		 * the reader.
1516 		 */
1517 		if (wpipe->pipe_state & PIPE_WANTR) {
1518 			wpipe->pipe_state &= ~PIPE_WANTR;
1519 			wakeup(wpipe);
1520 		}
1521 	}
1522 
1523 	/*
1524 	 * Don't return EPIPE if I/O was successful
1525 	 */
1526 	if ((error == EPIPE) && (wpipe->pipe_buffer.cnt == 0)
1527 	    && (uio->uio_resid == 0))
1528 		error = 0;
1529 
1530 	if (error == 0)
1531 		vfs_timestamp(&wpipe->pipe_mtime);
1532 
1533 	/*
1534 	 * We have something to offer, wake up select/poll.
1535 	 * wpipe->pipe_map.cnt is always 0 in this point (direct write
1536 	 * is only done synchronously), so check only wpipe->pipe_buffer.cnt
1537 	 */
1538 	if (wpipe->pipe_buffer.cnt)
1539 		pipeselwakeup(wpipe, wpipe);
1540 
1541 	/*
1542 	 * Arrange for next read(2) to do a signal.
1543 	 */
1544 	wpipe->pipe_state |= PIPE_SIGNALR;
1545 
1546 	PIPE_UNLOCK(rpipe);
1547 	return (error);
1548 }
1549 
1550 /*
1551  * we implement a very minimal set of ioctls for compatibility with sockets.
1552  */
1553 int
1554 #ifdef __FreeBSD__
1555 pipe_ioctl(fp, cmd, data, td)
1556 	struct file *fp;
1557 	u_long cmd;
1558 	caddr_t data;
1559 	struct thread *td;
1560 #else
1561 pipe_ioctl(fp, cmd, data, p)
1562 	struct file *fp;
1563 	u_long cmd;
1564 	caddr_t data;
1565 	struct proc *p;
1566 #endif
1567 {
1568 	struct pipe *mpipe = (struct pipe *)fp->f_data;
1569 
1570 	switch (cmd) {
1571 
1572 	case FIONBIO:
1573 		return (0);
1574 
1575 	case FIOASYNC:
1576 		PIPE_LOCK(mpipe);
1577 		if (*(int *)data) {
1578 			mpipe->pipe_state |= PIPE_ASYNC;
1579 		} else {
1580 			mpipe->pipe_state &= ~PIPE_ASYNC;
1581 		}
1582 		PIPE_UNLOCK(mpipe);
1583 		return (0);
1584 
1585 	case FIONREAD:
1586 		PIPE_LOCK(mpipe);
1587 #ifndef PIPE_NODIRECT
1588 		if (mpipe->pipe_state & PIPE_DIRECTW)
1589 			*(int *)data = mpipe->pipe_map.cnt;
1590 		else
1591 #endif
1592 			*(int *)data = mpipe->pipe_buffer.cnt;
1593 		PIPE_UNLOCK(mpipe);
1594 		return (0);
1595 
1596 #ifdef __FreeBSD__
1597 	case FIOSETOWN:
1598 		return (fsetown(*(int *)data, &mpipe->pipe_sigio));
1599 
1600 	case FIOGETOWN:
1601 		*(int *)data = fgetown(mpipe->pipe_sigio);
1602 		return (0);
1603 
1604 	/* This is deprecated, FIOSETOWN should be used instead. */
1605 	case TIOCSPGRP:
1606 		return (fsetown(-(*(int *)data), &mpipe->pipe_sigio));
1607 
1608 	/* This is deprecated, FIOGETOWN should be used instead. */
1609 	case TIOCGPGRP:
1610 		*(int *)data = -fgetown(mpipe->pipe_sigio);
1611 		return (0);
1612 #endif /* FreeBSD */
1613 #ifdef __NetBSD__
1614 	case TIOCSPGRP:
1615 		mpipe->pipe_pgid = *(int *)data;
1616 		return (0);
1617 
1618 	case TIOCGPGRP:
1619 		*(int *)data = mpipe->pipe_pgid;
1620 		return (0);
1621 #endif /* NetBSD */
1622 
1623 	}
1624 	return (EPASSTHROUGH);
1625 }
1626 
1627 int
1628 #ifdef __FreeBSD__
1629 pipe_poll(fp, events, cred, td)
1630 	struct file *fp;
1631 	int events;
1632 	struct ucred *cred;
1633 	struct thread *td;
1634 #elif defined(__NetBSD__)
1635 pipe_poll(fp, events, td)
1636 	struct file *fp;
1637 	int events;
1638 	struct proc *td;
1639 #endif
1640 {
1641 	struct pipe *rpipe = (struct pipe *)fp->f_data;
1642 	struct pipe *wpipe;
1643 	int revents = 0;
1644 
1645 	wpipe = rpipe->pipe_peer;
1646 	PIPE_LOCK(rpipe);
1647 	if (events & (POLLIN | POLLRDNORM))
1648 		if ((rpipe->pipe_buffer.cnt > 0) ||
1649 #ifndef PIPE_NODIRECT
1650 		    (rpipe->pipe_state & PIPE_DIRECTW) ||
1651 #endif
1652 		    (rpipe->pipe_state & PIPE_EOF))
1653 			revents |= events & (POLLIN | POLLRDNORM);
1654 
1655 	if (events & (POLLOUT | POLLWRNORM))
1656 		if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF)
1657 		    || (
1658 #ifndef PIPE_NODIRECT
1659 		     ((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1660 #endif
1661 		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
1662 			revents |= events & (POLLOUT | POLLWRNORM);
1663 
1664 	if ((rpipe->pipe_state & PIPE_EOF) ||
1665 	    (wpipe == NULL) ||
1666 	    (wpipe->pipe_state & PIPE_EOF))
1667 		revents |= POLLHUP;
1668 
1669 	if (revents == 0) {
1670 		if (events & (POLLIN | POLLRDNORM)) {
1671 			selrecord(td, &rpipe->pipe_sel);
1672 			rpipe->pipe_state |= PIPE_SEL;
1673 		}
1674 
1675 		if (events & (POLLOUT | POLLWRNORM)) {
1676 			selrecord(td, &wpipe->pipe_sel);
1677 			wpipe->pipe_state |= PIPE_SEL;
1678 		}
1679 	}
1680 	PIPE_UNLOCK(rpipe);
1681 
1682 	return (revents);
1683 }
1684 
1685 static int
1686 #ifdef __FreeBSD__
1687 pipe_stat(fp, ub, td)
1688 	struct file *fp;
1689 	struct stat *ub;
1690 	struct thread *td;
1691 #else
1692 pipe_stat(fp, ub, td)
1693 	struct file *fp;
1694 	struct stat *ub;
1695 	struct proc *td;
1696 #endif
1697 {
1698 	struct pipe *pipe = (struct pipe *)fp->f_data;
1699 
1700 	memset((caddr_t)ub, 0, sizeof(*ub));
1701 	ub->st_mode = S_IFIFO;
1702 	ub->st_blksize = pipe->pipe_buffer.size;
1703 	ub->st_size = pipe->pipe_buffer.cnt;
1704 	ub->st_blocks = (ub->st_size) ? 1 : 0;
1705 #ifdef __FreeBSD__
1706 	ub->st_atimespec = pipe->pipe_atime;
1707 	ub->st_mtimespec = pipe->pipe_mtime;
1708 	ub->st_ctimespec = pipe->pipe_ctime;
1709 #endif /* FreeBSD */
1710 #ifdef __NetBSD__
1711 	TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec)
1712 	TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec);
1713 	TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec);
1714 #endif /* NetBSD */
1715 	ub->st_uid = fp->f_cred->cr_uid;
1716 	ub->st_gid = fp->f_cred->cr_gid;
1717 	/*
1718 	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
1719 	 * XXX (st_dev, st_ino) should be unique.
1720 	 */
1721 	return (0);
1722 }
1723 
1724 /* ARGSUSED */
1725 static int
1726 #ifdef __FreeBSD__
1727 pipe_close(fp, td)
1728 	struct file *fp;
1729 	struct thread *td;
1730 #else
1731 pipe_close(fp, td)
1732 	struct file *fp;
1733 	struct proc *td;
1734 #endif
1735 {
1736 	struct pipe *cpipe = (struct pipe *)fp->f_data;
1737 
1738 #ifdef __FreeBSD__
1739 	fp->f_ops = &badfileops;
1740 	funsetown(cpipe->pipe_sigio);
1741 #endif
1742 	fp->f_data = NULL;
1743 	pipeclose(cpipe);
1744 	return (0);
1745 }
1746 
1747 static void
1748 pipe_free_kmem(cpipe)
1749 	struct pipe *cpipe;
1750 {
1751 
1752 #ifdef __FreeBSD__
1753 
1754 	GIANT_REQUIRED;
1755 	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
1756 	       ("pipespace: pipe mutex locked"));
1757 #endif
1758 
1759 	if (cpipe->pipe_buffer.buffer != NULL) {
1760 		if (cpipe->pipe_buffer.size > PIPE_SIZE)
1761 			--nbigpipe;
1762 		amountpipekva -= cpipe->pipe_buffer.size;
1763 #ifdef __FreeBSD__
1764 		kmem_free(kernel_map,
1765 			(vm_offset_t)cpipe->pipe_buffer.buffer,
1766 			cpipe->pipe_buffer.size);
1767 #elif defined(__NetBSD__)
1768 		uvm_km_free(kernel_map,
1769 			(vaddr_t)cpipe->pipe_buffer.buffer,
1770 			cpipe->pipe_buffer.size);
1771 #endif /* NetBSD */
1772 		cpipe->pipe_buffer.buffer = NULL;
1773 	}
1774 #ifndef PIPE_NODIRECT
1775 	if (cpipe->pipe_map.kva != 0) {
1776 #ifdef __FreeBSD__
1777 		amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE;
1778 		kmem_free(kernel_map,
1779 			cpipe->pipe_map.kva,
1780 			cpipe->pipe_buffer.size + PAGE_SIZE);
1781 #elif defined(__NetBSD__)
1782 		pipe_loan_free(cpipe);
1783 #endif /* NetBSD */
1784 		cpipe->pipe_map.cnt = 0;
1785 		cpipe->pipe_map.kva = 0;
1786 		cpipe->pipe_map.pos = 0;
1787 		cpipe->pipe_map.npages = 0;
1788 	}
1789 #endif /* !PIPE_NODIRECT */
1790 }
1791 
1792 /*
1793  * shutdown the pipe
1794  */
1795 static void
1796 pipeclose(cpipe)
1797 	struct pipe *cpipe;
1798 {
1799 	struct pipe *ppipe;
1800 #ifdef __FreeBSD__
1801 	int hadpeer = 0;
1802 #endif
1803 
1804 	if (cpipe == NULL)
1805 		return;
1806 
1807 	/* partially created pipes won't have a valid mutex. */
1808 	if (PIPE_MTX(cpipe) != NULL)
1809 		PIPE_LOCK(cpipe);
1810 
1811 	pipeselwakeup(cpipe, cpipe);
1812 
1813 	/*
1814 	 * If the other side is blocked, wake it up saying that
1815 	 * we want to close it down.
1816 	 */
1817 	while (cpipe->pipe_busy) {
1818 		wakeup(cpipe);
1819 		cpipe->pipe_state |= PIPE_WANTCLOSE | PIPE_EOF;
1820 #ifdef __FreeBSD__
1821 		msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1822 #else
1823 		tsleep(cpipe, PRIBIO, "pipecl", 0);
1824 #endif
1825 	}
1826 
1827 	/*
1828 	 * Disconnect from peer
1829 	 */
1830 	if ((ppipe = cpipe->pipe_peer) != NULL) {
1831 #ifdef __FreeBSD__
1832 		hadpeer++;
1833 #endif
1834 		pipeselwakeup(ppipe, ppipe);
1835 
1836 		ppipe->pipe_state |= PIPE_EOF;
1837 		wakeup(ppipe);
1838 #ifdef __FreeBSD__
1839 		KNOTE(&ppipe->pipe_sel.si_note, 0);
1840 #endif
1841 		ppipe->pipe_peer = NULL;
1842 	}
1843 	/*
1844 	 * free resources
1845 	 */
1846 #ifdef __FreeBSD__
1847 	if (PIPE_MTX(cpipe) != NULL) {
1848 		PIPE_UNLOCK(cpipe);
1849 		if (!hadpeer) {
1850 			mtx_destroy(PIPE_MTX(cpipe));
1851 			free(PIPE_MTX(cpipe), M_TEMP);
1852 		}
1853 	}
1854 	mtx_lock(&Giant);
1855 	pipe_free_kmem(cpipe);
1856 	zfree(pipe_zone, cpipe);
1857 	mtx_unlock(&Giant);
1858 #endif
1859 
1860 #ifdef __NetBSD__
1861 	if (PIPE_MTX(cpipe) != NULL)
1862 		PIPE_UNLOCK(cpipe);
1863 
1864 	pipe_free_kmem(cpipe);
1865 	(void) lockmgr(&cpipe->pipe_lock, LK_DRAIN, NULL);
1866 	pool_put(&pipe_pool, cpipe);
1867 #endif
1868 }
1869 
1870 #ifdef __FreeBSD__
1871 /*ARGSUSED*/
1872 static int
1873 pipe_kqfilter(struct file *fp, struct knote *kn)
1874 {
1875 	struct pipe *cpipe;
1876 
1877 	cpipe = (struct pipe *)kn->kn_fp->f_data;
1878 	switch (kn->kn_filter) {
1879 	case EVFILT_READ:
1880 		kn->kn_fop = &pipe_rfiltops;
1881 		break;
1882 	case EVFILT_WRITE:
1883 		kn->kn_fop = &pipe_wfiltops;
1884 		cpipe = cpipe->pipe_peer;
1885 		break;
1886 	default:
1887 		return (1);
1888 	}
1889 	kn->kn_hook = (caddr_t)cpipe;
1890 
1891 	PIPE_LOCK(cpipe);
1892 	SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext);
1893 	PIPE_UNLOCK(cpipe);
1894 	return (0);
1895 }
1896 
1897 static void
1898 filt_pipedetach(struct knote *kn)
1899 {
1900 	struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
1901 
1902 	PIPE_LOCK(cpipe);
1903 	SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext);
1904 	PIPE_UNLOCK(cpipe);
1905 }
1906 
1907 /*ARGSUSED*/
1908 static int
1909 filt_piperead(struct knote *kn, long hint)
1910 {
1911 	struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1912 	struct pipe *wpipe = rpipe->pipe_peer;
1913 
1914 	PIPE_LOCK(rpipe);
1915 	kn->kn_data = rpipe->pipe_buffer.cnt;
1916 	if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
1917 		kn->kn_data = rpipe->pipe_map.cnt;
1918 
1919 	if ((rpipe->pipe_state & PIPE_EOF) ||
1920 	    (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1921 		kn->kn_flags |= EV_EOF;
1922 		PIPE_UNLOCK(rpipe);
1923 		return (1);
1924 	}
1925 	PIPE_UNLOCK(rpipe);
1926 	return (kn->kn_data > 0);
1927 }
1928 
1929 /*ARGSUSED*/
1930 static int
1931 filt_pipewrite(struct knote *kn, long hint)
1932 {
1933 	struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1934 	struct pipe *wpipe = rpipe->pipe_peer;
1935 
1936 	PIPE_LOCK(rpipe);
1937 	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1938 		kn->kn_data = 0;
1939 		kn->kn_flags |= EV_EOF;
1940 		PIPE_UNLOCK(rpipe);
1941 		return (1);
1942 	}
1943 	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
1944 	if (wpipe->pipe_state & PIPE_DIRECTW)
1945 		kn->kn_data = 0;
1946 
1947 	PIPE_UNLOCK(rpipe);
1948 	return (kn->kn_data >= PIPE_BUF);
1949 }
1950 #endif /* FreeBSD */
1951 
1952 #ifdef __NetBSD__
1953 static int
1954 pipe_fcntl(fp, cmd, data, p)
1955 	struct file *fp;
1956 	u_int cmd;
1957 	caddr_t data;
1958 	struct proc *p;
1959 {
1960 	if (cmd == F_SETFL)
1961 		return (0);
1962 	else
1963 		return (EOPNOTSUPP);
1964 }
1965 
1966 /*
1967  * Handle pipe sysctls.
1968  */
1969 int
1970 sysctl_dopipe(name, namelen, oldp, oldlenp, newp, newlen)
1971 	int *name;
1972 	u_int namelen;
1973 	void *oldp;
1974 	size_t *oldlenp;
1975 	void *newp;
1976 	size_t newlen;
1977 {
1978 	/* All sysctl names at this level are terminal. */
1979 	if (namelen != 1)
1980 		return (ENOTDIR);		/* overloaded */
1981 
1982 	switch (name[0]) {
1983 	case KERN_PIPE_MAXKVASZ:
1984 		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxpipekva));
1985 	case KERN_PIPE_LIMITKVA:
1986 		return (sysctl_int(oldp, oldlenp, newp, newlen, &limitpipekva));
1987 	case KERN_PIPE_MAXBIGPIPES:
1988 		return (sysctl_int(oldp, oldlenp, newp, newlen, &maxbigpipes));
1989 	case KERN_PIPE_NBIGPIPES:
1990 		return (sysctl_rdint(oldp, oldlenp, newp, nbigpipe));
1991 	case KERN_PIPE_KVASIZE:
1992 		return (sysctl_rdint(oldp, oldlenp, newp, amountpipekva));
1993 	default:
1994 		return (EOPNOTSUPP);
1995 	}
1996 	/* NOTREACHED */
1997 }
1998 
1999 /*
2000  * Initialize pipe structs.
2001  */
2002 void
2003 pipe_init(void)
2004 {
2005 	pool_init(&pipe_pool, sizeof(struct pipe), 0, 0, 0, "pipepl", NULL);
2006 }
2007 
2008 #endif /* __NetBSD __ */
2009