xref: /minix/minix/servers/vfs/pipe.c (revision fb9c64b2)
1 /* This file deals with the suspension and revival of processes.  A process can
2  * be suspended because it wants to read or write from a pipe and can't, or
3  * because it wants to read or write from a special file and can't.  When a
4  * process can't continue it is suspended, and revived later when it is able
5  * to continue.
6  *
7  * The entry points into this file are
8  *   do_pipe2:	  perform the PIPE2 system call
9  *   pipe_check:  check to see that a read or write on a pipe is feasible now
10  *   suspend:	  suspend a process that cannot do a requested read or write
11  *   release:	  check to see if a suspended process can be released and do
12  *                it
13  *   revive:	  mark a suspended process as able to run again
14  *   unsuspend_by_endpt: revive all processes blocking on a given process
15  *   do_unpause:  a signal has been sent to a process; see if it suspended
16  */
17 
18 #include "fs.h"
19 #include <fcntl.h>
20 #include <signal.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <minix/callnr.h>
24 #include <minix/endpoint.h>
25 #include <minix/com.h>
26 #include <minix/u64.h>
27 #include <sys/select.h>
28 #include <sys/time.h>
29 #include "file.h"
30 #include <minix/vfsif.h>
31 #include "vnode.h"
32 #include "vmnt.h"
33 
34 static int create_pipe(int fil_des[2], int flags);
35 
36 /*===========================================================================*
37  *				do_pipe2				     *
38  *===========================================================================*/
39 int do_pipe2(void)
40 {
41 /* Perform the pipe2(fil_des[2], flags) system call. */
42   int r, flags;
43   int fil_des[2];		/* reply goes here */
44 
45   flags = job_m_in.m_lc_vfs_pipe2.flags;
46   flags |= job_m_in.m_lc_vfs_pipe2.oflags;	/* backward compatibility */
47 
48   r = create_pipe(fil_des, flags);
49   if (r == OK) {
50 	job_m_out.m_vfs_lc_fdpair.fd0 = fil_des[0];
51 	job_m_out.m_vfs_lc_fdpair.fd1 = fil_des[1];
52   }
53 
54   return r;
55 }
56 
57 /*===========================================================================*
58  *				create_pipe				     *
59  *===========================================================================*/
60 static int create_pipe(int fil_des[2], int flags)
61 {
62   register struct fproc *rfp;
63   int r;
64   struct filp *fil_ptr0, *fil_ptr1;
65   struct vnode *vp;
66   struct vmnt *vmp;
67   struct node_details res;
68 
69   /* Get a lock on PFS */
70   if ((vmp = find_vmnt(PFS_PROC_NR)) == NULL) panic("PFS gone");
71   if ((r = lock_vmnt(vmp, VMNT_READ)) != OK) return(r);
72 
73   /* See if a free vnode is available */
74   if ((vp = get_free_vnode()) == NULL) {
75 	unlock_vmnt(vmp);
76 	return(err_code);
77   }
78   lock_vnode(vp, VNODE_OPCL);
79 
80   /* Acquire two file descriptors. */
81   rfp = fp;
82   if ((r = get_fd(fp, 0, R_BIT, &fil_des[0], &fil_ptr0)) != OK) {
83 	unlock_vnode(vp);
84 	unlock_vmnt(vmp);
85 	return(r);
86   }
87   rfp->fp_filp[fil_des[0]] = fil_ptr0;
88   fil_ptr0->filp_count = 1;		/* mark filp in use */
89   if ((r = get_fd(fp, 0, W_BIT, &fil_des[1], &fil_ptr1)) != OK) {
90 	rfp->fp_filp[fil_des[0]] = NULL;
91 	fil_ptr0->filp_count = 0;	/* mark filp free */
92 	unlock_filp(fil_ptr0);
93 	unlock_vnode(vp);
94 	unlock_vmnt(vmp);
95 	return(r);
96   }
97   rfp->fp_filp[fil_des[1]] = fil_ptr1;
98   fil_ptr1->filp_count = 1;
99 
100   /* Create a named pipe inode on PipeFS */
101   r = req_newnode(PFS_PROC_NR, fp->fp_effuid, fp->fp_effgid, I_NAMED_PIPE,
102 		  NO_DEV, &res);
103 
104   if (r != OK) {
105 	rfp->fp_filp[fil_des[0]] = NULL;
106 	fil_ptr0->filp_count = 0;
107 	rfp->fp_filp[fil_des[1]] = NULL;
108 	fil_ptr1->filp_count = 0;
109 	unlock_filp(fil_ptr1);
110 	unlock_filp(fil_ptr0);
111 	unlock_vnode(vp);
112 	unlock_vmnt(vmp);
113 	return(r);
114   }
115 
116   /* Fill in vnode */
117   vp->v_fs_e = res.fs_e;
118   vp->v_mapfs_e = res.fs_e;
119   vp->v_inode_nr = res.inode_nr;
120   vp->v_mapinode_nr = res.inode_nr;
121   vp->v_mode = res.fmode;
122   vp->v_fs_count = 1;
123   vp->v_mapfs_count = 1;
124   vp->v_ref_count = 1;
125   vp->v_size = 0;
126   vp->v_vmnt = NULL;
127   vp->v_dev = NO_DEV;
128 
129   /* Fill in filp objects */
130   fil_ptr0->filp_vno = vp;
131   dup_vnode(vp);
132   fil_ptr1->filp_vno = vp;
133   fil_ptr0->filp_flags = O_RDONLY | (flags & ~O_ACCMODE);
134   fil_ptr1->filp_flags = O_WRONLY | (flags & ~O_ACCMODE);
135   if (flags & O_CLOEXEC) {
136 	FD_SET(fil_des[0], &rfp->fp_cloexec_set);
137 	FD_SET(fil_des[1], &rfp->fp_cloexec_set);
138   }
139 
140   unlock_filps(fil_ptr0, fil_ptr1);
141   unlock_vmnt(vmp);
142 
143   return(OK);
144 }
145 
146 
147 /*===========================================================================*
148  *				map_vnode				     *
149  *===========================================================================*/
150 int
151 map_vnode(struct vnode *vp, endpoint_t map_to_fs_e)
152 {
153   int r;
154   struct vmnt *vmp;
155   struct node_details res;
156 
157   if(vp->v_mapfs_e != NONE) return(OK);	/* Already mapped; nothing to do. */
158 
159   if ((vmp = find_vmnt(map_to_fs_e)) == NULL)
160 	panic("Can't map to unknown endpoint");
161   if ((r = lock_vmnt(vmp, VMNT_WRITE)) != OK) {
162 	if (r == EBUSY)
163 		vmp = NULL;	/* Already locked, do not unlock */
164 	else
165 		return(r);
166 
167   }
168 
169   /* Create a temporary mapping of this inode to another FS. Read and write
170    * operations on data will be handled by that FS. The rest by the 'original'
171    * FS that holds the inode. */
172   if ((r = req_newnode(map_to_fs_e, fp->fp_effuid, fp->fp_effgid, I_NAMED_PIPE,
173 		       vp->v_dev, &res)) == OK) {
174 	vp->v_mapfs_e = res.fs_e;
175 	vp->v_mapinode_nr = res.inode_nr;
176 	vp->v_mapfs_count = 1;
177   }
178 
179   if (vmp) unlock_vmnt(vmp);
180 
181   return(r);
182 }
183 
184 /*===========================================================================*
185  *				pipe_check				     *
186  *===========================================================================*/
187 int pipe_check(
188 struct filp *filp,	/* the filp of the pipe */
189 int rw_flag,		/* READING or WRITING */
190 int oflags,		/* flags set by open or fcntl */
191 int bytes,		/* bytes to be read or written (all chunks) */
192 int notouch		/* check only */
193 )
194 {
195 /* Pipes are a little different.  If a process reads from an empty pipe for
196  * which a writer still exists, suspend the reader.  If the pipe is empty
197  * and there is no writer, return 0 bytes.  If a process is writing to a
198  * pipe and no one is reading from it, give a broken pipe error.
199  */
200   struct vnode *vp;
201   off_t pos;
202   int r = OK;
203 
204   vp = filp->filp_vno;
205 
206   /* Reads start at the beginning; writes append to pipes */
207   if (notouch) /* In this case we don't actually care whether data transfer
208 		* would succeed. See POSIX 1003.1-2008 */
209 	pos = 0;
210   else if (rw_flag == READING)
211 	pos = 0;
212   else {
213 	pos = vp->v_size;
214   }
215 
216   /* If reading, check for empty pipe. */
217   if (rw_flag == READING) {
218 	if (vp->v_size == 0) {
219 		/* Process is reading from an empty pipe. */
220 		if (find_filp(vp, W_BIT) != NULL) {
221 			/* Writer exists */
222 			if (oflags & O_NONBLOCK)
223 				r = EAGAIN;
224 			else
225 				r = SUSPEND;
226 
227 			/* If need be, activate sleeping writers. */
228 			/* We ignore notouch voluntary here. */
229 			if (susp_count > 0)
230 				release(vp, VFS_WRITE, susp_count);
231 		}
232 		return(r);
233 	}
234 	return(bytes);
235   }
236 
237   /* Process is writing to a pipe. */
238   if (find_filp(vp, R_BIT) == NULL) {
239 	return(EPIPE);
240   }
241 
242   /* Calculate how many bytes can be written. */
243   if (pos + bytes > PIPE_BUF) {
244 	if (oflags & O_NONBLOCK) {
245 		if (bytes <= PIPE_BUF) {
246 			/* Write has to be atomic */
247 			return(EAGAIN);
248 		}
249 
250 		/* Compute available space */
251 		bytes = PIPE_BUF - pos;
252 
253 		if (bytes > 0)  {
254 			/* Do a partial write. Need to wakeup reader */
255 			if (!notouch)
256 				release(vp, VFS_READ, susp_count);
257 			return(bytes);
258 		} else {
259 			/* Pipe is full */
260 			return(EAGAIN);
261 		}
262 	}
263 
264 	if (bytes > PIPE_BUF) {
265 		/* Compute available space */
266 		bytes = PIPE_BUF - pos;
267 
268 		if (bytes > 0) {
269 			/* Do a partial write. Need to wakeup reader
270 			 * since we'll suspend ourself in read_write()
271 			 */
272 			if (!notouch)
273 				release(vp, VFS_READ, susp_count);
274 			return(bytes);
275 		}
276 	}
277 
278 	/* Pipe is full */
279 	return(SUSPEND);
280   }
281 
282   /* Writing to an empty pipe.  Search for suspended reader. */
283   if (pos == 0 && !notouch)
284 	release(vp, VFS_READ, susp_count);
285 
286   /* Requested amount fits */
287   return(bytes);
288 }
289 
290 
291 /*===========================================================================*
292  *				suspend					     *
293  *===========================================================================*/
294 void suspend(int why)
295 {
296 /* Take measures to suspend the processing of the present system call.  The
297  * caller must store the parameters to be used upon resuming in the process
298  * table as appropriate.  The SUSPEND pseudo error should be returned after
299  * calling suspend().
300  */
301 
302   assert(fp->fp_blocked_on == FP_BLOCKED_ON_NONE);
303 
304   if (why == FP_BLOCKED_ON_POPEN || why == FP_BLOCKED_ON_PIPE)
305 	/* #procs susp'ed on pipe*/
306 	susp_count++;
307 
308   fp->fp_blocked_on = why;
309 }
310 
311 
312 /*===========================================================================*
313  *				pipe_suspend				     *
314  *===========================================================================*/
315 void pipe_suspend(int callnr, int fd, vir_bytes buf, size_t size,
316 	size_t cum_io)
317 {
318 /* Take measures to suspend the processing of the present system call.
319  * Store the parameters to be used upon resuming in the process table.
320  */
321 
322   fp->fp_pipe.callnr = callnr;
323   fp->fp_pipe.fd = fd;
324   fp->fp_pipe.buf = buf;
325   fp->fp_pipe.nbytes = size;
326   fp->fp_pipe.cum_io = cum_io;
327   suspend(FP_BLOCKED_ON_PIPE);
328 }
329 
330 
331 /*===========================================================================*
332  *				unsuspend_by_endpt			     *
333  *===========================================================================*/
334 void unsuspend_by_endpt(endpoint_t proc_e)
335 {
336 /* Revive processes waiting for drivers (SUSPENDed) that have disappeared, with
337  * return code EIO.
338  */
339   struct fproc *rp;
340   struct smap *sp;
341 
342   for (rp = &fproc[0]; rp < &fproc[NR_PROCS]; rp++) {
343 	if (rp->fp_pid == PID_FREE) continue;
344 	if (rp->fp_blocked_on == FP_BLOCKED_ON_CDEV &&
345 	    rp->fp_cdev.endpt == proc_e)
346 		revive(rp->fp_endpoint, EIO);
347 	else if (rp->fp_blocked_on == FP_BLOCKED_ON_SDEV &&
348 	    (sp = get_smap_by_dev(rp->fp_sdev.dev, NULL)) != NULL &&
349 	    sp->smap_endpt == proc_e)
350 		sdev_stop(rp);
351   }
352 
353   /* Revive processes waiting in drivers on select()s with EAGAIN too */
354   select_unsuspend_by_endpt(proc_e);
355 
356   return;
357 }
358 
359 
360 /*===========================================================================*
361  *				release					     *
362  *===========================================================================*/
363 void release(struct vnode * vp, int op, int count)
364 {
365 /* Check to see if any process is hanging on pipe vnode 'vp'. If one is, and it
366  * was trying to perform the call indicated by 'op' - one of VFS_OPEN,
367  * VFS_READ, or VFS_WRITE - release it.  The 'count' parameter indicates the
368  * maximum number of processes to release, which allows us to stop searching
369  * early in some cases.
370  */
371 
372   register struct fproc *rp;
373   struct filp *f;
374   int fd, selop;
375 
376   /* Trying to perform the call also includes SELECTing on it with that
377    * operation.
378    */
379   if (op == VFS_READ || op == VFS_WRITE) {
380 	if (op == VFS_READ)
381 		selop = SEL_RD;
382 	else
383 		selop = SEL_WR;
384 
385 	for (f = &filp[0]; f < &filp[NR_FILPS]; f++) {
386 		if (f->filp_count < 1 || !(f->filp_pipe_select_ops & selop) ||
387 		    f->filp_vno != vp)
388 			continue;
389 
390 		select_callback(f, selop);
391 
392 		f->filp_pipe_select_ops &= ~selop;
393 	}
394   }
395 
396   /* Search the proc table. */
397   for (rp = &fproc[0]; rp < &fproc[NR_PROCS] && count > 0; rp++) {
398 	/* Just to make sure:
399 	 * - FP_BLOCKED_ON_POPEN implies the original request was VFS_OPEN;
400 	 * - FP_BLOCKED_ON_PIPE may be the result of VFS_READ and VFS_WRITE,
401 	 *   and one of those two numbers is stored in fp_pipe.callnr.
402 	 */
403 	if (rp->fp_pid != PID_FREE && fp_is_blocked(rp) &&
404 	    !(rp->fp_flags & FP_REVIVED) &&
405 	    ((op == VFS_OPEN && rp->fp_blocked_on == FP_BLOCKED_ON_POPEN) ||
406 	     (op != VFS_OPEN && rp->fp_blocked_on == FP_BLOCKED_ON_PIPE &&
407 	      op == rp->fp_pipe.callnr))) {
408 		/* Find the vnode. Depending on the reason the process was
409 		 * suspended, there are different ways of finding it.
410 		 */
411 		if (rp->fp_blocked_on == FP_BLOCKED_ON_POPEN)
412 			fd = rp->fp_popen.fd;
413 		else
414 			fd = rp->fp_pipe.fd;
415 		f = rp->fp_filp[fd];
416 		if (f == NULL || f->filp_mode == FILP_CLOSED)
417 			continue;
418 		if (f->filp_vno != vp)
419 			continue;
420 
421 		/* We found the vnode. Revive process. */
422 		revive(rp->fp_endpoint, 0);
423 		susp_count--;	/* keep track of who is suspended */
424 		if(susp_count < 0)
425 			panic("susp_count now negative: %d", susp_count);
426 		if (--count == 0) return;
427 	}
428   }
429 }
430 
431 
432 /*===========================================================================*
433  *				revive					     *
434  *===========================================================================*/
435 void revive(endpoint_t proc_e, int returned)
436 {
437 /* Revive a previously blocked process. When a process hangs on tty, this
438  * is the way it is eventually released. For processes blocked on _SELECT,
439  * _CDEV, or _SDEV, this function MUST NOT block its calling thread.
440  */
441   struct fproc *rfp;
442   int blocked_on;
443   int slot;
444 
445   if (proc_e == NONE || isokendpt(proc_e, &slot) != OK) return;
446 
447   rfp = &fproc[slot];
448   if (!fp_is_blocked(rfp) || (rfp->fp_flags & FP_REVIVED)) return;
449 
450   /* The 'reviving' flag applies to pipe I/O and file locks.  Processes waiting
451    * on those suspension types need more processing, and will be unblocked from
452    * the main loop later.  Processes suspended for other reasons get a reply
453    * right away, and as such, have their suspension cleared right here as well.
454    */
455   blocked_on = rfp->fp_blocked_on;
456   if (blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_FLOCK) {
457 	/* Revive a process suspended on a pipe or lock. */
458 	rfp->fp_flags |= FP_REVIVED;
459 	reviving++;		/* process was waiting on pipe or lock */
460   } else {
461 	rfp->fp_blocked_on = FP_BLOCKED_ON_NONE;
462 	switch (blocked_on) {
463 	case FP_BLOCKED_ON_POPEN:
464 		/* process blocked in open or create */
465 		replycode(proc_e, rfp->fp_popen.fd);
466 		break;
467 	case FP_BLOCKED_ON_SELECT:
468 		replycode(proc_e, returned);
469 		break;
470 	case FP_BLOCKED_ON_CDEV:
471 		/* If a grant has been issued by FS for this I/O, revoke
472 		 * it again now that I/O is done.
473 		 */
474 		if (GRANT_VALID(rfp->fp_cdev.grant)) {
475 			if (cpf_revoke(rfp->fp_cdev.grant) == -1) {
476 				panic("VFS: revoke failed for grant: %d",
477 				    rfp->fp_cdev.grant);
478 			}
479 		}
480 		replycode(proc_e, returned);/* unblock the process */
481 		break;
482 	case FP_BLOCKED_ON_SDEV:
483 		/*
484 		 * Cleaning up socket requests is too complex to put here, and
485 		 * neither sdev_reply() nor sdev_stop() call revive().
486 		 */
487 		panic("revive should not be used for socket calls");
488 	default:
489 		panic("unknown block state %d", blocked_on);
490 	}
491   }
492 }
493 
494 
495 /*===========================================================================*
496  *				unpause					     *
497  *===========================================================================*/
498 void unpause(void)
499 {
500 /* A signal has been sent to a user who is paused on the file system.
501  * Abort the system call with the EINTR error message.
502  */
503   int blocked_on, status = EINTR;
504   int wasreviving = 0;
505 
506   if (!fp_is_blocked(fp)) return;
507   blocked_on = fp->fp_blocked_on;
508 
509   /* Clear the block status now. The procedure below might make blocking calls
510    * and it is imperative that while at least cdev_cancel() or sdev_cancel()
511    * are executing, other parts of VFS do not perceive this process as blocked
512    * on something.
513    */
514   fp->fp_blocked_on = FP_BLOCKED_ON_NONE;
515 
516   if (fp->fp_flags & FP_REVIVED) {
517 	fp->fp_flags &= ~FP_REVIVED;
518 	reviving--;
519 	wasreviving = 1;
520   }
521 
522   switch (blocked_on) {
523 	case FP_BLOCKED_ON_PIPE:/* process trying to read or write a pipe */
524 		/* If the operation succeeded partially, return the bytes
525 		 * processed so far.  Otherwise, return EINTR as usual.
526 		 */
527 		if (fp->fp_pipe.cum_io > 0)
528 			status = fp->fp_pipe.cum_io;
529 		break;
530 
531 	case FP_BLOCKED_ON_FLOCK:/* process trying to set a lock with FCNTL */
532 		break;
533 
534 	case FP_BLOCKED_ON_SELECT:/* process blocking on select() */
535 		select_forget();
536 		break;
537 
538 	case FP_BLOCKED_ON_POPEN:	/* process trying to open a fifo */
539 		break;
540 
541 	case FP_BLOCKED_ON_CDEV: /* process blocked on character device I/O */
542 		status = cdev_cancel(fp->fp_cdev.dev, fp->fp_cdev.endpt,
543 		    fp->fp_cdev.grant);
544 
545 		break;
546 
547 	case FP_BLOCKED_ON_SDEV:	/* process blocked on socket I/O */
548 		sdev_cancel();
549 		return;			/* sdev_cancel() sends its own reply */
550 
551 	default :
552 		panic("VFS: unknown block reason: %d", blocked_on);
553   }
554 
555   if ((blocked_on == FP_BLOCKED_ON_PIPE || blocked_on == FP_BLOCKED_ON_POPEN)&&
556 	!wasreviving) {
557 	susp_count--;
558   }
559 
560   replycode(fp->fp_endpoint, status);	/* signal interrupted call */
561 }
562