xref: /minix/minix/servers/vfs/select.c (revision 0a6a1f1d)
1 /* Implement entry point to select system call.
2  *
3  * The entry points into this file are
4  *   do_select:	       perform the SELECT system call
5  *   select_callback:  notify select system of possible fd operation
6  *   select_unsuspend_by_endpt: cancel a blocking select on exiting driver
7  *
8  * The select code uses minimal locking, so that the replies from character
9  * drivers can be processed without blocking. Filps are locked only for pipes.
10  * We make the assumption that any other structures and fields are safe to
11  * check (and possibly change) as long as we know that a process is blocked on
12  * a select(2) call, meaning that all involved filps are guaranteed to stay
13  * open until either we finish the select call, it the process gets interrupted
14  * by a signal.
15  */
16 
17 #include "fs.h"
18 #include <sys/fcntl.h>
19 #include <sys/time.h>
20 #include <sys/select.h>
21 #include <sys/stat.h>
22 #include <minix/callnr.h>
23 #include <minix/u64.h>
24 #include <string.h>
25 #include <assert.h>
26 
27 #include "file.h"
28 #include "vnode.h"
29 
30 /* max. number of simultaneously pending select() calls */
31 #define MAXSELECTS 25
32 #define FROM_PROC 0
33 #define TO_PROC   1
34 
35 typedef fd_set *ixfer_fd_set_ptr;
36 
37 static struct selectentry {
38   struct fproc *requestor;	/* slot is free iff this is NULL */
39   endpoint_t req_endpt;
40   fd_set readfds, writefds, errorfds;
41   fd_set ready_readfds, ready_writefds, ready_errorfds;
42   ixfer_fd_set_ptr vir_readfds, vir_writefds, vir_errorfds;
43   struct filp *filps[OPEN_MAX];
44   int type[OPEN_MAX];
45   int nfds, nreadyfds;
46   int error;
47   char block;
48   char starting;
49   clock_t expiry;
50   minix_timer_t timer;	/* if expiry > 0 */
51 } selecttab[MAXSELECTS];
52 
53 static int copy_fdsets(struct selectentry *se, int nfds, int direction);
54 static void filp_status(struct filp *fp, int status);
55 static int is_deferred(struct selectentry *se);
56 static void restart_proc(struct selectentry *se);
57 static void ops2tab(int ops, int fd, struct selectentry *e);
58 static int is_regular_file(struct filp *f);
59 static int is_pipe(struct filp *f);
60 static int is_char_device(struct filp *f);
61 static void select_lock_filp(struct filp *f, int ops);
62 static int select_request_file(struct filp *f, int *ops, int block,
63 	struct fproc *rfp);
64 static int select_request_char(struct filp *f, int *ops, int block,
65 	struct fproc *rfp);
66 static int select_request_pipe(struct filp *f, int *ops, int block,
67 	struct fproc *rfp);
68 static void select_cancel_all(struct selectentry *e);
69 static void select_cancel_filp(struct filp *f);
70 static void select_return(struct selectentry *);
71 static void select_restart_filps(void);
72 static int tab2ops(int fd, struct selectentry *e);
73 static void wipe_select(struct selectentry *s);
74 
75 static struct fdtype {
76 	int (*select_request)(struct filp *, int *ops, int block,
77 		struct fproc *rfp);
78 	int (*type_match)(struct filp *f);
79 } fdtypes[] = {
80 	{ select_request_char, is_char_device },
81 	{ select_request_file, is_regular_file },
82 	{ select_request_pipe, is_pipe },
83 };
84 #define SEL_FDS		(sizeof(fdtypes) / sizeof(fdtypes[0]))
85 
86 /*===========================================================================*
87  *				do_select				     *
88  *===========================================================================*/
89 int do_select(void)
90 {
91 /* Implement the select(nfds, readfds, writefds, errorfds, timeout) system
92  * call. First we copy the arguments and verify their sanity. Then we check
93  * whether there are file descriptors that satisfy the select call right off
94  * the bat. If so, or if there are no ready file descriptors but the process
95  * requested to return immediately, we return the result. Otherwise we set a
96  * timeout and wait for either the file descriptors to become ready or the
97  * timer to go off. If no timeout value was provided, we wait indefinitely.
98  */
99   int r, nfds, do_timeout = 0, fd, s;
100   struct filp *f;
101   unsigned int type, ops;
102   struct timeval timeout;
103   struct selectentry *se;
104   vir_bytes vtimeout;
105 
106   nfds = job_m_in.m_lc_vfs_select.nfds;
107   vtimeout = job_m_in.m_lc_vfs_select.timeout;
108 
109   /* Sane amount of file descriptors? */
110   if (nfds < 0 || nfds > OPEN_MAX) return(EINVAL);
111 
112   /* Find a slot to store this select request */
113   for (s = 0; s < MAXSELECTS; s++)
114 	if (selecttab[s].requestor == NULL) /* Unused slot */
115 		break;
116   if (s >= MAXSELECTS) return(ENOSPC);
117 
118   se = &selecttab[s];
119   wipe_select(se);	/* Clear results of previous usage */
120   se->requestor = fp;
121   se->req_endpt = who_e;
122   se->vir_readfds = job_m_in.m_lc_vfs_select.readfds;
123   se->vir_writefds = job_m_in.m_lc_vfs_select.writefds;
124   se->vir_errorfds = job_m_in.m_lc_vfs_select.errorfds;
125 
126   /* Copy fdsets from the process */
127   if ((r = copy_fdsets(se, nfds, FROM_PROC)) != OK) {
128 	se->requestor = NULL;
129 	return(r);
130   }
131 
132   /* Did the process set a timeout value? If so, retrieve it. */
133   if (vtimeout != 0) {
134 	do_timeout = 1;
135 	r = sys_datacopy_wrapper(who_e, vtimeout, SELF, (vir_bytes) &timeout,
136 		sizeof(timeout));
137 	if (r != OK) {
138 		se->requestor = NULL;
139 		return(r);
140 	}
141   }
142 
143   /* No nonsense in the timeval */
144   if (do_timeout && (timeout.tv_sec < 0 || timeout.tv_usec < 0)) {
145 	se->requestor = NULL;
146 	return(EINVAL);
147   }
148 
149   /* If there is no timeout, we block forever. Otherwise, we block up to the
150    * specified time interval.
151    */
152   if (!do_timeout)	/* No timeout value set */
153 	se->block = 1;
154   else if (do_timeout && (timeout.tv_sec > 0 || timeout.tv_usec > 0))
155 	se->block = 1;
156   else			/* timeout set as (0,0) - this effects a poll */
157 	se->block = 0;
158   se->expiry = 0;	/* no timer set (yet) */
159 
160   /* We are going to lock filps, and that means that while locking a second
161    * filp, we might already get the results for the first one. In that case,
162    * the incoming results must not cause the select call to finish prematurely.
163    */
164   se->starting = TRUE;
165 
166   /* Verify that file descriptors are okay to select on */
167   for (fd = 0; fd < nfds; fd++) {
168 	/* Because the select() interface implicitly includes file descriptors
169 	 * you might not want to select on, we have to figure out whether we're
170 	 * interested in them. Typically, these file descriptors include fd's
171 	 * inherited from the parent proc and file descriptors that have been
172 	 * close()d, but had a lower fd than one in the current set.
173 	 */
174 	if (!(ops = tab2ops(fd, se)))
175 		continue; /* No operations set; nothing to do for this fd */
176 
177 	/* Get filp belonging to this fd */
178 	f = se->filps[fd] = get_filp(fd, VNODE_READ);
179 	if (f == NULL) {
180 		if (err_code == EBADF)
181 			r = err_code;
182 		else /* File descriptor is 'ready' to return EIO */
183 			r = EINTR;
184 
185 		se->requestor = NULL;
186 		return(r);
187 	}
188 
189 	/* Check file types. According to POSIX 2008:
190 	 * "The pselect() and select() functions shall support regular files,
191 	 * terminal and pseudo-terminal devices, FIFOs, pipes, and sockets. The
192 	 * behavior of pselect() and select() on file descriptors that refer to
193 	 * other types of file is unspecified."
194 	 *
195 	 * In our case, terminal and pseudo-terminal devices are handled by the
196 	 * TTY major and sockets by either INET major (socket type AF_INET) or
197 	 * UDS major (socket type AF_UNIX). Additionally, we give other
198 	 * character drivers the chance to handle select for any of their
199 	 * device nodes. Some may not implement support for select and let
200 	 * libchardriver return EBADF, which we then pass to the calling
201 	 * process once we receive the reply.
202 	 */
203 	se->type[fd] = -1;
204 	for (type = 0; type < SEL_FDS; type++) {
205 		if (fdtypes[type].type_match(f)) {
206 			se->type[fd] = type;
207 			se->nfds = fd+1;
208 			se->filps[fd]->filp_selectors++;
209 			break;
210 		}
211 	}
212 	unlock_filp(f);
213 	if (se->type[fd] == -1) { /* Type not found */
214 		se->requestor = NULL;
215 		return(EBADF);
216 	}
217   }
218 
219   /* Check all file descriptors in the set whether one is 'ready' now */
220   for (fd = 0; fd < nfds; fd++) {
221 	/* Again, check for involuntarily selected fd's */
222 	if (!(ops = tab2ops(fd, se)))
223 		continue; /* No operations set; nothing to do for this fd */
224 
225 	/* File descriptors selected for reading that are not opened for
226 	 * reading should be marked as readable, as read calls would fail
227 	 * immediately. The same applies to writing.
228 	 */
229 	f = se->filps[fd];
230 	if ((ops & SEL_RD) && !(f->filp_mode & R_BIT)) {
231 		ops2tab(SEL_RD, fd, se);
232 		ops &= ~SEL_RD;
233 	}
234 	if ((ops & SEL_WR) && !(f->filp_mode & W_BIT)) {
235 		ops2tab(SEL_WR, fd, se);
236 		ops &= ~SEL_WR;
237 	}
238 	/* Test filp for select operations if not already done so. e.g.,
239 	 * processes sharing a filp and both doing a select on that filp. */
240 	if ((f->filp_select_ops & ops) != ops) {
241 		int wantops;
242 
243 		wantops = (f->filp_select_ops |= ops);
244 		type = se->type[fd];
245 		select_lock_filp(f, wantops);
246 		r = fdtypes[type].select_request(f, &wantops, se->block, fp);
247 		unlock_filp(f);
248 		if (r != OK && r != SUSPEND) {
249 			se->error = r;
250 			break; /* Error or bogus return code; abort */
251 		}
252 
253 		/* The select request above might have turned on/off some
254 		 * operations because they were 'ready' or not meaningful.
255 		 * Either way, we might have a result and we need to store them
256 		 * in the select table entry. */
257 		if (wantops & ops) ops2tab(wantops, fd, se);
258 	}
259   }
260 
261   /* At this point there won't be any blocking calls anymore. */
262   se->starting = FALSE;
263 
264   if ((se->nreadyfds > 0 || se->error != OK || !se->block) &&
265 		!is_deferred(se)) {
266 	/* An error occurred, or fd's were found that were ready to go right
267 	 * away, and/or we were instructed not to block at all. Must return
268 	 * immediately. Do not copy FD sets if an error occurred.
269 	 */
270 	if (se->error != OK)
271 		r = se->error;
272 	else
273 		r = copy_fdsets(se, se->nfds, TO_PROC);
274 	select_cancel_all(se);
275 	se->requestor = NULL;
276 
277 	if (r != OK)
278 		return(r);
279 	return(se->nreadyfds);
280   }
281 
282   /* Convert timeval to ticks and set the timer. If it fails, undo
283    * all, return error.
284    */
285   if (do_timeout) {
286 	int ticks;
287 	/* Open Group:
288 	 * "If the requested timeout interval requires a finer
289 	 * granularity than the implementation supports, the
290 	 * actual timeout interval shall be rounded up to the next
291 	 * supported value."
292 	 */
293 #define USECPERSEC 1000000
294 	while(timeout.tv_usec >= USECPERSEC) {
295 		/* this is to avoid overflow with *system_hz below */
296 		timeout.tv_usec -= USECPERSEC;
297 		timeout.tv_sec++;
298 	}
299 	ticks = timeout.tv_sec * system_hz +
300 		(timeout.tv_usec * system_hz + USECPERSEC-1) / USECPERSEC;
301 	se->expiry = ticks;
302 	set_timer(&se->timer, ticks, select_timeout_check, s);
303   }
304 
305   /* process now blocked */
306   suspend(FP_BLOCKED_ON_SELECT);
307   return(SUSPEND);
308 }
309 
310 /*===========================================================================*
311  *				is_deferred				     *
312  *===========================================================================*/
313 static int is_deferred(struct selectentry *se)
314 {
315 /* Find out whether this select has pending initial replies */
316 
317   int fd;
318   struct filp *f;
319 
320   /* The select call must have finished its initialization at all. */
321   if (se->starting) return(TRUE);
322 
323   for (fd = 0; fd < se->nfds; fd++) {
324 	if ((f = se->filps[fd]) == NULL) continue;
325 	if (f->filp_select_flags & (FSF_UPDATE|FSF_BUSY)) return(TRUE);
326   }
327 
328   return(FALSE);
329 }
330 
331 
332 /*===========================================================================*
333  *				is_regular_file				     *
334  *===========================================================================*/
335 static int is_regular_file(struct filp *f)
336 {
337   return(f && f->filp_vno && S_ISREG(f->filp_vno->v_mode));
338 }
339 
340 /*===========================================================================*
341  *				is_pipe					     *
342  *===========================================================================*/
343 static int is_pipe(struct filp *f)
344 {
345 /* Recognize either anonymous pipe or named pipe (FIFO) */
346   return(f && f->filp_vno && S_ISFIFO(f->filp_vno->v_mode));
347 }
348 
349 /*===========================================================================*
350  *				is_char_device				     *
351  *===========================================================================*/
352 static int is_char_device(struct filp *f)
353 {
354 /* See if this filp is a handle on a character device. This function MUST NOT
355  * block its calling thread. The given filp may or may not be locked.
356  */
357 
358   return (f && f->filp_vno && S_ISCHR(f->filp_vno->v_mode));
359 }
360 
361 /*===========================================================================*
362  *				select_request_char			     *
363  *===========================================================================*/
364 static int select_request_char(struct filp *f, int *ops, int block,
365 	struct fproc *rfp)
366 {
367 /* Check readiness status on a character device. Unless suitable results are
368  * available right now, this will only initiate the polling process, causing
369  * result processing to be deferred. This function MUST NOT block its calling
370  * thread. The given filp may or may not be locked.
371  */
372   dev_t dev;
373   int r, rops;
374   struct dmap *dp;
375 
376   /* Start by remapping the device node number to a "real" device number. Those
377    * two are different only for CTTY_MAJOR aka /dev/tty, but that one single
378    * exception requires quite some extra effort here: the select code matches
379    * character driver replies to their requests based on the device number, so
380    * it needs to be aware that device numbers may be mapped. The idea is to
381    * perform the mapping once and store the result in the filp object, so that
382    * at least we don't run into problems when a process loses its controlling
383    * terminal while doing a select (see also free_proc). It should be noted
384    * that it is possible that multiple processes share the same /dev/tty filp,
385    * and they may not all have a controlling terminal. The ctty-less processes
386    * should never pass the mapping; a more problematic case is checked below.
387    *
388    * The cdev_map call also checks the major number for rough validity, so that
389    * we can use it to index the dmap array safely a bit later.
390    */
391   if ((dev = cdev_map(f->filp_vno->v_sdev, rfp)) == NO_DEV)
392 	return(ENXIO);
393 
394   if (f->filp_char_select_dev != NO_DEV && f->filp_char_select_dev != dev) {
395 	/* Currently, this case can occur as follows: a process with a
396 	 * controlling terminal opens /dev/tty and forks, the new child starts
397 	 * a new session, opens a new controlling terminal, and both parent and
398 	 * child call select on the /dev/tty file descriptor. If this case ever
399 	 * becomes real, a better solution may be to force-close a filp for
400 	 * /dev/tty when a new controlling terminal is opened.
401 	 */
402 	printf("VFS: file pointer has multiple controlling TTYs!\n");
403 	return(EIO);
404   }
405   f->filp_char_select_dev = dev; /* set before possibly suspending */
406 
407   rops = *ops;
408 
409   /* By default, nothing to do */
410   *ops = 0;
411 
412   if (!block && (f->filp_select_flags & FSF_BLOCKED)) {
413 	/* This filp is blocked waiting for a reply, but we don't want to
414 	 * block ourselves. Unless we're awaiting the initial reply, these
415 	 * operations won't be ready */
416 	if (!(f->filp_select_flags & FSF_BUSY)) {
417 		if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK))
418 			rops &= ~SEL_RD;
419 		if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK))
420 			rops &= ~SEL_WR;
421 		if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK))
422 			rops &= ~SEL_ERR;
423 		if (!(rops & (SEL_RD|SEL_WR|SEL_ERR)))
424 			return(OK);
425 	}
426   }
427 
428   f->filp_select_flags |= FSF_UPDATE;
429   if (block) {
430 	rops |= SEL_NOTIFY;
431 	if (rops & SEL_RD)	f->filp_select_flags |= FSF_RD_BLOCK;
432 	if (rops & SEL_WR)	f->filp_select_flags |= FSF_WR_BLOCK;
433 	if (rops & SEL_ERR)	f->filp_select_flags |= FSF_ERR_BLOCK;
434   }
435 
436   if (f->filp_select_flags & FSF_BUSY)
437 	return(SUSPEND);
438 
439   dp = &dmap[major(dev)];
440   if (dp->dmap_sel_busy)
441 	return(SUSPEND);
442 
443   f->filp_select_flags &= ~FSF_UPDATE;
444   r = cdev_select(dev, rops);
445   if (r != OK)
446 	return(r);
447 
448   dp->dmap_sel_busy = TRUE;
449   dp->dmap_sel_filp = f;
450   f->filp_select_flags |= FSF_BUSY;
451 
452   return(SUSPEND);
453 }
454 
455 /*===========================================================================*
456  *				select_request_file			     *
457  *===========================================================================*/
458 static int select_request_file(struct filp *UNUSED(f), int *UNUSED(ops),
459   int UNUSED(block), struct fproc *UNUSED(rfp))
460 {
461   /* Files are always ready, so output *ops is input *ops */
462   return(OK);
463 }
464 
465 /*===========================================================================*
466  *				select_request_pipe			     *
467  *===========================================================================*/
468 static int select_request_pipe(struct filp *f, int *ops, int block,
469 	struct fproc *UNUSED(rfp))
470 {
471 /* Check readiness status on a pipe. The given filp is locked. This function
472  * may block its calling thread if necessary.
473  */
474   int orig_ops, r = 0, err;
475 
476   orig_ops = *ops;
477 
478   if ((*ops & (SEL_RD|SEL_ERR))) {
479 	/* Check if we can read 1 byte */
480 	err = pipe_check(f, READING, f->filp_flags & ~O_NONBLOCK, 1,
481 			 1 /* Check only */);
482 
483 	if (err != SUSPEND)
484 		r |= SEL_RD;
485 	if (err < 0 && err != SUSPEND)
486 		r |= SEL_ERR;
487   }
488 
489   if ((*ops & (SEL_WR|SEL_ERR))) {
490 	/* Check if we can write 1 byte */
491 	err = pipe_check(f, WRITING, f->filp_flags & ~O_NONBLOCK, 1,
492 			 1 /* Check only */);
493 
494 	if (err != SUSPEND)
495 		r |= SEL_WR;
496 	if (err < 0 && err != SUSPEND)
497 		r |= SEL_ERR;
498   }
499 
500   /* Some options we collected might not be requested. */
501   *ops = r & orig_ops;
502 
503   if (!*ops && block)
504 	f->filp_pipe_select_ops |= orig_ops;
505 
506   return(OK);
507 }
508 
509 /*===========================================================================*
510  *				tab2ops					     *
511  *===========================================================================*/
512 static int tab2ops(int fd, struct selectentry *e)
513 {
514   int ops = 0;
515   if (FD_ISSET(fd, &e->readfds))  ops |= SEL_RD;
516   if (FD_ISSET(fd, &e->writefds)) ops |= SEL_WR;
517   if (FD_ISSET(fd, &e->errorfds)) ops |= SEL_ERR;
518 
519   return(ops);
520 }
521 
522 
523 /*===========================================================================*
524  *				ops2tab					     *
525  *===========================================================================*/
526 static void ops2tab(int ops, int fd, struct selectentry *e)
527 {
528   if ((ops & SEL_RD) && e->vir_readfds && FD_ISSET(fd, &e->readfds) &&
529       !FD_ISSET(fd, &e->ready_readfds)) {
530 	FD_SET(fd, &e->ready_readfds);
531 	e->nreadyfds++;
532   }
533 
534   if ((ops & SEL_WR) && e->vir_writefds && FD_ISSET(fd, &e->writefds) &&
535       !FD_ISSET(fd, &e->ready_writefds)) {
536 	FD_SET(fd, &e->ready_writefds);
537 	e->nreadyfds++;
538   }
539 
540   if ((ops & SEL_ERR) && e->vir_errorfds && FD_ISSET(fd, &e->errorfds) &&
541       !FD_ISSET(fd, &e->ready_errorfds)) {
542 	FD_SET(fd, &e->ready_errorfds);
543 	e->nreadyfds++;
544   }
545 }
546 
547 
548 /*===========================================================================*
549  *				copy_fdsets				     *
550  *===========================================================================*/
551 static int copy_fdsets(struct selectentry *se, int nfds, int direction)
552 {
553 /* Copy FD sets from or to the user process calling select(2). This function
554  * MUST NOT block the calling thread.
555  */
556   int r;
557   size_t fd_setsize;
558   endpoint_t src_e, dst_e;
559   fd_set *src_fds, *dst_fds;
560 
561   if (nfds < 0 || nfds > OPEN_MAX)
562 	panic("select copy_fdsets: nfds wrong: %d", nfds);
563 
564   /* Only copy back as many bits as the user expects. */
565   fd_setsize = (size_t) (howmany(nfds, __NFDBITS) * sizeof(__fd_mask));
566 
567   /* Set source and destination endpoints */
568   src_e = (direction == FROM_PROC) ? se->req_endpt : SELF;
569   dst_e = (direction == FROM_PROC) ? SELF : se->req_endpt;
570 
571   /* read set */
572   src_fds = (direction == FROM_PROC) ? se->vir_readfds : &se->ready_readfds;
573   dst_fds = (direction == FROM_PROC) ? &se->readfds : se->vir_readfds;
574   if (se->vir_readfds) {
575 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
576 			(vir_bytes) dst_fds, fd_setsize);
577 	if (r != OK) return(r);
578   }
579 
580   /* write set */
581   src_fds = (direction == FROM_PROC) ? se->vir_writefds : &se->ready_writefds;
582   dst_fds = (direction == FROM_PROC) ? &se->writefds : se->vir_writefds;
583   if (se->vir_writefds) {
584 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
585 			(vir_bytes) dst_fds, fd_setsize);
586 	if (r != OK) return(r);
587   }
588 
589   /* error set */
590   src_fds = (direction == FROM_PROC) ? se->vir_errorfds : &se->ready_errorfds;
591   dst_fds = (direction == FROM_PROC) ? &se->errorfds : se->vir_errorfds;
592   if (se->vir_errorfds) {
593 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
594 			(vir_bytes) dst_fds, fd_setsize);
595 	if (r != OK) return(r);
596   }
597 
598   return(OK);
599 }
600 
601 
602 /*===========================================================================*
603  *				select_cancel_all			     *
604  *===========================================================================*/
605 static void select_cancel_all(struct selectentry *se)
606 {
607 /* Cancel select, possibly on success. Decrease select usage and cancel timer.
608  * This function MUST NOT block its calling thread.
609  */
610 
611   int fd;
612   struct filp *f;
613 
614   for (fd = 0; fd < se->nfds; fd++) {
615 	if ((f = se->filps[fd]) == NULL) continue;
616 	se->filps[fd] = NULL;
617 	select_cancel_filp(f);
618   }
619 
620   if (se->expiry > 0) {
621 	cancel_timer(&se->timer);
622 	se->expiry = 0;
623   }
624 
625   se->requestor = NULL;
626 }
627 
628 /*===========================================================================*
629  *				select_cancel_filp			     *
630  *===========================================================================*/
631 static void select_cancel_filp(struct filp *f)
632 {
633 /* Reduce the number of select users of this filp. This function MUST NOT block
634  * its calling thread.
635  */
636   devmajor_t major;
637 
638   assert(f);
639   assert(f->filp_selectors > 0);
640   assert(f->filp_count > 0);
641 
642   f->filp_selectors--;
643   if (f->filp_selectors == 0) {
644 	/* No one selecting on this filp anymore, forget about select state */
645 	f->filp_select_ops = 0;
646 	f->filp_select_flags = 0;
647 	f->filp_pipe_select_ops = 0;
648 
649 	/* If this filp is the subject of an ongoing select query to a
650 	 * character device, mark the query as stale, so that this filp will
651 	 * not be checked when the result arrives. The filp select device may
652 	 * still be NO_DEV if do_select fails on the initial fd check.
653 	 */
654 	if (is_char_device(f) && f->filp_char_select_dev != NO_DEV) {
655 		major = major(f->filp_char_select_dev);
656 		if (dmap[major].dmap_sel_busy &&
657 			dmap[major].dmap_sel_filp == f)
658 			dmap[major].dmap_sel_filp = NULL; /* leave _busy set */
659 		f->filp_char_select_dev = NO_DEV;
660 	}
661   }
662 }
663 
664 /*===========================================================================*
665  *				select_return				     *
666  *===========================================================================*/
667 static void select_return(struct selectentry *se)
668 {
669 /* Return the results of a select call to the user process and revive the
670  * process. This function MUST NOT block its calling thread.
671  */
672   int r;
673 
674   assert(!is_deferred(se));	/* Not done yet, first wait for async reply */
675 
676   select_cancel_all(se);
677 
678   if (se->error != OK)
679 	r = se->error;
680   else
681 	r = copy_fdsets(se, se->nfds, TO_PROC);
682   if (r == OK)
683 	r = se->nreadyfds;
684 
685   revive(se->req_endpt, r);
686 }
687 
688 
689 /*===========================================================================*
690  *				select_callback			             *
691  *===========================================================================*/
692 void select_callback(struct filp *f, int status)
693 {
694 /* The status of a filp has changed, with the given ready operations or error.
695  * This function is currently called only for pipes, and holds the lock to
696  * the filp.
697  */
698 
699   filp_status(f, status);
700 }
701 
702 /*===========================================================================*
703  *				init_select  				     *
704  *===========================================================================*/
705 void init_select(void)
706 {
707   int s;
708 
709   for (s = 0; s < MAXSELECTS; s++)
710 	init_timer(&selecttab[s].timer);
711 }
712 
713 
714 /*===========================================================================*
715  *				select_forget			             *
716  *===========================================================================*/
717 void select_forget(void)
718 {
719 /* The calling thread's associated process is expected to be unpaused, due to
720  * a signal that is supposed to interrupt the current system call. Totally
721  * forget about the select(). This function may block its calling thread if
722  * necessary (but it doesn't).
723  */
724   int slot;
725   struct selectentry *se;
726 
727   for (slot = 0; slot < MAXSELECTS; slot++) {
728 	se = &selecttab[slot];
729 	if (se->requestor == fp)
730 		break;
731   }
732 
733   if (slot >= MAXSELECTS) return;	/* Entry not found */
734 
735   assert(se->starting == FALSE);
736 
737   /* Do NOT test on is_deferred here. We can safely cancel ongoing queries. */
738   select_cancel_all(se);
739 }
740 
741 
742 /*===========================================================================*
743  *				select_timeout_check	  	     	     *
744  *===========================================================================*/
745 void select_timeout_check(minix_timer_t *timer)
746 {
747 /* An alarm has gone off for one of the select queries. This function MUST NOT
748  * block its calling thread.
749  */
750   int s;
751   struct selectentry *se;
752 
753   s = tmr_arg(timer)->ta_int;
754   if (s < 0 || s >= MAXSELECTS) return;	/* Entry does not exist */
755 
756   se = &selecttab[s];
757   if (se->requestor == NULL) return;
758   if (se->expiry <= 0) return;	/* Strange, did we even ask for a timeout? */
759   se->expiry = 0;
760   if (!is_deferred(se))
761 	select_return(se);
762   else
763 	se->block = 0;	/* timer triggered "too soon", treat as nonblocking */
764 }
765 
766 
767 /*===========================================================================*
768  *				select_unsuspend_by_endpt  	     	     *
769  *===========================================================================*/
770 void select_unsuspend_by_endpt(endpoint_t proc_e)
771 {
772 /* Revive blocked processes when a driver has disappeared */
773   devmajor_t major;
774   int fd, s;
775   struct selectentry *se;
776   struct filp *f;
777 
778   for (s = 0; s < MAXSELECTS; s++) {
779 	int wakehim = 0;
780 	se = &selecttab[s];
781 	if (se->requestor == NULL) continue;
782 	if (se->requestor->fp_endpoint == proc_e) {
783 		assert(se->requestor->fp_flags & FP_EXITING);
784 		select_cancel_all(se);
785 		continue;
786 	}
787 
788 	for (fd = 0; fd < se->nfds; fd++) {
789 		if ((f = se->filps[fd]) == NULL || !is_char_device(f))
790 			continue;
791 
792 		assert(f->filp_char_select_dev != NO_DEV);
793 		major = major(f->filp_char_select_dev);
794 		if (dmap_driver_match(proc_e, major)) {
795 			se->filps[fd] = NULL;
796 			se->error = EIO;
797 			select_cancel_filp(f);
798 			wakehim = 1;
799 		}
800 	}
801 
802 	if (wakehim && !is_deferred(se))
803 		select_return(se);
804   }
805 }
806 
807 /*===========================================================================*
808  *				select_reply1				     *
809  *===========================================================================*/
810 void select_reply1(endpoint_t driver_e, devminor_t minor, int status)
811 {
812 /* Handle the initial reply to CDEV_SELECT request. This function MUST NOT
813  * block its calling thread.
814  */
815   devmajor_t major;
816   dev_t dev;
817   struct filp *f;
818   struct dmap *dp;
819 
820   /* Figure out which device is replying */
821   if ((dp = get_dmap(driver_e)) == NULL) return;
822 
823   major = dp-dmap;
824   dev = makedev(major, minor);
825 
826   /* Get filp belonging to character special file */
827   if (!dp->dmap_sel_busy) {
828 	printf("VFS (%s:%d): major %d was not expecting a CDEV_SELECT reply\n",
829 		__FILE__, __LINE__, major);
830 	return;
831   }
832 
833   /* The select filp may have been set to NULL if the requestor has been
834    * unpaused in the meantime. In that case, we ignore the result, but we do
835    * look for other filps to restart later.
836    */
837   if ((f = dp->dmap_sel_filp) != NULL) {
838 	/* Find vnode and check we got a reply from the device we expected */
839 	assert(is_char_device(f));
840 	assert(f->filp_char_select_dev != NO_DEV);
841 	if (f->filp_char_select_dev != dev) {
842 		/* This should never happen. The driver may be misbehaving.
843 		 * For now we assume that the reply we want will arrive later..
844 		 */
845 		printf("VFS (%s:%d): expected reply from dev %llx not %llx\n",
846 			__FILE__, __LINE__, f->filp_char_select_dev, dev);
847 		return;
848 	}
849   }
850 
851   /* No longer waiting for a reply from this device */
852   dp->dmap_sel_busy = FALSE;
853   dp->dmap_sel_filp = NULL;
854 
855   /* Process the select result only if the filp is valid. */
856   if (f != NULL) {
857 	assert(f->filp_count >= 1);
858 	assert(f->filp_select_flags & FSF_BUSY);
859 
860 	f->filp_select_flags &= ~FSF_BUSY;
861 
862 	/* The select call is done now, except when
863 	 * - another process started a select on the same filp with possibly a
864 	 *   different set of operations.
865 	 * - a process does a select on the same filp but using different file
866 	 *   descriptors.
867 	 * - the select has a timeout. Upon receiving this reply the operations
868 	 *   might not be ready yet, so we want to wait for that to ultimately
869 	 *   happen.
870 	 *   Therefore we need to keep remembering what the operations are.
871 	 */
872 	if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
873 		f->filp_select_ops = 0;		/* done selecting */
874 	else if (status > 0 && !(f->filp_select_flags & FSF_UPDATE))
875 		/* there may be operations pending */
876 		f->filp_select_ops &= ~status;
877 
878 	/* Record new filp status */
879 	if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
880 		if (status > 0) {	/* operations ready */
881 			if (status & SEL_RD)
882 				f->filp_select_flags &= ~FSF_RD_BLOCK;
883 			if (status & SEL_WR)
884 				f->filp_select_flags &= ~FSF_WR_BLOCK;
885 			if (status & SEL_ERR)
886 				f->filp_select_flags &= ~FSF_ERR_BLOCK;
887 		} else if (status < 0) { /* error */
888 			/* Always unblock upon error */
889 			f->filp_select_flags &= ~FSF_BLOCKED;
890 		}
891 	}
892 
893 	filp_status(f, status); /* Tell filp owners about the results */
894   }
895 
896   select_restart_filps();
897 }
898 
899 
900 /*===========================================================================*
901  *				select_reply2				     *
902  *===========================================================================*/
903 void select_reply2(endpoint_t driver_e, devminor_t minor, int status)
904 {
905 /* Handle secondary reply to DEV_SELECT request. A secondary reply occurs when
906  * the select request is 'blocking' until an operation becomes ready. This
907  * function MUST NOT block its calling thread.
908  */
909   int slot, found, fd;
910   devmajor_t major;
911   dev_t dev;
912   struct filp *f;
913   struct dmap *dp;
914   struct selectentry *se;
915 
916   if (status == 0) {
917 	printf("VFS (%s:%d): weird status (%d) to report\n",
918 		__FILE__, __LINE__, status);
919 	return;
920   }
921 
922   /* Figure out which device is replying */
923   if ((dp = get_dmap(driver_e)) == NULL) {
924 	printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
925 		__FILE__, __LINE__, driver_e);
926 	return;
927   }
928   major = dp-dmap;
929   dev = makedev(major, minor);
930 
931   /* Find all file descriptors selecting for this device */
932   for (slot = 0; slot < MAXSELECTS; slot++) {
933 	se = &selecttab[slot];
934 	if (se->requestor == NULL) continue;	/* empty slot */
935 
936 	found = FALSE;
937 	for (fd = 0; fd < se->nfds; fd++) {
938 		if ((f = se->filps[fd]) == NULL) continue;
939 		if (!is_char_device(f)) continue;
940 		assert(f->filp_char_select_dev != NO_DEV);
941 		if (f->filp_char_select_dev != dev) continue;
942 
943 		if (status > 0) {	/* Operations ready */
944 			/* Clear the replied bits from the request
945 			 * mask unless FSF_UPDATE is set.
946 			 */
947 			if (!(f->filp_select_flags & FSF_UPDATE))
948 				f->filp_select_ops &= ~status;
949 			if (status & SEL_RD)
950 				f->filp_select_flags &= ~FSF_RD_BLOCK;
951 			if (status & SEL_WR)
952 				f->filp_select_flags &= ~FSF_WR_BLOCK;
953 			if (status & SEL_ERR)
954 				f->filp_select_flags &= ~FSF_ERR_BLOCK;
955 
956 			ops2tab(status, fd, se);
957 		} else {
958 			f->filp_select_flags &= ~FSF_BLOCKED;
959 			se->error = status;
960 		}
961 		found = TRUE;
962 	}
963 	/* Even if 'found' is set now, nothing may have changed for this call,
964 	 * as it may not have been interested in the operations that were
965 	 * reported as ready. Let restart_proc check.
966 	 */
967 	if (found)
968 		restart_proc(se);
969   }
970 
971   select_restart_filps();
972 }
973 
974 /*===========================================================================*
975  *				select_restart_filps			     *
976  *===========================================================================*/
977 static void select_restart_filps(void)
978 {
979 /* We got a result from a character driver, and now we need to check if we can
980  * restart deferred polling operations. This function MUST NOT block its
981  * calling thread.
982  */
983   int fd, slot;
984   struct filp *f;
985   struct selectentry *se;
986 
987   /* Locate filps that can be restarted */
988   for (slot = 0; slot < MAXSELECTS; slot++) {
989 	se = &selecttab[slot];
990 	if (se->requestor == NULL) continue; /* empty slot */
991 
992 	/* Only 'deferred' processes are eligible to restart */
993 	if (!is_deferred(se)) continue;
994 
995 	/* Find filps that are not waiting for a reply, but have an updated
996 	 * status (i.e., another select on the same filp with possibly a
997 	 * different set of operations is to be done), and thus requires the
998 	 * select request to be sent again).
999 	 */
1000 	for (fd = 0; fd < se->nfds; fd++) {
1001 		int r, wantops, ops;
1002 		if ((f = se->filps[fd]) == NULL) continue;
1003 		if (f->filp_select_flags & FSF_BUSY) /* Still waiting for */
1004 			continue;		     /* initial reply */
1005 		if (!(f->filp_select_flags & FSF_UPDATE)) /* Must be in  */
1006 			continue;			  /* 'update' state */
1007 
1008 		/* This function is suitable only for character devices. In
1009 		 * particular, checking pipes the same way would introduce a
1010 		 * serious locking problem.
1011 		 */
1012 		assert(is_char_device(f));
1013 
1014 		wantops = ops = f->filp_select_ops;
1015 		r = select_request_char(f, &wantops, se->block, se->requestor);
1016 		if (r != OK && r != SUSPEND) {
1017 			se->error = r;
1018 			restart_proc(se);
1019 			break; /* Error or bogus return code; abort */
1020 		}
1021 		if (wantops & ops) ops2tab(wantops, fd, se);
1022 	}
1023   }
1024 }
1025 
1026 /*===========================================================================*
1027  *				filp_status				     *
1028  *===========================================================================*/
1029 static void filp_status(f, status)
1030 struct filp *f;
1031 int status;
1032 {
1033 /* Tell processes that need to know about the status of this filp. This
1034  * function MUST NOT block its calling thread.
1035  */
1036   int fd, slot, found;
1037   struct selectentry *se;
1038 
1039   for (slot = 0; slot < MAXSELECTS; slot++) {
1040 	se = &selecttab[slot];
1041 	if (se->requestor == NULL) continue; /* empty slot */
1042 
1043 	found = FALSE;
1044 	for (fd = 0; fd < se->nfds; fd++) {
1045 		if (se->filps[fd] != f) continue;
1046 		if (status < 0)
1047 			se->error = status;
1048 		else
1049 			ops2tab(status, fd, se);
1050 		found = TRUE;
1051 	}
1052 	if (found)
1053 		restart_proc(se);
1054   }
1055 }
1056 
1057 /*===========================================================================*
1058  *				restart_proc				     *
1059  *===========================================================================*/
1060 static void restart_proc(se)
1061 struct selectentry *se;
1062 {
1063 /* Tell process about select results (if any) unless there are still results
1064  * pending. This function MUST NOT block its calling thread.
1065  */
1066 
1067   if ((se->nreadyfds > 0 || se->error != OK || !se->block) && !is_deferred(se))
1068 	select_return(se);
1069 }
1070 
1071 /*===========================================================================*
1072  *				wipe_select				     *
1073  *===========================================================================*/
1074 static void wipe_select(struct selectentry *se)
1075 {
1076   se->nfds = 0;
1077   se->nreadyfds = 0;
1078   se->error = OK;
1079   se->block = 0;
1080   memset(se->filps, 0, sizeof(se->filps));
1081 
1082   FD_ZERO(&se->readfds);
1083   FD_ZERO(&se->writefds);
1084   FD_ZERO(&se->errorfds);
1085   FD_ZERO(&se->ready_readfds);
1086   FD_ZERO(&se->ready_writefds);
1087   FD_ZERO(&se->ready_errorfds);
1088 }
1089 
1090 /*===========================================================================*
1091  *				select_lock_filp			     *
1092  *===========================================================================*/
1093 static void select_lock_filp(struct filp *f, int ops)
1094 {
1095 /* Lock a filp and vnode based on which operations are requested. This function
1096  * may block its calling thread, obviously.
1097  */
1098   tll_access_t locktype;
1099 
1100   locktype = VNODE_READ; /* By default */
1101 
1102   if (ops & (SEL_WR|SEL_ERR))
1103 	/* Selecting for error or writing requires exclusive access */
1104 	locktype = VNODE_WRITE;
1105 
1106   lock_filp(f, locktype);
1107 }
1108