xref: /minix/minix/servers/vfs/select.c (revision 7f5f010b)
1 /* Implement entry point to select system call.
2  *
3  * The entry points into this file are
4  *   do_select:	       perform the SELECT system call
5  *   select_callback:  notify select system of possible fd operation
6  *   select_unsuspend_by_endpt: cancel a blocking select on exiting driver
7  *
8  * The select code uses minimal locking, so that the replies from character
9  * drivers can be processed without blocking. Filps are locked only for pipes.
10  * We make the assumption that any other structures and fields are safe to
11  * check (and possibly change) as long as we know that a process is blocked on
12  * a select(2) call, meaning that all involved filps are guaranteed to stay
13  * open until either we finish the select call, it the process gets interrupted
14  * by a signal.
15  */
16 
17 #include "fs.h"
18 #include <sys/fcntl.h>
19 #include <sys/time.h>
20 #include <sys/select.h>
21 #include <sys/stat.h>
22 #include <minix/callnr.h>
23 #include <minix/u64.h>
24 #include <string.h>
25 #include <assert.h>
26 
27 #include "file.h"
28 #include "vnode.h"
29 
30 /* max. number of simultaneously pending select() calls */
31 #define MAXSELECTS 25
32 #define FROM_PROC 0
33 #define TO_PROC   1
34 
35 static struct selectentry {
36   struct fproc *requestor;	/* slot is free iff this is NULL */
37   endpoint_t req_endpt;
38   fd_set readfds, writefds, errorfds;
39   fd_set ready_readfds, ready_writefds, ready_errorfds;
40   fd_set *vir_readfds, *vir_writefds, *vir_errorfds;
41   struct filp *filps[OPEN_MAX];
42   int type[OPEN_MAX];
43   int nfds, nreadyfds;
44   int error;
45   char block;
46   char starting;
47   clock_t expiry;
48   minix_timer_t timer;	/* if expiry > 0 */
49 } selecttab[MAXSELECTS];
50 
51 static int copy_fdsets(struct selectentry *se, int nfds, int direction);
52 static void filp_status(struct filp *fp, int status);
53 static int is_deferred(struct selectentry *se);
54 static void restart_proc(struct selectentry *se);
55 static void ops2tab(int ops, int fd, struct selectentry *e);
56 static int is_regular_file(struct filp *f);
57 static int is_pipe(struct filp *f);
58 static int is_char_device(struct filp *f);
59 static void select_lock_filp(struct filp *f, int ops);
60 static int select_request_file(struct filp *f, int *ops, int block);
61 static int select_request_char(struct filp *f, int *ops, int block);
62 static int select_request_pipe(struct filp *f, int *ops, int block);
63 static void select_cancel_all(struct selectentry *e);
64 static void select_cancel_filp(struct filp *f);
65 static void select_return(struct selectentry *);
66 static void select_restart_filps(void);
67 static int tab2ops(int fd, struct selectentry *e);
68 static void wipe_select(struct selectentry *s);
69 
70 static struct fdtype {
71 	int (*select_request)(struct filp *, int *ops, int block);
72 	int (*type_match)(struct filp *f);
73 } fdtypes[] = {
74 	{ select_request_char, is_char_device },
75 	{ select_request_file, is_regular_file },
76 	{ select_request_pipe, is_pipe },
77 };
78 #define SEL_FDS		(sizeof(fdtypes) / sizeof(fdtypes[0]))
79 
80 /*===========================================================================*
81  *				do_select				     *
82  *===========================================================================*/
83 int do_select(void)
84 {
85 /* Implement the select(nfds, readfds, writefds, errorfds, timeout) system
86  * call. First we copy the arguments and verify their sanity. Then we check
87  * whether there are file descriptors that satisfy the select call right off
88  * the bat. If so, or if there are no ready file descriptors but the process
89  * requested to return immediately, we return the result. Otherwise we set a
90  * timeout and wait for either the file descriptors to become ready or the
91  * timer to go off. If no timeout value was provided, we wait indefinitely.
92  */
93   int r, nfds, do_timeout = 0, fd, s;
94   struct filp *f;
95   unsigned int type, ops;
96   struct timeval timeout;
97   struct selectentry *se;
98   vir_bytes vtimeout;
99 
100   nfds = job_m_in.m_lc_vfs_select.nfds;
101   vtimeout = job_m_in.m_lc_vfs_select.timeout;
102 
103   /* Sane amount of file descriptors? */
104   if (nfds < 0 || nfds > OPEN_MAX) return(EINVAL);
105 
106   /* Find a slot to store this select request */
107   for (s = 0; s < MAXSELECTS; s++)
108 	if (selecttab[s].requestor == NULL) /* Unused slot */
109 		break;
110   if (s >= MAXSELECTS) return(ENOSPC);
111 
112   se = &selecttab[s];
113   wipe_select(se);	/* Clear results of previous usage */
114   se->requestor = fp;
115   se->req_endpt = who_e;
116   se->vir_readfds = job_m_in.m_lc_vfs_select.readfds;
117   se->vir_writefds = job_m_in.m_lc_vfs_select.writefds;
118   se->vir_errorfds = job_m_in.m_lc_vfs_select.errorfds;
119 
120   /* Copy fdsets from the process */
121   if ((r = copy_fdsets(se, nfds, FROM_PROC)) != OK) {
122 	se->requestor = NULL;
123 	return(r);
124   }
125 
126   /* Did the process set a timeout value? If so, retrieve it. */
127   if (vtimeout != 0) {
128 	do_timeout = 1;
129 	r = sys_datacopy_wrapper(who_e, vtimeout, SELF, (vir_bytes) &timeout,
130 		sizeof(timeout));
131 	if (r != OK) {
132 		se->requestor = NULL;
133 		return(r);
134 	}
135   }
136 
137   /* No nonsense in the timeval */
138   if (do_timeout && (timeout.tv_sec < 0 || timeout.tv_usec < 0)) {
139 	se->requestor = NULL;
140 	return(EINVAL);
141   }
142 
143   /* If there is no timeout, we block forever. Otherwise, we block up to the
144    * specified time interval.
145    */
146   if (!do_timeout)	/* No timeout value set */
147 	se->block = 1;
148   else if (do_timeout && (timeout.tv_sec > 0 || timeout.tv_usec > 0))
149 	se->block = 1;
150   else			/* timeout set as (0,0) - this effects a poll */
151 	se->block = 0;
152   se->expiry = 0;	/* no timer set (yet) */
153 
154   /* We are going to lock filps, and that means that while locking a second
155    * filp, we might already get the results for the first one. In that case,
156    * the incoming results must not cause the select call to finish prematurely.
157    */
158   se->starting = TRUE;
159 
160   /* Verify that file descriptors are okay to select on */
161   for (fd = 0; fd < nfds; fd++) {
162 	/* Because the select() interface implicitly includes file descriptors
163 	 * you might not want to select on, we have to figure out whether we're
164 	 * interested in them. Typically, these file descriptors include fd's
165 	 * inherited from the parent proc and file descriptors that have been
166 	 * close()d, but had a lower fd than one in the current set.
167 	 */
168 	if (!(ops = tab2ops(fd, se)))
169 		continue; /* No operations set; nothing to do for this fd */
170 
171 	/* Get filp belonging to this fd */
172 	f = se->filps[fd] = get_filp(fd, VNODE_READ);
173 	if (f == NULL) {
174 		if (err_code == EBADF)
175 			r = err_code;
176 		else /* File descriptor is 'ready' to return EIO */
177 			r = EINTR;
178 
179 		se->requestor = NULL;
180 		return(r);
181 	}
182 
183 	/* Check file types. According to POSIX 2008:
184 	 * "The pselect() and select() functions shall support regular files,
185 	 * terminal and pseudo-terminal devices, FIFOs, pipes, and sockets. The
186 	 * behavior of pselect() and select() on file descriptors that refer to
187 	 * other types of file is unspecified."
188 	 *
189 	 * In our case, terminal and pseudo-terminal devices are handled by the
190 	 * TTY major and sockets by either INET major (socket type AF_INET) or
191 	 * UDS major (socket type AF_UNIX). Additionally, we give other
192 	 * character drivers the chance to handle select for any of their
193 	 * device nodes. Some may not implement support for select and let
194 	 * libchardriver return EBADF, which we then pass to the calling
195 	 * process once we receive the reply.
196 	 */
197 	se->type[fd] = -1;
198 	for (type = 0; type < SEL_FDS; type++) {
199 		if (fdtypes[type].type_match(f)) {
200 			se->type[fd] = type;
201 			se->nfds = fd+1;
202 			se->filps[fd]->filp_selectors++;
203 			break;
204 		}
205 	}
206 	unlock_filp(f);
207 	if (se->type[fd] == -1) { /* Type not found */
208 		se->requestor = NULL;
209 		return(EBADF);
210 	}
211   }
212 
213   /* Check all file descriptors in the set whether one is 'ready' now */
214   for (fd = 0; fd < nfds; fd++) {
215 	/* Again, check for involuntarily selected fd's */
216 	if (!(ops = tab2ops(fd, se)))
217 		continue; /* No operations set; nothing to do for this fd */
218 
219 	/* File descriptors selected for reading that are not opened for
220 	 * reading should be marked as readable, as read calls would fail
221 	 * immediately. The same applies to writing.
222 	 */
223 	f = se->filps[fd];
224 	if ((ops & SEL_RD) && !(f->filp_mode & R_BIT)) {
225 		ops2tab(SEL_RD, fd, se);
226 		ops &= ~SEL_RD;
227 	}
228 	if ((ops & SEL_WR) && !(f->filp_mode & W_BIT)) {
229 		ops2tab(SEL_WR, fd, se);
230 		ops &= ~SEL_WR;
231 	}
232 	/* Test filp for select operations if not already done so. e.g.,
233 	 * processes sharing a filp and both doing a select on that filp. */
234 	if ((f->filp_select_ops & ops) != ops) {
235 		int wantops;
236 
237 		wantops = (f->filp_select_ops |= ops);
238 		type = se->type[fd];
239 		select_lock_filp(f, wantops);
240 		r = fdtypes[type].select_request(f, &wantops, se->block);
241 		unlock_filp(f);
242 		if (r != OK && r != SUSPEND) {
243 			se->error = r;
244 			break; /* Error or bogus return code; abort */
245 		}
246 
247 		/* The select request above might have turned on/off some
248 		 * operations because they were 'ready' or not meaningful.
249 		 * Either way, we might have a result and we need to store them
250 		 * in the select table entry. */
251 		if (wantops & ops) ops2tab(wantops, fd, se);
252 	}
253   }
254 
255   /* At this point there won't be any blocking calls anymore. */
256   se->starting = FALSE;
257 
258   if ((se->nreadyfds > 0 || se->error != OK || !se->block) &&
259 		!is_deferred(se)) {
260 	/* An error occurred, or fd's were found that were ready to go right
261 	 * away, and/or we were instructed not to block at all. Must return
262 	 * immediately. Do not copy FD sets if an error occurred.
263 	 */
264 	if (se->error != OK)
265 		r = se->error;
266 	else
267 		r = copy_fdsets(se, se->nfds, TO_PROC);
268 	select_cancel_all(se);
269 	se->requestor = NULL;
270 
271 	if (r != OK)
272 		return(r);
273 	return(se->nreadyfds);
274   }
275 
276   /* Convert timeval to ticks and set the timer. If it fails, undo
277    * all, return error.
278    */
279   if (do_timeout) {
280 	int ticks;
281 	/* Open Group:
282 	 * "If the requested timeout interval requires a finer
283 	 * granularity than the implementation supports, the
284 	 * actual timeout interval shall be rounded up to the next
285 	 * supported value."
286 	 */
287 #define USECPERSEC 1000000
288 	while(timeout.tv_usec >= USECPERSEC) {
289 		/* this is to avoid overflow with *system_hz below */
290 		timeout.tv_usec -= USECPERSEC;
291 		timeout.tv_sec++;
292 	}
293 	ticks = timeout.tv_sec * system_hz +
294 		(timeout.tv_usec * system_hz + USECPERSEC-1) / USECPERSEC;
295 	se->expiry = ticks;
296 	set_timer(&se->timer, ticks, select_timeout_check, s);
297   }
298 
299   /* process now blocked */
300   suspend(FP_BLOCKED_ON_SELECT);
301   return(SUSPEND);
302 }
303 
304 /*===========================================================================*
305  *				is_deferred				     *
306  *===========================================================================*/
307 static int is_deferred(struct selectentry *se)
308 {
309 /* Find out whether this select has pending initial replies */
310 
311   int fd;
312   struct filp *f;
313 
314   /* The select call must have finished its initialization at all. */
315   if (se->starting) return(TRUE);
316 
317   for (fd = 0; fd < se->nfds; fd++) {
318 	if ((f = se->filps[fd]) == NULL) continue;
319 	if (f->filp_select_flags & (FSF_UPDATE|FSF_BUSY)) return(TRUE);
320   }
321 
322   return(FALSE);
323 }
324 
325 
326 /*===========================================================================*
327  *				is_regular_file				     *
328  *===========================================================================*/
329 static int is_regular_file(struct filp *f)
330 {
331   return(f && f->filp_vno && S_ISREG(f->filp_vno->v_mode));
332 }
333 
334 /*===========================================================================*
335  *				is_pipe					     *
336  *===========================================================================*/
337 static int is_pipe(struct filp *f)
338 {
339 /* Recognize either anonymous pipe or named pipe (FIFO) */
340   return(f && f->filp_vno && S_ISFIFO(f->filp_vno->v_mode));
341 }
342 
343 /*===========================================================================*
344  *				is_char_device				     *
345  *===========================================================================*/
346 static int is_char_device(struct filp *f)
347 {
348 /* See if this filp is a handle on a character device. This function MUST NOT
349  * block its calling thread. The given filp may or may not be locked.
350  */
351 
352   return (f && f->filp_vno && S_ISCHR(f->filp_vno->v_mode));
353 }
354 
355 /*===========================================================================*
356  *				select_request_char			     *
357  *===========================================================================*/
358 static int select_request_char(struct filp *f, int *ops, int block)
359 {
360 /* Check readiness status on a character device. Unless suitable results are
361  * available right now, this will only initiate the polling process, causing
362  * result processing to be deferred. This function MUST NOT block its calling
363  * thread. The given filp may or may not be locked.
364  */
365   int r, rops, major;
366   struct dmap *dp;
367 
368   major = major(f->filp_vno->v_sdev);
369   if (major < 0 || major >= NR_DEVICES) return(ENXIO);
370 
371   rops = *ops;
372 
373   /* By default, nothing to do */
374   *ops = 0;
375 
376   if (!block && (f->filp_select_flags & FSF_BLOCKED)) {
377 	/* This filp is blocked waiting for a reply, but we don't want to
378 	 * block ourselves. Unless we're awaiting the initial reply, these
379 	 * operations won't be ready */
380 	if (!(f->filp_select_flags & FSF_BUSY)) {
381 		if ((rops & SEL_RD) && (f->filp_select_flags & FSF_RD_BLOCK))
382 			rops &= ~SEL_RD;
383 		if ((rops & SEL_WR) && (f->filp_select_flags & FSF_WR_BLOCK))
384 			rops &= ~SEL_WR;
385 		if ((rops & SEL_ERR) && (f->filp_select_flags & FSF_ERR_BLOCK))
386 			rops &= ~SEL_ERR;
387 		if (!(rops & (SEL_RD|SEL_WR|SEL_ERR)))
388 			return(OK);
389 	}
390   }
391 
392   f->filp_select_flags |= FSF_UPDATE;
393   if (block) {
394 	rops |= SEL_NOTIFY;
395 	if (rops & SEL_RD)	f->filp_select_flags |= FSF_RD_BLOCK;
396 	if (rops & SEL_WR)	f->filp_select_flags |= FSF_WR_BLOCK;
397 	if (rops & SEL_ERR)	f->filp_select_flags |= FSF_ERR_BLOCK;
398   }
399 
400   if (f->filp_select_flags & FSF_BUSY)
401 	return(SUSPEND);
402 
403   dp = &dmap[major];
404   if (dp->dmap_sel_busy)
405 	return(SUSPEND);
406 
407   f->filp_select_flags &= ~FSF_UPDATE;
408   r = cdev_select(f->filp_vno->v_sdev, rops);
409   if (r != OK)
410 	return(r);
411 
412   dp->dmap_sel_busy = TRUE;
413   dp->dmap_sel_filp = f;
414   f->filp_select_flags |= FSF_BUSY;
415 
416   return(SUSPEND);
417 }
418 
419 /*===========================================================================*
420  *				select_request_file			     *
421  *===========================================================================*/
422 static int select_request_file(struct filp *UNUSED(f), int *UNUSED(ops),
423   int UNUSED(block))
424 {
425   /* Files are always ready, so output *ops is input *ops */
426   return(OK);
427 }
428 
429 /*===========================================================================*
430  *				select_request_pipe			     *
431  *===========================================================================*/
432 static int select_request_pipe(struct filp *f, int *ops, int block)
433 {
434 /* Check readiness status on a pipe. The given filp is locked. This function
435  * may block its calling thread if necessary.
436  */
437   int orig_ops, r = 0, err;
438 
439   orig_ops = *ops;
440 
441   if ((*ops & (SEL_RD|SEL_ERR))) {
442 	/* Check if we can read 1 byte */
443 	err = pipe_check(f, READING, f->filp_flags & ~O_NONBLOCK, 1,
444 			 1 /* Check only */);
445 
446 	if (err != SUSPEND)
447 		r |= SEL_RD;
448 	if (err < 0 && err != SUSPEND)
449 		r |= SEL_ERR;
450   }
451 
452   if ((*ops & (SEL_WR|SEL_ERR))) {
453 	/* Check if we can write 1 byte */
454 	err = pipe_check(f, WRITING, f->filp_flags & ~O_NONBLOCK, 1,
455 			 1 /* Check only */);
456 
457 	if (err != SUSPEND)
458 		r |= SEL_WR;
459 	if (err < 0 && err != SUSPEND)
460 		r |= SEL_ERR;
461   }
462 
463   /* Some options we collected might not be requested. */
464   *ops = r & orig_ops;
465 
466   if (!*ops && block)
467 	f->filp_pipe_select_ops |= orig_ops;
468 
469   return(OK);
470 }
471 
472 /*===========================================================================*
473  *				tab2ops					     *
474  *===========================================================================*/
475 static int tab2ops(int fd, struct selectentry *e)
476 {
477   int ops = 0;
478   if (FD_ISSET(fd, &e->readfds))  ops |= SEL_RD;
479   if (FD_ISSET(fd, &e->writefds)) ops |= SEL_WR;
480   if (FD_ISSET(fd, &e->errorfds)) ops |= SEL_ERR;
481 
482   return(ops);
483 }
484 
485 
486 /*===========================================================================*
487  *				ops2tab					     *
488  *===========================================================================*/
489 static void ops2tab(int ops, int fd, struct selectentry *e)
490 {
491   if ((ops & SEL_RD) && e->vir_readfds && FD_ISSET(fd, &e->readfds) &&
492       !FD_ISSET(fd, &e->ready_readfds)) {
493 	FD_SET(fd, &e->ready_readfds);
494 	e->nreadyfds++;
495   }
496 
497   if ((ops & SEL_WR) && e->vir_writefds && FD_ISSET(fd, &e->writefds) &&
498       !FD_ISSET(fd, &e->ready_writefds)) {
499 	FD_SET(fd, &e->ready_writefds);
500 	e->nreadyfds++;
501   }
502 
503   if ((ops & SEL_ERR) && e->vir_errorfds && FD_ISSET(fd, &e->errorfds) &&
504       !FD_ISSET(fd, &e->ready_errorfds)) {
505 	FD_SET(fd, &e->ready_errorfds);
506 	e->nreadyfds++;
507   }
508 }
509 
510 
511 /*===========================================================================*
512  *				copy_fdsets				     *
513  *===========================================================================*/
514 static int copy_fdsets(struct selectentry *se, int nfds, int direction)
515 {
516 /* Copy FD sets from or to the user process calling select(2). This function
517  * MUST NOT block the calling thread.
518  */
519   int r;
520   size_t fd_setsize;
521   endpoint_t src_e, dst_e;
522   fd_set *src_fds, *dst_fds;
523 
524   if (nfds < 0 || nfds > OPEN_MAX)
525 	panic("select copy_fdsets: nfds wrong: %d", nfds);
526 
527   /* Only copy back as many bits as the user expects. */
528   fd_setsize = (size_t) (howmany(nfds, __NFDBITS) * sizeof(__fd_mask));
529 
530   /* Set source and destination endpoints */
531   src_e = (direction == FROM_PROC) ? se->req_endpt : SELF;
532   dst_e = (direction == FROM_PROC) ? SELF : se->req_endpt;
533 
534   /* read set */
535   src_fds = (direction == FROM_PROC) ? se->vir_readfds : &se->ready_readfds;
536   dst_fds = (direction == FROM_PROC) ? &se->readfds : se->vir_readfds;
537   if (se->vir_readfds) {
538 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
539 			(vir_bytes) dst_fds, fd_setsize);
540 	if (r != OK) return(r);
541   }
542 
543   /* write set */
544   src_fds = (direction == FROM_PROC) ? se->vir_writefds : &se->ready_writefds;
545   dst_fds = (direction == FROM_PROC) ? &se->writefds : se->vir_writefds;
546   if (se->vir_writefds) {
547 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
548 			(vir_bytes) dst_fds, fd_setsize);
549 	if (r != OK) return(r);
550   }
551 
552   /* error set */
553   src_fds = (direction == FROM_PROC) ? se->vir_errorfds : &se->ready_errorfds;
554   dst_fds = (direction == FROM_PROC) ? &se->errorfds : se->vir_errorfds;
555   if (se->vir_errorfds) {
556 	r = sys_datacopy_wrapper(src_e, (vir_bytes) src_fds, dst_e,
557 			(vir_bytes) dst_fds, fd_setsize);
558 	if (r != OK) return(r);
559   }
560 
561   return(OK);
562 }
563 
564 
565 /*===========================================================================*
566  *				select_cancel_all			     *
567  *===========================================================================*/
568 static void select_cancel_all(struct selectentry *se)
569 {
570 /* Cancel select, possibly on success. Decrease select usage and cancel timer.
571  * This function MUST NOT block its calling thread.
572  */
573 
574   int fd;
575   struct filp *f;
576 
577   for (fd = 0; fd < se->nfds; fd++) {
578 	if ((f = se->filps[fd]) == NULL) continue;
579 	se->filps[fd] = NULL;
580 	select_cancel_filp(f);
581   }
582 
583   if (se->expiry > 0) {
584 	cancel_timer(&se->timer);
585 	se->expiry = 0;
586   }
587 
588   se->requestor = NULL;
589 }
590 
591 /*===========================================================================*
592  *				select_cancel_filp			     *
593  *===========================================================================*/
594 static void select_cancel_filp(struct filp *f)
595 {
596 /* Reduce the number of select users of this filp. This function MUST NOT block
597  * its calling thread.
598  */
599   dev_t major;
600 
601   assert(f);
602   assert(f->filp_selectors > 0);
603   assert(f->filp_count > 0);
604 
605   f->filp_selectors--;
606   if (f->filp_selectors == 0) {
607 	/* No one selecting on this filp anymore, forget about select state */
608 	f->filp_select_ops = 0;
609 	f->filp_select_flags = 0;
610 	f->filp_pipe_select_ops = 0;
611 
612 	/* If this filp is the subject of an ongoing select query to a
613 	 * character device, mark the query as stale, so that this filp will
614 	 * not be checked when the result arrives.
615 	 */
616 	if (is_char_device(f)) {
617 		major = major(f->filp_vno->v_sdev);
618 		if (dmap[major].dmap_sel_busy &&
619 			dmap[major].dmap_sel_filp == f)
620 			dmap[major].dmap_sel_filp = NULL; /* leave _busy set */
621 	}
622   }
623 }
624 
625 /*===========================================================================*
626  *				select_return				     *
627  *===========================================================================*/
628 static void select_return(struct selectentry *se)
629 {
630 /* Return the results of a select call to the user process and revive the
631  * process. This function MUST NOT block its calling thread.
632  */
633   int r;
634 
635   assert(!is_deferred(se));	/* Not done yet, first wait for async reply */
636 
637   select_cancel_all(se);
638 
639   if (se->error != OK)
640 	r = se->error;
641   else
642 	r = copy_fdsets(se, se->nfds, TO_PROC);
643   if (r == OK)
644 	r = se->nreadyfds;
645 
646   revive(se->req_endpt, r);
647 }
648 
649 
650 /*===========================================================================*
651  *				select_callback			             *
652  *===========================================================================*/
653 void select_callback(struct filp *f, int status)
654 {
655 /* The status of a filp has changed, with the given ready operations or error.
656  * This function is currently called only for pipes, and holds the lock to
657  * the filp.
658  */
659 
660   filp_status(f, status);
661 }
662 
663 /*===========================================================================*
664  *				init_select  				     *
665  *===========================================================================*/
666 void init_select(void)
667 {
668   int s;
669 
670   for (s = 0; s < MAXSELECTS; s++)
671 	init_timer(&selecttab[s].timer);
672 }
673 
674 
675 /*===========================================================================*
676  *				select_forget			             *
677  *===========================================================================*/
678 void select_forget(void)
679 {
680 /* The calling thread's associated process is expected to be unpaused, due to
681  * a signal that is supposed to interrupt the current system call. Totally
682  * forget about the select(). This function may block its calling thread if
683  * necessary (but it doesn't).
684  */
685   int slot;
686   struct selectentry *se;
687 
688   for (slot = 0; slot < MAXSELECTS; slot++) {
689 	se = &selecttab[slot];
690 	if (se->requestor == fp)
691 		break;
692   }
693 
694   if (slot >= MAXSELECTS) return;	/* Entry not found */
695 
696   assert(se->starting == FALSE);
697 
698   /* Do NOT test on is_deferred here. We can safely cancel ongoing queries. */
699   select_cancel_all(se);
700 }
701 
702 
703 /*===========================================================================*
704  *				select_timeout_check	  	     	     *
705  *===========================================================================*/
706 void select_timeout_check(minix_timer_t *timer)
707 {
708 /* An alarm has gone off for one of the select queries. This function MUST NOT
709  * block its calling thread.
710  */
711   int s;
712   struct selectentry *se;
713 
714   s = tmr_arg(timer)->ta_int;
715   if (s < 0 || s >= MAXSELECTS) return;	/* Entry does not exist */
716 
717   se = &selecttab[s];
718   if (se->requestor == NULL) return;
719   if (se->expiry <= 0) return;	/* Strange, did we even ask for a timeout? */
720   se->expiry = 0;
721   if (is_deferred(se)) return;	/* Wait for initial replies to CDEV_SELECT */
722   select_return(se);
723 }
724 
725 
726 /*===========================================================================*
727  *				select_unsuspend_by_endpt  	     	     *
728  *===========================================================================*/
729 void select_unsuspend_by_endpt(endpoint_t proc_e)
730 {
731 /* Revive blocked processes when a driver has disappeared */
732 
733   int fd, s, major;
734   struct selectentry *se;
735   struct filp *f;
736 
737   for (s = 0; s < MAXSELECTS; s++) {
738 	int wakehim = 0;
739 	se = &selecttab[s];
740 	if (se->requestor == NULL) continue;
741 	if (se->requestor->fp_endpoint == proc_e) {
742 		assert(se->requestor->fp_flags & FP_EXITING);
743 		select_cancel_all(se);
744 		continue;
745 	}
746 
747 	for (fd = 0; fd < se->nfds; fd++) {
748 		if ((f = se->filps[fd]) == NULL || f->filp_vno == NULL)
749 			continue;
750 
751 		major = major(f->filp_vno->v_sdev);
752 		if (dmap_driver_match(proc_e, major)) {
753 			se->filps[fd] = NULL;
754 			se->error = EIO;
755 			select_cancel_filp(f);
756 			wakehim = 1;
757 		}
758 	}
759 
760 	if (wakehim && !is_deferred(se))
761 		select_return(se);
762   }
763 }
764 
765 /*===========================================================================*
766  *				select_reply1				     *
767  *===========================================================================*/
768 void select_reply1(driver_e, minor, status)
769 endpoint_t driver_e;
770 int minor;
771 int status;
772 {
773 /* Handle the initial reply to CDEV_SELECT request. This function MUST NOT
774  * block its calling thread.
775  */
776   int major;
777   dev_t dev;
778   struct filp *f;
779   struct dmap *dp;
780   struct vnode *vp;
781 
782   /* Figure out which device is replying */
783   if ((dp = get_dmap(driver_e)) == NULL) return;
784 
785   major = dp-dmap;
786   dev = makedev(major, minor);
787 
788   /* Get filp belonging to character special file */
789   if (!dp->dmap_sel_busy) {
790 	printf("VFS (%s:%d): major %d was not expecting a CDEV_SELECT reply\n",
791 		__FILE__, __LINE__, major);
792 	return;
793   }
794 
795   /* The select filp may have been set to NULL if the requestor has been
796    * unpaused in the meantime. In that case, we ignore the result, but we do
797    * look for other filps to restart later.
798    */
799   if ((f = dp->dmap_sel_filp) != NULL) {
800 	/* Find vnode and check we got a reply from the device we expected */
801 	vp = f->filp_vno;
802 	assert(vp != NULL);
803 	assert(S_ISCHR(vp->v_mode));
804 	if (vp->v_sdev != dev) {
805 		/* This should never happen. The driver may be misbehaving.
806 		 * For now we assume that the reply we want will arrive later..
807 		 */
808 		printf("VFS (%s:%d): expected reply from dev %llx not %llx\n",
809 			__FILE__, __LINE__, vp->v_sdev, dev);
810 		return;
811 	}
812   }
813 
814   /* No longer waiting for a reply from this device */
815   dp->dmap_sel_busy = FALSE;
816   dp->dmap_sel_filp = NULL;
817 
818   /* Process the select result only if the filp is valid. */
819   if (f != NULL) {
820 	assert(f->filp_count >= 1);
821 	assert(f->filp_select_flags & FSF_BUSY);
822 
823 	f->filp_select_flags &= ~FSF_BUSY;
824 
825 	/* The select call is done now, except when
826 	 * - another process started a select on the same filp with possibly a
827 	 *   different set of operations.
828 	 * - a process does a select on the same filp but using different file
829 	 *   descriptors.
830 	 * - the select has a timeout. Upon receiving this reply the operations
831 	 *   might not be ready yet, so we want to wait for that to ultimately
832 	 *   happen.
833 	 *   Therefore we need to keep remembering what the operations are.
834 	 */
835 	if (!(f->filp_select_flags & (FSF_UPDATE|FSF_BLOCKED)))
836 		f->filp_select_ops = 0;		/* done selecting */
837 	else if (status > 0 && !(f->filp_select_flags & FSF_UPDATE))
838 		/* there may be operations pending */
839 		f->filp_select_ops &= ~status;
840 
841 	/* Record new filp status */
842 	if (!(status == 0 && (f->filp_select_flags & FSF_BLOCKED))) {
843 		if (status > 0) {	/* operations ready */
844 			if (status & SEL_RD)
845 				f->filp_select_flags &= ~FSF_RD_BLOCK;
846 			if (status & SEL_WR)
847 				f->filp_select_flags &= ~FSF_WR_BLOCK;
848 			if (status & SEL_ERR)
849 				f->filp_select_flags &= ~FSF_ERR_BLOCK;
850 		} else if (status < 0) { /* error */
851 			/* Always unblock upon error */
852 			f->filp_select_flags &= ~FSF_BLOCKED;
853 		}
854 	}
855 
856 	filp_status(f, status); /* Tell filp owners about the results */
857   }
858 
859   select_restart_filps();
860 }
861 
862 
863 /*===========================================================================*
864  *				select_reply2				     *
865  *===========================================================================*/
866 void select_reply2(driver_e, minor, status)
867 endpoint_t driver_e;
868 int minor;
869 int status;
870 {
871 /* Handle secondary reply to DEV_SELECT request. A secondary reply occurs when
872  * the select request is 'blocking' until an operation becomes ready. This
873  * function MUST NOT block its calling thread.
874  */
875   int major, slot, found, fd;
876   dev_t dev;
877   struct filp *f;
878   struct dmap *dp;
879   struct vnode *vp;
880   struct selectentry *se;
881 
882   if (status == 0) {
883 	printf("VFS (%s:%d): weird status (%d) to report\n",
884 		__FILE__, __LINE__, status);
885 	return;
886   }
887 
888   /* Figure out which device is replying */
889   if ((dp = get_dmap(driver_e)) == NULL) {
890 	printf("VFS (%s:%d): endpoint %d is not a known driver endpoint\n",
891 		__FILE__, __LINE__, driver_e);
892 	return;
893   }
894   major = dp-dmap;
895   dev = makedev(major, minor);
896 
897   /* Find all file descriptors selecting for this device */
898   for (slot = 0; slot < MAXSELECTS; slot++) {
899 	se = &selecttab[slot];
900 	if (se->requestor == NULL) continue;	/* empty slot */
901 
902 	found = FALSE;
903 	for (fd = 0; fd < se->nfds; fd++) {
904 		if ((f = se->filps[fd]) == NULL) continue;
905 		if ((vp = f->filp_vno) == NULL) continue;
906 		if (!S_ISCHR(vp->v_mode)) continue;
907 		if (vp->v_sdev != dev) continue;
908 
909 		if (status > 0) {	/* Operations ready */
910 			/* Clear the replied bits from the request
911 			 * mask unless FSF_UPDATE is set.
912 			 */
913 			if (!(f->filp_select_flags & FSF_UPDATE))
914 				f->filp_select_ops &= ~status;
915 			if (status & SEL_RD)
916 				f->filp_select_flags &= ~FSF_RD_BLOCK;
917 			if (status & SEL_WR)
918 				f->filp_select_flags &= ~FSF_WR_BLOCK;
919 			if (status & SEL_ERR)
920 				f->filp_select_flags &= ~FSF_ERR_BLOCK;
921 
922 			ops2tab(status, fd, se);
923 		} else {
924 			f->filp_select_flags &= ~FSF_BLOCKED;
925 			se->error = status;
926 		}
927 		found = TRUE;
928 	}
929 	/* Even if 'found' is set now, nothing may have changed for this call,
930 	 * as it may not have been interested in the operations that were
931 	 * reported as ready. Let restart_proc check.
932 	 */
933 	if (found)
934 		restart_proc(se);
935   }
936 
937   select_restart_filps();
938 }
939 
940 /*===========================================================================*
941  *				select_restart_filps			     *
942  *===========================================================================*/
943 static void select_restart_filps(void)
944 {
945 /* We got a result from a character driver, and now we need to check if we can
946  * restart deferred polling operations. This function MUST NOT block its
947  * calling thread.
948  */
949   int fd, slot;
950   struct filp *f;
951   struct selectentry *se;
952 
953   /* Locate filps that can be restarted */
954   for (slot = 0; slot < MAXSELECTS; slot++) {
955 	se = &selecttab[slot];
956 	if (se->requestor == NULL) continue; /* empty slot */
957 
958 	/* Only 'deferred' processes are eligible to restart */
959 	if (!is_deferred(se)) continue;
960 
961 	/* Find filps that are not waiting for a reply, but have an updated
962 	 * status (i.e., another select on the same filp with possibly a
963 	 * different set of operations is to be done), and thus requires the
964 	 * select request to be sent again).
965 	 */
966 	for (fd = 0; fd < se->nfds; fd++) {
967 		int r, wantops, ops;
968 		if ((f = se->filps[fd]) == NULL) continue;
969 		if (f->filp_select_flags & FSF_BUSY) /* Still waiting for */
970 			continue;		     /* initial reply */
971 		if (!(f->filp_select_flags & FSF_UPDATE)) /* Must be in  */
972 			continue;			  /* 'update' state */
973 
974 		/* This function is suitable only for character devices. In
975 		 * particular, checking pipes the same way would introduce a
976 		 * serious locking problem.
977 		 */
978 		assert(is_char_device(f));
979 
980 		wantops = ops = f->filp_select_ops;
981 		r = select_request_char(f, &wantops, se->block);
982 		if (r != OK && r != SUSPEND) {
983 			se->error = r;
984 			restart_proc(se);
985 			break; /* Error or bogus return code; abort */
986 		}
987 		if (wantops & ops) ops2tab(wantops, fd, se);
988 	}
989   }
990 }
991 
992 /*===========================================================================*
993  *				filp_status				     *
994  *===========================================================================*/
995 static void filp_status(f, status)
996 struct filp *f;
997 int status;
998 {
999 /* Tell processes that need to know about the status of this filp. This
1000  * function MUST NOT block its calling thread.
1001  */
1002   int fd, slot, found;
1003   struct selectentry *se;
1004 
1005   for (slot = 0; slot < MAXSELECTS; slot++) {
1006 	se = &selecttab[slot];
1007 	if (se->requestor == NULL) continue; /* empty slot */
1008 
1009 	found = FALSE;
1010 	for (fd = 0; fd < se->nfds; fd++) {
1011 		if (se->filps[fd] != f) continue;
1012 		if (status < 0)
1013 			se->error = status;
1014 		else
1015 			ops2tab(status, fd, se);
1016 		found = TRUE;
1017 	}
1018 	if (found)
1019 		restart_proc(se);
1020   }
1021 }
1022 
1023 /*===========================================================================*
1024  *				restart_proc				     *
1025  *===========================================================================*/
1026 static void restart_proc(se)
1027 struct selectentry *se;
1028 {
1029 /* Tell process about select results (if any) unless there are still results
1030  * pending. This function MUST NOT block its calling thread.
1031  */
1032 
1033   if ((se->nreadyfds > 0 || se->error != OK || !se->block) && !is_deferred(se))
1034 	select_return(se);
1035 }
1036 
1037 /*===========================================================================*
1038  *				wipe_select				     *
1039  *===========================================================================*/
1040 static void wipe_select(struct selectentry *se)
1041 {
1042   se->nfds = 0;
1043   se->nreadyfds = 0;
1044   se->error = OK;
1045   se->block = 0;
1046   memset(se->filps, 0, sizeof(se->filps));
1047 
1048   FD_ZERO(&se->readfds);
1049   FD_ZERO(&se->writefds);
1050   FD_ZERO(&se->errorfds);
1051   FD_ZERO(&se->ready_readfds);
1052   FD_ZERO(&se->ready_writefds);
1053   FD_ZERO(&se->ready_errorfds);
1054 }
1055 
1056 /*===========================================================================*
1057  *				select_lock_filp			     *
1058  *===========================================================================*/
1059 static void select_lock_filp(struct filp *f, int ops)
1060 {
1061 /* Lock a filp and vnode based on which operations are requested. This function
1062  * may block its calling thread, obviously.
1063  */
1064   tll_access_t locktype;
1065 
1066   locktype = VNODE_READ; /* By default */
1067 
1068   if (ops & (SEL_WR|SEL_ERR))
1069 	/* Selecting for error or writing requires exclusive access */
1070 	locktype = VNODE_WRITE;
1071 
1072   lock_filp(f, locktype);
1073 }
1074