1 /*
2  * selector.c
3  *
4  * Code for abstracting select for files and timers.
5  *
6  * Author: MontaVista Software, Inc.
7  *         Corey Minyard <minyard@mvista.com>
8  *         source@mvista.com
9  *
10  * Copyright 2002,2003 MontaVista Software Inc.
11  *
12  *  This program is free software; you can redistribute it and/or
13  *  modify it under the terms of the GNU Lesser General Public License
14  *  as published by the Free Software Foundation; either version 2 of
15  *  the License, or (at your option) any later version.
16  *
17  *
18  *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
19  *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20  *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23  *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
24  *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25  *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
26  *  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
27  *  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  *  You should have received a copy of the GNU Lesser General Public
30  *  License along with this program; if not, write to the Free
31  *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
32  */
33 
34 /* This file holds code to abstract the "select" call and make it
35    easier to use.  The main thread lives here, the rest of the code
36    uses a callback interface.  Basically, other parts of the program
37    can register file descriptors with this code, when interesting
38    things happen on those file descriptors this code will call
39    routines registered with it. */
40 
41 #include "selector.h"
42 
43 #include <sys/time.h>
44 #include <time.h>
45 #include <sys/types.h>
46 #include <unistd.h>
47 #include <stdlib.h>
48 #include <errno.h>
49 #include <stdio.h>
50 #include <syslog.h>
51 #include <signal.h>
52 #include <string.h>
53 #ifdef HAVE_EPOLL_PWAIT
54 #include <sys/epoll.h>
55 #else
56 #define EPOLL_CTL_ADD 0
57 #define EPOLL_CTL_DEL 0
58 #define EPOLL_CTL_MOD 0
59 #endif
60 
61 typedef struct fd_state_s
62 {
63     int               deleted;
64     unsigned int      use_count;
65     sel_fd_cleared_cb done;
66 } fd_state_t;
67 
68 /* The control structure for each file descriptor. */
69 typedef struct fd_control_s
70 {
71     /* This structure is allocated when an FD is set and it holds
72        whether the FD has been deleted and information to handle the
73        deletion. */
74     fd_state_t       *state;
75     void             *data;		/* Operation-specific data */
76     sel_fd_handler_t handle_read;
77     sel_fd_handler_t handle_write;
78     sel_fd_handler_t handle_except;
79 } fd_control_t;
80 
81 typedef struct heap_val_s
82 {
83     /* Set this to the function to call when the timeout occurs. */
84     sel_timeout_handler_t handler;
85 
86     /* Set this to whatever you like.  You can use this to store your
87        own data. */
88     void *user_data;
89 
90     /* Set this to the time when the timer will go off. */
91     struct timeval timeout;
92 
93     /* Who owns me? */
94     struct selector_s *sel;
95 
96     /* Am I currently running? */
97     int in_heap;
98 
99     /* Am I currently stopped? */
100     int stopped;
101 
102     /* Have I been freed? */
103     int freed;
104 
105     /* Am I currently in a handler? */
106     int in_handler;
107 
108     sel_timeout_handler_t done_handler;
109     void *done_cb_data;
110 } heap_val_t;
111 
112 typedef struct theap_s theap_t;
113 #define heap_s theap_s
114 #define heap_node_s sel_timer_s
115 #define HEAP_EXPORT_NAME(s) theap_ ## s
116 #define HEAP_NAMES_LOCAL static
117 #define HEAP_OUTPUT_PRINTF "(%ld.%7.7ld)"
118 #define HEAP_OUTPUT_DATA pos->timeout.tv_sec, pos->timeout.tv_usec
119 
120 static int
cmp_timeval(const struct timeval * tv1,const struct timeval * tv2)121 cmp_timeval(const struct timeval *tv1, const struct timeval *tv2)
122 {
123     if (tv1->tv_sec < tv2->tv_sec)
124 	return -1;
125 
126     if (tv1->tv_sec > tv2->tv_sec)
127 	return 1;
128 
129     if (tv1->tv_usec < tv2->tv_usec)
130 	return -1;
131 
132     if (tv1->tv_usec > tv2->tv_usec)
133 	return 1;
134 
135     return 0;
136 }
137 
138 static int
heap_cmp_key(heap_val_t * v1,heap_val_t * v2)139 heap_cmp_key(heap_val_t *v1, heap_val_t *v2)
140 {
141     return cmp_timeval(&v1->timeout, &v2->timeout);
142 }
143 
144 #include "heap.h"
145 
146 /* Used to build a list of threads that may need to be woken if a
147    timer on the top of the heap changes, or an FD is added/removed.
148    See wake_sel_thread() for more info. */
149 typedef struct sel_wait_list_s
150 {
151     /* The thread to wake up. */
152     long            thread_id;
153 
154     /* How to wake it. */
155     sel_send_sig_cb send_sig;
156     void            *send_sig_cb_data;
157 
158     /* This is the memory used to hold the timeout for select
159        operation. */
160     volatile struct timeval *timeout;
161 
162     struct sel_wait_list_s *next, *prev;
163 } sel_wait_list_t;
164 
165 struct sel_runner_s
166 {
167     struct selector_s *sel;
168     sel_runner_func_t func;
169     void *cb_data;
170     int in_use;
171     sel_runner_t *next;
172 };
173 
174 struct selector_s
175 {
176     /* This is an array of all the file descriptors possible.  This is
177        moderately wasteful of space, but easy to do.  Hey, memory is
178        cheap. */
179     volatile fd_control_t fds[FD_SETSIZE];
180 
181   /* These are the offical fd_sets used to track what file descriptors
182        need to be monitored. */
183     volatile fd_set read_set;
184     volatile fd_set write_set;
185     volatile fd_set except_set;
186 
187     volatile int maxfd; /* The largest file descriptor registered with
188 			   this code. */
189 
190     void *fd_lock;
191 
192     /* The timer heap. */
193     theap_t timer_heap;
194 
195     /* This is a list of items waiting to be woken up because they are
196        sitting in a select.  See wake_sel_thread() for more info. */
197     sel_wait_list_t wait_list;
198 
199     void *timer_lock;
200 
201     sel_runner_t *runner_head;
202     sel_runner_t *runner_tail;
203 
204     int wake_sig;
205 
206 #ifdef HAVE_EPOLL_PWAIT
207     int epollfd;
208 #endif
209     sel_lock_t *(*sel_lock_alloc)(void *cb_data);
210     void (*sel_lock_free)(sel_lock_t *);
211     void (*sel_lock)(sel_lock_t *);
212     void (*sel_unlock)(sel_lock_t *);
213 };
214 
215 static void
sel_timer_lock(struct selector_s * sel)216 sel_timer_lock(struct selector_s *sel)
217 {
218     if (sel->sel_lock)
219 	sel->sel_lock(sel->timer_lock);
220 }
221 
222 static void
sel_timer_unlock(struct selector_s * sel)223 sel_timer_unlock(struct selector_s *sel)
224 {
225     if (sel->sel_lock)
226 	sel->sel_unlock(sel->timer_lock);
227 }
228 
229 static void
sel_fd_lock(struct selector_s * sel)230 sel_fd_lock(struct selector_s *sel)
231 {
232     if (sel->sel_lock)
233 	sel->sel_lock(sel->fd_lock);
234 }
235 
236 static void
sel_fd_unlock(struct selector_s * sel)237 sel_fd_unlock(struct selector_s *sel)
238 {
239     if (sel->sel_lock)
240 	sel->sel_unlock(sel->fd_lock);
241 }
242 
243 /* This function will wake the SEL thread.  It must be called with the
244    timer lock held, because it messes with timeout.
245 
246    The operation is is subtle, but it does work.  The timeout in the
247    selector is the data passed in (must be the actual data) as the
248    timeout to select.  When we want to wake the select, we set the
249    timeout to zero first.  That way, if the select has calculated the
250    timeout but has not yet called select, then this will set it to
251    zero (causing it to wait zero time).  If select has already been
252    called, then the signal send should wake it up.  We only need to do
253    this after we have calculated the timeout, but before we have
254    called select, thus only things in the wait list matter. */
255 static void
wake_sel_thread(struct selector_s * sel)256 wake_sel_thread(struct selector_s *sel)
257 {
258     sel_wait_list_t *item;
259 
260     item = sel->wait_list.next;
261     while (item != &sel->wait_list) {
262 	item->timeout->tv_sec = 0;
263 	item->timeout->tv_usec = 0;
264 	if (item->send_sig)
265 	    item->send_sig(item->thread_id, item->send_sig_cb_data);
266 	item = item->next;
267     }
268 }
269 
270 static void
wake_fd_sel_thread(struct selector_s * sel)271 wake_fd_sel_thread(struct selector_s *sel)
272 {
273     wake_sel_thread(sel);
274     sel_fd_unlock(sel);
275 }
276 
277 static void
wake_timer_sel_thread(struct selector_s * sel,volatile sel_timer_t * old_top)278 wake_timer_sel_thread(struct selector_s *sel, volatile sel_timer_t *old_top)
279 {
280     if (old_top != theap_get_top(&sel->timer_heap))
281 	/* If the top value changed, restart the waiting thread. */
282 	wake_sel_thread(sel);
283 }
284 
285 /* Wait list management.  These *must* be called with the timer list
286    locked, and the values in the item *must not* change while in the
287    list. */
288 static void
add_sel_wait_list(struct selector_s * sel,sel_wait_list_t * item,sel_send_sig_cb send_sig,void * cb_data,long thread_id,volatile struct timeval * timeout)289 add_sel_wait_list(struct selector_s *sel, sel_wait_list_t *item,
290 		  sel_send_sig_cb send_sig,
291 		  void            *cb_data,
292 		  long thread_id, volatile struct timeval *timeout)
293 {
294     item->thread_id = thread_id;
295     item->timeout = timeout;
296     item->send_sig = send_sig;
297     item->send_sig_cb_data = cb_data;
298     item->next = sel->wait_list.next;
299     item->prev = &sel->wait_list;
300     sel->wait_list.next->prev = item;
301     sel->wait_list.next = item;
302 }
303 static void
remove_sel_wait_list(struct selector_s * sel,sel_wait_list_t * item)304 remove_sel_wait_list(struct selector_s *sel, sel_wait_list_t *item)
305 {
306     item->next->prev = item->prev;
307     item->prev->next = item->next;
308 }
309 
310 /* Initialize a single file descriptor. */
311 static void
init_fd(fd_control_t * fd)312 init_fd(fd_control_t *fd)
313 {
314     fd->state = NULL;
315     fd->data = NULL;
316     fd->handle_read = NULL;
317     fd->handle_write = NULL;
318     fd->handle_except = NULL;
319 }
320 
321 #ifdef HAVE_EPOLL_PWAIT
322 static int
sel_update_epoll(struct selector_s * sel,int fd,int op)323 sel_update_epoll(struct selector_s *sel, int fd, int op)
324 {
325     struct epoll_event event;
326 
327     if (sel->epollfd < 0)
328 	return 1;
329 
330     memset(&event, 0, sizeof(event));
331     event.events = EPOLLONESHOT;
332     event.data.fd = fd;
333     if (FD_ISSET(fd, &sel->read_set))
334 	event.events |= EPOLLIN | EPOLLHUP;
335     if (FD_ISSET(fd, &sel->write_set))
336 	event.events |= EPOLLOUT;
337     if (FD_ISSET(fd, &sel->write_set))
338 	event.events |= EPOLLERR | EPOLLPRI;
339 
340     epoll_ctl(sel->epollfd, op, fd, &event);
341     return 0;
342 }
343 #else
344 static int
sel_update_epoll(struct selector_s * sel,int fd,int op)345 sel_update_epoll(struct selector_s *sel, int fd, int op)
346 {
347     return 1;
348 }
349 #endif
350 
351 /* Set the handlers for a file descriptor. */
352 int
sel_set_fd_handlers(struct selector_s * sel,int fd,void * data,sel_fd_handler_t read_handler,sel_fd_handler_t write_handler,sel_fd_handler_t except_handler,sel_fd_cleared_cb done)353 sel_set_fd_handlers(struct selector_s *sel,
354 		    int               fd,
355 		    void              *data,
356 		    sel_fd_handler_t  read_handler,
357 		    sel_fd_handler_t  write_handler,
358 		    sel_fd_handler_t  except_handler,
359 		    sel_fd_cleared_cb done)
360 {
361     fd_control_t *fdc;
362     fd_state_t   *state, *oldstate = NULL;
363     void         *olddata = NULL;
364     int          added = 1;
365 
366     state = malloc(sizeof(*state));
367     if (!state)
368 	return ENOMEM;
369     state->deleted = 0;
370     state->use_count = 0;
371     state->done = done;
372 
373     sel_fd_lock(sel);
374     fdc = (fd_control_t *) &(sel->fds[fd]);
375     if (fdc->state) {
376 	oldstate = fdc->state;
377 	olddata = fdc->data;
378 	added = 0;
379     }
380     fdc->state = state;
381     fdc->data = data;
382     fdc->handle_read = read_handler;
383     fdc->handle_write = write_handler;
384     fdc->handle_except = except_handler;
385 
386     if (added) {
387 	/* Move maxfd up if necessary. */
388 	if (fd > sel->maxfd) {
389 	    sel->maxfd = fd;
390 	}
391 
392 	if (sel_update_epoll(sel, fd, EPOLL_CTL_ADD)) {
393 	    wake_fd_sel_thread(sel);
394 	    goto out;
395 	}
396     }
397     sel_fd_unlock(sel);
398 
399  out:
400     if (oldstate) {
401 	oldstate->deleted = 1;
402 	if (oldstate->use_count == 0) {
403 	    if (oldstate->done)
404 		oldstate->done(fd, olddata);
405 	    free(oldstate);
406 	}
407     }
408     return 0;
409 }
410 
411 /* Clear the handlers for a file descriptor and remove it from
412    select's monitoring. */
413 void
sel_clear_fd_handlers(struct selector_s * sel,int fd)414 sel_clear_fd_handlers(struct selector_s *sel,
415 		      int        fd)
416 {
417     fd_control_t *fdc;
418     fd_state_t   *oldstate = NULL;
419     void         *olddata = NULL;
420 
421     sel_fd_lock(sel);
422     fdc = (fd_control_t *) &(sel->fds[fd]);
423 
424     if (fdc->state) {
425 	oldstate = fdc->state;
426 	olddata = fdc->data;
427 	fdc->state = NULL;
428 
429 	sel_update_epoll(sel, fd, EPOLL_CTL_DEL);
430     }
431 
432     init_fd(fdc);
433     FD_CLR(fd, &sel->read_set);
434     FD_CLR(fd, &sel->write_set);
435     FD_CLR(fd, &sel->except_set);
436 
437     /* Move maxfd down if necessary. */
438     if (fd == sel->maxfd) {
439 	while ((sel->maxfd >= 0) && (! sel->fds[sel->maxfd].state)) {
440 	    sel->maxfd--;
441 	}
442     }
443 
444     sel_fd_unlock(sel);
445 
446     if (oldstate) {
447 	oldstate->deleted = 1;
448 	if (oldstate->use_count == 0) {
449 	    if (oldstate->done)
450 		oldstate->done(fd, olddata);
451 	    free(oldstate);
452 	}
453     }
454 }
455 
456 /* Set whether the file descriptor will be monitored for data ready to
457    read on the file descriptor. */
458 void
sel_set_fd_read_handler(struct selector_s * sel,int fd,int state)459 sel_set_fd_read_handler(struct selector_s *sel, int fd, int state)
460 {
461     fd_control_t *fdc = (fd_control_t *) &(sel->fds[fd]);
462 
463     sel_fd_lock(sel);
464     if (!fdc->state)
465 	goto out;
466 
467     if (state == SEL_FD_HANDLER_ENABLED) {
468 	if (FD_ISSET(fd, &sel->read_set))
469 	    goto out;
470 	FD_SET(fd, &sel->read_set);
471     } else if (state == SEL_FD_HANDLER_DISABLED) {
472 	if (!FD_ISSET(fd, &sel->read_set))
473 	    goto out;
474 	FD_CLR(fd, &sel->read_set);
475     }
476     if (sel_update_epoll(sel, fd, EPOLL_CTL_MOD)) {
477 	wake_fd_sel_thread(sel);
478 	return;
479     }
480 
481  out:
482     sel_fd_unlock(sel);
483 }
484 
485 /* Set whether the file descriptor will be monitored for when the file
486    descriptor can be written to. */
487 void
sel_set_fd_write_handler(struct selector_s * sel,int fd,int state)488 sel_set_fd_write_handler(struct selector_s *sel, int fd, int state)
489 {
490     fd_control_t *fdc = (fd_control_t *) &(sel->fds[fd]);
491 
492     sel_fd_lock(sel);
493     if (!fdc->state)
494 	goto out;
495 
496     if (state == SEL_FD_HANDLER_ENABLED) {
497 	if (FD_ISSET(fd, &sel->write_set))
498 	    goto out;
499 	FD_SET(fd, &sel->write_set);
500     } else if (state == SEL_FD_HANDLER_DISABLED) {
501 	if (!FD_ISSET(fd, &sel->write_set))
502 	    goto out;
503 	FD_CLR(fd, &sel->write_set);
504     }
505     if (sel_update_epoll(sel, fd, EPOLL_CTL_MOD)) {
506 	wake_fd_sel_thread(sel);
507 	return;
508     }
509 
510  out:
511     sel_fd_unlock(sel);
512 }
513 
514 /* Set whether the file descriptor will be monitored for exceptions
515    on the file descriptor. */
516 void
sel_set_fd_except_handler(struct selector_s * sel,int fd,int state)517 sel_set_fd_except_handler(struct selector_s *sel, int fd, int state)
518 {
519     fd_control_t *fdc = (fd_control_t *) &(sel->fds[fd]);
520 
521     sel_fd_lock(sel);
522     if (!fdc->state)
523 	goto out;
524 
525     if (state == SEL_FD_HANDLER_ENABLED) {
526 	if (FD_ISSET(fd, &sel->except_set))
527 	    goto out;
528 	FD_SET(fd, &sel->except_set);
529     } else if (state == SEL_FD_HANDLER_DISABLED) {
530 	if (!FD_ISSET(fd, &sel->except_set))
531 	    goto out;
532 	FD_CLR(fd, &sel->except_set);
533     }
534     if (sel_update_epoll(sel, fd, EPOLL_CTL_MOD)) {
535 	wake_fd_sel_thread(sel);
536 	return;
537     }
538 
539  out:
540     sel_fd_unlock(sel);
541 }
542 
543 static void
diff_timeval(struct timeval * dest,struct timeval * left,struct timeval * right)544 diff_timeval(struct timeval *dest,
545 	     struct timeval *left,
546 	     struct timeval *right)
547 {
548     if (   (left->tv_sec < right->tv_sec)
549 	|| (   (left->tv_sec == right->tv_sec)
550 	    && (left->tv_usec < right->tv_usec)))
551     {
552 	/* If left < right, just force to zero, don't allow negative
553            numbers. */
554 	dest->tv_sec = 0;
555 	dest->tv_usec = 0;
556 	return;
557     }
558 
559     dest->tv_sec = left->tv_sec - right->tv_sec;
560     dest->tv_usec = left->tv_usec - right->tv_usec;
561     while (dest->tv_usec < 0) {
562 	dest->tv_usec += 1000000;
563 	dest->tv_sec--;
564     }
565 }
566 
567 int
sel_alloc_timer(struct selector_s * sel,sel_timeout_handler_t handler,void * user_data,sel_timer_t ** new_timer)568 sel_alloc_timer(struct selector_s     *sel,
569 		sel_timeout_handler_t handler,
570 		void                  *user_data,
571 		sel_timer_t           **new_timer)
572 {
573     sel_timer_t *timer;
574 
575     timer = malloc(sizeof(*timer));
576     if (!timer)
577 	return ENOMEM;
578     memset(timer, 0, sizeof(*timer));
579 
580     timer->val.handler = handler;
581     timer->val.user_data = user_data;
582     timer->val.sel = sel;
583     timer->val.stopped = 1;
584     *new_timer = timer;
585 
586     return 0;
587 }
588 
589 int
sel_free_timer(sel_timer_t * timer)590 sel_free_timer(sel_timer_t *timer)
591 {
592     struct selector_s *sel = timer->val.sel;
593     int in_handler;
594 
595     sel_timer_lock(sel);
596     if (timer->val.in_heap) {
597 	sel_stop_timer(timer);
598     }
599     timer->val.freed = 1;
600     in_handler = timer->val.in_handler;
601     sel_timer_unlock(sel);
602 
603     if (!in_handler)
604 	free(timer);
605 
606     return 0;
607 }
608 
609 int
sel_start_timer(sel_timer_t * timer,struct timeval * timeout)610 sel_start_timer(sel_timer_t    *timer,
611 		struct timeval *timeout)
612 {
613     struct selector_s *sel = timer->val.sel;
614     volatile sel_timer_t *top;
615 
616     sel_timer_lock(sel);
617     if (timer->val.in_heap) {
618 	sel_timer_unlock(sel);
619 	return EBUSY;
620     }
621 
622     top = theap_get_top(&sel->timer_heap);
623 
624     timer->val.timeout = *timeout;
625 
626     if (!timer->val.in_handler) {
627 	/* Wait until the handler returns to start the timer. */
628 	theap_add(&sel->timer_heap, timer);
629 	timer->val.in_heap = 1;
630     }
631     timer->val.stopped = 0;
632 
633     wake_timer_sel_thread(sel, top);
634 
635     sel_timer_unlock(sel);
636 
637     return 0;
638 }
639 
640 int
sel_stop_timer(sel_timer_t * timer)641 sel_stop_timer(sel_timer_t *timer)
642 {
643     struct selector_s *sel = timer->val.sel;
644 
645     sel_timer_lock(sel);
646     if (timer->val.stopped) {
647 	sel_timer_unlock(sel);
648 	return ETIMEDOUT;
649     }
650 
651     if (timer->val.in_heap) {
652 	volatile sel_timer_t *top = theap_get_top(&sel->timer_heap);
653 
654 	theap_remove(&sel->timer_heap, timer);
655 	timer->val.in_heap = 0;
656 	wake_timer_sel_thread(sel, top);
657     }
658     timer->val.stopped = 1;
659 
660     sel_timer_unlock(sel);
661 
662     return 0;
663 }
664 
665 int
sel_stop_timer_with_done(sel_timer_t * timer,sel_timeout_handler_t done_handler,void * cb_data)666 sel_stop_timer_with_done(sel_timer_t *timer,
667 			 sel_timeout_handler_t done_handler,
668 			 void *cb_data)
669 {
670     struct selector_s *sel = timer->val.sel;
671     volatile sel_timer_t *top;
672 
673     sel_timer_lock(sel);
674     if (timer->val.stopped) {
675 	sel_timer_unlock(sel);
676 	goto out;
677     }
678 
679     if (timer->val.in_handler) {
680 	timer->val.done_handler = done_handler;
681 	timer->val.done_cb_data = cb_data;
682 	sel_timer_unlock(sel);
683 	return 0;
684     }
685 
686     if (timer->val.in_heap) {
687 	top = theap_get_top(&sel->timer_heap);
688 
689 	theap_remove(&sel->timer_heap, timer);
690 	timer->val.in_heap = 0;
691 
692 	wake_timer_sel_thread(sel, top);
693     }
694     timer->val.stopped = 1;
695     sel_timer_unlock(sel);
696 
697  out:
698     done_handler(sel, timer, cb_data);
699     return 0;
700 }
701 
702 void
sel_get_monotonic_time(struct timeval * tv)703 sel_get_monotonic_time(struct timeval *tv)
704 {
705     struct timespec ts;
706 
707     clock_gettime(CLOCK_MONOTONIC, &ts);
708     tv->tv_sec = ts.tv_sec;
709     tv->tv_usec = (ts.tv_nsec + 500) / 1000;
710 }
711 
712 /*
713  * Process timers on selector.  The timeout is always set, to a very
714  * long value if no timers are waiting.  Note that this *must* be
715  * called with sel->timer_lock held.  Note that if this processes
716  * any timers, the timeout will be set to { 0,0 }.
717  */
718 static void
process_timers(struct selector_s * sel,volatile struct timeval * timeout)719 process_timers(struct selector_s       *sel,
720 	       volatile struct timeval *timeout)
721 {
722     struct timeval now;
723     sel_timer_t    *timer;
724     int            called = 0;
725 
726     timer = theap_get_top(&sel->timer_heap);
727     sel_get_monotonic_time(&now);
728     while (timer && cmp_timeval(&now, &timer->val.timeout) >= 0) {
729 	called = 1;
730 	theap_remove(&(sel->timer_heap), timer);
731 	timer->val.in_heap = 0;
732 	timer->val.stopped = 1;
733 	timer->val.in_handler = 1;
734 	sel_timer_unlock(sel);
735 	timer->val.handler(sel, timer, timer->val.user_data);
736 	sel_timer_lock(sel);
737 	timer->val.in_handler = 0;
738 	if (timer->val.done_handler) {
739 	    sel_timeout_handler_t done_handler = timer->val.done_handler;
740 	    void *done_cb_data = timer->val.done_cb_data;
741 	    timer->val.done_handler = NULL;
742 	    sel_timer_unlock(sel);
743 	    done_handler(sel, timer, done_cb_data);
744 	    sel_timer_lock(sel);
745 	}
746 	if (timer->val.freed)
747 	    free(timer);
748 	else if (!timer->val.stopped) {
749 	    /* We were restarted while in the handler. */
750 	    theap_add(&sel->timer_heap, timer);
751 	    timer->val.in_heap = 1;
752 	}
753 
754 	timer = theap_get_top(&sel->timer_heap);
755     }
756 
757     if (called) {
758 	/* If called, set the timeout to zero. */
759 	timeout->tv_sec = 0;
760 	timeout->tv_usec = 0;
761     } else if (timer) {
762 	sel_get_monotonic_time(&now);
763 	diff_timeval((struct timeval *) timeout,
764 		     (struct timeval *) &timer->val.timeout,
765 		     &now);
766     } else {
767 	/* No timers, just set a long time. */
768 	timeout->tv_sec = 100000;
769 	timeout->tv_usec = 0;
770     }
771 }
772 
773 int
sel_alloc_runner(struct selector_s * sel,sel_runner_t ** new_runner)774 sel_alloc_runner(struct selector_s *sel, sel_runner_t **new_runner)
775 {
776     sel_runner_t *runner;
777 
778     runner = malloc(sizeof(*runner));
779     if (!runner)
780 	return ENOMEM;
781     memset(runner, 0, sizeof(*runner));
782     runner->sel = sel;
783     *new_runner = runner;
784     return 0;
785 }
786 
787 int
sel_free_runner(sel_runner_t * runner)788 sel_free_runner(sel_runner_t *runner)
789 {
790     struct selector_s *sel = runner->sel;
791 
792     sel_timer_lock(sel);
793     if (runner->in_use) {
794 	sel_timer_unlock(sel);
795 	return EBUSY;
796     }
797     sel_timer_unlock(sel);
798     free(runner);
799     return 0;
800 }
801 
802 int
sel_run(sel_runner_t * runner,sel_runner_func_t func,void * cb_data)803 sel_run(sel_runner_t *runner, sel_runner_func_t func, void *cb_data)
804 {
805     struct selector_s *sel = runner->sel;
806 
807     sel_timer_lock(sel);
808     if (runner->in_use) {
809 	sel_timer_unlock(sel);
810 	return EBUSY;
811     }
812 
813     runner->func = func;
814     runner->cb_data = cb_data;
815     runner->next = NULL;
816     runner->in_use = 1;
817 
818     if (sel->runner_tail) {
819 	sel->runner_tail->next = runner;
820 	sel->runner_tail = runner;
821     } else {
822 	sel->runner_head = runner;
823 	sel->runner_tail = runner;
824     }
825     sel_timer_unlock(sel);
826     return 0;
827 }
828 
829 static void
process_runners(struct selector_s * sel)830 process_runners(struct selector_s *sel)
831 {
832     while (sel->runner_head) {
833 	sel_runner_t *runner = sel->runner_head;
834 	sel_runner_func_t func;
835 	void *cb_data;
836 
837 	sel->runner_head = sel->runner_head->next;
838 	if (!sel->runner_head)
839 	    sel->runner_tail = NULL;
840 	runner->in_use = 0;
841 	func = runner->func;
842 	cb_data = runner->cb_data;
843 	sel_timer_unlock(sel);
844 	func(runner, cb_data);
845 	sel_timer_lock(sel);
846     }
847 }
848 
849 static void
handle_selector_call(struct selector_s * sel,int i,volatile fd_set * fdset,sel_fd_handler_t handler)850 handle_selector_call(struct selector_s *sel, int i, volatile fd_set *fdset,
851 		     sel_fd_handler_t handler)
852 {
853     void             *data;
854     fd_state_t       *state;
855 
856     if (handler == NULL) {
857 	/* Somehow we don't have a handler for this.
858 	   Just shut it down. */
859 	FD_CLR(i, fdset);
860 	return;
861     }
862 
863     if (!FD_ISSET(i, fdset))
864 	/* The value was cleared, ignore it. */
865 	return;
866 
867     data = sel->fds[i].data;
868     state = sel->fds[i].state;
869     state->use_count++;
870     sel_fd_unlock(sel);
871     handler(i, data);
872     sel_fd_lock(sel);
873     state->use_count--;
874     if (state->deleted && state->use_count == 0) {
875 	if (state->done) {
876 	    sel_fd_unlock(sel);
877 	    state->done(i, data);
878 	    sel_fd_lock(sel);
879 	}
880 	free(state);
881     }
882 }
883 
884 /*
885  * return == 0  when timeout
886  * 	  >  0  when successful
887  * 	  <  0  when error
888  */
889 static int
process_fds(struct selector_s * sel,volatile struct timeval * timeout)890 process_fds(struct selector_s	    *sel,
891 	    volatile struct timeval *timeout)
892 {
893     fd_set      tmp_read_set;
894     fd_set      tmp_write_set;
895     fd_set      tmp_except_set;
896     int i;
897     int err;
898     int num_fds;
899 
900     sel_fd_lock(sel);
901     memcpy(&tmp_read_set, (void *) &sel->read_set, sizeof(tmp_read_set));
902     memcpy(&tmp_write_set, (void *) &sel->write_set, sizeof(tmp_write_set));
903     memcpy(&tmp_except_set, (void *) &sel->except_set, sizeof(tmp_except_set));
904     num_fds = sel->maxfd + 1;
905     sel_fd_unlock(sel);
906 
907     err = select(num_fds,
908 		 &tmp_read_set,
909 		 &tmp_write_set,
910 		 &tmp_except_set,
911 		 (struct timeval *) timeout);
912     if (err <= 0)
913 	goto out;
914 
915     /* We got some I/O. */
916     sel_fd_lock(sel);
917     for (i = 0; i <= sel->maxfd; i++) {
918 	if (FD_ISSET(i, &tmp_read_set))
919 	    handle_selector_call(sel, i, &sel->read_set,
920 				 sel->fds[i].handle_read);
921 	if (FD_ISSET(i, &tmp_write_set))
922 	    handle_selector_call(sel, i, &sel->write_set,
923 				 sel->fds[i].handle_write);
924 	if (FD_ISSET(i, &tmp_except_set))
925 	    handle_selector_call(sel, i, &sel->except_set,
926 				 sel->fds[i].handle_except);
927     }
928     sel_fd_unlock(sel);
929 out:
930     return err;
931 }
932 
933 #ifdef HAVE_EPOLL_PWAIT
934 static int
process_fds_epoll(struct selector_s * sel,struct timeval * tvtimeout)935 process_fds_epoll(struct selector_s *sel, struct timeval *tvtimeout)
936 {
937     int rv, fd;
938     struct epoll_event event;
939     int timeout;
940     sigset_t sigmask;
941 
942     if (tvtimeout->tv_sec > 600)
943 	 /* Don't wait over 10 minutes, to work around an old epoll bug
944 	    and avoid issues with timeout overflowing on 64-bit systems,
945 	    which is much larger that 10 minutes, but who cares. */
946 	timeout = 600 * 1000;
947     else
948 	timeout = ((tvtimeout->tv_sec * 1000) +
949 		   (tvtimeout->tv_usec + 999) / 1000);
950 
951 #ifdef USE_PTHREADS
952     pthread_sigmask(SIG_SETMASK, NULL, &sigmask);
953 #else
954     sigprocmask(SIG_SETMASK, NULL, &sigmask);
955 #endif
956     sigdelset(&sigmask, sel->wake_sig);
957     rv = epoll_pwait(sel->epollfd, &event, 1, timeout, &sigmask);
958 
959     if (rv <= 0)
960 	return rv;
961 
962     sel_fd_lock(sel);
963     fd = event.data.fd;
964     if (event.events & (EPOLLIN | EPOLLHUP))
965 	handle_selector_call(sel, fd, &sel->read_set,
966 			     sel->fds[fd].handle_read);
967     if (event.events & EPOLLOUT)
968 	handle_selector_call(sel, fd, &sel->write_set,
969 			     sel->fds[fd].handle_write);
970     if (event.events & (EPOLLERR | EPOLLPRI))
971 	handle_selector_call(sel, fd, &sel->except_set,
972 			     sel->fds[fd].handle_except);
973 
974     /* Rearm the event.  Remember it could have been deleted in the handler. */
975     if (sel->fds[fd].state)
976 	sel_update_epoll(sel, fd, EPOLL_CTL_MOD);
977     sel_fd_unlock(sel);
978 
979     return 0;
980 }
981 #endif
982 
983 int
sel_select(struct selector_s * sel,sel_send_sig_cb send_sig,long thread_id,void * cb_data,struct timeval * timeout)984 sel_select(struct selector_s *sel,
985 	   sel_send_sig_cb send_sig,
986 	   long            thread_id,
987 	   void            *cb_data,
988 	   struct timeval  *timeout)
989 {
990     int             err;
991     struct timeval  loc_timeout;
992     sel_wait_list_t wait_entry;
993 
994     sel_timer_lock(sel);
995     process_runners(sel);
996     process_timers(sel, (struct timeval *)(&loc_timeout));
997     if (timeout) {
998 	if (cmp_timeval((struct timeval *)(&loc_timeout), timeout) >= 0)
999 	    memcpy(&loc_timeout, timeout, sizeof(loc_timeout));
1000     }
1001     add_sel_wait_list(sel, &wait_entry, send_sig, cb_data, thread_id,
1002 		      &loc_timeout);
1003     sel_timer_unlock(sel);
1004 
1005 #ifdef HAVE_EPOLL_PWAIT
1006     if (sel->epollfd >= 0)
1007 	err = process_fds_epoll(sel, &loc_timeout);
1008     else
1009 #endif
1010 	err = process_fds(sel, &loc_timeout);
1011 
1012     sel_timer_lock(sel);
1013     remove_sel_wait_list(sel, &wait_entry);
1014     sel_timer_unlock(sel);
1015 
1016     return err;
1017 }
1018 
1019 /* The main loop for the program.  This will select on the various
1020    sets, then scan for any available I/O to process.  It also monitors
1021    the time and call the timeout handlers periodically. */
1022 int
sel_select_loop(struct selector_s * sel,sel_send_sig_cb send_sig,long thread_id,void * cb_data)1023 sel_select_loop(struct selector_s *sel,
1024 		sel_send_sig_cb send_sig,
1025 		long            thread_id,
1026 		void            *cb_data)
1027 {
1028     for (;;) {
1029 	int err = sel_select(sel, send_sig, thread_id, cb_data, NULL);
1030 
1031 	if ((err < 0) && (errno != EINTR)) {
1032 	    err = errno;
1033 	    /* An error occurred. */
1034 	    /* An error is bad, we need to abort. */
1035 	    syslog(LOG_ERR, "select_loop() - select: %m");
1036 	    return err;
1037 	}
1038     }
1039 }
1040 
1041 /* Initialize the select code. */
1042 int
sel_alloc_selector_thread(struct selector_s ** new_selector,int wake_sig,sel_lock_t * (* sel_lock_alloc)(void * cb_data),void (* sel_lock_free)(sel_lock_t *),void (* sel_lock)(sel_lock_t *),void (* sel_unlock)(sel_lock_t *),void * cb_data)1043 sel_alloc_selector_thread(struct selector_s **new_selector, int wake_sig,
1044 			  sel_lock_t *(*sel_lock_alloc)(void *cb_data),
1045 			  void (*sel_lock_free)(sel_lock_t *),
1046 			  void (*sel_lock)(sel_lock_t *),
1047 			  void (*sel_unlock)(sel_lock_t *),
1048 			  void *cb_data)
1049 {
1050     struct selector_s *sel;
1051     unsigned int i;
1052 
1053     sel = malloc(sizeof(*sel));
1054     if (!sel)
1055 	return ENOMEM;
1056     memset(sel, 0, sizeof(*sel));
1057 
1058     sel->sel_lock_alloc = sel_lock_alloc;
1059     sel->sel_lock_free = sel_lock_free;
1060     sel->sel_lock = sel_lock;
1061     sel->sel_unlock = sel_unlock;
1062 
1063     /* The list is initially empty. */
1064     sel->wait_list.next = &sel->wait_list;
1065     sel->wait_list.prev = &sel->wait_list;
1066 
1067     sel->wake_sig = wake_sig;
1068 
1069     FD_ZERO((fd_set *) &sel->read_set);
1070     FD_ZERO((fd_set *) &sel->write_set);
1071     FD_ZERO((fd_set *) &sel->except_set);
1072 
1073     for (i = 0; i < FD_SETSIZE; i++) {
1074 	init_fd((fd_control_t *) &(sel->fds[i]));
1075     }
1076 
1077     theap_init(&sel->timer_heap);
1078 
1079     if (sel->sel_lock_alloc) {
1080 	sel->timer_lock = sel->sel_lock_alloc(cb_data);
1081 	if (!sel->timer_lock) {
1082 	    free(sel);
1083 	    return ENOMEM;
1084 	}
1085 	sel->fd_lock = sel->sel_lock_alloc(cb_data);
1086 	if (!sel->fd_lock) {
1087 	    sel->sel_lock_free(sel->fd_lock);
1088 	    free(sel);
1089 	    return ENOMEM;
1090 	}
1091     }
1092 
1093 #ifdef HAVE_EPOLL_PWAIT
1094     sel->epollfd = epoll_create(32768);
1095     if (sel->epollfd == -1) {
1096 	syslog(LOG_ERR, "Unable to set up epoll, falling back to select: %m");
1097     } else {
1098 	int rv;
1099 	sigset_t sigset;
1100 
1101 	sigemptyset(&sigset);
1102 	sigaddset(&sigset, wake_sig);
1103 	rv = sigprocmask(SIG_BLOCK, &sigset, NULL);
1104 	if (rv == -1) {
1105 	    rv = errno;
1106 	    close(sel->epollfd);
1107 	    if (sel->sel_lock_alloc) {
1108 		sel->sel_lock_free(sel->fd_lock);
1109 		sel->sel_lock_free(sel->timer_lock);
1110 	    }
1111 	    free(sel);
1112 	    return rv;
1113 	}
1114     }
1115 #endif
1116 
1117     *new_selector = sel;
1118 
1119     return 0;
1120 }
1121 
1122 int
sel_alloc_selector_nothread(struct selector_s ** new_selector)1123 sel_alloc_selector_nothread(struct selector_s **new_selector)
1124 {
1125     return sel_alloc_selector_thread(new_selector, 0, NULL, NULL, NULL, NULL,
1126 				     NULL);
1127 }
1128 
1129 int
sel_free_selector(struct selector_s * sel)1130 sel_free_selector(struct selector_s *sel)
1131 {
1132     sel_timer_t *elem;
1133 
1134     elem = theap_get_top(&(sel->timer_heap));
1135     while (elem) {
1136 	theap_remove(&(sel->timer_heap), elem);
1137 	free(elem);
1138 	elem = theap_get_top(&(sel->timer_heap));
1139     }
1140 #ifdef HAVE_EPOLL_PWAIT
1141     if (sel->epollfd >= 0)
1142 	close(sel->epollfd);
1143 #endif
1144     if (sel->fd_lock)
1145 	sel->sel_lock_free(sel->fd_lock);
1146     if (sel->timer_lock)
1147 	sel->sel_lock_free(sel->timer_lock);
1148     free(sel);
1149 
1150     return 0;
1151 }
1152