1 /*
2 * selector.c
3 *
4 * Code for abstracting select for files and timers.
5 *
6 * Author: MontaVista Software, Inc.
7 * Corey Minyard <minyard@mvista.com>
8 * source@mvista.com
9 *
10 * Copyright 2002,2003 MontaVista Software Inc.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2 of
15 * the License, or (at your option) any later version.
16 *
17 *
18 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
19 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
23 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
24 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
26 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
27 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * You should have received a copy of the GNU Lesser General Public
30 * License along with this program; if not, write to the Free
31 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
32 */
33
34 /* This file holds code to abstract the "select" call and make it
35 easier to use. The main thread lives here, the rest of the code
36 uses a callback interface. Basically, other parts of the program
37 can register file descriptors with this code, when interesting
38 things happen on those file descriptors this code will call
39 routines registered with it. */
40
41 #include "selector.h"
42
43 #include <sys/time.h>
44 #include <time.h>
45 #include <sys/types.h>
46 #include <unistd.h>
47 #include <stdlib.h>
48 #include <errno.h>
49 #include <stdio.h>
50 #include <syslog.h>
51 #include <signal.h>
52 #include <string.h>
53 #ifdef HAVE_EPOLL_PWAIT
54 #include <sys/epoll.h>
55 #else
56 #define EPOLL_CTL_ADD 0
57 #define EPOLL_CTL_DEL 0
58 #define EPOLL_CTL_MOD 0
59 #endif
60
61 typedef struct fd_state_s
62 {
63 int deleted;
64 unsigned int use_count;
65 sel_fd_cleared_cb done;
66 } fd_state_t;
67
68 /* The control structure for each file descriptor. */
69 typedef struct fd_control_s
70 {
71 /* This structure is allocated when an FD is set and it holds
72 whether the FD has been deleted and information to handle the
73 deletion. */
74 fd_state_t *state;
75 void *data; /* Operation-specific data */
76 sel_fd_handler_t handle_read;
77 sel_fd_handler_t handle_write;
78 sel_fd_handler_t handle_except;
79 } fd_control_t;
80
81 typedef struct heap_val_s
82 {
83 /* Set this to the function to call when the timeout occurs. */
84 sel_timeout_handler_t handler;
85
86 /* Set this to whatever you like. You can use this to store your
87 own data. */
88 void *user_data;
89
90 /* Set this to the time when the timer will go off. */
91 struct timeval timeout;
92
93 /* Who owns me? */
94 struct selector_s *sel;
95
96 /* Am I currently running? */
97 int in_heap;
98
99 /* Am I currently stopped? */
100 int stopped;
101
102 /* Have I been freed? */
103 int freed;
104
105 /* Am I currently in a handler? */
106 int in_handler;
107
108 sel_timeout_handler_t done_handler;
109 void *done_cb_data;
110 } heap_val_t;
111
112 typedef struct theap_s theap_t;
113 #define heap_s theap_s
114 #define heap_node_s sel_timer_s
115 #define HEAP_EXPORT_NAME(s) theap_ ## s
116 #define HEAP_NAMES_LOCAL static
117 #define HEAP_OUTPUT_PRINTF "(%ld.%7.7ld)"
118 #define HEAP_OUTPUT_DATA pos->timeout.tv_sec, pos->timeout.tv_usec
119
120 static int
cmp_timeval(const struct timeval * tv1,const struct timeval * tv2)121 cmp_timeval(const struct timeval *tv1, const struct timeval *tv2)
122 {
123 if (tv1->tv_sec < tv2->tv_sec)
124 return -1;
125
126 if (tv1->tv_sec > tv2->tv_sec)
127 return 1;
128
129 if (tv1->tv_usec < tv2->tv_usec)
130 return -1;
131
132 if (tv1->tv_usec > tv2->tv_usec)
133 return 1;
134
135 return 0;
136 }
137
138 static int
heap_cmp_key(heap_val_t * v1,heap_val_t * v2)139 heap_cmp_key(heap_val_t *v1, heap_val_t *v2)
140 {
141 return cmp_timeval(&v1->timeout, &v2->timeout);
142 }
143
144 #include "heap.h"
145
146 /* Used to build a list of threads that may need to be woken if a
147 timer on the top of the heap changes, or an FD is added/removed.
148 See wake_sel_thread() for more info. */
149 typedef struct sel_wait_list_s
150 {
151 /* The thread to wake up. */
152 long thread_id;
153
154 /* How to wake it. */
155 sel_send_sig_cb send_sig;
156 void *send_sig_cb_data;
157
158 /* This is the memory used to hold the timeout for select
159 operation. */
160 volatile struct timeval *timeout;
161
162 struct sel_wait_list_s *next, *prev;
163 } sel_wait_list_t;
164
165 struct sel_runner_s
166 {
167 struct selector_s *sel;
168 sel_runner_func_t func;
169 void *cb_data;
170 int in_use;
171 sel_runner_t *next;
172 };
173
174 struct selector_s
175 {
176 /* This is an array of all the file descriptors possible. This is
177 moderately wasteful of space, but easy to do. Hey, memory is
178 cheap. */
179 volatile fd_control_t fds[FD_SETSIZE];
180
181 /* These are the offical fd_sets used to track what file descriptors
182 need to be monitored. */
183 volatile fd_set read_set;
184 volatile fd_set write_set;
185 volatile fd_set except_set;
186
187 volatile int maxfd; /* The largest file descriptor registered with
188 this code. */
189
190 void *fd_lock;
191
192 /* The timer heap. */
193 theap_t timer_heap;
194
195 /* This is a list of items waiting to be woken up because they are
196 sitting in a select. See wake_sel_thread() for more info. */
197 sel_wait_list_t wait_list;
198
199 void *timer_lock;
200
201 sel_runner_t *runner_head;
202 sel_runner_t *runner_tail;
203
204 int wake_sig;
205
206 #ifdef HAVE_EPOLL_PWAIT
207 int epollfd;
208 #endif
209 sel_lock_t *(*sel_lock_alloc)(void *cb_data);
210 void (*sel_lock_free)(sel_lock_t *);
211 void (*sel_lock)(sel_lock_t *);
212 void (*sel_unlock)(sel_lock_t *);
213 };
214
215 static void
sel_timer_lock(struct selector_s * sel)216 sel_timer_lock(struct selector_s *sel)
217 {
218 if (sel->sel_lock)
219 sel->sel_lock(sel->timer_lock);
220 }
221
222 static void
sel_timer_unlock(struct selector_s * sel)223 sel_timer_unlock(struct selector_s *sel)
224 {
225 if (sel->sel_lock)
226 sel->sel_unlock(sel->timer_lock);
227 }
228
229 static void
sel_fd_lock(struct selector_s * sel)230 sel_fd_lock(struct selector_s *sel)
231 {
232 if (sel->sel_lock)
233 sel->sel_lock(sel->fd_lock);
234 }
235
236 static void
sel_fd_unlock(struct selector_s * sel)237 sel_fd_unlock(struct selector_s *sel)
238 {
239 if (sel->sel_lock)
240 sel->sel_unlock(sel->fd_lock);
241 }
242
243 /* This function will wake the SEL thread. It must be called with the
244 timer lock held, because it messes with timeout.
245
246 The operation is is subtle, but it does work. The timeout in the
247 selector is the data passed in (must be the actual data) as the
248 timeout to select. When we want to wake the select, we set the
249 timeout to zero first. That way, if the select has calculated the
250 timeout but has not yet called select, then this will set it to
251 zero (causing it to wait zero time). If select has already been
252 called, then the signal send should wake it up. We only need to do
253 this after we have calculated the timeout, but before we have
254 called select, thus only things in the wait list matter. */
255 static void
wake_sel_thread(struct selector_s * sel)256 wake_sel_thread(struct selector_s *sel)
257 {
258 sel_wait_list_t *item;
259
260 item = sel->wait_list.next;
261 while (item != &sel->wait_list) {
262 item->timeout->tv_sec = 0;
263 item->timeout->tv_usec = 0;
264 if (item->send_sig)
265 item->send_sig(item->thread_id, item->send_sig_cb_data);
266 item = item->next;
267 }
268 }
269
270 static void
wake_fd_sel_thread(struct selector_s * sel)271 wake_fd_sel_thread(struct selector_s *sel)
272 {
273 wake_sel_thread(sel);
274 sel_fd_unlock(sel);
275 }
276
277 static void
wake_timer_sel_thread(struct selector_s * sel,volatile sel_timer_t * old_top)278 wake_timer_sel_thread(struct selector_s *sel, volatile sel_timer_t *old_top)
279 {
280 if (old_top != theap_get_top(&sel->timer_heap))
281 /* If the top value changed, restart the waiting thread. */
282 wake_sel_thread(sel);
283 }
284
285 /* Wait list management. These *must* be called with the timer list
286 locked, and the values in the item *must not* change while in the
287 list. */
288 static void
add_sel_wait_list(struct selector_s * sel,sel_wait_list_t * item,sel_send_sig_cb send_sig,void * cb_data,long thread_id,volatile struct timeval * timeout)289 add_sel_wait_list(struct selector_s *sel, sel_wait_list_t *item,
290 sel_send_sig_cb send_sig,
291 void *cb_data,
292 long thread_id, volatile struct timeval *timeout)
293 {
294 item->thread_id = thread_id;
295 item->timeout = timeout;
296 item->send_sig = send_sig;
297 item->send_sig_cb_data = cb_data;
298 item->next = sel->wait_list.next;
299 item->prev = &sel->wait_list;
300 sel->wait_list.next->prev = item;
301 sel->wait_list.next = item;
302 }
303 static void
remove_sel_wait_list(struct selector_s * sel,sel_wait_list_t * item)304 remove_sel_wait_list(struct selector_s *sel, sel_wait_list_t *item)
305 {
306 item->next->prev = item->prev;
307 item->prev->next = item->next;
308 }
309
310 /* Initialize a single file descriptor. */
311 static void
init_fd(fd_control_t * fd)312 init_fd(fd_control_t *fd)
313 {
314 fd->state = NULL;
315 fd->data = NULL;
316 fd->handle_read = NULL;
317 fd->handle_write = NULL;
318 fd->handle_except = NULL;
319 }
320
321 #ifdef HAVE_EPOLL_PWAIT
322 static int
sel_update_epoll(struct selector_s * sel,int fd,int op)323 sel_update_epoll(struct selector_s *sel, int fd, int op)
324 {
325 struct epoll_event event;
326
327 if (sel->epollfd < 0)
328 return 1;
329
330 memset(&event, 0, sizeof(event));
331 event.events = EPOLLONESHOT;
332 event.data.fd = fd;
333 if (FD_ISSET(fd, &sel->read_set))
334 event.events |= EPOLLIN | EPOLLHUP;
335 if (FD_ISSET(fd, &sel->write_set))
336 event.events |= EPOLLOUT;
337 if (FD_ISSET(fd, &sel->write_set))
338 event.events |= EPOLLERR | EPOLLPRI;
339
340 epoll_ctl(sel->epollfd, op, fd, &event);
341 return 0;
342 }
343 #else
344 static int
sel_update_epoll(struct selector_s * sel,int fd,int op)345 sel_update_epoll(struct selector_s *sel, int fd, int op)
346 {
347 return 1;
348 }
349 #endif
350
351 /* Set the handlers for a file descriptor. */
352 int
sel_set_fd_handlers(struct selector_s * sel,int fd,void * data,sel_fd_handler_t read_handler,sel_fd_handler_t write_handler,sel_fd_handler_t except_handler,sel_fd_cleared_cb done)353 sel_set_fd_handlers(struct selector_s *sel,
354 int fd,
355 void *data,
356 sel_fd_handler_t read_handler,
357 sel_fd_handler_t write_handler,
358 sel_fd_handler_t except_handler,
359 sel_fd_cleared_cb done)
360 {
361 fd_control_t *fdc;
362 fd_state_t *state, *oldstate = NULL;
363 void *olddata = NULL;
364 int added = 1;
365
366 state = malloc(sizeof(*state));
367 if (!state)
368 return ENOMEM;
369 state->deleted = 0;
370 state->use_count = 0;
371 state->done = done;
372
373 sel_fd_lock(sel);
374 fdc = (fd_control_t *) &(sel->fds[fd]);
375 if (fdc->state) {
376 oldstate = fdc->state;
377 olddata = fdc->data;
378 added = 0;
379 }
380 fdc->state = state;
381 fdc->data = data;
382 fdc->handle_read = read_handler;
383 fdc->handle_write = write_handler;
384 fdc->handle_except = except_handler;
385
386 if (added) {
387 /* Move maxfd up if necessary. */
388 if (fd > sel->maxfd) {
389 sel->maxfd = fd;
390 }
391
392 if (sel_update_epoll(sel, fd, EPOLL_CTL_ADD)) {
393 wake_fd_sel_thread(sel);
394 goto out;
395 }
396 }
397 sel_fd_unlock(sel);
398
399 out:
400 if (oldstate) {
401 oldstate->deleted = 1;
402 if (oldstate->use_count == 0) {
403 if (oldstate->done)
404 oldstate->done(fd, olddata);
405 free(oldstate);
406 }
407 }
408 return 0;
409 }
410
411 /* Clear the handlers for a file descriptor and remove it from
412 select's monitoring. */
413 void
sel_clear_fd_handlers(struct selector_s * sel,int fd)414 sel_clear_fd_handlers(struct selector_s *sel,
415 int fd)
416 {
417 fd_control_t *fdc;
418 fd_state_t *oldstate = NULL;
419 void *olddata = NULL;
420
421 sel_fd_lock(sel);
422 fdc = (fd_control_t *) &(sel->fds[fd]);
423
424 if (fdc->state) {
425 oldstate = fdc->state;
426 olddata = fdc->data;
427 fdc->state = NULL;
428
429 sel_update_epoll(sel, fd, EPOLL_CTL_DEL);
430 }
431
432 init_fd(fdc);
433 FD_CLR(fd, &sel->read_set);
434 FD_CLR(fd, &sel->write_set);
435 FD_CLR(fd, &sel->except_set);
436
437 /* Move maxfd down if necessary. */
438 if (fd == sel->maxfd) {
439 while ((sel->maxfd >= 0) && (! sel->fds[sel->maxfd].state)) {
440 sel->maxfd--;
441 }
442 }
443
444 sel_fd_unlock(sel);
445
446 if (oldstate) {
447 oldstate->deleted = 1;
448 if (oldstate->use_count == 0) {
449 if (oldstate->done)
450 oldstate->done(fd, olddata);
451 free(oldstate);
452 }
453 }
454 }
455
456 /* Set whether the file descriptor will be monitored for data ready to
457 read on the file descriptor. */
458 void
sel_set_fd_read_handler(struct selector_s * sel,int fd,int state)459 sel_set_fd_read_handler(struct selector_s *sel, int fd, int state)
460 {
461 fd_control_t *fdc = (fd_control_t *) &(sel->fds[fd]);
462
463 sel_fd_lock(sel);
464 if (!fdc->state)
465 goto out;
466
467 if (state == SEL_FD_HANDLER_ENABLED) {
468 if (FD_ISSET(fd, &sel->read_set))
469 goto out;
470 FD_SET(fd, &sel->read_set);
471 } else if (state == SEL_FD_HANDLER_DISABLED) {
472 if (!FD_ISSET(fd, &sel->read_set))
473 goto out;
474 FD_CLR(fd, &sel->read_set);
475 }
476 if (sel_update_epoll(sel, fd, EPOLL_CTL_MOD)) {
477 wake_fd_sel_thread(sel);
478 return;
479 }
480
481 out:
482 sel_fd_unlock(sel);
483 }
484
485 /* Set whether the file descriptor will be monitored for when the file
486 descriptor can be written to. */
487 void
sel_set_fd_write_handler(struct selector_s * sel,int fd,int state)488 sel_set_fd_write_handler(struct selector_s *sel, int fd, int state)
489 {
490 fd_control_t *fdc = (fd_control_t *) &(sel->fds[fd]);
491
492 sel_fd_lock(sel);
493 if (!fdc->state)
494 goto out;
495
496 if (state == SEL_FD_HANDLER_ENABLED) {
497 if (FD_ISSET(fd, &sel->write_set))
498 goto out;
499 FD_SET(fd, &sel->write_set);
500 } else if (state == SEL_FD_HANDLER_DISABLED) {
501 if (!FD_ISSET(fd, &sel->write_set))
502 goto out;
503 FD_CLR(fd, &sel->write_set);
504 }
505 if (sel_update_epoll(sel, fd, EPOLL_CTL_MOD)) {
506 wake_fd_sel_thread(sel);
507 return;
508 }
509
510 out:
511 sel_fd_unlock(sel);
512 }
513
514 /* Set whether the file descriptor will be monitored for exceptions
515 on the file descriptor. */
516 void
sel_set_fd_except_handler(struct selector_s * sel,int fd,int state)517 sel_set_fd_except_handler(struct selector_s *sel, int fd, int state)
518 {
519 fd_control_t *fdc = (fd_control_t *) &(sel->fds[fd]);
520
521 sel_fd_lock(sel);
522 if (!fdc->state)
523 goto out;
524
525 if (state == SEL_FD_HANDLER_ENABLED) {
526 if (FD_ISSET(fd, &sel->except_set))
527 goto out;
528 FD_SET(fd, &sel->except_set);
529 } else if (state == SEL_FD_HANDLER_DISABLED) {
530 if (!FD_ISSET(fd, &sel->except_set))
531 goto out;
532 FD_CLR(fd, &sel->except_set);
533 }
534 if (sel_update_epoll(sel, fd, EPOLL_CTL_MOD)) {
535 wake_fd_sel_thread(sel);
536 return;
537 }
538
539 out:
540 sel_fd_unlock(sel);
541 }
542
543 static void
diff_timeval(struct timeval * dest,struct timeval * left,struct timeval * right)544 diff_timeval(struct timeval *dest,
545 struct timeval *left,
546 struct timeval *right)
547 {
548 if ( (left->tv_sec < right->tv_sec)
549 || ( (left->tv_sec == right->tv_sec)
550 && (left->tv_usec < right->tv_usec)))
551 {
552 /* If left < right, just force to zero, don't allow negative
553 numbers. */
554 dest->tv_sec = 0;
555 dest->tv_usec = 0;
556 return;
557 }
558
559 dest->tv_sec = left->tv_sec - right->tv_sec;
560 dest->tv_usec = left->tv_usec - right->tv_usec;
561 while (dest->tv_usec < 0) {
562 dest->tv_usec += 1000000;
563 dest->tv_sec--;
564 }
565 }
566
567 int
sel_alloc_timer(struct selector_s * sel,sel_timeout_handler_t handler,void * user_data,sel_timer_t ** new_timer)568 sel_alloc_timer(struct selector_s *sel,
569 sel_timeout_handler_t handler,
570 void *user_data,
571 sel_timer_t **new_timer)
572 {
573 sel_timer_t *timer;
574
575 timer = malloc(sizeof(*timer));
576 if (!timer)
577 return ENOMEM;
578 memset(timer, 0, sizeof(*timer));
579
580 timer->val.handler = handler;
581 timer->val.user_data = user_data;
582 timer->val.sel = sel;
583 timer->val.stopped = 1;
584 *new_timer = timer;
585
586 return 0;
587 }
588
589 int
sel_free_timer(sel_timer_t * timer)590 sel_free_timer(sel_timer_t *timer)
591 {
592 struct selector_s *sel = timer->val.sel;
593 int in_handler;
594
595 sel_timer_lock(sel);
596 if (timer->val.in_heap) {
597 sel_stop_timer(timer);
598 }
599 timer->val.freed = 1;
600 in_handler = timer->val.in_handler;
601 sel_timer_unlock(sel);
602
603 if (!in_handler)
604 free(timer);
605
606 return 0;
607 }
608
609 int
sel_start_timer(sel_timer_t * timer,struct timeval * timeout)610 sel_start_timer(sel_timer_t *timer,
611 struct timeval *timeout)
612 {
613 struct selector_s *sel = timer->val.sel;
614 volatile sel_timer_t *top;
615
616 sel_timer_lock(sel);
617 if (timer->val.in_heap) {
618 sel_timer_unlock(sel);
619 return EBUSY;
620 }
621
622 top = theap_get_top(&sel->timer_heap);
623
624 timer->val.timeout = *timeout;
625
626 if (!timer->val.in_handler) {
627 /* Wait until the handler returns to start the timer. */
628 theap_add(&sel->timer_heap, timer);
629 timer->val.in_heap = 1;
630 }
631 timer->val.stopped = 0;
632
633 wake_timer_sel_thread(sel, top);
634
635 sel_timer_unlock(sel);
636
637 return 0;
638 }
639
640 int
sel_stop_timer(sel_timer_t * timer)641 sel_stop_timer(sel_timer_t *timer)
642 {
643 struct selector_s *sel = timer->val.sel;
644
645 sel_timer_lock(sel);
646 if (timer->val.stopped) {
647 sel_timer_unlock(sel);
648 return ETIMEDOUT;
649 }
650
651 if (timer->val.in_heap) {
652 volatile sel_timer_t *top = theap_get_top(&sel->timer_heap);
653
654 theap_remove(&sel->timer_heap, timer);
655 timer->val.in_heap = 0;
656 wake_timer_sel_thread(sel, top);
657 }
658 timer->val.stopped = 1;
659
660 sel_timer_unlock(sel);
661
662 return 0;
663 }
664
665 int
sel_stop_timer_with_done(sel_timer_t * timer,sel_timeout_handler_t done_handler,void * cb_data)666 sel_stop_timer_with_done(sel_timer_t *timer,
667 sel_timeout_handler_t done_handler,
668 void *cb_data)
669 {
670 struct selector_s *sel = timer->val.sel;
671 volatile sel_timer_t *top;
672
673 sel_timer_lock(sel);
674 if (timer->val.stopped) {
675 sel_timer_unlock(sel);
676 goto out;
677 }
678
679 if (timer->val.in_handler) {
680 timer->val.done_handler = done_handler;
681 timer->val.done_cb_data = cb_data;
682 sel_timer_unlock(sel);
683 return 0;
684 }
685
686 if (timer->val.in_heap) {
687 top = theap_get_top(&sel->timer_heap);
688
689 theap_remove(&sel->timer_heap, timer);
690 timer->val.in_heap = 0;
691
692 wake_timer_sel_thread(sel, top);
693 }
694 timer->val.stopped = 1;
695 sel_timer_unlock(sel);
696
697 out:
698 done_handler(sel, timer, cb_data);
699 return 0;
700 }
701
702 void
sel_get_monotonic_time(struct timeval * tv)703 sel_get_monotonic_time(struct timeval *tv)
704 {
705 struct timespec ts;
706
707 clock_gettime(CLOCK_MONOTONIC, &ts);
708 tv->tv_sec = ts.tv_sec;
709 tv->tv_usec = (ts.tv_nsec + 500) / 1000;
710 }
711
712 /*
713 * Process timers on selector. The timeout is always set, to a very
714 * long value if no timers are waiting. Note that this *must* be
715 * called with sel->timer_lock held. Note that if this processes
716 * any timers, the timeout will be set to { 0,0 }.
717 */
718 static void
process_timers(struct selector_s * sel,volatile struct timeval * timeout)719 process_timers(struct selector_s *sel,
720 volatile struct timeval *timeout)
721 {
722 struct timeval now;
723 sel_timer_t *timer;
724 int called = 0;
725
726 timer = theap_get_top(&sel->timer_heap);
727 sel_get_monotonic_time(&now);
728 while (timer && cmp_timeval(&now, &timer->val.timeout) >= 0) {
729 called = 1;
730 theap_remove(&(sel->timer_heap), timer);
731 timer->val.in_heap = 0;
732 timer->val.stopped = 1;
733 timer->val.in_handler = 1;
734 sel_timer_unlock(sel);
735 timer->val.handler(sel, timer, timer->val.user_data);
736 sel_timer_lock(sel);
737 timer->val.in_handler = 0;
738 if (timer->val.done_handler) {
739 sel_timeout_handler_t done_handler = timer->val.done_handler;
740 void *done_cb_data = timer->val.done_cb_data;
741 timer->val.done_handler = NULL;
742 sel_timer_unlock(sel);
743 done_handler(sel, timer, done_cb_data);
744 sel_timer_lock(sel);
745 }
746 if (timer->val.freed)
747 free(timer);
748 else if (!timer->val.stopped) {
749 /* We were restarted while in the handler. */
750 theap_add(&sel->timer_heap, timer);
751 timer->val.in_heap = 1;
752 }
753
754 timer = theap_get_top(&sel->timer_heap);
755 }
756
757 if (called) {
758 /* If called, set the timeout to zero. */
759 timeout->tv_sec = 0;
760 timeout->tv_usec = 0;
761 } else if (timer) {
762 sel_get_monotonic_time(&now);
763 diff_timeval((struct timeval *) timeout,
764 (struct timeval *) &timer->val.timeout,
765 &now);
766 } else {
767 /* No timers, just set a long time. */
768 timeout->tv_sec = 100000;
769 timeout->tv_usec = 0;
770 }
771 }
772
773 int
sel_alloc_runner(struct selector_s * sel,sel_runner_t ** new_runner)774 sel_alloc_runner(struct selector_s *sel, sel_runner_t **new_runner)
775 {
776 sel_runner_t *runner;
777
778 runner = malloc(sizeof(*runner));
779 if (!runner)
780 return ENOMEM;
781 memset(runner, 0, sizeof(*runner));
782 runner->sel = sel;
783 *new_runner = runner;
784 return 0;
785 }
786
787 int
sel_free_runner(sel_runner_t * runner)788 sel_free_runner(sel_runner_t *runner)
789 {
790 struct selector_s *sel = runner->sel;
791
792 sel_timer_lock(sel);
793 if (runner->in_use) {
794 sel_timer_unlock(sel);
795 return EBUSY;
796 }
797 sel_timer_unlock(sel);
798 free(runner);
799 return 0;
800 }
801
802 int
sel_run(sel_runner_t * runner,sel_runner_func_t func,void * cb_data)803 sel_run(sel_runner_t *runner, sel_runner_func_t func, void *cb_data)
804 {
805 struct selector_s *sel = runner->sel;
806
807 sel_timer_lock(sel);
808 if (runner->in_use) {
809 sel_timer_unlock(sel);
810 return EBUSY;
811 }
812
813 runner->func = func;
814 runner->cb_data = cb_data;
815 runner->next = NULL;
816 runner->in_use = 1;
817
818 if (sel->runner_tail) {
819 sel->runner_tail->next = runner;
820 sel->runner_tail = runner;
821 } else {
822 sel->runner_head = runner;
823 sel->runner_tail = runner;
824 }
825 sel_timer_unlock(sel);
826 return 0;
827 }
828
829 static void
process_runners(struct selector_s * sel)830 process_runners(struct selector_s *sel)
831 {
832 while (sel->runner_head) {
833 sel_runner_t *runner = sel->runner_head;
834 sel_runner_func_t func;
835 void *cb_data;
836
837 sel->runner_head = sel->runner_head->next;
838 if (!sel->runner_head)
839 sel->runner_tail = NULL;
840 runner->in_use = 0;
841 func = runner->func;
842 cb_data = runner->cb_data;
843 sel_timer_unlock(sel);
844 func(runner, cb_data);
845 sel_timer_lock(sel);
846 }
847 }
848
849 static void
handle_selector_call(struct selector_s * sel,int i,volatile fd_set * fdset,sel_fd_handler_t handler)850 handle_selector_call(struct selector_s *sel, int i, volatile fd_set *fdset,
851 sel_fd_handler_t handler)
852 {
853 void *data;
854 fd_state_t *state;
855
856 if (handler == NULL) {
857 /* Somehow we don't have a handler for this.
858 Just shut it down. */
859 FD_CLR(i, fdset);
860 return;
861 }
862
863 if (!FD_ISSET(i, fdset))
864 /* The value was cleared, ignore it. */
865 return;
866
867 data = sel->fds[i].data;
868 state = sel->fds[i].state;
869 state->use_count++;
870 sel_fd_unlock(sel);
871 handler(i, data);
872 sel_fd_lock(sel);
873 state->use_count--;
874 if (state->deleted && state->use_count == 0) {
875 if (state->done) {
876 sel_fd_unlock(sel);
877 state->done(i, data);
878 sel_fd_lock(sel);
879 }
880 free(state);
881 }
882 }
883
884 /*
885 * return == 0 when timeout
886 * > 0 when successful
887 * < 0 when error
888 */
889 static int
process_fds(struct selector_s * sel,volatile struct timeval * timeout)890 process_fds(struct selector_s *sel,
891 volatile struct timeval *timeout)
892 {
893 fd_set tmp_read_set;
894 fd_set tmp_write_set;
895 fd_set tmp_except_set;
896 int i;
897 int err;
898 int num_fds;
899
900 sel_fd_lock(sel);
901 memcpy(&tmp_read_set, (void *) &sel->read_set, sizeof(tmp_read_set));
902 memcpy(&tmp_write_set, (void *) &sel->write_set, sizeof(tmp_write_set));
903 memcpy(&tmp_except_set, (void *) &sel->except_set, sizeof(tmp_except_set));
904 num_fds = sel->maxfd + 1;
905 sel_fd_unlock(sel);
906
907 err = select(num_fds,
908 &tmp_read_set,
909 &tmp_write_set,
910 &tmp_except_set,
911 (struct timeval *) timeout);
912 if (err <= 0)
913 goto out;
914
915 /* We got some I/O. */
916 sel_fd_lock(sel);
917 for (i = 0; i <= sel->maxfd; i++) {
918 if (FD_ISSET(i, &tmp_read_set))
919 handle_selector_call(sel, i, &sel->read_set,
920 sel->fds[i].handle_read);
921 if (FD_ISSET(i, &tmp_write_set))
922 handle_selector_call(sel, i, &sel->write_set,
923 sel->fds[i].handle_write);
924 if (FD_ISSET(i, &tmp_except_set))
925 handle_selector_call(sel, i, &sel->except_set,
926 sel->fds[i].handle_except);
927 }
928 sel_fd_unlock(sel);
929 out:
930 return err;
931 }
932
933 #ifdef HAVE_EPOLL_PWAIT
934 static int
process_fds_epoll(struct selector_s * sel,struct timeval * tvtimeout)935 process_fds_epoll(struct selector_s *sel, struct timeval *tvtimeout)
936 {
937 int rv, fd;
938 struct epoll_event event;
939 int timeout;
940 sigset_t sigmask;
941
942 if (tvtimeout->tv_sec > 600)
943 /* Don't wait over 10 minutes, to work around an old epoll bug
944 and avoid issues with timeout overflowing on 64-bit systems,
945 which is much larger that 10 minutes, but who cares. */
946 timeout = 600 * 1000;
947 else
948 timeout = ((tvtimeout->tv_sec * 1000) +
949 (tvtimeout->tv_usec + 999) / 1000);
950
951 #ifdef USE_PTHREADS
952 pthread_sigmask(SIG_SETMASK, NULL, &sigmask);
953 #else
954 sigprocmask(SIG_SETMASK, NULL, &sigmask);
955 #endif
956 sigdelset(&sigmask, sel->wake_sig);
957 rv = epoll_pwait(sel->epollfd, &event, 1, timeout, &sigmask);
958
959 if (rv <= 0)
960 return rv;
961
962 sel_fd_lock(sel);
963 fd = event.data.fd;
964 if (event.events & (EPOLLIN | EPOLLHUP))
965 handle_selector_call(sel, fd, &sel->read_set,
966 sel->fds[fd].handle_read);
967 if (event.events & EPOLLOUT)
968 handle_selector_call(sel, fd, &sel->write_set,
969 sel->fds[fd].handle_write);
970 if (event.events & (EPOLLERR | EPOLLPRI))
971 handle_selector_call(sel, fd, &sel->except_set,
972 sel->fds[fd].handle_except);
973
974 /* Rearm the event. Remember it could have been deleted in the handler. */
975 if (sel->fds[fd].state)
976 sel_update_epoll(sel, fd, EPOLL_CTL_MOD);
977 sel_fd_unlock(sel);
978
979 return 0;
980 }
981 #endif
982
983 int
sel_select(struct selector_s * sel,sel_send_sig_cb send_sig,long thread_id,void * cb_data,struct timeval * timeout)984 sel_select(struct selector_s *sel,
985 sel_send_sig_cb send_sig,
986 long thread_id,
987 void *cb_data,
988 struct timeval *timeout)
989 {
990 int err;
991 struct timeval loc_timeout;
992 sel_wait_list_t wait_entry;
993
994 sel_timer_lock(sel);
995 process_runners(sel);
996 process_timers(sel, (struct timeval *)(&loc_timeout));
997 if (timeout) {
998 if (cmp_timeval((struct timeval *)(&loc_timeout), timeout) >= 0)
999 memcpy(&loc_timeout, timeout, sizeof(loc_timeout));
1000 }
1001 add_sel_wait_list(sel, &wait_entry, send_sig, cb_data, thread_id,
1002 &loc_timeout);
1003 sel_timer_unlock(sel);
1004
1005 #ifdef HAVE_EPOLL_PWAIT
1006 if (sel->epollfd >= 0)
1007 err = process_fds_epoll(sel, &loc_timeout);
1008 else
1009 #endif
1010 err = process_fds(sel, &loc_timeout);
1011
1012 sel_timer_lock(sel);
1013 remove_sel_wait_list(sel, &wait_entry);
1014 sel_timer_unlock(sel);
1015
1016 return err;
1017 }
1018
1019 /* The main loop for the program. This will select on the various
1020 sets, then scan for any available I/O to process. It also monitors
1021 the time and call the timeout handlers periodically. */
1022 int
sel_select_loop(struct selector_s * sel,sel_send_sig_cb send_sig,long thread_id,void * cb_data)1023 sel_select_loop(struct selector_s *sel,
1024 sel_send_sig_cb send_sig,
1025 long thread_id,
1026 void *cb_data)
1027 {
1028 for (;;) {
1029 int err = sel_select(sel, send_sig, thread_id, cb_data, NULL);
1030
1031 if ((err < 0) && (errno != EINTR)) {
1032 err = errno;
1033 /* An error occurred. */
1034 /* An error is bad, we need to abort. */
1035 syslog(LOG_ERR, "select_loop() - select: %m");
1036 return err;
1037 }
1038 }
1039 }
1040
1041 /* Initialize the select code. */
1042 int
sel_alloc_selector_thread(struct selector_s ** new_selector,int wake_sig,sel_lock_t * (* sel_lock_alloc)(void * cb_data),void (* sel_lock_free)(sel_lock_t *),void (* sel_lock)(sel_lock_t *),void (* sel_unlock)(sel_lock_t *),void * cb_data)1043 sel_alloc_selector_thread(struct selector_s **new_selector, int wake_sig,
1044 sel_lock_t *(*sel_lock_alloc)(void *cb_data),
1045 void (*sel_lock_free)(sel_lock_t *),
1046 void (*sel_lock)(sel_lock_t *),
1047 void (*sel_unlock)(sel_lock_t *),
1048 void *cb_data)
1049 {
1050 struct selector_s *sel;
1051 unsigned int i;
1052
1053 sel = malloc(sizeof(*sel));
1054 if (!sel)
1055 return ENOMEM;
1056 memset(sel, 0, sizeof(*sel));
1057
1058 sel->sel_lock_alloc = sel_lock_alloc;
1059 sel->sel_lock_free = sel_lock_free;
1060 sel->sel_lock = sel_lock;
1061 sel->sel_unlock = sel_unlock;
1062
1063 /* The list is initially empty. */
1064 sel->wait_list.next = &sel->wait_list;
1065 sel->wait_list.prev = &sel->wait_list;
1066
1067 sel->wake_sig = wake_sig;
1068
1069 FD_ZERO((fd_set *) &sel->read_set);
1070 FD_ZERO((fd_set *) &sel->write_set);
1071 FD_ZERO((fd_set *) &sel->except_set);
1072
1073 for (i = 0; i < FD_SETSIZE; i++) {
1074 init_fd((fd_control_t *) &(sel->fds[i]));
1075 }
1076
1077 theap_init(&sel->timer_heap);
1078
1079 if (sel->sel_lock_alloc) {
1080 sel->timer_lock = sel->sel_lock_alloc(cb_data);
1081 if (!sel->timer_lock) {
1082 free(sel);
1083 return ENOMEM;
1084 }
1085 sel->fd_lock = sel->sel_lock_alloc(cb_data);
1086 if (!sel->fd_lock) {
1087 sel->sel_lock_free(sel->fd_lock);
1088 free(sel);
1089 return ENOMEM;
1090 }
1091 }
1092
1093 #ifdef HAVE_EPOLL_PWAIT
1094 sel->epollfd = epoll_create(32768);
1095 if (sel->epollfd == -1) {
1096 syslog(LOG_ERR, "Unable to set up epoll, falling back to select: %m");
1097 } else {
1098 int rv;
1099 sigset_t sigset;
1100
1101 sigemptyset(&sigset);
1102 sigaddset(&sigset, wake_sig);
1103 rv = sigprocmask(SIG_BLOCK, &sigset, NULL);
1104 if (rv == -1) {
1105 rv = errno;
1106 close(sel->epollfd);
1107 if (sel->sel_lock_alloc) {
1108 sel->sel_lock_free(sel->fd_lock);
1109 sel->sel_lock_free(sel->timer_lock);
1110 }
1111 free(sel);
1112 return rv;
1113 }
1114 }
1115 #endif
1116
1117 *new_selector = sel;
1118
1119 return 0;
1120 }
1121
1122 int
sel_alloc_selector_nothread(struct selector_s ** new_selector)1123 sel_alloc_selector_nothread(struct selector_s **new_selector)
1124 {
1125 return sel_alloc_selector_thread(new_selector, 0, NULL, NULL, NULL, NULL,
1126 NULL);
1127 }
1128
1129 int
sel_free_selector(struct selector_s * sel)1130 sel_free_selector(struct selector_s *sel)
1131 {
1132 sel_timer_t *elem;
1133
1134 elem = theap_get_top(&(sel->timer_heap));
1135 while (elem) {
1136 theap_remove(&(sel->timer_heap), elem);
1137 free(elem);
1138 elem = theap_get_top(&(sel->timer_heap));
1139 }
1140 #ifdef HAVE_EPOLL_PWAIT
1141 if (sel->epollfd >= 0)
1142 close(sel->epollfd);
1143 #endif
1144 if (sel->fd_lock)
1145 sel->sel_lock_free(sel->fd_lock);
1146 if (sel->timer_lock)
1147 sel->sel_lock_free(sel->timer_lock);
1148 free(sel);
1149
1150 return 0;
1151 }
1152