xref: /dragonfly/contrib/dhcpcd/src/eloop.c (revision f984587a)
1 /* SPDX-License-Identifier: BSD-2-Clause */
2 /*
3  * eloop - portable event based main loop.
4  * Copyright (c) 2006-2023 Roy Marples <roy@marples.name>
5  * All rights reserved.
6 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /* NOTES:
30  * Basically for a small number of fd's (total, not max fd)
31  * of say a few hundred, ppoll(2) performs just fine, if not faster than others.
32  * It also has the smallest memory and binary size footprint.
33  * ppoll(2) is available on all modern OS my software runs on and should be
34  * an up and coming POSIX standard interface.
35  * If ppoll is not available, then pselect(2) can be used instead which has
36  * even smaller memory and binary size footprint.
37  * However, this difference is quite tiny and the ppoll API is superior.
38  * pselect cannot return error conditions such as EOF for example.
39  *
40  * Both epoll(7) and kqueue(2) require an extra fd per process to manage
41  * their respective list of interest AND syscalls to manage it.
42  * So for a small number of fd's, these are more resource intensive,
43  * especially when used with more than one process.
44  *
45  * epoll avoids the resource limit RLIMIT_NOFILE Linux poll stupidly applies.
46  * kqueue avoids the same limit on OpenBSD.
47  * ppoll can still be secured in both by using SEECOMP or pledge.
48  *
49  * kqueue can avoid the signal trick we use here so that we function calls
50  * other than those listed in sigaction(2) in our signal handlers which is
51  * probably more robust than ours at surviving a signal storm.
52  * signalfd(2) is available for Linux which probably works in a similar way
53  * but it's yet another fd to use.
54  *
55  * Taking this all into account, ppoll(2) is the default mechanism used here.
56  */
57 
58 #if (defined(__unix__) || defined(unix)) && !defined(USG)
59 #include <sys/param.h>
60 #endif
61 #include <sys/time.h>
62 
63 #include <assert.h>
64 #include <errno.h>
65 #include <fcntl.h>
66 #include <limits.h>
67 #include <stdbool.h>
68 #include <signal.h>
69 #include <stdarg.h>
70 #include <stdint.h>
71 #include <stdlib.h>
72 #include <string.h>
73 #include <unistd.h>
74 
75 /* config.h should define HAVE_PPOLL, etc. */
76 #if defined(HAVE_CONFIG_H) && !defined(NO_CONFIG_H)
77 #include "config.h"
78 #endif
79 
80 /* Prioritise which mechanism we want to use.*/
81 #if defined(HAVE_PPOLL)
82 #undef HAVE_EPOLL
83 #undef HAVE_KQUEUE
84 #undef HAVE_PSELECT
85 #elif defined(HAVE_POLLTS)
86 #define HAVE_PPOLL
87 #define ppoll pollts
88 #undef HAVE_EPOLL
89 #undef HAVE_KQUEUE
90 #undef HAVE_PSELECT
91 #elif defined(HAVE_KQUEUE)
92 #undef HAVE_EPOLL
93 #undef HAVE_PSELECT
94 #elif defined(HAVE_EPOLL)
95 #undef HAVE_KQUEUE
96 #undef HAVE_PSELECT
97 #elif !defined(HAVE_PSELECT)
98 #define HAVE_PPOLL
99 #endif
100 
101 #if defined(HAVE_KQUEUE)
102 #include <sys/event.h>
103 #if defined(__DragonFly__) || defined(__FreeBSD__)
104 #define	_kevent(kq, cl, ncl, el, nel, t) \
105 	kevent((kq), (cl), (int)(ncl), (el), (int)(nel), (t))
106 #else
107 #define	_kevent kevent
108 #endif
109 #define NFD 2
110 #elif defined(HAVE_EPOLL)
111 #include <sys/epoll.h>
112 #define	NFD 1
113 #elif defined(HAVE_PPOLL)
114 #include <poll.h>
115 #define NFD 1
116 #elif defined(HAVE_PSELECT)
117 #include <sys/select.h>
118 #endif
119 
120 #include "eloop.h"
121 
122 #ifndef UNUSED
123 #define UNUSED(a) (void)((a))
124 #endif
125 #ifndef __unused
126 #ifdef __GNUC__
127 #define __unused   __attribute__((__unused__))
128 #else
129 #define __unused
130 #endif
131 #endif
132 
133 /* Our structures require TAILQ macros, which really every libc should
134  * ship as they are useful beyond belief.
135  * Sadly some libc's don't have sys/queue.h and some that do don't have
136  * the TAILQ_FOREACH macro. For those that don't, the application using
137  * this implementation will need to ship a working queue.h somewhere.
138  * If we don't have sys/queue.h found in config.h, then
139  * allow QUEUE_H to override loading queue.h in the current directory. */
140 #ifndef TAILQ_FOREACH
141 #ifdef HAVE_SYS_QUEUE_H
142 #include <sys/queue.h>
143 #elif defined(QUEUE_H)
144 #define __QUEUE_HEADER(x) #x
145 #define _QUEUE_HEADER(x) __QUEUE_HEADER(x)
146 #include _QUEUE_HEADER(QUEUE_H)
147 #else
148 #include "queue.h"
149 #endif
150 #endif
151 
152 #ifdef ELOOP_DEBUG
153 #include <stdio.h>
154 #endif
155 
156 /*
157  * Allow a backlog of signals.
158  * If you use many eloops in the same process, they should all
159  * use the same signal handler or have the signal handler unset.
160  * Otherwise the signal might not behave as expected.
161  */
162 #define ELOOP_NSIGNALS	5
163 
164 /*
165  * time_t is a signed integer of an unspecified size.
166  * To adjust for time_t wrapping, we need to work the maximum signed
167  * value and use that as a maximum.
168  */
169 #ifndef TIME_MAX
170 #define	TIME_MAX	((1ULL << (sizeof(time_t) * NBBY - 1)) - 1)
171 #endif
172 /* The unsigned maximum is then simple - multiply by two and add one. */
173 #ifndef UTIME_MAX
174 #define	UTIME_MAX	(TIME_MAX * 2) + 1
175 #endif
176 
177 struct eloop_event {
178 	TAILQ_ENTRY(eloop_event) next;
179 	int fd;
180 	void (*cb)(void *, unsigned short);
181 	void *cb_arg;
182 	unsigned short events;
183 #ifdef HAVE_PPOLL
184 	struct pollfd *pollfd;
185 #endif
186 };
187 
188 struct eloop_timeout {
189 	TAILQ_ENTRY(eloop_timeout) next;
190 	unsigned int seconds;
191 	unsigned int nseconds;
192 	void (*callback)(void *);
193 	void *arg;
194 	int queue;
195 };
196 
197 struct eloop {
198 	TAILQ_HEAD (event_head, eloop_event) events;
199 	size_t nevents;
200 	struct event_head free_events;
201 
202 	struct timespec now;
203 	TAILQ_HEAD (timeout_head, eloop_timeout) timeouts;
204 	struct timeout_head free_timeouts;
205 
206 	const int *signals;
207 	size_t nsignals;
208 	void (*signal_cb)(int, void *);
209 	void *signal_cb_ctx;
210 
211 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
212 	int fd;
213 #endif
214 #if defined(HAVE_KQUEUE)
215 	struct kevent *fds;
216 #elif defined(HAVE_EPOLL)
217 	struct epoll_event *fds;
218 #elif defined(HAVE_PPOLL)
219 	struct pollfd *fds;
220 #endif
221 #if !defined(HAVE_PSELECT)
222 	size_t nfds;
223 #endif
224 
225 	int exitcode;
226 	bool exitnow;
227 	bool events_need_setup;
228 	bool cleared;
229 };
230 
231 #ifdef HAVE_REALLOCARRAY
232 #define	eloop_realloca	reallocarray
233 #else
234 /* Handy routing to check for potential overflow.
235  * reallocarray(3) and reallocarr(3) are not portable. */
236 #define SQRT_SIZE_MAX (((size_t)1) << (sizeof(size_t) * CHAR_BIT / 2))
237 static void *
238 eloop_realloca(void *ptr, size_t n, size_t size)
239 {
240 
241 	if ((n | size) >= SQRT_SIZE_MAX && n > SIZE_MAX / size) {
242 		errno = EOVERFLOW;
243 		return NULL;
244 	}
245 	return realloc(ptr, n * size);
246 }
247 #endif
248 
249 
250 static int
251 eloop_event_setup_fds(struct eloop *eloop)
252 {
253 	struct eloop_event *e, *ne;
254 #if defined(HAVE_KQUEUE)
255 	struct kevent *pfd;
256 	size_t nfds = eloop->nsignals;
257 #elif defined(HAVE_EPOLL)
258 	struct epoll_event *pfd;
259 	size_t nfds = 0;
260 #elif defined(HAVE_PPOLL)
261 	struct pollfd *pfd;
262 	size_t nfds = 0;
263 #endif
264 
265 #ifndef HAVE_PSELECT
266 	nfds += eloop->nevents * NFD;
267 	if (eloop->nfds < nfds) {
268 		pfd = eloop_realloca(eloop->fds, nfds, sizeof(*pfd));
269 		if (pfd == NULL)
270 			return -1;
271 		eloop->fds = pfd;
272 		eloop->nfds = nfds;
273 	}
274 #endif
275 
276 #ifdef HAVE_PPOLL
277 	pfd = eloop->fds;
278 #endif
279 	TAILQ_FOREACH_SAFE(e, &eloop->events, next, ne) {
280 		if (e->fd == -1) {
281 			TAILQ_REMOVE(&eloop->events, e, next);
282 			TAILQ_INSERT_TAIL(&eloop->free_events, e, next);
283 			continue;
284 		}
285 #ifdef HAVE_PPOLL
286 		e->pollfd = pfd;
287 		pfd->fd = e->fd;
288 		pfd->events = 0;
289 		if (e->events & ELE_READ)
290 			pfd->events |= POLLIN;
291 		if (e->events & ELE_WRITE)
292 			pfd->events |= POLLOUT;
293 		pfd->revents = 0;
294 		pfd++;
295 #endif
296 	}
297 
298 	eloop->events_need_setup = false;
299 	return 0;
300 }
301 
302 size_t
303 eloop_event_count(const struct eloop *eloop)
304 {
305 
306 	return eloop->nevents;
307 }
308 
309 int
310 eloop_event_add(struct eloop *eloop, int fd, unsigned short events,
311     void (*cb)(void *, unsigned short), void *cb_arg)
312 {
313 	struct eloop_event *e;
314 	bool added;
315 #if defined(HAVE_KQUEUE)
316 	struct kevent ke[2], *kep = &ke[0];
317 	size_t n;
318 #elif defined(HAVE_EPOLL)
319 	struct epoll_event epe;
320 	int op;
321 #endif
322 
323 	assert(eloop != NULL);
324 	assert(cb != NULL && cb_arg != NULL);
325 	if (fd == -1 || !(events & (ELE_READ | ELE_WRITE | ELE_HANGUP))) {
326 		errno = EINVAL;
327 		return -1;
328 	}
329 
330 	TAILQ_FOREACH(e, &eloop->events, next) {
331 		if (e->fd == fd)
332 			break;
333 	}
334 
335 	if (e == NULL) {
336 		added = true;
337 		e = TAILQ_FIRST(&eloop->free_events);
338 		if (e != NULL)
339 			TAILQ_REMOVE(&eloop->free_events, e, next);
340 		else {
341 			e = malloc(sizeof(*e));
342 			if (e == NULL) {
343 				return -1;
344 			}
345 		}
346 		TAILQ_INSERT_HEAD(&eloop->events, e, next);
347 		eloop->nevents++;
348 		e->fd = fd;
349 		e->events = 0;
350 	} else
351 		added = false;
352 
353 	e->cb = cb;
354 	e->cb_arg = cb_arg;
355 
356 #if defined(HAVE_KQUEUE)
357 	n = 2;
358 	if (events & ELE_READ && !(e->events & ELE_READ))
359 		EV_SET(kep++, (uintptr_t)fd, EVFILT_READ, EV_ADD, 0, 0, e);
360 	else if (!(events & ELE_READ) && e->events & ELE_READ)
361 		EV_SET(kep++, (uintptr_t)fd, EVFILT_READ, EV_DELETE, 0, 0, e);
362 	else
363 		n--;
364 	if (events & ELE_WRITE && !(e->events & ELE_WRITE))
365 		EV_SET(kep++, (uintptr_t)fd, EVFILT_WRITE, EV_ADD, 0, 0, e);
366 	else if (!(events & ELE_WRITE) && e->events & ELE_WRITE)
367 		EV_SET(kep++, (uintptr_t)fd, EVFILT_WRITE, EV_DELETE, 0, 0, e);
368 	else
369 		n--;
370 #ifdef EVFILT_PROCDESC
371 	if (events & ELE_HANGUP)
372 		EV_SET(kep++, (uintptr_t)fd, EVFILT_PROCDESC, EV_ADD,
373 		    NOTE_EXIT, 0, e);
374 	else
375 		n--;
376 #endif
377 	if (n != 0 && _kevent(eloop->fd, ke, n, NULL, 0, NULL) == -1) {
378 		if (added) {
379 			TAILQ_REMOVE(&eloop->events, e, next);
380 			TAILQ_INSERT_TAIL(&eloop->free_events, e, next);
381 		}
382 		return -1;
383 	}
384 #elif defined(HAVE_EPOLL)
385 	memset(&epe, 0, sizeof(epe));
386 	epe.data.ptr = e;
387 	if (events & ELE_READ)
388 		epe.events |= EPOLLIN;
389 	if (events & ELE_WRITE)
390 		epe.events |= EPOLLOUT;
391 	op = added ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
392 	if (epe.events != 0 && epoll_ctl(eloop->fd, op, fd, &epe) == -1) {
393 		if (added) {
394 			TAILQ_REMOVE(&eloop->events, e, next);
395 			TAILQ_INSERT_TAIL(&eloop->free_events, e, next);
396 		}
397 		return -1;
398 	}
399 #elif defined(HAVE_PPOLL)
400 	e->pollfd = NULL;
401 	UNUSED(added);
402 #else
403 	UNUSED(added);
404 #endif
405 	e->events = events;
406 	eloop->events_need_setup = true;
407 	return 0;
408 }
409 
410 int
411 eloop_event_delete(struct eloop *eloop, int fd)
412 {
413 	struct eloop_event *e;
414 #if defined(HAVE_KQUEUE)
415 	struct kevent ke[2], *kep = &ke[0];
416 	size_t n;
417 #endif
418 
419 	assert(eloop != NULL);
420 	if (fd == -1) {
421 		errno = EINVAL;
422 		return -1;
423 	}
424 
425 	TAILQ_FOREACH(e, &eloop->events, next) {
426 		if (e->fd == fd)
427 			break;
428 	}
429 	if (e == NULL) {
430 		errno = ENOENT;
431 		return -1;
432 	}
433 
434 #if defined(HAVE_KQUEUE)
435 	n = 0;
436 	if (e->events & ELE_READ) {
437 		EV_SET(kep++, (uintptr_t)fd, EVFILT_READ, EV_DELETE, 0, 0, e);
438 		n++;
439 	}
440 	if (e->events & ELE_WRITE) {
441 		EV_SET(kep++, (uintptr_t)fd, EVFILT_WRITE, EV_DELETE, 0, 0, e);
442 		n++;
443 	}
444 	if (n != 0 && _kevent(eloop->fd, ke, n, NULL, 0, NULL) == -1)
445 		return -1;
446 #elif defined(HAVE_EPOLL)
447 	if (epoll_ctl(eloop->fd, EPOLL_CTL_DEL, fd, NULL) == -1)
448 		return -1;
449 #endif
450 	e->fd = -1;
451 	eloop->nevents--;
452 	eloop->events_need_setup = true;
453 	return 1;
454 }
455 
456 unsigned long long
457 eloop_timespec_diff(const struct timespec *tsp, const struct timespec *usp,
458     unsigned int *nsp)
459 {
460 	unsigned long long tsecs, usecs, secs;
461 	long nsecs;
462 
463 	if (tsp->tv_sec < 0) /* time wreapped */
464 		tsecs = UTIME_MAX - (unsigned long long)(-tsp->tv_sec);
465 	else
466 		tsecs = (unsigned long long)tsp->tv_sec;
467 	if (usp->tv_sec < 0) /* time wrapped */
468 		usecs = UTIME_MAX - (unsigned long long)(-usp->tv_sec);
469 	else
470 		usecs = (unsigned long long)usp->tv_sec;
471 
472 	if (usecs > tsecs) /* time wrapped */
473 		secs = (UTIME_MAX - usecs) + tsecs;
474 	else
475 		secs = tsecs - usecs;
476 
477 	nsecs = tsp->tv_nsec - usp->tv_nsec;
478 	if (nsecs < 0) {
479 		if (secs == 0)
480 			nsecs = 0;
481 		else {
482 			secs--;
483 			nsecs += NSEC_PER_SEC;
484 		}
485 	}
486 	if (nsp != NULL)
487 		*nsp = (unsigned int)nsecs;
488 	return secs;
489 }
490 
491 static void
492 eloop_reduce_timers(struct eloop *eloop)
493 {
494 	struct timespec now;
495 	unsigned long long secs;
496 	unsigned int nsecs;
497 	struct eloop_timeout *t;
498 
499 	clock_gettime(CLOCK_MONOTONIC, &now);
500 	secs = eloop_timespec_diff(&now, &eloop->now, &nsecs);
501 
502 	TAILQ_FOREACH(t, &eloop->timeouts, next) {
503 		if (secs > t->seconds) {
504 			t->seconds = 0;
505 			t->nseconds = 0;
506 		} else {
507 			t->seconds -= (unsigned int)secs;
508 			if (nsecs > t->nseconds) {
509 				if (t->seconds == 0)
510 					t->nseconds = 0;
511 				else {
512 					t->seconds--;
513 					t->nseconds = NSEC_PER_SEC
514 					    - (nsecs - t->nseconds);
515 				}
516 			} else
517 				t->nseconds -= nsecs;
518 		}
519 	}
520 
521 	eloop->now = now;
522 }
523 
524 /*
525  * This implementation should cope with UINT_MAX seconds on a system
526  * where time_t is INT32_MAX. It should also cope with the monotonic timer
527  * wrapping, although this is highly unlikely.
528  * unsigned int should match or be greater than any on wire specified timeout.
529  */
530 static int
531 eloop_q_timeout_add(struct eloop *eloop, int queue,
532     unsigned int seconds, unsigned int nseconds,
533     void (*callback)(void *), void *arg)
534 {
535 	struct eloop_timeout *t, *tt = NULL;
536 
537 	assert(eloop != NULL);
538 	assert(callback != NULL);
539 	assert(nseconds <= NSEC_PER_SEC);
540 
541 	/* Remove existing timeout if present. */
542 	TAILQ_FOREACH(t, &eloop->timeouts, next) {
543 		if (t->callback == callback && t->arg == arg) {
544 			TAILQ_REMOVE(&eloop->timeouts, t, next);
545 			break;
546 		}
547 	}
548 
549 	if (t == NULL) {
550 		/* No existing, so allocate or grab one from the free pool. */
551 		if ((t = TAILQ_FIRST(&eloop->free_timeouts))) {
552 			TAILQ_REMOVE(&eloop->free_timeouts, t, next);
553 		} else {
554 			if ((t = malloc(sizeof(*t))) == NULL)
555 				return -1;
556 		}
557 	}
558 
559 	eloop_reduce_timers(eloop);
560 
561 	t->seconds = seconds;
562 	t->nseconds = nseconds;
563 	t->callback = callback;
564 	t->arg = arg;
565 	t->queue = queue;
566 
567 	/* The timeout list should be in chronological order,
568 	 * soonest first. */
569 	TAILQ_FOREACH(tt, &eloop->timeouts, next) {
570 		if (t->seconds < tt->seconds ||
571 		    (t->seconds == tt->seconds && t->nseconds < tt->nseconds))
572 		{
573 			TAILQ_INSERT_BEFORE(tt, t, next);
574 			return 0;
575 		}
576 	}
577 	TAILQ_INSERT_TAIL(&eloop->timeouts, t, next);
578 	return 0;
579 }
580 
581 int
582 eloop_q_timeout_add_tv(struct eloop *eloop, int queue,
583     const struct timespec *when, void (*callback)(void *), void *arg)
584 {
585 
586 	if (when->tv_sec < 0 || (unsigned long)when->tv_sec > UINT_MAX) {
587 		errno = EINVAL;
588 		return -1;
589 	}
590 	if (when->tv_nsec < 0 || when->tv_nsec > NSEC_PER_SEC) {
591 		errno = EINVAL;
592 		return -1;
593 	}
594 
595 	return eloop_q_timeout_add(eloop, queue,
596 	    (unsigned int)when->tv_sec, (unsigned int)when->tv_sec,
597 	    callback, arg);
598 }
599 
600 int
601 eloop_q_timeout_add_sec(struct eloop *eloop, int queue, unsigned int seconds,
602     void (*callback)(void *), void *arg)
603 {
604 
605 	return eloop_q_timeout_add(eloop, queue, seconds, 0, callback, arg);
606 }
607 
608 int
609 eloop_q_timeout_add_msec(struct eloop *eloop, int queue, unsigned long when,
610     void (*callback)(void *), void *arg)
611 {
612 	unsigned long seconds, nseconds;
613 
614 	seconds = when / MSEC_PER_SEC;
615 	if (seconds > UINT_MAX) {
616 		errno = EINVAL;
617 		return -1;
618 	}
619 
620 	nseconds = (when % MSEC_PER_SEC) * NSEC_PER_MSEC;
621 	return eloop_q_timeout_add(eloop, queue,
622 		(unsigned int)seconds, (unsigned int)nseconds, callback, arg);
623 }
624 
625 int
626 eloop_q_timeout_delete(struct eloop *eloop, int queue,
627     void (*callback)(void *), void *arg)
628 {
629 	struct eloop_timeout *t, *tt;
630 	int n;
631 
632 	assert(eloop != NULL);
633 
634 	n = 0;
635 	TAILQ_FOREACH_SAFE(t, &eloop->timeouts, next, tt) {
636 		if ((queue == 0 || t->queue == queue) &&
637 		    t->arg == arg &&
638 		    (!callback || t->callback == callback))
639 		{
640 			TAILQ_REMOVE(&eloop->timeouts, t, next);
641 			TAILQ_INSERT_TAIL(&eloop->free_timeouts, t, next);
642 			n++;
643 		}
644 	}
645 	return n;
646 }
647 
648 void
649 eloop_exit(struct eloop *eloop, int code)
650 {
651 
652 	assert(eloop != NULL);
653 
654 	eloop->exitcode = code;
655 	eloop->exitnow = true;
656 }
657 
658 void
659 eloop_enter(struct eloop *eloop)
660 {
661 
662 	assert(eloop != NULL);
663 
664 	eloop->exitnow = false;
665 }
666 
667 /* Must be called after fork(2) */
668 int
669 eloop_forked(struct eloop *eloop)
670 {
671 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
672 	struct eloop_event *e;
673 #if defined(HAVE_KQUEUE)
674 	struct kevent *pfds, *pfd;
675 	size_t i;
676 #elif defined(HAVE_EPOLL)
677 	struct epoll_event epe = { .events = 0 };
678 #endif
679 
680 	assert(eloop != NULL);
681 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
682 	if (eloop->fd != -1)
683 		close(eloop->fd);
684 	if (eloop_open(eloop) == -1)
685 		return -1;
686 #endif
687 
688 #ifdef HAVE_KQUEUE
689 	pfds = malloc((eloop->nsignals + (eloop->nevents * NFD)) * sizeof(*pfds));
690 	pfd = pfds;
691 
692 	if (eloop->signal_cb != NULL) {
693 		for (i = 0; i < eloop->nsignals; i++) {
694 			EV_SET(pfd++, (uintptr_t)eloop->signals[i],
695 			    EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
696 		}
697 	} else
698 		i = 0;
699 #endif
700 
701 	TAILQ_FOREACH(e, &eloop->events, next) {
702 		if (e->fd == -1)
703 			continue;
704 #if defined(HAVE_KQUEUE)
705 		if (e->events & ELE_READ) {
706 			EV_SET(pfd++, (uintptr_t)e->fd,
707 			    EVFILT_READ, EV_ADD, 0, 0, e);
708 			i++;
709 		}
710 		if (e->events & ELE_WRITE) {
711 			EV_SET(pfd++, (uintptr_t)e->fd,
712 			    EVFILT_WRITE, EV_ADD, 0, 0, e);
713 			i++;
714 		}
715 #elif defined(HAVE_EPOLL)
716 		memset(&epe, 0, sizeof(epe));
717 		epe.data.ptr = e;
718 		if (e->events & ELE_READ)
719 			epe.events |= EPOLLIN;
720 		if (e->events & ELE_WRITE)
721 			epe.events |= EPOLLOUT;
722 		if (epoll_ctl(eloop->fd, EPOLL_CTL_ADD, e->fd, &epe) == -1)
723 			return -1;
724 #endif
725 	}
726 
727 #if defined(HAVE_KQUEUE)
728 	if (i == 0)
729 		return 0;
730 	return _kevent(eloop->fd, pfds, i, NULL, 0, NULL);
731 #else
732 	return 0;
733 #endif
734 #else
735 	UNUSED(eloop);
736 	return 0;
737 #endif
738 }
739 
740 int
741 eloop_open(struct eloop *eloop)
742 {
743 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
744 	int fd;
745 
746 	assert(eloop != NULL);
747 #if defined(HAVE_KQUEUE1)
748 	fd = kqueue1(O_CLOEXEC);
749 #elif defined(HAVE_KQUEUE)
750 	int flags;
751 
752 	fd = kqueue();
753 	flags = fcntl(fd, F_GETFD, 0);
754 	if (!(flags != -1 && !(flags & FD_CLOEXEC) &&
755 	    fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == 0))
756 	{
757 		close(fd);
758 		return -1;
759 	}
760 #elif defined(HAVE_EPOLL)
761 	fd = epoll_create1(EPOLL_CLOEXEC);
762 #endif
763 
764 	eloop->fd = fd;
765 	return fd;
766 #else
767 	UNUSED(eloop);
768 	return 0;
769 #endif
770 }
771 
772 int
773 eloop_signal_set_cb(struct eloop *eloop,
774     const int *signals, size_t nsignals,
775     void (*signal_cb)(int, void *), void *signal_cb_ctx)
776 {
777 #ifdef HAVE_KQUEUE
778 	size_t i;
779 	struct kevent *ke, *kes;
780 #endif
781 	int error = 0;
782 
783 	assert(eloop != NULL);
784 
785 #ifdef HAVE_KQUEUE
786 	ke = kes = malloc(MAX(eloop->nsignals, nsignals) * sizeof(*kes));
787 	if (kes == NULL)
788 		return -1;
789 	for (i = 0; i < eloop->nsignals; i++) {
790 		EV_SET(ke++, (uintptr_t)eloop->signals[i],
791 		    EVFILT_SIGNAL, EV_DELETE, 0, 0, NULL);
792 	}
793 	if (i != 0 && _kevent(eloop->fd, kes, i, NULL, 0, NULL) == -1) {
794 		error = -1;
795 		goto out;
796 	}
797 #endif
798 
799 	eloop->signals = signals;
800 	eloop->nsignals = nsignals;
801 	eloop->signal_cb = signal_cb;
802 	eloop->signal_cb_ctx = signal_cb_ctx;
803 
804 #ifdef HAVE_KQUEUE
805 	if (signal_cb == NULL)
806 		goto out;
807 	ke = kes;
808 	for (i = 0; i < eloop->nsignals; i++) {
809 		EV_SET(ke++, (uintptr_t)eloop->signals[i],
810 		    EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
811 	}
812 	if (i != 0 && _kevent(eloop->fd, kes, i, NULL, 0, NULL) == -1)
813 		error = -1;
814 out:
815 	free(kes);
816 #endif
817 
818 	return error;
819 }
820 
821 #ifndef HAVE_KQUEUE
822 static volatile int _eloop_sig[ELOOP_NSIGNALS];
823 static volatile size_t _eloop_nsig;
824 
825 static void
826 eloop_signal3(int sig, __unused siginfo_t *siginfo, __unused void *arg)
827 {
828 
829 	if (_eloop_nsig == __arraycount(_eloop_sig)) {
830 #ifdef ELOOP_DEBUG
831 		fprintf(stderr, "%s: signal storm, discarding signal %d\n",
832 		    __func__, sig);
833 #endif
834 		return;
835 	}
836 
837 	_eloop_sig[_eloop_nsig++] = sig;
838 }
839 #endif
840 
841 int
842 eloop_signal_mask(struct eloop *eloop, sigset_t *oldset)
843 {
844 	sigset_t newset;
845 	size_t i;
846 #ifndef HAVE_KQUEUE
847 	struct sigaction sa = {
848 	    .sa_sigaction = eloop_signal3,
849 	    .sa_flags = SA_SIGINFO,
850 	};
851 #endif
852 
853 	assert(eloop != NULL);
854 
855 	sigemptyset(&newset);
856 	for (i = 0; i < eloop->nsignals; i++)
857 		sigaddset(&newset, eloop->signals[i]);
858 	if (sigprocmask(SIG_SETMASK, &newset, oldset) == -1)
859 		return -1;
860 
861 #ifndef HAVE_KQUEUE
862 	sigemptyset(&sa.sa_mask);
863 
864 	for (i = 0; i < eloop->nsignals; i++) {
865 		if (sigaction(eloop->signals[i], &sa, NULL) == -1)
866 			return -1;
867 	}
868 #endif
869 
870 	return 0;
871 }
872 
873 struct eloop *
874 eloop_new(void)
875 {
876 	struct eloop *eloop;
877 
878 	eloop = calloc(1, sizeof(*eloop));
879 	if (eloop == NULL)
880 		return NULL;
881 
882 	/* Check we have a working monotonic clock. */
883 	if (clock_gettime(CLOCK_MONOTONIC, &eloop->now) == -1) {
884 		free(eloop);
885 		return NULL;
886 	}
887 
888 	TAILQ_INIT(&eloop->events);
889 	TAILQ_INIT(&eloop->free_events);
890 	TAILQ_INIT(&eloop->timeouts);
891 	TAILQ_INIT(&eloop->free_timeouts);
892 	eloop->exitcode = EXIT_FAILURE;
893 
894 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
895 	if (eloop_open(eloop) == -1) {
896 		eloop_free(eloop);
897 		return NULL;
898 	}
899 #endif
900 
901 	return eloop;
902 }
903 
904 void
905 eloop_clear(struct eloop *eloop, ...)
906 {
907 	va_list va1, va2;
908 	int except_fd;
909 	struct eloop_event *e, *ne;
910 	struct eloop_timeout *t;
911 
912 	if (eloop == NULL)
913 		return;
914 
915 	va_start(va1, eloop);
916 	TAILQ_FOREACH_SAFE(e, &eloop->events, next, ne) {
917 		va_copy(va2, va1);
918 		do
919 			except_fd = va_arg(va2, int);
920 		while (except_fd != -1 && except_fd != e->fd);
921 		va_end(va2);
922 		if (e->fd == except_fd && e->fd != -1)
923 			continue;
924 		TAILQ_REMOVE(&eloop->events, e, next);
925 		if (e->fd != -1) {
926 			close(e->fd);
927 			eloop->nevents--;
928 		}
929 		free(e);
930 	}
931 	va_end(va1);
932 
933 #if !defined(HAVE_PSELECT)
934 	/* Free the pollfd buffer and ensure it's re-created before
935 	 * the next run. This allows us to shrink it incase we use a lot less
936 	 * signals and fds to respond to after forking. */
937 	free(eloop->fds);
938 	eloop->fds = NULL;
939 	eloop->nfds = 0;
940 	eloop->events_need_setup = true;
941 #endif
942 
943 	while ((e = TAILQ_FIRST(&eloop->free_events))) {
944 		TAILQ_REMOVE(&eloop->free_events, e, next);
945 		free(e);
946 	}
947 	while ((t = TAILQ_FIRST(&eloop->timeouts))) {
948 		TAILQ_REMOVE(&eloop->timeouts, t, next);
949 		free(t);
950 	}
951 	while ((t = TAILQ_FIRST(&eloop->free_timeouts))) {
952 		TAILQ_REMOVE(&eloop->free_timeouts, t, next);
953 		free(t);
954 	}
955 	eloop->cleared = true;
956 }
957 
958 void
959 eloop_free(struct eloop *eloop)
960 {
961 
962 	eloop_clear(eloop, -1);
963 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
964 	if (eloop != NULL && eloop->fd != -1)
965 		close(eloop->fd);
966 #endif
967 	free(eloop);
968 }
969 
970 #if defined(HAVE_KQUEUE)
971 static int
972 eloop_run_kqueue(struct eloop *eloop, const struct timespec *ts)
973 {
974 	int n, nn;
975 	struct kevent *ke;
976 	struct eloop_event *e;
977 	unsigned short events;
978 
979 	n = _kevent(eloop->fd, NULL, 0, eloop->fds, eloop->nevents, ts);
980 	if (n == -1)
981 		return -1;
982 
983 	for (nn = n, ke = eloop->fds; nn != 0; nn--, ke++) {
984 		if (eloop->cleared || eloop->exitnow)
985 			break;
986 		e = (struct eloop_event *)ke->udata;
987 		if (ke->filter == EVFILT_SIGNAL) {
988 			eloop->signal_cb((int)ke->ident,
989 			    eloop->signal_cb_ctx);
990 			continue;
991 		}
992 		if (ke->filter == EVFILT_READ)
993 			events = ELE_READ;
994 		else if (ke->filter == EVFILT_WRITE)
995 			events = ELE_WRITE;
996 #ifdef EVFILT_PROCDESC
997 		else if (ke->filter == EVFILT_PROCDESC &&
998 		    ke->fflags & NOTE_EXIT)
999 			/* exit status is in ke->data.
1000 			 * As we default to using ppoll anyway
1001 			 * we don't have to do anything with it right now. */
1002 			events = ELE_HANGUP;
1003 #endif
1004 		else
1005 			continue; /* assert? */
1006 		if (ke->flags & EV_EOF)
1007 			events |= ELE_HANGUP;
1008 		if (ke->flags & EV_ERROR)
1009 			events |= ELE_ERROR;
1010 		e->cb(e->cb_arg, events);
1011 	}
1012 	return n;
1013 }
1014 
1015 #elif defined(HAVE_EPOLL)
1016 
1017 static int
1018 eloop_run_epoll(struct eloop *eloop,
1019     const struct timespec *ts, const sigset_t *signals)
1020 {
1021 	int timeout, n, nn;
1022 	struct epoll_event *epe;
1023 	struct eloop_event *e;
1024 	unsigned short events;
1025 
1026 	if (ts != NULL) {
1027 		if (ts->tv_sec > INT_MAX / 1000 ||
1028 		    (ts->tv_sec == INT_MAX / 1000 &&
1029 		     ((ts->tv_nsec + 999999) / 1000000 > INT_MAX % 1000000)))
1030 			timeout = INT_MAX;
1031 		else
1032 			timeout = (int)(ts->tv_sec * 1000 +
1033 			    (ts->tv_nsec + 999999) / 1000000);
1034 	} else
1035 		timeout = -1;
1036 
1037 	if (signals != NULL)
1038 		n = epoll_pwait(eloop->fd, eloop->fds,
1039 		    (int)eloop->nevents, timeout, signals);
1040 	else
1041 		n = epoll_wait(eloop->fd, eloop->fds,
1042 		    (int)eloop->nevents, timeout);
1043 	if (n == -1)
1044 		return -1;
1045 
1046 	for (nn = n, epe = eloop->fds; nn != 0; nn--, epe++) {
1047 		if (eloop->cleared || eloop->exitnow)
1048 			break;
1049 		e = (struct eloop_event *)epe->data.ptr;
1050 		if (e->fd == -1)
1051 			continue;
1052 		events = 0;
1053 		if (epe->events & EPOLLIN)
1054 			events |= ELE_READ;
1055 		if (epe->events & EPOLLOUT)
1056 			events |= ELE_WRITE;
1057 		if (epe->events & EPOLLHUP)
1058 			events |= ELE_HANGUP;
1059 		if (epe->events & EPOLLERR)
1060 			events |= ELE_ERROR;
1061 		e->cb(e->cb_arg, events);
1062 	}
1063 	return n;
1064 }
1065 
1066 #elif defined(HAVE_PPOLL)
1067 
1068 static int
1069 eloop_run_ppoll(struct eloop *eloop,
1070     const struct timespec *ts, const sigset_t *signals)
1071 {
1072 	int n, nn;
1073 	struct eloop_event *e;
1074 	struct pollfd *pfd;
1075 	unsigned short events;
1076 
1077 	n = ppoll(eloop->fds, (nfds_t)eloop->nevents, ts, signals);
1078 	if (n == -1 || n == 0)
1079 		return n;
1080 
1081 	nn = n;
1082 	TAILQ_FOREACH(e, &eloop->events, next) {
1083 		if (eloop->cleared || eloop->exitnow)
1084 			break;
1085 		/* Skip freshly added events */
1086 		if ((pfd = e->pollfd) == NULL)
1087 			continue;
1088 		if (e->pollfd->revents) {
1089 			nn--;
1090 			events = 0;
1091 			if (pfd->revents & POLLIN)
1092 				events |= ELE_READ;
1093 			if (pfd->revents & POLLOUT)
1094 				events |= ELE_WRITE;
1095 			if (pfd->revents & POLLHUP)
1096 				events |= ELE_HANGUP;
1097 			if (pfd->revents & POLLERR)
1098 				events |= ELE_ERROR;
1099 			if (pfd->revents & POLLNVAL)
1100 				events |= ELE_NVAL;
1101 			if (events)
1102 				e->cb(e->cb_arg, events);
1103 		}
1104 		if (nn == 0)
1105 			break;
1106 	}
1107 	return n;
1108 }
1109 
1110 #elif defined(HAVE_PSELECT)
1111 
1112 static int
1113 eloop_run_pselect(struct eloop *eloop,
1114     const struct timespec *ts, const sigset_t *sigmask)
1115 {
1116 	fd_set read_fds, write_fds;
1117 	int maxfd, n;
1118 	struct eloop_event *e;
1119 	unsigned short events;
1120 
1121 	FD_ZERO(&read_fds);
1122 	FD_ZERO(&write_fds);
1123 	maxfd = 0;
1124 	TAILQ_FOREACH(e, &eloop->events, next) {
1125 		if (e->fd == -1)
1126 			continue;
1127 		if (e->events & ELE_READ) {
1128 			FD_SET(e->fd, &read_fds);
1129 			if (e->fd > maxfd)
1130 				maxfd = e->fd;
1131 		}
1132 		if (e->events & ELE_WRITE) {
1133 			FD_SET(e->fd, &write_fds);
1134 			if (e->fd > maxfd)
1135 				maxfd = e->fd;
1136 		}
1137 	}
1138 
1139 	/* except_fd's is for STREAMS devices which we don't use. */
1140 	n = pselect(maxfd + 1, &read_fds, &write_fds, NULL, ts, sigmask);
1141 	if (n == -1 || n == 0)
1142 		return n;
1143 
1144 	TAILQ_FOREACH(e, &eloop->events, next) {
1145 		if (eloop->cleared || eloop->exitnow)
1146 			break;
1147 		if (e->fd == -1)
1148 			continue;
1149 		events = 0;
1150 		if (FD_ISSET(e->fd, &read_fds))
1151 			events |= ELE_READ;
1152 		if (FD_ISSET(e->fd, &write_fds))
1153 			events |= ELE_WRITE;
1154 		if (events)
1155 			e->cb(e->cb_arg, events);
1156 	}
1157 
1158 	return n;
1159 }
1160 #endif
1161 
1162 int
1163 eloop_start(struct eloop *eloop, sigset_t *signals)
1164 {
1165 	int error;
1166 	struct eloop_timeout *t;
1167 	struct timespec ts, *tsp;
1168 
1169 	assert(eloop != NULL);
1170 #ifdef HAVE_KQUEUE
1171 	UNUSED(signals);
1172 #endif
1173 
1174 	for (;;) {
1175 		if (eloop->exitnow)
1176 			break;
1177 
1178 #ifndef HAVE_KQUEUE
1179 		if (_eloop_nsig != 0) {
1180 			int n = _eloop_sig[--_eloop_nsig];
1181 
1182 			if (eloop->signal_cb != NULL)
1183 				eloop->signal_cb(n, eloop->signal_cb_ctx);
1184 			continue;
1185 		}
1186 #endif
1187 
1188 		t = TAILQ_FIRST(&eloop->timeouts);
1189 		if (t == NULL && eloop->nevents == 0)
1190 			break;
1191 
1192 		if (t != NULL)
1193 			eloop_reduce_timers(eloop);
1194 
1195 		if (t != NULL && t->seconds == 0 && t->nseconds == 0) {
1196 			TAILQ_REMOVE(&eloop->timeouts, t, next);
1197 			t->callback(t->arg);
1198 			TAILQ_INSERT_TAIL(&eloop->free_timeouts, t, next);
1199 			continue;
1200 		}
1201 
1202 		if (t != NULL) {
1203 			if (t->seconds > INT_MAX) {
1204 				ts.tv_sec = (time_t)INT_MAX;
1205 				ts.tv_nsec = 0;
1206 			} else {
1207 				ts.tv_sec = (time_t)t->seconds;
1208 				ts.tv_nsec = (long)t->nseconds;
1209 			}
1210 			tsp = &ts;
1211 		} else
1212 			tsp = NULL;
1213 
1214 		eloop->cleared = false;
1215 		if (eloop->events_need_setup)
1216 			eloop_event_setup_fds(eloop);
1217 
1218 #if defined(HAVE_KQUEUE)
1219 		UNUSED(signals);
1220 		error = eloop_run_kqueue(eloop, tsp);
1221 #elif defined(HAVE_EPOLL)
1222 		error = eloop_run_epoll(eloop, tsp, signals);
1223 #elif defined(HAVE_PPOLL)
1224 		error = eloop_run_ppoll(eloop, tsp, signals);
1225 #elif defined(HAVE_PSELECT)
1226 		error = eloop_run_pselect(eloop, tsp, signals);
1227 #else
1228 #error no polling mechanism to run!
1229 #endif
1230 		if (error == -1) {
1231 			if (errno == EINTR)
1232 				continue;
1233 			return -errno;
1234 		}
1235 	}
1236 
1237 	return eloop->exitcode;
1238 }
1239