xref: /dragonfly/contrib/dhcpcd/src/eloop.c (revision 556932ec)
1 /* SPDX-License-Identifier: BSD-2-Clause */
2 /*
3  * eloop - portable event based main loop.
4  * Copyright (c) 2006-2023 Roy Marples <roy@marples.name>
5  * All rights reserved.
6 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /* NOTES:
30  * Basically for a small number of fd's (total, not max fd)
31  * of say a few hundred, ppoll(2) performs just fine, if not faster than others.
32  * It also has the smallest memory and binary size footprint.
33  * ppoll(2) is available on all modern OS my software runs on and should be
34  * an up and coming POSIX standard interface.
35  * If ppoll is not available, then pselect(2) can be used instead which has
36  * even smaller memory and binary size footprint.
37  * However, this difference is quite tiny and the ppoll API is superior.
38  * pselect cannot return error conditions such as EOF for example.
39  *
40  * Both epoll(7) and kqueue(2) require an extra fd per process to manage
41  * their respective list of interest AND syscalls to manage it.
42  * So for a small number of fd's, these are more resource intensive,
43  * especially when used with more than one process.
44  *
45  * epoll avoids the resource limit RLIMIT_NOFILE Linux poll stupidly applies.
46  * kqueue avoids the same limit on OpenBSD.
47  * ppoll can still be secured in both by using SEECOMP or pledge.
48  *
49  * kqueue can avoid the signal trick we use here so that we function calls
50  * other than those listed in sigaction(2) in our signal handlers which is
51  * probably more robust than ours at surviving a signal storm.
52  * signalfd(2) is available for Linux which probably works in a similar way
53  * but it's yet another fd to use.
54  *
55  * Taking this all into account, ppoll(2) is the default mechanism used here.
56  */
57 
58 #if (defined(__unix__) || defined(unix)) && !defined(USG)
59 #include <sys/param.h>
60 #endif
61 #include <sys/time.h>
62 
63 #include <assert.h>
64 #include <errno.h>
65 #include <fcntl.h>
66 #include <limits.h>
67 #include <stdbool.h>
68 #include <signal.h>
69 #include <stdarg.h>
70 #include <stdint.h>
71 #include <stdlib.h>
72 #include <string.h>
73 #include <unistd.h>
74 
75 /* config.h should define HAVE_PPOLL, etc. */
76 #if defined(HAVE_CONFIG_H) && !defined(NO_CONFIG_H)
77 #include "config.h"
78 #endif
79 
80 /* Prioritise which mechanism we want to use.*/
81 #if defined(HAVE_PPOLL)
82 #undef HAVE_EPOLL
83 #undef HAVE_KQUEUE
84 #undef HAVE_PSELECT
85 #elif defined(HAVE_POLLTS)
86 #define HAVE_PPOLL
87 #define ppoll pollts
88 #undef HAVE_EPOLL
89 #undef HAVE_KQUEUE
90 #undef HAVE_PSELECT
91 #elif defined(HAVE_KQUEUE)
92 #undef HAVE_EPOLL
93 #undef HAVE_PSELECT
94 #elif defined(HAVE_EPOLL)
95 #undef HAVE_KQUEUE
96 #undef HAVE_PSELECT
97 #elif !defined(HAVE_PSELECT)
98 #define HAVE_PPOLL
99 #endif
100 
101 #if defined(HAVE_KQUEUE)
102 #include <sys/event.h>
103 #if defined(__DragonFly__) || defined(__FreeBSD__)
104 #define	_kevent(kq, cl, ncl, el, nel, t) \
105 	kevent((kq), (cl), (int)(ncl), (el), (int)(nel), (t))
106 #else
107 #define	_kevent kevent
108 #endif
109 #define NFD 2
110 #elif defined(HAVE_EPOLL)
111 #include <sys/epoll.h>
112 #define	NFD 1
113 #elif defined(HAVE_PPOLL)
114 #include <poll.h>
115 #define NFD 1
116 #elif defined(HAVE_PSELECT)
117 #include <sys/select.h>
118 #endif
119 
120 #include "eloop.h"
121 
122 #ifndef UNUSED
123 #define UNUSED(a) (void)((a))
124 #endif
125 #ifndef __unused
126 #ifdef __GNUC__
127 #define __unused   __attribute__((__unused__))
128 #else
129 #define __unused
130 #endif
131 #endif
132 
133 /* Our structures require TAILQ macros, which really every libc should
134  * ship as they are useful beyond belief.
135  * Sadly some libc's don't have sys/queue.h and some that do don't have
136  * the TAILQ_FOREACH macro. For those that don't, the application using
137  * this implementation will need to ship a working queue.h somewhere.
138  * If we don't have sys/queue.h found in config.h, then
139  * allow QUEUE_H to override loading queue.h in the current directory. */
140 #ifndef TAILQ_FOREACH
141 #ifdef HAVE_SYS_QUEUE_H
142 #include <sys/queue.h>
143 #elif defined(QUEUE_H)
144 #define __QUEUE_HEADER(x) #x
145 #define _QUEUE_HEADER(x) __QUEUE_HEADER(x)
146 #include _QUEUE_HEADER(QUEUE_H)
147 #else
148 #include "queue.h"
149 #endif
150 #endif
151 
152 #ifdef ELOOP_DEBUG
153 #include <stdio.h>
154 #endif
155 
156 #ifndef __arraycount
157 #  define __arraycount(__x)       (sizeof(__x) / sizeof(__x[0]))
158 #endif
159 
160 /*
161  * Allow a backlog of signals.
162  * If you use many eloops in the same process, they should all
163  * use the same signal handler or have the signal handler unset.
164  * Otherwise the signal might not behave as expected.
165  */
166 #define ELOOP_NSIGNALS	5
167 
168 /*
169  * time_t is a signed integer of an unspecified size.
170  * To adjust for time_t wrapping, we need to work the maximum signed
171  * value and use that as a maximum.
172  */
173 #ifndef TIME_MAX
174 #define	TIME_MAX	((1ULL << (sizeof(time_t) * NBBY - 1)) - 1)
175 #endif
176 /* The unsigned maximum is then simple - multiply by two and add one. */
177 #ifndef UTIME_MAX
178 #define	UTIME_MAX	(TIME_MAX * 2) + 1
179 #endif
180 
181 struct eloop_event {
182 	TAILQ_ENTRY(eloop_event) next;
183 	int fd;
184 	void (*cb)(void *, unsigned short);
185 	void *cb_arg;
186 	unsigned short events;
187 #ifdef HAVE_PPOLL
188 	struct pollfd *pollfd;
189 #endif
190 };
191 
192 struct eloop_timeout {
193 	TAILQ_ENTRY(eloop_timeout) next;
194 	unsigned int seconds;
195 	unsigned int nseconds;
196 	void (*callback)(void *);
197 	void *arg;
198 	int queue;
199 };
200 
201 struct eloop {
202 	TAILQ_HEAD (event_head, eloop_event) events;
203 	size_t nevents;
204 	struct event_head free_events;
205 
206 	struct timespec now;
207 	TAILQ_HEAD (timeout_head, eloop_timeout) timeouts;
208 	struct timeout_head free_timeouts;
209 
210 	const int *signals;
211 	size_t nsignals;
212 	void (*signal_cb)(int, void *);
213 	void *signal_cb_ctx;
214 
215 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
216 	int fd;
217 #endif
218 #if defined(HAVE_KQUEUE)
219 	struct kevent *fds;
220 #elif defined(HAVE_EPOLL)
221 	struct epoll_event *fds;
222 #elif defined(HAVE_PPOLL)
223 	struct pollfd *fds;
224 #endif
225 #if !defined(HAVE_PSELECT)
226 	size_t nfds;
227 #endif
228 
229 	int exitcode;
230 	bool exitnow;
231 	bool events_need_setup;
232 	bool cleared;
233 };
234 
235 #ifdef HAVE_REALLOCARRAY
236 #define	eloop_realloca	reallocarray
237 #else
238 /* Handy routing to check for potential overflow.
239  * reallocarray(3) and reallocarr(3) are not portable. */
240 #define SQRT_SIZE_MAX (((size_t)1) << (sizeof(size_t) * CHAR_BIT / 2))
241 static void *
242 eloop_realloca(void *ptr, size_t n, size_t size)
243 {
244 
245 	if ((n | size) >= SQRT_SIZE_MAX && n > SIZE_MAX / size) {
246 		errno = EOVERFLOW;
247 		return NULL;
248 	}
249 	return realloc(ptr, n * size);
250 }
251 #endif
252 
253 
254 static int
255 eloop_event_setup_fds(struct eloop *eloop)
256 {
257 	struct eloop_event *e, *ne;
258 #if defined(HAVE_KQUEUE)
259 	struct kevent *pfd;
260 	size_t nfds = eloop->nsignals;
261 #elif defined(HAVE_EPOLL)
262 	struct epoll_event *pfd;
263 	size_t nfds = 0;
264 #elif defined(HAVE_PPOLL)
265 	struct pollfd *pfd;
266 	size_t nfds = 0;
267 #endif
268 
269 #ifndef HAVE_PSELECT
270 	nfds += eloop->nevents * NFD;
271 	if (eloop->nfds < nfds) {
272 		pfd = eloop_realloca(eloop->fds, nfds, sizeof(*pfd));
273 		if (pfd == NULL)
274 			return -1;
275 		eloop->fds = pfd;
276 		eloop->nfds = nfds;
277 	}
278 #endif
279 
280 #ifdef HAVE_PPOLL
281 	pfd = eloop->fds;
282 #endif
283 	TAILQ_FOREACH_SAFE(e, &eloop->events, next, ne) {
284 		if (e->fd == -1) {
285 			TAILQ_REMOVE(&eloop->events, e, next);
286 			TAILQ_INSERT_TAIL(&eloop->free_events, e, next);
287 			continue;
288 		}
289 #ifdef HAVE_PPOLL
290 		e->pollfd = pfd;
291 		pfd->fd = e->fd;
292 		pfd->events = 0;
293 		if (e->events & ELE_READ)
294 			pfd->events |= POLLIN;
295 		if (e->events & ELE_WRITE)
296 			pfd->events |= POLLOUT;
297 		pfd->revents = 0;
298 		pfd++;
299 #endif
300 	}
301 
302 	eloop->events_need_setup = false;
303 	return 0;
304 }
305 
306 size_t
307 eloop_event_count(const struct eloop *eloop)
308 {
309 
310 	return eloop->nevents;
311 }
312 
313 int
314 eloop_event_add(struct eloop *eloop, int fd, unsigned short events,
315     void (*cb)(void *, unsigned short), void *cb_arg)
316 {
317 	struct eloop_event *e;
318 	bool added;
319 #if defined(HAVE_KQUEUE)
320 	struct kevent ke[2], *kep = &ke[0];
321 	size_t n;
322 #elif defined(HAVE_EPOLL)
323 	struct epoll_event epe;
324 	int op;
325 #endif
326 
327 	assert(eloop != NULL);
328 	assert(cb != NULL && cb_arg != NULL);
329 	if (fd == -1 || !(events & (ELE_READ | ELE_WRITE | ELE_HANGUP))) {
330 		errno = EINVAL;
331 		return -1;
332 	}
333 
334 	TAILQ_FOREACH(e, &eloop->events, next) {
335 		if (e->fd == fd)
336 			break;
337 	}
338 
339 	if (e == NULL) {
340 		added = true;
341 		e = TAILQ_FIRST(&eloop->free_events);
342 		if (e != NULL)
343 			TAILQ_REMOVE(&eloop->free_events, e, next);
344 		else {
345 			e = malloc(sizeof(*e));
346 			if (e == NULL) {
347 				return -1;
348 			}
349 		}
350 		TAILQ_INSERT_HEAD(&eloop->events, e, next);
351 		eloop->nevents++;
352 		e->fd = fd;
353 		e->events = 0;
354 	} else
355 		added = false;
356 
357 	e->cb = cb;
358 	e->cb_arg = cb_arg;
359 
360 #if defined(HAVE_KQUEUE)
361 	n = 2;
362 	if (events & ELE_READ && !(e->events & ELE_READ))
363 		EV_SET(kep++, (uintptr_t)fd, EVFILT_READ, EV_ADD, 0, 0, e);
364 	else if (!(events & ELE_READ) && e->events & ELE_READ)
365 		EV_SET(kep++, (uintptr_t)fd, EVFILT_READ, EV_DELETE, 0, 0, e);
366 	else
367 		n--;
368 	if (events & ELE_WRITE && !(e->events & ELE_WRITE))
369 		EV_SET(kep++, (uintptr_t)fd, EVFILT_WRITE, EV_ADD, 0, 0, e);
370 	else if (!(events & ELE_WRITE) && e->events & ELE_WRITE)
371 		EV_SET(kep++, (uintptr_t)fd, EVFILT_WRITE, EV_DELETE, 0, 0, e);
372 	else
373 		n--;
374 #ifdef EVFILT_PROCDESC
375 	if (events & ELE_HANGUP)
376 		EV_SET(kep++, (uintptr_t)fd, EVFILT_PROCDESC, EV_ADD,
377 		    NOTE_EXIT, 0, e);
378 	else
379 		n--;
380 #endif
381 	if (n != 0 && _kevent(eloop->fd, ke, n, NULL, 0, NULL) == -1) {
382 		if (added) {
383 			TAILQ_REMOVE(&eloop->events, e, next);
384 			TAILQ_INSERT_TAIL(&eloop->free_events, e, next);
385 		}
386 		return -1;
387 	}
388 #elif defined(HAVE_EPOLL)
389 	memset(&epe, 0, sizeof(epe));
390 	epe.data.ptr = e;
391 	if (events & ELE_READ)
392 		epe.events |= EPOLLIN;
393 	if (events & ELE_WRITE)
394 		epe.events |= EPOLLOUT;
395 	op = added ? EPOLL_CTL_ADD : EPOLL_CTL_MOD;
396 	if (epe.events != 0 && epoll_ctl(eloop->fd, op, fd, &epe) == -1) {
397 		if (added) {
398 			TAILQ_REMOVE(&eloop->events, e, next);
399 			TAILQ_INSERT_TAIL(&eloop->free_events, e, next);
400 		}
401 		return -1;
402 	}
403 #elif defined(HAVE_PPOLL)
404 	e->pollfd = NULL;
405 	UNUSED(added);
406 #else
407 	UNUSED(added);
408 #endif
409 	e->events = events;
410 	eloop->events_need_setup = true;
411 	return 0;
412 }
413 
414 int
415 eloop_event_delete(struct eloop *eloop, int fd)
416 {
417 	struct eloop_event *e;
418 #if defined(HAVE_KQUEUE)
419 	struct kevent ke[2], *kep = &ke[0];
420 	size_t n;
421 #endif
422 
423 	assert(eloop != NULL);
424 	if (fd == -1) {
425 		errno = EINVAL;
426 		return -1;
427 	}
428 
429 	TAILQ_FOREACH(e, &eloop->events, next) {
430 		if (e->fd == fd)
431 			break;
432 	}
433 	if (e == NULL) {
434 		errno = ENOENT;
435 		return -1;
436 	}
437 
438 #if defined(HAVE_KQUEUE)
439 	n = 0;
440 	if (e->events & ELE_READ) {
441 		EV_SET(kep++, (uintptr_t)fd, EVFILT_READ, EV_DELETE, 0, 0, e);
442 		n++;
443 	}
444 	if (e->events & ELE_WRITE) {
445 		EV_SET(kep++, (uintptr_t)fd, EVFILT_WRITE, EV_DELETE, 0, 0, e);
446 		n++;
447 	}
448 	if (n != 0 && _kevent(eloop->fd, ke, n, NULL, 0, NULL) == -1)
449 		return -1;
450 #elif defined(HAVE_EPOLL)
451 	if (epoll_ctl(eloop->fd, EPOLL_CTL_DEL, fd, NULL) == -1)
452 		return -1;
453 #endif
454 	e->fd = -1;
455 	eloop->nevents--;
456 	eloop->events_need_setup = true;
457 	return 1;
458 }
459 
460 unsigned long long
461 eloop_timespec_diff(const struct timespec *tsp, const struct timespec *usp,
462     unsigned int *nsp)
463 {
464 	unsigned long long tsecs, usecs, secs;
465 	long nsecs;
466 
467 	if (tsp->tv_sec < 0) /* time wreapped */
468 		tsecs = UTIME_MAX - (unsigned long long)(-tsp->tv_sec);
469 	else
470 		tsecs = (unsigned long long)tsp->tv_sec;
471 	if (usp->tv_sec < 0) /* time wrapped */
472 		usecs = UTIME_MAX - (unsigned long long)(-usp->tv_sec);
473 	else
474 		usecs = (unsigned long long)usp->tv_sec;
475 
476 	if (usecs > tsecs) /* time wrapped */
477 		secs = (UTIME_MAX - usecs) + tsecs;
478 	else
479 		secs = tsecs - usecs;
480 
481 	nsecs = tsp->tv_nsec - usp->tv_nsec;
482 	if (nsecs < 0) {
483 		if (secs == 0)
484 			nsecs = 0;
485 		else {
486 			secs--;
487 			nsecs += NSEC_PER_SEC;
488 		}
489 	}
490 	if (nsp != NULL)
491 		*nsp = (unsigned int)nsecs;
492 	return secs;
493 }
494 
495 static void
496 eloop_reduce_timers(struct eloop *eloop)
497 {
498 	struct timespec now;
499 	unsigned long long secs;
500 	unsigned int nsecs;
501 	struct eloop_timeout *t;
502 
503 	clock_gettime(CLOCK_MONOTONIC, &now);
504 	secs = eloop_timespec_diff(&now, &eloop->now, &nsecs);
505 
506 	TAILQ_FOREACH(t, &eloop->timeouts, next) {
507 		if (secs > t->seconds) {
508 			t->seconds = 0;
509 			t->nseconds = 0;
510 		} else {
511 			t->seconds -= (unsigned int)secs;
512 			if (nsecs > t->nseconds) {
513 				if (t->seconds == 0)
514 					t->nseconds = 0;
515 				else {
516 					t->seconds--;
517 					t->nseconds = NSEC_PER_SEC
518 					    - (nsecs - t->nseconds);
519 				}
520 			} else
521 				t->nseconds -= nsecs;
522 		}
523 	}
524 
525 	eloop->now = now;
526 }
527 
528 /*
529  * This implementation should cope with UINT_MAX seconds on a system
530  * where time_t is INT32_MAX. It should also cope with the monotonic timer
531  * wrapping, although this is highly unlikely.
532  * unsigned int should match or be greater than any on wire specified timeout.
533  */
534 static int
535 eloop_q_timeout_add(struct eloop *eloop, int queue,
536     unsigned int seconds, unsigned int nseconds,
537     void (*callback)(void *), void *arg)
538 {
539 	struct eloop_timeout *t, *tt = NULL;
540 
541 	assert(eloop != NULL);
542 	assert(callback != NULL);
543 	assert(nseconds <= NSEC_PER_SEC);
544 
545 	/* Remove existing timeout if present. */
546 	TAILQ_FOREACH(t, &eloop->timeouts, next) {
547 		if (t->callback == callback && t->arg == arg) {
548 			TAILQ_REMOVE(&eloop->timeouts, t, next);
549 			break;
550 		}
551 	}
552 
553 	if (t == NULL) {
554 		/* No existing, so allocate or grab one from the free pool. */
555 		if ((t = TAILQ_FIRST(&eloop->free_timeouts))) {
556 			TAILQ_REMOVE(&eloop->free_timeouts, t, next);
557 		} else {
558 			if ((t = malloc(sizeof(*t))) == NULL)
559 				return -1;
560 		}
561 	}
562 
563 	eloop_reduce_timers(eloop);
564 
565 	t->seconds = seconds;
566 	t->nseconds = nseconds;
567 	t->callback = callback;
568 	t->arg = arg;
569 	t->queue = queue;
570 
571 	/* The timeout list should be in chronological order,
572 	 * soonest first. */
573 	TAILQ_FOREACH(tt, &eloop->timeouts, next) {
574 		if (t->seconds < tt->seconds ||
575 		    (t->seconds == tt->seconds && t->nseconds < tt->nseconds))
576 		{
577 			TAILQ_INSERT_BEFORE(tt, t, next);
578 			return 0;
579 		}
580 	}
581 	TAILQ_INSERT_TAIL(&eloop->timeouts, t, next);
582 	return 0;
583 }
584 
585 int
586 eloop_q_timeout_add_tv(struct eloop *eloop, int queue,
587     const struct timespec *when, void (*callback)(void *), void *arg)
588 {
589 
590 	if (when->tv_sec < 0 || (unsigned long)when->tv_sec > UINT_MAX) {
591 		errno = EINVAL;
592 		return -1;
593 	}
594 	if (when->tv_nsec < 0 || when->tv_nsec > NSEC_PER_SEC) {
595 		errno = EINVAL;
596 		return -1;
597 	}
598 
599 	return eloop_q_timeout_add(eloop, queue,
600 	    (unsigned int)when->tv_sec, (unsigned int)when->tv_sec,
601 	    callback, arg);
602 }
603 
604 int
605 eloop_q_timeout_add_sec(struct eloop *eloop, int queue, unsigned int seconds,
606     void (*callback)(void *), void *arg)
607 {
608 
609 	return eloop_q_timeout_add(eloop, queue, seconds, 0, callback, arg);
610 }
611 
612 int
613 eloop_q_timeout_add_msec(struct eloop *eloop, int queue, unsigned long when,
614     void (*callback)(void *), void *arg)
615 {
616 	unsigned long seconds, nseconds;
617 
618 	seconds = when / MSEC_PER_SEC;
619 	if (seconds > UINT_MAX) {
620 		errno = EINVAL;
621 		return -1;
622 	}
623 
624 	nseconds = (when % MSEC_PER_SEC) * NSEC_PER_MSEC;
625 	return eloop_q_timeout_add(eloop, queue,
626 		(unsigned int)seconds, (unsigned int)nseconds, callback, arg);
627 }
628 
629 int
630 eloop_q_timeout_delete(struct eloop *eloop, int queue,
631     void (*callback)(void *), void *arg)
632 {
633 	struct eloop_timeout *t, *tt;
634 	int n;
635 
636 	assert(eloop != NULL);
637 
638 	n = 0;
639 	TAILQ_FOREACH_SAFE(t, &eloop->timeouts, next, tt) {
640 		if ((queue == 0 || t->queue == queue) &&
641 		    t->arg == arg &&
642 		    (!callback || t->callback == callback))
643 		{
644 			TAILQ_REMOVE(&eloop->timeouts, t, next);
645 			TAILQ_INSERT_TAIL(&eloop->free_timeouts, t, next);
646 			n++;
647 		}
648 	}
649 	return n;
650 }
651 
652 void
653 eloop_exit(struct eloop *eloop, int code)
654 {
655 
656 	assert(eloop != NULL);
657 
658 	eloop->exitcode = code;
659 	eloop->exitnow = true;
660 }
661 
662 void
663 eloop_enter(struct eloop *eloop)
664 {
665 
666 	assert(eloop != NULL);
667 
668 	eloop->exitnow = false;
669 }
670 
671 /* Must be called after fork(2) */
672 int
673 eloop_forked(struct eloop *eloop)
674 {
675 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
676 	struct eloop_event *e;
677 #if defined(HAVE_KQUEUE)
678 	struct kevent *pfds, *pfd;
679 	size_t i;
680 #elif defined(HAVE_EPOLL)
681 	struct epoll_event epe = { .events = 0 };
682 #endif
683 
684 	assert(eloop != NULL);
685 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
686 	if (eloop->fd != -1)
687 		close(eloop->fd);
688 	if (eloop_open(eloop) == -1)
689 		return -1;
690 #endif
691 
692 #ifdef HAVE_KQUEUE
693 	pfds = malloc((eloop->nsignals + (eloop->nevents * NFD)) * sizeof(*pfds));
694 	pfd = pfds;
695 
696 	if (eloop->signal_cb != NULL) {
697 		for (i = 0; i < eloop->nsignals; i++) {
698 			EV_SET(pfd++, (uintptr_t)eloop->signals[i],
699 			    EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
700 		}
701 	} else
702 		i = 0;
703 #endif
704 
705 	TAILQ_FOREACH(e, &eloop->events, next) {
706 		if (e->fd == -1)
707 			continue;
708 #if defined(HAVE_KQUEUE)
709 		if (e->events & ELE_READ) {
710 			EV_SET(pfd++, (uintptr_t)e->fd,
711 			    EVFILT_READ, EV_ADD, 0, 0, e);
712 			i++;
713 		}
714 		if (e->events & ELE_WRITE) {
715 			EV_SET(pfd++, (uintptr_t)e->fd,
716 			    EVFILT_WRITE, EV_ADD, 0, 0, e);
717 			i++;
718 		}
719 #elif defined(HAVE_EPOLL)
720 		memset(&epe, 0, sizeof(epe));
721 		epe.data.ptr = e;
722 		if (e->events & ELE_READ)
723 			epe.events |= EPOLLIN;
724 		if (e->events & ELE_WRITE)
725 			epe.events |= EPOLLOUT;
726 		if (epoll_ctl(eloop->fd, EPOLL_CTL_ADD, e->fd, &epe) == -1)
727 			return -1;
728 #endif
729 	}
730 
731 #if defined(HAVE_KQUEUE)
732 	if (i == 0)
733 		return 0;
734 	return _kevent(eloop->fd, pfds, i, NULL, 0, NULL);
735 #else
736 	return 0;
737 #endif
738 #else
739 	UNUSED(eloop);
740 	return 0;
741 #endif
742 }
743 
744 int
745 eloop_open(struct eloop *eloop)
746 {
747 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
748 	int fd;
749 
750 	assert(eloop != NULL);
751 #if defined(HAVE_KQUEUE1)
752 	fd = kqueue1(O_CLOEXEC);
753 #elif defined(HAVE_KQUEUE)
754 	int flags;
755 
756 	fd = kqueue();
757 	flags = fcntl(fd, F_GETFD, 0);
758 	if (!(flags != -1 && !(flags & FD_CLOEXEC) &&
759 	    fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == 0))
760 	{
761 		close(fd);
762 		return -1;
763 	}
764 #elif defined(HAVE_EPOLL)
765 	fd = epoll_create1(EPOLL_CLOEXEC);
766 #endif
767 
768 	eloop->fd = fd;
769 	return fd;
770 #else
771 	UNUSED(eloop);
772 	return 0;
773 #endif
774 }
775 
776 int
777 eloop_signal_set_cb(struct eloop *eloop,
778     const int *signals, size_t nsignals,
779     void (*signal_cb)(int, void *), void *signal_cb_ctx)
780 {
781 #ifdef HAVE_KQUEUE
782 	size_t i;
783 	struct kevent *ke, *kes;
784 #endif
785 	int error = 0;
786 
787 	assert(eloop != NULL);
788 
789 #ifdef HAVE_KQUEUE
790 	ke = kes = malloc(MAX(eloop->nsignals, nsignals) * sizeof(*kes));
791 	if (kes == NULL)
792 		return -1;
793 	for (i = 0; i < eloop->nsignals; i++) {
794 		EV_SET(ke++, (uintptr_t)eloop->signals[i],
795 		    EVFILT_SIGNAL, EV_DELETE, 0, 0, NULL);
796 	}
797 	if (i != 0 && _kevent(eloop->fd, kes, i, NULL, 0, NULL) == -1) {
798 		error = -1;
799 		goto out;
800 	}
801 #endif
802 
803 	eloop->signals = signals;
804 	eloop->nsignals = nsignals;
805 	eloop->signal_cb = signal_cb;
806 	eloop->signal_cb_ctx = signal_cb_ctx;
807 
808 #ifdef HAVE_KQUEUE
809 	if (signal_cb == NULL)
810 		goto out;
811 	ke = kes;
812 	for (i = 0; i < eloop->nsignals; i++) {
813 		EV_SET(ke++, (uintptr_t)eloop->signals[i],
814 		    EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
815 	}
816 	if (i != 0 && _kevent(eloop->fd, kes, i, NULL, 0, NULL) == -1)
817 		error = -1;
818 out:
819 	free(kes);
820 #endif
821 
822 	return error;
823 }
824 
825 #ifndef HAVE_KQUEUE
826 static volatile int _eloop_sig[ELOOP_NSIGNALS];
827 static volatile size_t _eloop_nsig;
828 
829 static void
830 eloop_signal3(int sig, __unused siginfo_t *siginfo, __unused void *arg)
831 {
832 
833 	if (_eloop_nsig == __arraycount(_eloop_sig)) {
834 #ifdef ELOOP_DEBUG
835 		fprintf(stderr, "%s: signal storm, discarding signal %d\n",
836 		    __func__, sig);
837 #endif
838 		return;
839 	}
840 
841 	_eloop_sig[_eloop_nsig++] = sig;
842 }
843 #endif
844 
845 int
846 eloop_signal_mask(struct eloop *eloop, sigset_t *oldset)
847 {
848 	sigset_t newset;
849 	size_t i;
850 #ifndef HAVE_KQUEUE
851 	struct sigaction sa = {
852 	    .sa_sigaction = eloop_signal3,
853 	    .sa_flags = SA_SIGINFO,
854 	};
855 #endif
856 
857 	assert(eloop != NULL);
858 
859 	sigemptyset(&newset);
860 	for (i = 0; i < eloop->nsignals; i++)
861 		sigaddset(&newset, eloop->signals[i]);
862 	if (sigprocmask(SIG_SETMASK, &newset, oldset) == -1)
863 		return -1;
864 
865 #ifndef HAVE_KQUEUE
866 	sigemptyset(&sa.sa_mask);
867 
868 	for (i = 0; i < eloop->nsignals; i++) {
869 		if (sigaction(eloop->signals[i], &sa, NULL) == -1)
870 			return -1;
871 	}
872 #endif
873 
874 	return 0;
875 }
876 
877 struct eloop *
878 eloop_new(void)
879 {
880 	struct eloop *eloop;
881 
882 	eloop = calloc(1, sizeof(*eloop));
883 	if (eloop == NULL)
884 		return NULL;
885 
886 	/* Check we have a working monotonic clock. */
887 	if (clock_gettime(CLOCK_MONOTONIC, &eloop->now) == -1) {
888 		free(eloop);
889 		return NULL;
890 	}
891 
892 	TAILQ_INIT(&eloop->events);
893 	TAILQ_INIT(&eloop->free_events);
894 	TAILQ_INIT(&eloop->timeouts);
895 	TAILQ_INIT(&eloop->free_timeouts);
896 	eloop->exitcode = EXIT_FAILURE;
897 
898 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
899 	if (eloop_open(eloop) == -1) {
900 		eloop_free(eloop);
901 		return NULL;
902 	}
903 #endif
904 
905 	return eloop;
906 }
907 
908 void
909 eloop_clear(struct eloop *eloop, ...)
910 {
911 	va_list va1, va2;
912 	int except_fd;
913 	struct eloop_event *e, *ne;
914 	struct eloop_timeout *t;
915 
916 	if (eloop == NULL)
917 		return;
918 
919 	va_start(va1, eloop);
920 	TAILQ_FOREACH_SAFE(e, &eloop->events, next, ne) {
921 		va_copy(va2, va1);
922 		do
923 			except_fd = va_arg(va2, int);
924 		while (except_fd != -1 && except_fd != e->fd);
925 		va_end(va2);
926 		if (e->fd == except_fd && e->fd != -1)
927 			continue;
928 		TAILQ_REMOVE(&eloop->events, e, next);
929 		if (e->fd != -1) {
930 			close(e->fd);
931 			eloop->nevents--;
932 		}
933 		free(e);
934 	}
935 	va_end(va1);
936 
937 #if !defined(HAVE_PSELECT)
938 	/* Free the pollfd buffer and ensure it's re-created before
939 	 * the next run. This allows us to shrink it incase we use a lot less
940 	 * signals and fds to respond to after forking. */
941 	free(eloop->fds);
942 	eloop->fds = NULL;
943 	eloop->nfds = 0;
944 	eloop->events_need_setup = true;
945 #endif
946 
947 	while ((e = TAILQ_FIRST(&eloop->free_events))) {
948 		TAILQ_REMOVE(&eloop->free_events, e, next);
949 		free(e);
950 	}
951 	while ((t = TAILQ_FIRST(&eloop->timeouts))) {
952 		TAILQ_REMOVE(&eloop->timeouts, t, next);
953 		free(t);
954 	}
955 	while ((t = TAILQ_FIRST(&eloop->free_timeouts))) {
956 		TAILQ_REMOVE(&eloop->free_timeouts, t, next);
957 		free(t);
958 	}
959 	eloop->cleared = true;
960 }
961 
962 void
963 eloop_free(struct eloop *eloop)
964 {
965 
966 	eloop_clear(eloop, -1);
967 #if defined(HAVE_KQUEUE) || defined(HAVE_EPOLL)
968 	if (eloop != NULL && eloop->fd != -1)
969 		close(eloop->fd);
970 #endif
971 	free(eloop);
972 }
973 
974 #if defined(HAVE_KQUEUE)
975 static int
976 eloop_run_kqueue(struct eloop *eloop, const struct timespec *ts)
977 {
978 	int n, nn;
979 	struct kevent *ke;
980 	struct eloop_event *e;
981 	unsigned short events;
982 
983 	n = _kevent(eloop->fd, NULL, 0, eloop->fds, eloop->nevents, ts);
984 	if (n == -1)
985 		return -1;
986 
987 	for (nn = n, ke = eloop->fds; nn != 0; nn--, ke++) {
988 		if (eloop->cleared || eloop->exitnow)
989 			break;
990 		e = (struct eloop_event *)ke->udata;
991 		if (ke->filter == EVFILT_SIGNAL) {
992 			eloop->signal_cb((int)ke->ident,
993 			    eloop->signal_cb_ctx);
994 			continue;
995 		}
996 		if (ke->filter == EVFILT_READ)
997 			events = ELE_READ;
998 		else if (ke->filter == EVFILT_WRITE)
999 			events = ELE_WRITE;
1000 #ifdef EVFILT_PROCDESC
1001 		else if (ke->filter == EVFILT_PROCDESC &&
1002 		    ke->fflags & NOTE_EXIT)
1003 			/* exit status is in ke->data.
1004 			 * As we default to using ppoll anyway
1005 			 * we don't have to do anything with it right now. */
1006 			events = ELE_HANGUP;
1007 #endif
1008 		else
1009 			continue; /* assert? */
1010 		if (ke->flags & EV_EOF)
1011 			events |= ELE_HANGUP;
1012 		if (ke->flags & EV_ERROR)
1013 			events |= ELE_ERROR;
1014 		e->cb(e->cb_arg, events);
1015 	}
1016 	return n;
1017 }
1018 
1019 #elif defined(HAVE_EPOLL)
1020 
1021 static int
1022 eloop_run_epoll(struct eloop *eloop,
1023     const struct timespec *ts, const sigset_t *signals)
1024 {
1025 	int timeout, n, nn;
1026 	struct epoll_event *epe;
1027 	struct eloop_event *e;
1028 	unsigned short events;
1029 
1030 	if (ts != NULL) {
1031 		if (ts->tv_sec > INT_MAX / 1000 ||
1032 		    (ts->tv_sec == INT_MAX / 1000 &&
1033 		     ((ts->tv_nsec + 999999) / 1000000 > INT_MAX % 1000000)))
1034 			timeout = INT_MAX;
1035 		else
1036 			timeout = (int)(ts->tv_sec * 1000 +
1037 			    (ts->tv_nsec + 999999) / 1000000);
1038 	} else
1039 		timeout = -1;
1040 
1041 	if (signals != NULL)
1042 		n = epoll_pwait(eloop->fd, eloop->fds,
1043 		    (int)eloop->nevents, timeout, signals);
1044 	else
1045 		n = epoll_wait(eloop->fd, eloop->fds,
1046 		    (int)eloop->nevents, timeout);
1047 	if (n == -1)
1048 		return -1;
1049 
1050 	for (nn = n, epe = eloop->fds; nn != 0; nn--, epe++) {
1051 		if (eloop->cleared || eloop->exitnow)
1052 			break;
1053 		e = (struct eloop_event *)epe->data.ptr;
1054 		if (e->fd == -1)
1055 			continue;
1056 		events = 0;
1057 		if (epe->events & EPOLLIN)
1058 			events |= ELE_READ;
1059 		if (epe->events & EPOLLOUT)
1060 			events |= ELE_WRITE;
1061 		if (epe->events & EPOLLHUP)
1062 			events |= ELE_HANGUP;
1063 		if (epe->events & EPOLLERR)
1064 			events |= ELE_ERROR;
1065 		e->cb(e->cb_arg, events);
1066 	}
1067 	return n;
1068 }
1069 
1070 #elif defined(HAVE_PPOLL)
1071 
1072 static int
1073 eloop_run_ppoll(struct eloop *eloop,
1074     const struct timespec *ts, const sigset_t *signals)
1075 {
1076 	int n, nn;
1077 	struct eloop_event *e;
1078 	struct pollfd *pfd;
1079 	unsigned short events;
1080 
1081 	n = ppoll(eloop->fds, (nfds_t)eloop->nevents, ts, signals);
1082 	if (n == -1 || n == 0)
1083 		return n;
1084 
1085 	nn = n;
1086 	TAILQ_FOREACH(e, &eloop->events, next) {
1087 		if (eloop->cleared || eloop->exitnow)
1088 			break;
1089 		/* Skip freshly added events */
1090 		if ((pfd = e->pollfd) == NULL)
1091 			continue;
1092 		if (e->pollfd->revents) {
1093 			nn--;
1094 			events = 0;
1095 			if (pfd->revents & POLLIN)
1096 				events |= ELE_READ;
1097 			if (pfd->revents & POLLOUT)
1098 				events |= ELE_WRITE;
1099 			if (pfd->revents & POLLHUP)
1100 				events |= ELE_HANGUP;
1101 			if (pfd->revents & POLLERR)
1102 				events |= ELE_ERROR;
1103 			if (pfd->revents & POLLNVAL)
1104 				events |= ELE_NVAL;
1105 			if (events)
1106 				e->cb(e->cb_arg, events);
1107 		}
1108 		if (nn == 0)
1109 			break;
1110 	}
1111 	return n;
1112 }
1113 
1114 #elif defined(HAVE_PSELECT)
1115 
1116 static int
1117 eloop_run_pselect(struct eloop *eloop,
1118     const struct timespec *ts, const sigset_t *sigmask)
1119 {
1120 	fd_set read_fds, write_fds;
1121 	int maxfd, n;
1122 	struct eloop_event *e;
1123 	unsigned short events;
1124 
1125 	FD_ZERO(&read_fds);
1126 	FD_ZERO(&write_fds);
1127 	maxfd = 0;
1128 	TAILQ_FOREACH(e, &eloop->events, next) {
1129 		if (e->fd == -1)
1130 			continue;
1131 		if (e->events & ELE_READ) {
1132 			FD_SET(e->fd, &read_fds);
1133 			if (e->fd > maxfd)
1134 				maxfd = e->fd;
1135 		}
1136 		if (e->events & ELE_WRITE) {
1137 			FD_SET(e->fd, &write_fds);
1138 			if (e->fd > maxfd)
1139 				maxfd = e->fd;
1140 		}
1141 	}
1142 
1143 	/* except_fd's is for STREAMS devices which we don't use. */
1144 	n = pselect(maxfd + 1, &read_fds, &write_fds, NULL, ts, sigmask);
1145 	if (n == -1 || n == 0)
1146 		return n;
1147 
1148 	TAILQ_FOREACH(e, &eloop->events, next) {
1149 		if (eloop->cleared || eloop->exitnow)
1150 			break;
1151 		if (e->fd == -1)
1152 			continue;
1153 		events = 0;
1154 		if (FD_ISSET(e->fd, &read_fds))
1155 			events |= ELE_READ;
1156 		if (FD_ISSET(e->fd, &write_fds))
1157 			events |= ELE_WRITE;
1158 		if (events)
1159 			e->cb(e->cb_arg, events);
1160 	}
1161 
1162 	return n;
1163 }
1164 #endif
1165 
1166 int
1167 eloop_start(struct eloop *eloop, sigset_t *signals)
1168 {
1169 	int error;
1170 	struct eloop_timeout *t;
1171 	struct timespec ts, *tsp;
1172 
1173 	assert(eloop != NULL);
1174 #ifdef HAVE_KQUEUE
1175 	UNUSED(signals);
1176 #endif
1177 
1178 	for (;;) {
1179 		if (eloop->exitnow)
1180 			break;
1181 
1182 #ifndef HAVE_KQUEUE
1183 		if (_eloop_nsig != 0) {
1184 			int n = _eloop_sig[--_eloop_nsig];
1185 
1186 			if (eloop->signal_cb != NULL)
1187 				eloop->signal_cb(n, eloop->signal_cb_ctx);
1188 			continue;
1189 		}
1190 #endif
1191 
1192 		t = TAILQ_FIRST(&eloop->timeouts);
1193 		if (t == NULL && eloop->nevents == 0)
1194 			break;
1195 
1196 		if (t != NULL)
1197 			eloop_reduce_timers(eloop);
1198 
1199 		if (t != NULL && t->seconds == 0 && t->nseconds == 0) {
1200 			TAILQ_REMOVE(&eloop->timeouts, t, next);
1201 			t->callback(t->arg);
1202 			TAILQ_INSERT_TAIL(&eloop->free_timeouts, t, next);
1203 			continue;
1204 		}
1205 
1206 		if (t != NULL) {
1207 			if (t->seconds > INT_MAX) {
1208 				ts.tv_sec = (time_t)INT_MAX;
1209 				ts.tv_nsec = 0;
1210 			} else {
1211 				ts.tv_sec = (time_t)t->seconds;
1212 				ts.tv_nsec = (long)t->nseconds;
1213 			}
1214 			tsp = &ts;
1215 		} else
1216 			tsp = NULL;
1217 
1218 		eloop->cleared = false;
1219 		if (eloop->events_need_setup)
1220 			eloop_event_setup_fds(eloop);
1221 
1222 #if defined(HAVE_KQUEUE)
1223 		UNUSED(signals);
1224 		error = eloop_run_kqueue(eloop, tsp);
1225 #elif defined(HAVE_EPOLL)
1226 		error = eloop_run_epoll(eloop, tsp, signals);
1227 #elif defined(HAVE_PPOLL)
1228 		error = eloop_run_ppoll(eloop, tsp, signals);
1229 #elif defined(HAVE_PSELECT)
1230 		error = eloop_run_pselect(eloop, tsp, signals);
1231 #else
1232 #error no polling mechanism to run!
1233 #endif
1234 		if (error == -1) {
1235 			if (errno == EINTR)
1236 				continue;
1237 			return -errno;
1238 		}
1239 	}
1240 
1241 	return eloop->exitcode;
1242 }
1243