xref: /openbsd/lib/libevent/kqueue.c (revision a96d88d3)
1 /*	$OpenBSD: kqueue.c,v 1.43 2024/03/23 22:51:49 yasuoka Exp $	*/
2 
3 /*
4  * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. The name of the author may not be used to endorse or promote products
16  *    derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/time.h>
32 #include <sys/queue.h>
33 #include <sys/event.h>
34 
35 #include <assert.h>
36 #include <errno.h>
37 #include <inttypes.h>
38 #include <limits.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 
45 #include "event.h"
46 #include "event-internal.h"
47 #include "log.h"
48 #include "evsignal.h"
49 
50 #define EVLIST_X_KQINKERNEL	0x1000
51 
52 #define NEVENT		64
53 
54 struct kqop {
55 	struct kevent *changes;
56 	int nchanges;
57 	struct kevent *events;
58 	struct event_list evsigevents[NSIG];
59 	int nevents;
60 	int kq;
61 	pid_t pid;
62 };
63 
64 static void *kq_init	(struct event_base *);
65 static int kq_add	(void *, struct event *);
66 static int kq_del	(void *, struct event *);
67 static int kq_dispatch	(struct event_base *, void *, struct timeval *);
68 static int kq_insert	(struct kqop *, struct kevent *);
69 static void kq_dealloc (struct event_base *, void *);
70 
71 const struct eventop kqops = {
72 	"kqueue",
73 	kq_init,
74 	kq_add,
75 	kq_del,
76 	kq_dispatch,
77 	kq_dealloc,
78 	1 /* need reinit */
79 };
80 
81 static void *
kq_init(struct event_base * base)82 kq_init(struct event_base *base)
83 {
84 	int i, kq;
85 	struct kqop *kqueueop;
86 
87 	/* Disable kqueue when this environment variable is set */
88 	if (!issetugid() && getenv("EVENT_NOKQUEUE"))
89 		return (NULL);
90 
91 	if (!(kqueueop = calloc(1, sizeof(struct kqop))))
92 		return (NULL);
93 
94 	/* Initialize the kernel queue */
95 
96 	if ((kq = kqueue()) == -1) {
97 		event_warn("kqueue");
98 		free (kqueueop);
99 		return (NULL);
100 	}
101 
102 	kqueueop->kq = kq;
103 
104 	kqueueop->pid = getpid();
105 
106 	/* Initialize fields */
107 	kqueueop->changes = calloc(NEVENT, sizeof(struct kevent));
108 	if (kqueueop->changes == NULL) {
109 		free (kqueueop);
110 		return (NULL);
111 	}
112 	kqueueop->events = calloc(NEVENT, sizeof(struct kevent));
113 	if (kqueueop->events == NULL) {
114 		free (kqueueop->changes);
115 		free (kqueueop);
116 		return (NULL);
117 	}
118 	kqueueop->nevents = NEVENT;
119 
120 	/* we need to keep track of multiple events per signal */
121 	for (i = 0; i < NSIG; ++i) {
122 		TAILQ_INIT(&kqueueop->evsigevents[i]);
123 	}
124 
125 	return (kqueueop);
126 }
127 
128 static int
kq_insert(struct kqop * kqop,struct kevent * kev)129 kq_insert(struct kqop *kqop, struct kevent *kev)
130 {
131 	int nevents = kqop->nevents;
132 
133 	if (kqop->nchanges == nevents) {
134 		struct kevent *newchange;
135 		struct kevent *newresult;
136 
137 		if (nevents > INT_MAX / 2) {
138 			event_warnx("%s: integer overflow", __func__);
139 			return (-1);
140 		}
141 		nevents *= 2;
142 
143 		newchange = recallocarray(kqop->changes,
144 		    kqop->nevents, nevents, sizeof(struct kevent));
145 		if (newchange == NULL) {
146 			event_warn("%s: recallocarray", __func__);
147 			return (-1);
148 		}
149 		kqop->changes = newchange;
150 
151 		newresult = recallocarray(kqop->events,
152 		    kqop->nevents, nevents, sizeof(struct kevent));
153 
154 		/*
155 		 * If we fail, we don't have to worry about freeing,
156 		 * the next realloc will pick it up.
157 		 */
158 		if (newresult == NULL) {
159 			event_warn("%s: recallocarray", __func__);
160 			return (-1);
161 		}
162 		kqop->events = newresult;
163 
164 		kqop->nevents = nevents;
165 	}
166 
167 	memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent));
168 
169 	event_debug(("%s: fd %d %s%s",
170 		__func__, (int)kev->ident,
171 		kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE",
172 		kev->flags == EV_DELETE ? " (del)" : ""));
173 
174 	return (0);
175 }
176 
177 static void
kq_sighandler(int sig)178 kq_sighandler(int sig)
179 {
180 	/* Do nothing here */
181 }
182 
183 static int
kq_dispatch(struct event_base * base,void * arg,struct timeval * tv)184 kq_dispatch(struct event_base *base, void *arg, struct timeval *tv)
185 {
186 	struct kqop *kqop = arg;
187 	struct kevent *changes = kqop->changes;
188 	struct kevent *events = kqop->events;
189 	struct event *ev;
190 	struct timespec ts, *ts_p = NULL;
191 	int i, res;
192 
193 	if (tv != NULL) {
194 		TIMEVAL_TO_TIMESPEC(tv, &ts);
195 		ts_p = &ts;
196 	}
197 
198 	res = kevent(kqop->kq, kqop->nchanges ? changes : NULL, kqop->nchanges,
199 	    events, kqop->nevents, ts_p);
200 	kqop->nchanges = 0;
201 	if (res == -1) {
202 		if (errno != EINTR) {
203 			event_warn("kevent");
204 			return (-1);
205 		}
206 
207 		return (0);
208 	}
209 
210 	event_debug(("%s: kevent reports %d", __func__, res));
211 
212 	for (i = 0; i < res; i++) {
213 		int which = 0;
214 
215 		if (events[i].flags & EV_ERROR) {
216 			switch (events[i].data) {
217 
218 			/* Can occur on delete if we are not currently
219 			 * watching any events on this fd.  That can
220 			 * happen when the fd was closed and another
221 			 * file was opened with that fd. */
222 			case ENOENT:
223 			/* Can occur for reasons not fully understood
224 			 * on FreeBSD. */
225 			case EINVAL:
226 				continue;
227 			/* Can occur on a delete if the fd is closed.  Can
228 			 * occur on an add if the fd was one side of a pipe,
229 			 * and the other side was closed. */
230 			case EBADF:
231 				continue;
232 			/* These two can occur on an add if the fd was one side
233 			 * of a pipe, and the other side was closed. */
234 			case EPERM:
235 			case EPIPE:
236 				/* Report read events, if we're listening for
237 				 * them, so that the user can learn about any
238 				 * add errors.  (If the operation was a
239 				 * delete, then udata should be cleared.) */
240 				if (events[i].udata) {
241 					/* The operation was an add:
242 					 * report the error as a read. */
243 					which |= EV_READ;
244 					break;
245 				} else {
246 					/* The operation was a del:
247 					 * report nothing. */
248 					continue;
249 				}
250 
251 			/* Other errors shouldn't occur. */
252 			default:
253 				errno = events[i].data;
254 				return (-1);
255 			}
256 		} else if (events[i].filter == EVFILT_READ) {
257 			which |= EV_READ;
258 		} else if (events[i].filter == EVFILT_WRITE) {
259 			which |= EV_WRITE;
260 		} else if (events[i].filter == EVFILT_SIGNAL) {
261 			which |= EV_SIGNAL;
262 		}
263 
264 		if (!which)
265 			continue;
266 
267 		if (events[i].filter == EVFILT_SIGNAL) {
268 			struct event_list *head =
269 			    (struct event_list *)events[i].udata;
270 			TAILQ_FOREACH(ev, head, ev_signal_next) {
271 				event_active(ev, which, events[i].data);
272 			}
273 		} else {
274 			ev = (struct event *)events[i].udata;
275 
276 			if (!(ev->ev_events & EV_PERSIST))
277 				ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
278 
279 			event_active(ev, which, 1);
280 		}
281 	}
282 
283 	return (0);
284 }
285 
286 
287 static int
kq_add(void * arg,struct event * ev)288 kq_add(void *arg, struct event *ev)
289 {
290 	struct kqop *kqop = arg;
291 	struct kevent kev;
292 
293 	if (ev->ev_events & EV_SIGNAL) {
294 		int nsignal = EVENT_SIGNAL(ev);
295 
296 		assert(nsignal >= 0 && nsignal < NSIG);
297 		if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
298 			struct timespec timeout = { 0, 0 };
299 
300 			memset(&kev, 0, sizeof(kev));
301 			kev.ident = nsignal;
302 			kev.filter = EVFILT_SIGNAL;
303 			kev.flags = EV_ADD;
304 			kev.udata = &kqop->evsigevents[nsignal];
305 
306 			/* Be ready for the signal if it is sent any
307 			 * time between now and the next call to
308 			 * kq_dispatch. */
309 			if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
310 				return (-1);
311 
312 			if (_evsignal_set_handler(ev->ev_base, nsignal,
313 				kq_sighandler) == -1)
314 				return (-1);
315 		}
316 
317 		TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev,
318 		    ev_signal_next);
319 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
320 		return (0);
321 	}
322 
323 	if (ev->ev_events & EV_READ) {
324 		memset(&kev, 0, sizeof(kev));
325 		kev.ident = ev->ev_fd;
326 		kev.filter = EVFILT_READ;
327 		/* Make it behave like select() and poll() */
328 		kev.fflags = NOTE_EOF;
329 		kev.flags = EV_ADD;
330 		if (!(ev->ev_events & EV_PERSIST))
331 			kev.flags |= EV_ONESHOT;
332 		kev.udata = ev;
333 
334 		if (kq_insert(kqop, &kev) == -1)
335 			return (-1);
336 
337 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
338 	}
339 
340 	if (ev->ev_events & EV_WRITE) {
341 		memset(&kev, 0, sizeof(kev));
342 		kev.ident = ev->ev_fd;
343 		kev.filter = EVFILT_WRITE;
344 		kev.flags = EV_ADD;
345 		if (!(ev->ev_events & EV_PERSIST))
346 			kev.flags |= EV_ONESHOT;
347 		kev.udata = ev;
348 
349 		if (kq_insert(kqop, &kev) == -1)
350 			return (-1);
351 
352 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
353 	}
354 
355 	return (0);
356 }
357 
358 static int
kq_del(void * arg,struct event * ev)359 kq_del(void *arg, struct event *ev)
360 {
361 	int i, j;
362 	struct kqop *kqop = arg;
363 	struct kevent kev;
364 
365 	if (!(ev->ev_flags & EVLIST_X_KQINKERNEL))
366 		return (0);
367 
368 	if (ev->ev_events & EV_SIGNAL) {
369 		int nsignal = EVENT_SIGNAL(ev);
370 		struct timespec timeout = { 0, 0 };
371 
372 		assert(nsignal >= 0 && nsignal < NSIG);
373 		TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next);
374 		if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
375 			memset(&kev, 0, sizeof(kev));
376 			kev.ident = nsignal;
377 			kev.filter = EVFILT_SIGNAL;
378 			kev.flags = EV_DELETE;
379 
380 			/* Because we insert signal events
381 			 * immediately, we need to delete them
382 			 * immediately, too */
383 			if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
384 				return (-1);
385 
386 			if (_evsignal_restore_handler(ev->ev_base,
387 				nsignal) == -1)
388 				return (-1);
389 		}
390 
391 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
392 		return (0);
393 	}
394 
395 	for (i = j = 0; i < kqop->nchanges; i++) {
396 		if (kqop->changes[i].udata == ev &&
397 		    (kqop->changes[i].flags & EV_ADD) != 0)
398 			continue;	/* delete this */
399 		if (i != j)
400 			memcpy(&kqop->changes[j], &kqop->changes[i],
401 			    sizeof(struct kevent));
402 		j++;
403 	}
404 	if (kqop->nchanges != j) {
405 		kqop->nchanges = j;
406 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
407 		return (0);
408 	}
409 
410 	if (ev->ev_events & EV_READ) {
411 		memset(&kev, 0, sizeof(kev));
412 		kev.ident = ev->ev_fd;
413 		kev.filter = EVFILT_READ;
414 		kev.flags = EV_DELETE;
415 
416 		if (kq_insert(kqop, &kev) == -1)
417 			return (-1);
418 
419 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
420 	}
421 
422 	if (ev->ev_events & EV_WRITE) {
423 		memset(&kev, 0, sizeof(kev));
424 		kev.ident = ev->ev_fd;
425 		kev.filter = EVFILT_WRITE;
426 		kev.flags = EV_DELETE;
427 
428 		if (kq_insert(kqop, &kev) == -1)
429 			return (-1);
430 
431 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
432 	}
433 
434 	return (0);
435 }
436 
437 static void
kq_dealloc(struct event_base * base,void * arg)438 kq_dealloc(struct event_base *base, void *arg)
439 {
440 	struct kqop *kqop = arg;
441 
442 	evsignal_dealloc(base);
443 
444 	free(kqop->changes);
445 	free(kqop->events);
446 	if (kqop->kq >= 0 && kqop->pid == getpid())
447 		close(kqop->kq);
448 
449 	memset(kqop, 0, sizeof(struct kqop));
450 	free(kqop);
451 }
452