xref: /openbsd/lib/libevent/kqueue.c (revision 73471bf0)
1 /*	$OpenBSD: kqueue.c,v 1.41 2019/05/08 17:33:22 tobias Exp $	*/
2 
3 /*
4  * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. The name of the author may not be used to endorse or promote products
16  *    derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/time.h>
32 #include <sys/queue.h>
33 #include <sys/event.h>
34 
35 #include <assert.h>
36 #include <errno.h>
37 #include <inttypes.h>
38 #include <limits.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 
45 #include "event.h"
46 #include "event-internal.h"
47 #include "log.h"
48 #include "evsignal.h"
49 
50 #define EVLIST_X_KQINKERNEL	0x1000
51 
52 #define NEVENT		64
53 
54 struct kqop {
55 	struct kevent *changes;
56 	int nchanges;
57 	struct kevent *events;
58 	struct event_list evsigevents[NSIG];
59 	int nevents;
60 	int kq;
61 	pid_t pid;
62 };
63 
64 static void *kq_init	(struct event_base *);
65 static int kq_add	(void *, struct event *);
66 static int kq_del	(void *, struct event *);
67 static int kq_dispatch	(struct event_base *, void *, struct timeval *);
68 static int kq_insert	(struct kqop *, struct kevent *);
69 static void kq_dealloc (struct event_base *, void *);
70 
71 const struct eventop kqops = {
72 	"kqueue",
73 	kq_init,
74 	kq_add,
75 	kq_del,
76 	kq_dispatch,
77 	kq_dealloc,
78 	1 /* need reinit */
79 };
80 
81 static void *
82 kq_init(struct event_base *base)
83 {
84 	int i, kq;
85 	struct kqop *kqueueop;
86 
87 	/* Disable kqueue when this environment variable is set */
88 	if (!issetugid() && getenv("EVENT_NOKQUEUE"))
89 		return (NULL);
90 
91 	if (!(kqueueop = calloc(1, sizeof(struct kqop))))
92 		return (NULL);
93 
94 	/* Initalize the kernel queue */
95 
96 	if ((kq = kqueue()) == -1) {
97 		event_warn("kqueue");
98 		free (kqueueop);
99 		return (NULL);
100 	}
101 
102 	kqueueop->kq = kq;
103 
104 	kqueueop->pid = getpid();
105 
106 	/* Initalize fields */
107 	kqueueop->changes = calloc(NEVENT, sizeof(struct kevent));
108 	if (kqueueop->changes == NULL) {
109 		free (kqueueop);
110 		return (NULL);
111 	}
112 	kqueueop->events = calloc(NEVENT, sizeof(struct kevent));
113 	if (kqueueop->events == NULL) {
114 		free (kqueueop->changes);
115 		free (kqueueop);
116 		return (NULL);
117 	}
118 	kqueueop->nevents = NEVENT;
119 
120 	/* we need to keep track of multiple events per signal */
121 	for (i = 0; i < NSIG; ++i) {
122 		TAILQ_INIT(&kqueueop->evsigevents[i]);
123 	}
124 
125 	return (kqueueop);
126 }
127 
128 static int
129 kq_insert(struct kqop *kqop, struct kevent *kev)
130 {
131 	int nevents = kqop->nevents;
132 
133 	if (kqop->nchanges == nevents) {
134 		struct kevent *newchange;
135 		struct kevent *newresult;
136 
137 		if (nevents > INT_MAX / 2) {
138 			event_warnx("%s: integer overflow", __func__);
139 			return (-1);
140 		}
141 		nevents *= 2;
142 
143 		newchange = recallocarray(kqop->changes,
144 		    kqop->nevents, nevents, sizeof(struct kevent));
145 		if (newchange == NULL) {
146 			event_warn("%s: recallocarray", __func__);
147 			return (-1);
148 		}
149 		kqop->changes = newchange;
150 
151 		newresult = recallocarray(kqop->events,
152 		    kqop->nevents, nevents, sizeof(struct kevent));
153 
154 		/*
155 		 * If we fail, we don't have to worry about freeing,
156 		 * the next realloc will pick it up.
157 		 */
158 		if (newresult == NULL) {
159 			event_warn("%s: recallocarray", __func__);
160 			return (-1);
161 		}
162 		kqop->events = newresult;
163 
164 		kqop->nevents = nevents;
165 	}
166 
167 	memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent));
168 
169 	event_debug(("%s: fd %d %s%s",
170 		__func__, (int)kev->ident,
171 		kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE",
172 		kev->flags == EV_DELETE ? " (del)" : ""));
173 
174 	return (0);
175 }
176 
177 static void
178 kq_sighandler(int sig)
179 {
180 	/* Do nothing here */
181 }
182 
183 static int
184 kq_dispatch(struct event_base *base, void *arg, struct timeval *tv)
185 {
186 	struct kqop *kqop = arg;
187 	struct kevent *changes = kqop->changes;
188 	struct kevent *events = kqop->events;
189 	struct event *ev;
190 	struct timespec ts, *ts_p = NULL;
191 	int i, res;
192 
193 	if (tv != NULL) {
194 		TIMEVAL_TO_TIMESPEC(tv, &ts);
195 		ts_p = &ts;
196 	}
197 
198 	res = kevent(kqop->kq, kqop->nchanges ? changes : NULL, kqop->nchanges,
199 	    events, kqop->nevents, ts_p);
200 	kqop->nchanges = 0;
201 	if (res == -1) {
202 		if (errno != EINTR) {
203 			event_warn("kevent");
204 			return (-1);
205 		}
206 
207 		return (0);
208 	}
209 
210 	event_debug(("%s: kevent reports %d", __func__, res));
211 
212 	for (i = 0; i < res; i++) {
213 		int which = 0;
214 
215 		if (events[i].flags & EV_ERROR) {
216 			switch (events[i].data) {
217 
218 			/* Can occur on delete if we are not currently
219 			 * watching any events on this fd.  That can
220 			 * happen when the fd was closed and another
221 			 * file was opened with that fd. */
222 			case ENOENT:
223 			/* Can occur for reasons not fully understood
224 			 * on FreeBSD. */
225 			case EINVAL:
226 				continue;
227 			/* Can occur on a delete if the fd is closed.  Can
228 			 * occur on an add if the fd was one side of a pipe,
229 			 * and the other side was closed. */
230 			case EBADF:
231 				continue;
232 			/* These two can occur on an add if the fd was one side
233 			 * of a pipe, and the other side was closed. */
234 			case EPERM:
235 			case EPIPE:
236 				/* Report read events, if we're listening for
237 				 * them, so that the user can learn about any
238 				 * add errors.  (If the operation was a
239 				 * delete, then udata should be cleared.) */
240 				if (events[i].udata) {
241 					/* The operation was an add:
242 					 * report the error as a read. */
243 					which |= EV_READ;
244 					break;
245 				} else {
246 					/* The operation was a del:
247 					 * report nothing. */
248 					continue;
249 				}
250 
251 			/* Other errors shouldn't occur. */
252 			default:
253 				errno = events[i].data;
254 				return (-1);
255 			}
256 		} else if (events[i].filter == EVFILT_READ) {
257 			which |= EV_READ;
258 		} else if (events[i].filter == EVFILT_WRITE) {
259 			which |= EV_WRITE;
260 		} else if (events[i].filter == EVFILT_SIGNAL) {
261 			which |= EV_SIGNAL;
262 		}
263 
264 		if (!which)
265 			continue;
266 
267 		if (events[i].filter == EVFILT_SIGNAL) {
268 			struct event_list *head =
269 			    (struct event_list *)events[i].udata;
270 			TAILQ_FOREACH(ev, head, ev_signal_next) {
271 				event_active(ev, which, events[i].data);
272 			}
273 		} else {
274 			ev = (struct event *)events[i].udata;
275 
276 			if (!(ev->ev_events & EV_PERSIST))
277 				ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
278 
279 			event_active(ev, which, 1);
280 		}
281 	}
282 
283 	return (0);
284 }
285 
286 
287 static int
288 kq_add(void *arg, struct event *ev)
289 {
290 	struct kqop *kqop = arg;
291 	struct kevent kev;
292 
293 	if (ev->ev_events & EV_SIGNAL) {
294 		int nsignal = EVENT_SIGNAL(ev);
295 
296 		assert(nsignal >= 0 && nsignal < NSIG);
297 		if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
298 			struct timespec timeout = { 0, 0 };
299 
300 			memset(&kev, 0, sizeof(kev));
301 			kev.ident = nsignal;
302 			kev.filter = EVFILT_SIGNAL;
303 			kev.flags = EV_ADD;
304 			kev.udata = &kqop->evsigevents[nsignal];
305 
306 			/* Be ready for the signal if it is sent any
307 			 * time between now and the next call to
308 			 * kq_dispatch. */
309 			if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
310 				return (-1);
311 
312 			if (_evsignal_set_handler(ev->ev_base, nsignal,
313 				kq_sighandler) == -1)
314 				return (-1);
315 		}
316 
317 		TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev,
318 		    ev_signal_next);
319 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
320 		return (0);
321 	}
322 
323 	if (ev->ev_events & EV_READ) {
324 		memset(&kev, 0, sizeof(kev));
325 		kev.ident = ev->ev_fd;
326 		kev.filter = EVFILT_READ;
327 		/* Make it behave like select() and poll() */
328 		kev.fflags = NOTE_EOF;
329 		kev.flags = EV_ADD;
330 		if (!(ev->ev_events & EV_PERSIST))
331 			kev.flags |= EV_ONESHOT;
332 		kev.udata = ev;
333 
334 		if (kq_insert(kqop, &kev) == -1)
335 			return (-1);
336 
337 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
338 	}
339 
340 	if (ev->ev_events & EV_WRITE) {
341 		memset(&kev, 0, sizeof(kev));
342 		kev.ident = ev->ev_fd;
343 		kev.filter = EVFILT_WRITE;
344 		kev.flags = EV_ADD;
345 		if (!(ev->ev_events & EV_PERSIST))
346 			kev.flags |= EV_ONESHOT;
347 		kev.udata = ev;
348 
349 		if (kq_insert(kqop, &kev) == -1)
350 			return (-1);
351 
352 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
353 	}
354 
355 	return (0);
356 }
357 
358 static int
359 kq_del(void *arg, struct event *ev)
360 {
361 	struct kqop *kqop = arg;
362 	struct kevent kev;
363 
364 	if (!(ev->ev_flags & EVLIST_X_KQINKERNEL))
365 		return (0);
366 
367 	if (ev->ev_events & EV_SIGNAL) {
368 		int nsignal = EVENT_SIGNAL(ev);
369 		struct timespec timeout = { 0, 0 };
370 
371 		assert(nsignal >= 0 && nsignal < NSIG);
372 		TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next);
373 		if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
374 			memset(&kev, 0, sizeof(kev));
375 			kev.ident = nsignal;
376 			kev.filter = EVFILT_SIGNAL;
377 			kev.flags = EV_DELETE;
378 
379 			/* Because we insert signal events
380 			 * immediately, we need to delete them
381 			 * immediately, too */
382 			if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
383 				return (-1);
384 
385 			if (_evsignal_restore_handler(ev->ev_base,
386 				nsignal) == -1)
387 				return (-1);
388 		}
389 
390 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
391 		return (0);
392 	}
393 
394 	if (ev->ev_events & EV_READ) {
395 		memset(&kev, 0, sizeof(kev));
396 		kev.ident = ev->ev_fd;
397 		kev.filter = EVFILT_READ;
398 		kev.flags = EV_DELETE;
399 
400 		if (kq_insert(kqop, &kev) == -1)
401 			return (-1);
402 
403 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
404 	}
405 
406 	if (ev->ev_events & EV_WRITE) {
407 		memset(&kev, 0, sizeof(kev));
408 		kev.ident = ev->ev_fd;
409 		kev.filter = EVFILT_WRITE;
410 		kev.flags = EV_DELETE;
411 
412 		if (kq_insert(kqop, &kev) == -1)
413 			return (-1);
414 
415 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
416 	}
417 
418 	return (0);
419 }
420 
421 static void
422 kq_dealloc(struct event_base *base, void *arg)
423 {
424 	struct kqop *kqop = arg;
425 
426 	evsignal_dealloc(base);
427 
428 	free(kqop->changes);
429 	free(kqop->events);
430 	if (kqop->kq >= 0 && kqop->pid == getpid())
431 		close(kqop->kq);
432 
433 	memset(kqop, 0, sizeof(struct kqop));
434 	free(kqop);
435 }
436