xref: /openbsd/lib/libevent/kqueue.c (revision 74ae6390)
1 /*	$OpenBSD: kqueue.c,v 1.39 2016/09/03 11:31:17 nayden Exp $	*/
2 
3 /*
4  * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. The name of the author may not be used to endorse or promote products
16  *    derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/time.h>
32 #include <sys/queue.h>
33 #include <sys/event.h>
34 
35 #include <signal.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <errno.h>
41 #include <assert.h>
42 #include <inttypes.h>
43 
44 #include "event.h"
45 #include "event-internal.h"
46 #include "log.h"
47 #include "evsignal.h"
48 
49 #define EVLIST_X_KQINKERNEL	0x1000
50 
51 #define NEVENT		64
52 
53 struct kqop {
54 	struct kevent *changes;
55 	int nchanges;
56 	struct kevent *events;
57 	struct event_list evsigevents[NSIG];
58 	int nevents;
59 	int kq;
60 	pid_t pid;
61 };
62 
63 static void *kq_init	(struct event_base *);
64 static int kq_add	(void *, struct event *);
65 static int kq_del	(void *, struct event *);
66 static int kq_dispatch	(struct event_base *, void *, struct timeval *);
67 static int kq_insert	(struct kqop *, struct kevent *);
68 static void kq_dealloc (struct event_base *, void *);
69 
70 const struct eventop kqops = {
71 	"kqueue",
72 	kq_init,
73 	kq_add,
74 	kq_del,
75 	kq_dispatch,
76 	kq_dealloc,
77 	1 /* need reinit */
78 };
79 
80 static void *
81 kq_init(struct event_base *base)
82 {
83 	int i, kq;
84 	struct kqop *kqueueop;
85 
86 	/* Disable kqueue when this environment variable is set */
87 	if (!issetugid() && getenv("EVENT_NOKQUEUE"))
88 		return (NULL);
89 
90 	if (!(kqueueop = calloc(1, sizeof(struct kqop))))
91 		return (NULL);
92 
93 	/* Initalize the kernel queue */
94 
95 	if ((kq = kqueue()) == -1) {
96 		event_warn("kqueue");
97 		free (kqueueop);
98 		return (NULL);
99 	}
100 
101 	kqueueop->kq = kq;
102 
103 	kqueueop->pid = getpid();
104 
105 	/* Initalize fields */
106 	kqueueop->changes = calloc(NEVENT, sizeof(struct kevent));
107 	if (kqueueop->changes == NULL) {
108 		free (kqueueop);
109 		return (NULL);
110 	}
111 	kqueueop->events = calloc(NEVENT, sizeof(struct kevent));
112 	if (kqueueop->events == NULL) {
113 		free (kqueueop->changes);
114 		free (kqueueop);
115 		return (NULL);
116 	}
117 	kqueueop->nevents = NEVENT;
118 
119 	/* we need to keep track of multiple events per signal */
120 	for (i = 0; i < NSIG; ++i) {
121 		TAILQ_INIT(&kqueueop->evsigevents[i]);
122 	}
123 
124 	return (kqueueop);
125 }
126 
127 static int
128 kq_insert(struct kqop *kqop, struct kevent *kev)
129 {
130 	int nevents = kqop->nevents;
131 
132 	if (kqop->nchanges == nevents) {
133 		struct kevent *newchange;
134 		struct kevent *newresult;
135 
136 		nevents *= 2;
137 
138 		newchange = reallocarray(kqop->changes,
139 		    nevents, sizeof(struct kevent));
140 		if (newchange == NULL) {
141 			event_warn("%s: malloc", __func__);
142 			return (-1);
143 		}
144 		kqop->changes = newchange;
145 
146 		newresult = reallocarray(kqop->events,
147 		    nevents, sizeof(struct kevent));
148 
149 		/*
150 		 * If we fail, we don't have to worry about freeing,
151 		 * the next realloc will pick it up.
152 		 */
153 		if (newresult == NULL) {
154 			event_warn("%s: malloc", __func__);
155 			return (-1);
156 		}
157 		kqop->events = newresult;
158 
159 		kqop->nevents = nevents;
160 	}
161 
162 	memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent));
163 
164 	event_debug(("%s: fd %d %s%s",
165 		__func__, (int)kev->ident,
166 		kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE",
167 		kev->flags == EV_DELETE ? " (del)" : ""));
168 
169 	return (0);
170 }
171 
172 static void
173 kq_sighandler(int sig)
174 {
175 	/* Do nothing here */
176 }
177 
178 static int
179 kq_dispatch(struct event_base *base, void *arg, struct timeval *tv)
180 {
181 	struct kqop *kqop = arg;
182 	struct kevent *changes = kqop->changes;
183 	struct kevent *events = kqop->events;
184 	struct event *ev;
185 	struct timespec ts, *ts_p = NULL;
186 	int i, res;
187 
188 	if (tv != NULL) {
189 		TIMEVAL_TO_TIMESPEC(tv, &ts);
190 		ts_p = &ts;
191 	}
192 
193 	res = kevent(kqop->kq, changes, kqop->nchanges,
194 	    events, kqop->nevents, ts_p);
195 	kqop->nchanges = 0;
196 	if (res == -1) {
197 		if (errno != EINTR) {
198 			event_warn("kevent");
199 			return (-1);
200 		}
201 
202 		return (0);
203 	}
204 
205 	event_debug(("%s: kevent reports %d", __func__, res));
206 
207 	for (i = 0; i < res; i++) {
208 		int which = 0;
209 
210 		if (events[i].flags & EV_ERROR) {
211 			switch (events[i].data) {
212 
213 			/* Can occur on delete if we are not currently
214 			 * watching any events on this fd.  That can
215 			 * happen when the fd was closed and another
216 			 * file was opened with that fd. */
217 			case ENOENT:
218 			/* Can occur for reasons not fully understood
219 			 * on FreeBSD. */
220 			case EINVAL:
221 				continue;
222 			/* Can occur on a delete if the fd is closed.  Can
223 			 * occur on an add if the fd was one side of a pipe,
224 			 * and the other side was closed. */
225 			case EBADF:
226 				continue;
227 			/* These two can occur on an add if the fd was one side
228 			 * of a pipe, and the other side was closed. */
229 			case EPERM:
230 			case EPIPE:
231 				/* Report read events, if we're listening for
232 				 * them, so that the user can learn about any
233 				 * add errors.  (If the operation was a
234 				 * delete, then udata should be cleared.) */
235 				if (events[i].udata) {
236 					/* The operation was an add:
237 					 * report the error as a read. */
238 					which |= EV_READ;
239 					break;
240 				} else {
241 					/* The operation was a del:
242 					 * report nothing. */
243 					continue;
244 				}
245 
246 			/* Other errors shouldn't occur. */
247 			default:
248 				errno = events[i].data;
249 				return (-1);
250 			}
251 		} else if (events[i].filter == EVFILT_READ) {
252 			which |= EV_READ;
253 		} else if (events[i].filter == EVFILT_WRITE) {
254 			which |= EV_WRITE;
255 		} else if (events[i].filter == EVFILT_SIGNAL) {
256 			which |= EV_SIGNAL;
257 		}
258 
259 		if (!which)
260 			continue;
261 
262 		if (events[i].filter == EVFILT_SIGNAL) {
263 			struct event_list *head =
264 			    (struct event_list *)events[i].udata;
265 			TAILQ_FOREACH(ev, head, ev_signal_next) {
266 				event_active(ev, which, events[i].data);
267 			}
268 		} else {
269 			ev = (struct event *)events[i].udata;
270 
271 			if (!(ev->ev_events & EV_PERSIST))
272 				ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
273 
274 			event_active(ev, which, 1);
275 		}
276 	}
277 
278 	return (0);
279 }
280 
281 
282 static int
283 kq_add(void *arg, struct event *ev)
284 {
285 	struct kqop *kqop = arg;
286 	struct kevent kev;
287 
288 	if (ev->ev_events & EV_SIGNAL) {
289 		int nsignal = EVENT_SIGNAL(ev);
290 
291 		assert(nsignal >= 0 && nsignal < NSIG);
292 		if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
293 			struct timespec timeout = { 0, 0 };
294 
295 			memset(&kev, 0, sizeof(kev));
296 			kev.ident = nsignal;
297 			kev.filter = EVFILT_SIGNAL;
298 			kev.flags = EV_ADD;
299 			kev.udata = &kqop->evsigevents[nsignal];
300 
301 			/* Be ready for the signal if it is sent any
302 			 * time between now and the next call to
303 			 * kq_dispatch. */
304 			if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
305 				return (-1);
306 
307 			if (_evsignal_set_handler(ev->ev_base, nsignal,
308 				kq_sighandler) == -1)
309 				return (-1);
310 		}
311 
312 		TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev,
313 		    ev_signal_next);
314 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
315 		return (0);
316 	}
317 
318 	if (ev->ev_events & EV_READ) {
319 		memset(&kev, 0, sizeof(kev));
320 		kev.ident = ev->ev_fd;
321 		kev.filter = EVFILT_READ;
322 		/* Make it behave like select() and poll() */
323 		kev.fflags = NOTE_EOF;
324 		kev.flags = EV_ADD;
325 		if (!(ev->ev_events & EV_PERSIST))
326 			kev.flags |= EV_ONESHOT;
327 		kev.udata = ev;
328 
329 		if (kq_insert(kqop, &kev) == -1)
330 			return (-1);
331 
332 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
333 	}
334 
335 	if (ev->ev_events & EV_WRITE) {
336 		memset(&kev, 0, sizeof(kev));
337 		kev.ident = ev->ev_fd;
338 		kev.filter = EVFILT_WRITE;
339 		kev.flags = EV_ADD;
340 		if (!(ev->ev_events & EV_PERSIST))
341 			kev.flags |= EV_ONESHOT;
342 		kev.udata = ev;
343 
344 		if (kq_insert(kqop, &kev) == -1)
345 			return (-1);
346 
347 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
348 	}
349 
350 	return (0);
351 }
352 
353 static int
354 kq_del(void *arg, struct event *ev)
355 {
356 	struct kqop *kqop = arg;
357 	struct kevent kev;
358 
359 	if (!(ev->ev_flags & EVLIST_X_KQINKERNEL))
360 		return (0);
361 
362 	if (ev->ev_events & EV_SIGNAL) {
363 		int nsignal = EVENT_SIGNAL(ev);
364 		struct timespec timeout = { 0, 0 };
365 
366 		assert(nsignal >= 0 && nsignal < NSIG);
367 		TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next);
368 		if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
369 			memset(&kev, 0, sizeof(kev));
370 			kev.ident = nsignal;
371 			kev.filter = EVFILT_SIGNAL;
372 			kev.flags = EV_DELETE;
373 
374 			/* Because we insert signal events
375 			 * immediately, we need to delete them
376 			 * immediately, too */
377 			if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
378 				return (-1);
379 
380 			if (_evsignal_restore_handler(ev->ev_base,
381 				nsignal) == -1)
382 				return (-1);
383 		}
384 
385 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
386 		return (0);
387 	}
388 
389 	if (ev->ev_events & EV_READ) {
390 		memset(&kev, 0, sizeof(kev));
391 		kev.ident = ev->ev_fd;
392 		kev.filter = EVFILT_READ;
393 		kev.flags = EV_DELETE;
394 
395 		if (kq_insert(kqop, &kev) == -1)
396 			return (-1);
397 
398 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
399 	}
400 
401 	if (ev->ev_events & EV_WRITE) {
402 		memset(&kev, 0, sizeof(kev));
403 		kev.ident = ev->ev_fd;
404 		kev.filter = EVFILT_WRITE;
405 		kev.flags = EV_DELETE;
406 
407 		if (kq_insert(kqop, &kev) == -1)
408 			return (-1);
409 
410 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
411 	}
412 
413 	return (0);
414 }
415 
416 static void
417 kq_dealloc(struct event_base *base, void *arg)
418 {
419 	struct kqop *kqop = arg;
420 
421 	evsignal_dealloc(base);
422 
423 	free(kqop->changes);
424 	free(kqop->events);
425 	if (kqop->kq >= 0 && kqop->pid == getpid())
426 		close(kqop->kq);
427 
428 	memset(kqop, 0, sizeof(struct kqop));
429 	free(kqop);
430 }
431