xref: /openbsd/lib/libevent/kqueue.c (revision bdce580d)
1*bdce580dSbrad /*	$OpenBSD: kqueue.c,v 1.24 2008/05/02 06:09:11 brad Exp $	*/
234fc9cdeSmickey 
3fd332320Sprovos /*
4fd332320Sprovos  * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
5fd332320Sprovos  * All rights reserved.
6fd332320Sprovos  *
7fd332320Sprovos  * Redistribution and use in source and binary forms, with or without
8fd332320Sprovos  * modification, are permitted provided that the following conditions
9fd332320Sprovos  * are met:
10fd332320Sprovos  * 1. Redistributions of source code must retain the above copyright
11fd332320Sprovos  *    notice, this list of conditions and the following disclaimer.
12fd332320Sprovos  * 2. Redistributions in binary form must reproduce the above copyright
13fd332320Sprovos  *    notice, this list of conditions and the following disclaimer in the
14fd332320Sprovos  *    documentation and/or other materials provided with the distribution.
15ff9272daSbrad  * 3. The name of the author may not be used to endorse or promote products
16fd332320Sprovos  *    derived from this software without specific prior written permission.
17fd332320Sprovos  *
18fd332320Sprovos  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19fd332320Sprovos  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20fd332320Sprovos  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21fd332320Sprovos  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22fd332320Sprovos  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23fd332320Sprovos  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24fd332320Sprovos  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25fd332320Sprovos  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26fd332320Sprovos  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27fd332320Sprovos  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28fd332320Sprovos  */
291770acb2Smarkus #ifdef HAVE_CONFIG_H
30fd332320Sprovos #include "config.h"
311770acb2Smarkus #endif
32fd332320Sprovos 
33fd332320Sprovos #include <sys/types.h>
341770acb2Smarkus #ifdef HAVE_SYS_TIME_H
35fd332320Sprovos #include <sys/time.h>
361770acb2Smarkus #else
371770acb2Smarkus #include <sys/_time.h>
381770acb2Smarkus #endif
39fd332320Sprovos #include <sys/queue.h>
40fd332320Sprovos #include <sys/event.h>
41fd332320Sprovos #include <signal.h>
42fd332320Sprovos #include <stdio.h>
43fd332320Sprovos #include <stdlib.h>
44ff33a3f0Sderaadt #include <string.h>
45fd332320Sprovos #include <unistd.h>
46fd332320Sprovos #include <errno.h>
471770acb2Smarkus #ifdef HAVE_INTTYPES_H
481770acb2Smarkus #include <inttypes.h>
491770acb2Smarkus #endif
50fd332320Sprovos 
51*bdce580dSbrad /* Some platforms apparently define the udata field of struct kevent as
52*bdce580dSbrad  * ntptr_t, whereas others define it as void*.  There doesn't seem to be an
53*bdce580dSbrad  * easy way to tell them apart via autoconf, so we need to use OS macros. */
54*bdce580dSbrad #if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__)
55*bdce580dSbrad #define PTR_TO_UDATA(x) ((intptr_t)(x))
561770acb2Smarkus #else
57*bdce580dSbrad #define PTR_TO_UDATA(x) (x)
58fd332320Sprovos #endif
59fd332320Sprovos 
60fd332320Sprovos #include "event.h"
614643be29Sbrad #include "log.h"
62fd332320Sprovos 
63fd332320Sprovos #define EVLIST_X_KQINKERNEL	0x1000
64fd332320Sprovos 
65fd332320Sprovos #define NEVENT		64
66fd332320Sprovos 
67fd332320Sprovos struct kqop {
68fd332320Sprovos 	struct kevent *changes;
69fd332320Sprovos 	int nchanges;
70fd332320Sprovos 	struct kevent *events;
71fd332320Sprovos 	int nevents;
72fd332320Sprovos 	int kq;
734643be29Sbrad };
74fd332320Sprovos 
75*bdce580dSbrad void *kq_init	(struct event_base *);
76fd332320Sprovos int kq_add	(void *, struct event *);
77fd332320Sprovos int kq_del	(void *, struct event *);
784643be29Sbrad int kq_recalc	(struct event_base *, void *, int);
794643be29Sbrad int kq_dispatch	(struct event_base *, void *, struct timeval *);
80ff9272daSbrad int kq_insert	(struct kqop *, struct kevent *);
81*bdce580dSbrad void kq_dealloc (struct event_base *, void *);
82fd332320Sprovos 
83759b8817Smickey const struct eventop kqops = {
84fd332320Sprovos 	"kqueue",
85fd332320Sprovos 	kq_init,
86fd332320Sprovos 	kq_add,
87fd332320Sprovos 	kq_del,
88fd332320Sprovos 	kq_recalc,
893ac1ba99Sbrad 	kq_dispatch,
903ac1ba99Sbrad 	kq_dealloc
91fd332320Sprovos };
92fd332320Sprovos 
93fd332320Sprovos void *
94*bdce580dSbrad kq_init(struct event_base *base)
95fd332320Sprovos {
96fd332320Sprovos 	int kq;
974643be29Sbrad 	struct kqop *kqueueop;
98fd332320Sprovos 
99fd332320Sprovos 	/* Disable kqueue when this environment variable is set */
1001770acb2Smarkus 	if (!issetugid() && getenv("EVENT_NOKQUEUE"))
101fd332320Sprovos 		return (NULL);
102fd332320Sprovos 
1034643be29Sbrad 	if (!(kqueueop = calloc(1, sizeof(struct kqop))))
1044643be29Sbrad 		return (NULL);
105fd332320Sprovos 
106ff9272daSbrad 	/* Initalize the kernel queue */
107fd332320Sprovos 
108fd332320Sprovos 	if ((kq = kqueue()) == -1) {
1094643be29Sbrad 		event_warn("kqueue");
1104643be29Sbrad 		free (kqueueop);
111fd332320Sprovos 		return (NULL);
112fd332320Sprovos 	}
113fd332320Sprovos 
1144643be29Sbrad 	kqueueop->kq = kq;
115fd332320Sprovos 
116ff9272daSbrad 	/* Initalize fields */
1171ed98fdfSderaadt 	kqueueop->changes = calloc(NEVENT, sizeof(struct kevent));
1184643be29Sbrad 	if (kqueueop->changes == NULL) {
1194643be29Sbrad 		free (kqueueop);
120fd332320Sprovos 		return (NULL);
121fd332320Sprovos 	}
1221ed98fdfSderaadt 	kqueueop->events = calloc(NEVENT, sizeof(struct kevent));
1234643be29Sbrad 	if (kqueueop->events == NULL) {
1244643be29Sbrad 		free (kqueueop->changes);
1254643be29Sbrad 		free (kqueueop);
1264643be29Sbrad 		return (NULL);
1274643be29Sbrad 	}
1284643be29Sbrad 	kqueueop->nevents = NEVENT;
129fd332320Sprovos 
130348ce57bSbrad 	/* Check for Mac OS X kqueue bug. */
131348ce57bSbrad 	kqueueop->changes[0].ident = -1;
132348ce57bSbrad 	kqueueop->changes[0].filter = EVFILT_READ;
133348ce57bSbrad 	kqueueop->changes[0].flags = EV_ADD;
134348ce57bSbrad 	/*
135348ce57bSbrad 	 * If kqueue works, then kevent will succeed, and it will
136348ce57bSbrad 	 * stick an error in events[0].  If kqueue is broken, then
137348ce57bSbrad 	 * kevent will fail.
138348ce57bSbrad 	 */
139348ce57bSbrad 	if (kevent(kq,
140348ce57bSbrad 		kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 ||
141348ce57bSbrad 	    kqueueop->events[0].ident != -1 ||
142348ce57bSbrad 	    kqueueop->events[0].flags != EV_ERROR) {
143348ce57bSbrad 		event_warn("%s: detected broken kqueue; not using.", __func__);
144348ce57bSbrad 		free(kqueueop->changes);
145348ce57bSbrad 		free(kqueueop->events);
146348ce57bSbrad 		free(kqueueop);
147348ce57bSbrad 		close(kq);
148348ce57bSbrad 		return (NULL);
149348ce57bSbrad 	}
150348ce57bSbrad 
1514643be29Sbrad 	return (kqueueop);
152fd332320Sprovos }
153fd332320Sprovos 
154fd332320Sprovos int
1554643be29Sbrad kq_recalc(struct event_base *base, void *arg, int max)
156fd332320Sprovos {
157fd332320Sprovos 	return (0);
158fd332320Sprovos }
159fd332320Sprovos 
160fd332320Sprovos int
161fd332320Sprovos kq_insert(struct kqop *kqop, struct kevent *kev)
162fd332320Sprovos {
163fd332320Sprovos 	int nevents = kqop->nevents;
164fd332320Sprovos 
165fd332320Sprovos 	if (kqop->nchanges == nevents) {
166fd332320Sprovos 		struct kevent *newchange;
167fd332320Sprovos 		struct kevent *newresult;
168fd332320Sprovos 
169fd332320Sprovos 		nevents *= 2;
170fd332320Sprovos 
171fd332320Sprovos 		newchange = realloc(kqop->changes,
172fd332320Sprovos 				    nevents * sizeof(struct kevent));
173fd332320Sprovos 		if (newchange == NULL) {
1744643be29Sbrad 			event_warn("%s: malloc", __func__);
175fd332320Sprovos 			return (-1);
176fd332320Sprovos 		}
177fd332320Sprovos 		kqop->changes = newchange;
178fd332320Sprovos 
179ff9272daSbrad 		newresult = realloc(kqop->events,
180fd332320Sprovos 				    nevents * sizeof(struct kevent));
181fd332320Sprovos 
182fd332320Sprovos 		/*
183fd332320Sprovos 		 * If we fail, we don't have to worry about freeing,
184fd332320Sprovos 		 * the next realloc will pick it up.
185fd332320Sprovos 		 */
186fd332320Sprovos 		if (newresult == NULL) {
1874643be29Sbrad 			event_warn("%s: malloc", __func__);
188fd332320Sprovos 			return (-1);
189fd332320Sprovos 		}
190ff9272daSbrad 		kqop->events = newresult;
191fd332320Sprovos 
192fd332320Sprovos 		kqop->nevents = nevents;
193fd332320Sprovos 	}
194fd332320Sprovos 
195fd332320Sprovos 	memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent));
196fd332320Sprovos 
1974643be29Sbrad 	event_debug(("%s: fd %d %s%s",
1981770acb2Smarkus 		 __func__, kev->ident,
199fd332320Sprovos 		 kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE",
200fd332320Sprovos 		 kev->flags == EV_DELETE ? " (del)" : ""));
201fd332320Sprovos 
202fd332320Sprovos 	return (0);
203fd332320Sprovos }
204fd332320Sprovos 
205fd332320Sprovos static void
206fd332320Sprovos kq_sighandler(int sig)
207fd332320Sprovos {
208fd332320Sprovos 	/* Do nothing here */
209fd332320Sprovos }
210fd332320Sprovos 
211fd332320Sprovos int
2124643be29Sbrad kq_dispatch(struct event_base *base, void *arg, struct timeval *tv)
213fd332320Sprovos {
214fd332320Sprovos 	struct kqop *kqop = arg;
215fd332320Sprovos 	struct kevent *changes = kqop->changes;
216fd332320Sprovos 	struct kevent *events = kqop->events;
217fd332320Sprovos 	struct event *ev;
218*bdce580dSbrad 	struct timespec ts, *ts_p = NULL;
219fd332320Sprovos 	int i, res;
220fd332320Sprovos 
221*bdce580dSbrad 	if (tv != NULL) {
222fd332320Sprovos 		TIMEVAL_TO_TIMESPEC(tv, &ts);
223*bdce580dSbrad 		ts_p = &ts;
224*bdce580dSbrad 	}
225fd332320Sprovos 
226fd332320Sprovos 	res = kevent(kqop->kq, changes, kqop->nchanges,
227*bdce580dSbrad 	    events, kqop->nevents, ts_p);
228fd332320Sprovos 	kqop->nchanges = 0;
229fd332320Sprovos 	if (res == -1) {
230fd332320Sprovos 		if (errno != EINTR) {
2314643be29Sbrad                         event_warn("kevent");
232fd332320Sprovos 			return (-1);
233fd332320Sprovos 		}
234fd332320Sprovos 
235fd332320Sprovos 		return (0);
236fd332320Sprovos 	}
237fd332320Sprovos 
2384643be29Sbrad 	event_debug(("%s: kevent reports %d", __func__, res));
239fd332320Sprovos 
240fd332320Sprovos 	for (i = 0; i < res; i++) {
241fd332320Sprovos 		int which = 0;
242fd332320Sprovos 
243fd332320Sprovos 		if (events[i].flags & EV_ERROR) {
244fd332320Sprovos 			/*
245fd332320Sprovos 			 * Error messages that can happen, when a delete fails.
246fd332320Sprovos 			 *   EBADF happens when the file discriptor has been
247fd332320Sprovos 			 *   closed,
248fd332320Sprovos 			 *   ENOENT when the file discriptor was closed and
249fd332320Sprovos 			 *   then reopened.
250348ce57bSbrad 			 *   EINVAL for some reasons not understood; EINVAL
251348ce57bSbrad 			 *   should not be returned ever; but FreeBSD does :-\
252fd332320Sprovos 			 * An error is also indicated when a callback deletes
253fd332320Sprovos 			 * an event we are still processing.  In that case
254fd332320Sprovos 			 * the data field is set to ENOENT.
255fd332320Sprovos 			 */
256fd332320Sprovos 			if (events[i].data == EBADF ||
257348ce57bSbrad 			    events[i].data == EINVAL ||
258fd332320Sprovos 			    events[i].data == ENOENT)
259fd332320Sprovos 				continue;
2601085edd8Sbrad 			errno = events[i].data;
261fd332320Sprovos 			return (-1);
262fd332320Sprovos 		}
263fd332320Sprovos 
2641770acb2Smarkus 		ev = (struct event *)events[i].udata;
265fd332320Sprovos 
266fd332320Sprovos 		if (events[i].filter == EVFILT_READ) {
267fd332320Sprovos 			which |= EV_READ;
268fd332320Sprovos 		} else if (events[i].filter == EVFILT_WRITE) {
269fd332320Sprovos 			which |= EV_WRITE;
270fd332320Sprovos 		} else if (events[i].filter == EVFILT_SIGNAL) {
271fd332320Sprovos 			which |= EV_SIGNAL;
2721770acb2Smarkus 		}
273fd332320Sprovos 
274fd332320Sprovos 		if (!which)
275fd332320Sprovos 			continue;
276fd332320Sprovos 
27794f6f200Sbrad 		if (!(ev->ev_events & EV_PERSIST))
278fd332320Sprovos 			event_del(ev);
279fd332320Sprovos 
2801770acb2Smarkus 		event_active(ev, which,
2811770acb2Smarkus 		    ev->ev_events & EV_SIGNAL ? events[i].data : 1);
282fd332320Sprovos 	}
283fd332320Sprovos 
284fd332320Sprovos 	return (0);
285fd332320Sprovos }
286fd332320Sprovos 
287fd332320Sprovos 
288fd332320Sprovos int
289fd332320Sprovos kq_add(void *arg, struct event *ev)
290fd332320Sprovos {
291fd332320Sprovos 	struct kqop *kqop = arg;
292fd332320Sprovos 	struct kevent kev;
293fd332320Sprovos 
294fd332320Sprovos 	if (ev->ev_events & EV_SIGNAL) {
295fd332320Sprovos 		int nsignal = EVENT_SIGNAL(ev);
296fd332320Sprovos 
297fd332320Sprovos  		memset(&kev, 0, sizeof(kev));
298fd332320Sprovos 		kev.ident = nsignal;
299fd332320Sprovos 		kev.filter = EVFILT_SIGNAL;
300fd332320Sprovos 		kev.flags = EV_ADD;
301fd332320Sprovos 		if (!(ev->ev_events & EV_PERSIST))
302ddb00dd9Sitojun 			kev.flags |= EV_ONESHOT;
303*bdce580dSbrad 		kev.udata = PTR_TO_UDATA(ev);
304fd332320Sprovos 
305fd332320Sprovos 		if (kq_insert(kqop, &kev) == -1)
306fd332320Sprovos 			return (-1);
307fd332320Sprovos 
308fd332320Sprovos 		if (signal(nsignal, kq_sighandler) == SIG_ERR)
309fd332320Sprovos 			return (-1);
310fd332320Sprovos 
311fd332320Sprovos 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
312fd332320Sprovos 		return (0);
313fd332320Sprovos 	}
314fd332320Sprovos 
315fd332320Sprovos 	if (ev->ev_events & EV_READ) {
316fd332320Sprovos  		memset(&kev, 0, sizeof(kev));
317fd332320Sprovos 		kev.ident = ev->ev_fd;
318fd332320Sprovos 		kev.filter = EVFILT_READ;
319ff9272daSbrad #ifdef NOTE_EOF
320c74e3f8eSmarkus 		/* Make it behave like select() and poll() */
321c74e3f8eSmarkus 		kev.fflags = NOTE_EOF;
322ff9272daSbrad #endif
323ff9272daSbrad 		kev.flags = EV_ADD;
324e5c7daabSart 		if (!(ev->ev_events & EV_PERSIST))
325ddb00dd9Sitojun 			kev.flags |= EV_ONESHOT;
326*bdce580dSbrad 		kev.udata = PTR_TO_UDATA(ev);
327fd332320Sprovos 
328fd332320Sprovos 		if (kq_insert(kqop, &kev) == -1)
329fd332320Sprovos 			return (-1);
330fd332320Sprovos 
331fd332320Sprovos 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
332fd332320Sprovos 	}
333fd332320Sprovos 
334fd332320Sprovos 	if (ev->ev_events & EV_WRITE) {
335fd332320Sprovos  		memset(&kev, 0, sizeof(kev));
336fd332320Sprovos 		kev.ident = ev->ev_fd;
337fd332320Sprovos 		kev.filter = EVFILT_WRITE;
338e5c7daabSart 		kev.flags = EV_ADD;
339e5c7daabSart 		if (!(ev->ev_events & EV_PERSIST))
340ddb00dd9Sitojun 			kev.flags |= EV_ONESHOT;
341*bdce580dSbrad 		kev.udata = PTR_TO_UDATA(ev);
342fd332320Sprovos 
343fd332320Sprovos 		if (kq_insert(kqop, &kev) == -1)
344fd332320Sprovos 			return (-1);
345fd332320Sprovos 
346fd332320Sprovos 		ev->ev_flags |= EVLIST_X_KQINKERNEL;
347fd332320Sprovos 	}
348fd332320Sprovos 
349fd332320Sprovos 	return (0);
350fd332320Sprovos }
351fd332320Sprovos 
352fd332320Sprovos int
353fd332320Sprovos kq_del(void *arg, struct event *ev)
354fd332320Sprovos {
355fd332320Sprovos 	struct kqop *kqop = arg;
356fd332320Sprovos 	struct kevent kev;
357fd332320Sprovos 
358fd332320Sprovos 	if (!(ev->ev_flags & EVLIST_X_KQINKERNEL))
359fd332320Sprovos 		return (0);
360fd332320Sprovos 
361fd332320Sprovos 	if (ev->ev_events & EV_SIGNAL) {
362fd332320Sprovos 		int nsignal = EVENT_SIGNAL(ev);
363fd332320Sprovos 
364fd332320Sprovos  		memset(&kev, 0, sizeof(kev));
365c46b02d0Sbrad 		kev.ident = nsignal;
366fd332320Sprovos 		kev.filter = EVFILT_SIGNAL;
367fd332320Sprovos 		kev.flags = EV_DELETE;
368fd332320Sprovos 
369fd332320Sprovos 		if (kq_insert(kqop, &kev) == -1)
370fd332320Sprovos 			return (-1);
371fd332320Sprovos 
372fd332320Sprovos 		if (signal(nsignal, SIG_DFL) == SIG_ERR)
373fd332320Sprovos 			return (-1);
374fd332320Sprovos 
375fd332320Sprovos 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
376fd332320Sprovos 		return (0);
377fd332320Sprovos 	}
378fd332320Sprovos 
379fd332320Sprovos 	if (ev->ev_events & EV_READ) {
380fd332320Sprovos  		memset(&kev, 0, sizeof(kev));
381fd332320Sprovos 		kev.ident = ev->ev_fd;
382fd332320Sprovos 		kev.filter = EVFILT_READ;
383fd332320Sprovos 		kev.flags = EV_DELETE;
384fd332320Sprovos 
385fd332320Sprovos 		if (kq_insert(kqop, &kev) == -1)
386fd332320Sprovos 			return (-1);
387fd332320Sprovos 
388fd332320Sprovos 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
389fd332320Sprovos 	}
390fd332320Sprovos 
391fd332320Sprovos 	if (ev->ev_events & EV_WRITE) {
392fd332320Sprovos  		memset(&kev, 0, sizeof(kev));
393fd332320Sprovos 		kev.ident = ev->ev_fd;
394fd332320Sprovos 		kev.filter = EVFILT_WRITE;
395fd332320Sprovos 		kev.flags = EV_DELETE;
396fd332320Sprovos 
397fd332320Sprovos 		if (kq_insert(kqop, &kev) == -1)
398fd332320Sprovos 			return (-1);
399fd332320Sprovos 
400fd332320Sprovos 		ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
401fd332320Sprovos 	}
402fd332320Sprovos 
403fd332320Sprovos 	return (0);
404fd332320Sprovos }
4053ac1ba99Sbrad 
4063ac1ba99Sbrad void
407*bdce580dSbrad kq_dealloc(struct event_base *base, void *arg)
4083ac1ba99Sbrad {
4093ac1ba99Sbrad 	struct kqop *kqop = arg;
4103ac1ba99Sbrad 
4113ac1ba99Sbrad 	if (kqop->changes)
4123ac1ba99Sbrad 		free(kqop->changes);
4133ac1ba99Sbrad 	if (kqop->events)
4143ac1ba99Sbrad 		free(kqop->events);
4153ac1ba99Sbrad 	if (kqop->kq)
4163ac1ba99Sbrad 		close(kqop->kq);
4173ac1ba99Sbrad 	memset(kqop, 0, sizeof(struct kqop));
4183ac1ba99Sbrad 	free(kqop);
4193ac1ba99Sbrad }
420