1*a96d88d3Syasuoka /* $OpenBSD: kqueue.c,v 1.43 2024/03/23 22:51:49 yasuoka Exp $ */
234fc9cdeSmickey
3fd332320Sprovos /*
4fd332320Sprovos * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
5fd332320Sprovos * All rights reserved.
6fd332320Sprovos *
7fd332320Sprovos * Redistribution and use in source and binary forms, with or without
8fd332320Sprovos * modification, are permitted provided that the following conditions
9fd332320Sprovos * are met:
10fd332320Sprovos * 1. Redistributions of source code must retain the above copyright
11fd332320Sprovos * notice, this list of conditions and the following disclaimer.
12fd332320Sprovos * 2. Redistributions in binary form must reproduce the above copyright
13fd332320Sprovos * notice, this list of conditions and the following disclaimer in the
14fd332320Sprovos * documentation and/or other materials provided with the distribution.
15ff9272daSbrad * 3. The name of the author may not be used to endorse or promote products
16fd332320Sprovos * derived from this software without specific prior written permission.
17fd332320Sprovos *
18fd332320Sprovos * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19fd332320Sprovos * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20fd332320Sprovos * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21fd332320Sprovos * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22fd332320Sprovos * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23fd332320Sprovos * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24fd332320Sprovos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25fd332320Sprovos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26fd332320Sprovos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27fd332320Sprovos * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28fd332320Sprovos */
29fd332320Sprovos
30fd332320Sprovos #include <sys/types.h>
31fd332320Sprovos #include <sys/time.h>
32fd332320Sprovos #include <sys/queue.h>
33fd332320Sprovos #include <sys/event.h>
34defc4074Sbluhm
35a3e9870fStobias #include <assert.h>
36a3e9870fStobias #include <errno.h>
37a3e9870fStobias #include <inttypes.h>
38a3e9870fStobias #include <limits.h>
39fd332320Sprovos #include <signal.h>
40fd332320Sprovos #include <stdio.h>
41fd332320Sprovos #include <stdlib.h>
42ff33a3f0Sderaadt #include <string.h>
43fd332320Sprovos #include <unistd.h>
44fd332320Sprovos
45fd332320Sprovos #include "event.h"
468ead113eSnicm #include "event-internal.h"
474643be29Sbrad #include "log.h"
48e175bf56Snicm #include "evsignal.h"
49fd332320Sprovos
50fd332320Sprovos #define EVLIST_X_KQINKERNEL 0x1000
51fd332320Sprovos
52fd332320Sprovos #define NEVENT 64
53fd332320Sprovos
54fd332320Sprovos struct kqop {
555512004aStedu struct kevent *changes;
565512004aStedu int nchanges;
57fd332320Sprovos struct kevent *events;
588ead113eSnicm struct event_list evsigevents[NSIG];
59fd332320Sprovos int nevents;
60fd332320Sprovos int kq;
618ead113eSnicm pid_t pid;
624643be29Sbrad };
63fd332320Sprovos
648ead113eSnicm static void *kq_init (struct event_base *);
658ead113eSnicm static int kq_add (void *, struct event *);
668ead113eSnicm static int kq_del (void *, struct event *);
678ead113eSnicm static int kq_dispatch (struct event_base *, void *, struct timeval *);
685512004aStedu static int kq_insert (struct kqop *, struct kevent *);
698ead113eSnicm static void kq_dealloc (struct event_base *, void *);
70fd332320Sprovos
71759b8817Smickey const struct eventop kqops = {
72fd332320Sprovos "kqueue",
73fd332320Sprovos kq_init,
74fd332320Sprovos kq_add,
75fd332320Sprovos kq_del,
763ac1ba99Sbrad kq_dispatch,
778ead113eSnicm kq_dealloc,
788ead113eSnicm 1 /* need reinit */
79fd332320Sprovos };
80fd332320Sprovos
818ead113eSnicm static void *
kq_init(struct event_base * base)82bdce580dSbrad kq_init(struct event_base *base)
83fd332320Sprovos {
848ead113eSnicm int i, kq;
854643be29Sbrad struct kqop *kqueueop;
86fd332320Sprovos
87fd332320Sprovos /* Disable kqueue when this environment variable is set */
8884ea67e3Sbluhm if (!issetugid() && getenv("EVENT_NOKQUEUE"))
89fd332320Sprovos return (NULL);
90fd332320Sprovos
914643be29Sbrad if (!(kqueueop = calloc(1, sizeof(struct kqop))))
924643be29Sbrad return (NULL);
93fd332320Sprovos
94949b353cSjmc /* Initialize the kernel queue */
95fd332320Sprovos
96fd332320Sprovos if ((kq = kqueue()) == -1) {
974643be29Sbrad event_warn("kqueue");
984643be29Sbrad free (kqueueop);
99fd332320Sprovos return (NULL);
100fd332320Sprovos }
101fd332320Sprovos
1024643be29Sbrad kqueueop->kq = kq;
103fd332320Sprovos
1048ead113eSnicm kqueueop->pid = getpid();
1058ead113eSnicm
106949b353cSjmc /* Initialize fields */
1075512004aStedu kqueueop->changes = calloc(NEVENT, sizeof(struct kevent));
1085512004aStedu if (kqueueop->changes == NULL) {
1095512004aStedu free (kqueueop);
1105512004aStedu return (NULL);
1115512004aStedu }
1121ed98fdfSderaadt kqueueop->events = calloc(NEVENT, sizeof(struct kevent));
1134643be29Sbrad if (kqueueop->events == NULL) {
1145512004aStedu free (kqueueop->changes);
1154643be29Sbrad free (kqueueop);
1164643be29Sbrad return (NULL);
1174643be29Sbrad }
1184643be29Sbrad kqueueop->nevents = NEVENT;
119fd332320Sprovos
1208ead113eSnicm /* we need to keep track of multiple events per signal */
1218ead113eSnicm for (i = 0; i < NSIG; ++i) {
1228ead113eSnicm TAILQ_INIT(&kqueueop->evsigevents[i]);
1238ead113eSnicm }
1248ead113eSnicm
1254643be29Sbrad return (kqueueop);
126fd332320Sprovos }
127fd332320Sprovos
1285512004aStedu static int
kq_insert(struct kqop * kqop,struct kevent * kev)1295512004aStedu kq_insert(struct kqop *kqop, struct kevent *kev)
1305512004aStedu {
1315512004aStedu int nevents = kqop->nevents;
1325512004aStedu
1335512004aStedu if (kqop->nchanges == nevents) {
1345512004aStedu struct kevent *newchange;
1355512004aStedu struct kevent *newresult;
1365512004aStedu
137a3e9870fStobias if (nevents > INT_MAX / 2) {
138a3e9870fStobias event_warnx("%s: integer overflow", __func__);
139a3e9870fStobias return (-1);
140a3e9870fStobias }
1415512004aStedu nevents *= 2;
1425512004aStedu
143a3e9870fStobias newchange = recallocarray(kqop->changes,
144a3e9870fStobias kqop->nevents, nevents, sizeof(struct kevent));
1455512004aStedu if (newchange == NULL) {
146a3e9870fStobias event_warn("%s: recallocarray", __func__);
1475512004aStedu return (-1);
1485512004aStedu }
1495512004aStedu kqop->changes = newchange;
1505512004aStedu
151a3e9870fStobias newresult = recallocarray(kqop->events,
152a3e9870fStobias kqop->nevents, nevents, sizeof(struct kevent));
1535512004aStedu
1545512004aStedu /*
1555512004aStedu * If we fail, we don't have to worry about freeing,
1565512004aStedu * the next realloc will pick it up.
1575512004aStedu */
1585512004aStedu if (newresult == NULL) {
159a3e9870fStobias event_warn("%s: recallocarray", __func__);
1605512004aStedu return (-1);
1615512004aStedu }
1625512004aStedu kqop->events = newresult;
1635512004aStedu
1645512004aStedu kqop->nevents = nevents;
1655512004aStedu }
1665512004aStedu
1675512004aStedu memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent));
1685512004aStedu
1695512004aStedu event_debug(("%s: fd %d %s%s",
1705512004aStedu __func__, (int)kev->ident,
1715512004aStedu kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE",
1725512004aStedu kev->flags == EV_DELETE ? " (del)" : ""));
1735512004aStedu
1745512004aStedu return (0);
1755512004aStedu }
1765512004aStedu
177fd332320Sprovos static void
kq_sighandler(int sig)178fd332320Sprovos kq_sighandler(int sig)
179fd332320Sprovos {
180fd332320Sprovos /* Do nothing here */
181fd332320Sprovos }
182fd332320Sprovos
1838ead113eSnicm static int
kq_dispatch(struct event_base * base,void * arg,struct timeval * tv)1844643be29Sbrad kq_dispatch(struct event_base *base, void *arg, struct timeval *tv)
185fd332320Sprovos {
186fd332320Sprovos struct kqop *kqop = arg;
1875512004aStedu struct kevent *changes = kqop->changes;
188fd332320Sprovos struct kevent *events = kqop->events;
189fd332320Sprovos struct event *ev;
190bdce580dSbrad struct timespec ts, *ts_p = NULL;
191fd332320Sprovos int i, res;
192fd332320Sprovos
193bdce580dSbrad if (tv != NULL) {
194fd332320Sprovos TIMEVAL_TO_TIMESPEC(tv, &ts);
195bdce580dSbrad ts_p = &ts;
196bdce580dSbrad }
197fd332320Sprovos
198becf7fcbStedu res = kevent(kqop->kq, kqop->nchanges ? changes : NULL, kqop->nchanges,
1995512004aStedu events, kqop->nevents, ts_p);
2005512004aStedu kqop->nchanges = 0;
201fd332320Sprovos if (res == -1) {
202fd332320Sprovos if (errno != EINTR) {
2034643be29Sbrad event_warn("kevent");
204fd332320Sprovos return (-1);
205fd332320Sprovos }
206fd332320Sprovos
207fd332320Sprovos return (0);
208fd332320Sprovos }
209fd332320Sprovos
2104643be29Sbrad event_debug(("%s: kevent reports %d", __func__, res));
211fd332320Sprovos
212fd332320Sprovos for (i = 0; i < res; i++) {
213fd332320Sprovos int which = 0;
214fd332320Sprovos
215fd332320Sprovos if (events[i].flags & EV_ERROR) {
21610649951Snicm switch (events[i].data) {
21710649951Snicm
21810649951Snicm /* Can occur on delete if we are not currently
21910649951Snicm * watching any events on this fd. That can
22010649951Snicm * happen when the fd was closed and another
22110649951Snicm * file was opened with that fd. */
22210649951Snicm case ENOENT:
22310649951Snicm /* Can occur for reasons not fully understood
22410649951Snicm * on FreeBSD. */
22510649951Snicm case EINVAL:
226fd332320Sprovos continue;
22710649951Snicm /* Can occur on a delete if the fd is closed. Can
22810649951Snicm * occur on an add if the fd was one side of a pipe,
22910649951Snicm * and the other side was closed. */
23010649951Snicm case EBADF:
2311db6aa6eSnicm continue;
23210649951Snicm /* These two can occur on an add if the fd was one side
23310649951Snicm * of a pipe, and the other side was closed. */
23410649951Snicm case EPERM:
23510649951Snicm case EPIPE:
23610649951Snicm /* Report read events, if we're listening for
23710649951Snicm * them, so that the user can learn about any
23810649951Snicm * add errors. (If the operation was a
23910649951Snicm * delete, then udata should be cleared.) */
24010649951Snicm if (events[i].udata) {
24110649951Snicm /* The operation was an add:
24210649951Snicm * report the error as a read. */
24310649951Snicm which |= EV_READ;
24410649951Snicm break;
24510649951Snicm } else {
24610649951Snicm /* The operation was a del:
24710649951Snicm * report nothing. */
24810649951Snicm continue;
24910649951Snicm }
25010649951Snicm
25110649951Snicm /* Other errors shouldn't occur. */
25210649951Snicm default:
2531085edd8Sbrad errno = events[i].data;
254fd332320Sprovos return (-1);
255fd332320Sprovos }
25610649951Snicm } else if (events[i].filter == EVFILT_READ) {
257fd332320Sprovos which |= EV_READ;
258fd332320Sprovos } else if (events[i].filter == EVFILT_WRITE) {
259fd332320Sprovos which |= EV_WRITE;
260fd332320Sprovos } else if (events[i].filter == EVFILT_SIGNAL) {
261fd332320Sprovos which |= EV_SIGNAL;
2621770acb2Smarkus }
263fd332320Sprovos
264fd332320Sprovos if (!which)
265fd332320Sprovos continue;
266fd332320Sprovos
2678ead113eSnicm if (events[i].filter == EVFILT_SIGNAL) {
2688ead113eSnicm struct event_list *head =
2698ead113eSnicm (struct event_list *)events[i].udata;
2708ead113eSnicm TAILQ_FOREACH(ev, head, ev_signal_next) {
2718ead113eSnicm event_active(ev, which, events[i].data);
2728ead113eSnicm }
2738ead113eSnicm } else {
2748ead113eSnicm ev = (struct event *)events[i].udata;
275fd332320Sprovos
2768ead113eSnicm if (!(ev->ev_events & EV_PERSIST))
2778ead113eSnicm ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
2788ead113eSnicm
2798ead113eSnicm event_active(ev, which, 1);
2808ead113eSnicm }
281fd332320Sprovos }
282fd332320Sprovos
283fd332320Sprovos return (0);
284fd332320Sprovos }
285fd332320Sprovos
286fd332320Sprovos
2878ead113eSnicm static int
kq_add(void * arg,struct event * ev)288fd332320Sprovos kq_add(void *arg, struct event *ev)
289fd332320Sprovos {
290fd332320Sprovos struct kqop *kqop = arg;
291fd332320Sprovos struct kevent kev;
292fd332320Sprovos
293fd332320Sprovos if (ev->ev_events & EV_SIGNAL) {
294fd332320Sprovos int nsignal = EVENT_SIGNAL(ev);
295fd332320Sprovos
2968ead113eSnicm assert(nsignal >= 0 && nsignal < NSIG);
2978ead113eSnicm if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
2988ead113eSnicm struct timespec timeout = { 0, 0 };
2998ead113eSnicm
300fd332320Sprovos memset(&kev, 0, sizeof(kev));
301fd332320Sprovos kev.ident = nsignal;
302fd332320Sprovos kev.filter = EVFILT_SIGNAL;
303fd332320Sprovos kev.flags = EV_ADD;
30437fbd8dfSbluhm kev.udata = &kqop->evsigevents[nsignal];
305fd332320Sprovos
3068ead113eSnicm /* Be ready for the signal if it is sent any
3078ead113eSnicm * time between now and the next call to
3088ead113eSnicm * kq_dispatch. */
3098ead113eSnicm if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
310fd332320Sprovos return (-1);
311fd332320Sprovos
3128ead113eSnicm if (_evsignal_set_handler(ev->ev_base, nsignal,
3138ead113eSnicm kq_sighandler) == -1)
314fd332320Sprovos return (-1);
3158ead113eSnicm }
316fd332320Sprovos
3178ead113eSnicm TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev,
3188ead113eSnicm ev_signal_next);
319fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL;
320fd332320Sprovos return (0);
321fd332320Sprovos }
322fd332320Sprovos
323fd332320Sprovos if (ev->ev_events & EV_READ) {
324fd332320Sprovos memset(&kev, 0, sizeof(kev));
325fd332320Sprovos kev.ident = ev->ev_fd;
326fd332320Sprovos kev.filter = EVFILT_READ;
327c74e3f8eSmarkus /* Make it behave like select() and poll() */
328c74e3f8eSmarkus kev.fflags = NOTE_EOF;
329ff9272daSbrad kev.flags = EV_ADD;
330e5c7daabSart if (!(ev->ev_events & EV_PERSIST))
331ddb00dd9Sitojun kev.flags |= EV_ONESHOT;
33237fbd8dfSbluhm kev.udata = ev;
333fd332320Sprovos
3345512004aStedu if (kq_insert(kqop, &kev) == -1)
335fd332320Sprovos return (-1);
336fd332320Sprovos
337fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL;
338fd332320Sprovos }
339fd332320Sprovos
340fd332320Sprovos if (ev->ev_events & EV_WRITE) {
341fd332320Sprovos memset(&kev, 0, sizeof(kev));
342fd332320Sprovos kev.ident = ev->ev_fd;
343fd332320Sprovos kev.filter = EVFILT_WRITE;
344e5c7daabSart kev.flags = EV_ADD;
345e5c7daabSart if (!(ev->ev_events & EV_PERSIST))
346ddb00dd9Sitojun kev.flags |= EV_ONESHOT;
34737fbd8dfSbluhm kev.udata = ev;
348fd332320Sprovos
3495512004aStedu if (kq_insert(kqop, &kev) == -1)
350fd332320Sprovos return (-1);
351fd332320Sprovos
352fd332320Sprovos ev->ev_flags |= EVLIST_X_KQINKERNEL;
353fd332320Sprovos }
354fd332320Sprovos
355fd332320Sprovos return (0);
356fd332320Sprovos }
357fd332320Sprovos
3588ead113eSnicm static int
kq_del(void * arg,struct event * ev)359fd332320Sprovos kq_del(void *arg, struct event *ev)
360fd332320Sprovos {
361*a96d88d3Syasuoka int i, j;
362fd332320Sprovos struct kqop *kqop = arg;
363fd332320Sprovos struct kevent kev;
364fd332320Sprovos
365fd332320Sprovos if (!(ev->ev_flags & EVLIST_X_KQINKERNEL))
366fd332320Sprovos return (0);
367fd332320Sprovos
368fd332320Sprovos if (ev->ev_events & EV_SIGNAL) {
369fd332320Sprovos int nsignal = EVENT_SIGNAL(ev);
3708ead113eSnicm struct timespec timeout = { 0, 0 };
371fd332320Sprovos
3728ead113eSnicm assert(nsignal >= 0 && nsignal < NSIG);
3738ead113eSnicm TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next);
3748ead113eSnicm if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
375fd332320Sprovos memset(&kev, 0, sizeof(kev));
376c46b02d0Sbrad kev.ident = nsignal;
377fd332320Sprovos kev.filter = EVFILT_SIGNAL;
378fd332320Sprovos kev.flags = EV_DELETE;
379fd332320Sprovos
3808ead113eSnicm /* Because we insert signal events
3818ead113eSnicm * immediately, we need to delete them
3828ead113eSnicm * immediately, too */
3838ead113eSnicm if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
384fd332320Sprovos return (-1);
385fd332320Sprovos
3868ead113eSnicm if (_evsignal_restore_handler(ev->ev_base,
3878ead113eSnicm nsignal) == -1)
388fd332320Sprovos return (-1);
3898ead113eSnicm }
390fd332320Sprovos
391fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
392fd332320Sprovos return (0);
393fd332320Sprovos }
394fd332320Sprovos
395*a96d88d3Syasuoka for (i = j = 0; i < kqop->nchanges; i++) {
396*a96d88d3Syasuoka if (kqop->changes[i].udata == ev &&
397*a96d88d3Syasuoka (kqop->changes[i].flags & EV_ADD) != 0)
398*a96d88d3Syasuoka continue; /* delete this */
399*a96d88d3Syasuoka if (i != j)
400*a96d88d3Syasuoka memcpy(&kqop->changes[j], &kqop->changes[i],
401*a96d88d3Syasuoka sizeof(struct kevent));
402*a96d88d3Syasuoka j++;
403*a96d88d3Syasuoka }
404*a96d88d3Syasuoka if (kqop->nchanges != j) {
405*a96d88d3Syasuoka kqop->nchanges = j;
406*a96d88d3Syasuoka ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
407*a96d88d3Syasuoka return (0);
408*a96d88d3Syasuoka }
409*a96d88d3Syasuoka
410fd332320Sprovos if (ev->ev_events & EV_READ) {
411fd332320Sprovos memset(&kev, 0, sizeof(kev));
412fd332320Sprovos kev.ident = ev->ev_fd;
413fd332320Sprovos kev.filter = EVFILT_READ;
414fd332320Sprovos kev.flags = EV_DELETE;
415fd332320Sprovos
4165512004aStedu if (kq_insert(kqop, &kev) == -1)
417fd332320Sprovos return (-1);
418fd332320Sprovos
419fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
420fd332320Sprovos }
421fd332320Sprovos
422fd332320Sprovos if (ev->ev_events & EV_WRITE) {
423fd332320Sprovos memset(&kev, 0, sizeof(kev));
424fd332320Sprovos kev.ident = ev->ev_fd;
425fd332320Sprovos kev.filter = EVFILT_WRITE;
426fd332320Sprovos kev.flags = EV_DELETE;
427fd332320Sprovos
4285512004aStedu if (kq_insert(kqop, &kev) == -1)
429fd332320Sprovos return (-1);
430fd332320Sprovos
431fd332320Sprovos ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
432fd332320Sprovos }
433fd332320Sprovos
434fd332320Sprovos return (0);
435fd332320Sprovos }
4363ac1ba99Sbrad
4378ead113eSnicm static void
kq_dealloc(struct event_base * base,void * arg)438bdce580dSbrad kq_dealloc(struct event_base *base, void *arg)
4393ac1ba99Sbrad {
4403ac1ba99Sbrad struct kqop *kqop = arg;
4413ac1ba99Sbrad
442e175bf56Snicm evsignal_dealloc(base);
443e175bf56Snicm
4445512004aStedu free(kqop->changes);
4453ac1ba99Sbrad free(kqop->events);
4468ead113eSnicm if (kqop->kq >= 0 && kqop->pid == getpid())
4473ac1ba99Sbrad close(kqop->kq);
448e175bf56Snicm
4493ac1ba99Sbrad memset(kqop, 0, sizeof(struct kqop));
4503ac1ba99Sbrad free(kqop);
4513ac1ba99Sbrad }
452