1 /* $OpenBSD: kqueue.c,v 1.43 2024/03/23 22:51:49 yasuoka Exp $ */
2
3 /*
4 * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include <sys/types.h>
31 #include <sys/time.h>
32 #include <sys/queue.h>
33 #include <sys/event.h>
34
35 #include <assert.h>
36 #include <errno.h>
37 #include <inttypes.h>
38 #include <limits.h>
39 #include <signal.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44
45 #include "event.h"
46 #include "event-internal.h"
47 #include "log.h"
48 #include "evsignal.h"
49
50 #define EVLIST_X_KQINKERNEL 0x1000
51
52 #define NEVENT 64
53
54 struct kqop {
55 struct kevent *changes;
56 int nchanges;
57 struct kevent *events;
58 struct event_list evsigevents[NSIG];
59 int nevents;
60 int kq;
61 pid_t pid;
62 };
63
64 static void *kq_init (struct event_base *);
65 static int kq_add (void *, struct event *);
66 static int kq_del (void *, struct event *);
67 static int kq_dispatch (struct event_base *, void *, struct timeval *);
68 static int kq_insert (struct kqop *, struct kevent *);
69 static void kq_dealloc (struct event_base *, void *);
70
71 const struct eventop kqops = {
72 "kqueue",
73 kq_init,
74 kq_add,
75 kq_del,
76 kq_dispatch,
77 kq_dealloc,
78 1 /* need reinit */
79 };
80
81 static void *
kq_init(struct event_base * base)82 kq_init(struct event_base *base)
83 {
84 int i, kq;
85 struct kqop *kqueueop;
86
87 /* Disable kqueue when this environment variable is set */
88 if (!issetugid() && getenv("EVENT_NOKQUEUE"))
89 return (NULL);
90
91 if (!(kqueueop = calloc(1, sizeof(struct kqop))))
92 return (NULL);
93
94 /* Initialize the kernel queue */
95
96 if ((kq = kqueue()) == -1) {
97 event_warn("kqueue");
98 free (kqueueop);
99 return (NULL);
100 }
101
102 kqueueop->kq = kq;
103
104 kqueueop->pid = getpid();
105
106 /* Initialize fields */
107 kqueueop->changes = calloc(NEVENT, sizeof(struct kevent));
108 if (kqueueop->changes == NULL) {
109 free (kqueueop);
110 return (NULL);
111 }
112 kqueueop->events = calloc(NEVENT, sizeof(struct kevent));
113 if (kqueueop->events == NULL) {
114 free (kqueueop->changes);
115 free (kqueueop);
116 return (NULL);
117 }
118 kqueueop->nevents = NEVENT;
119
120 /* we need to keep track of multiple events per signal */
121 for (i = 0; i < NSIG; ++i) {
122 TAILQ_INIT(&kqueueop->evsigevents[i]);
123 }
124
125 return (kqueueop);
126 }
127
128 static int
kq_insert(struct kqop * kqop,struct kevent * kev)129 kq_insert(struct kqop *kqop, struct kevent *kev)
130 {
131 int nevents = kqop->nevents;
132
133 if (kqop->nchanges == nevents) {
134 struct kevent *newchange;
135 struct kevent *newresult;
136
137 if (nevents > INT_MAX / 2) {
138 event_warnx("%s: integer overflow", __func__);
139 return (-1);
140 }
141 nevents *= 2;
142
143 newchange = recallocarray(kqop->changes,
144 kqop->nevents, nevents, sizeof(struct kevent));
145 if (newchange == NULL) {
146 event_warn("%s: recallocarray", __func__);
147 return (-1);
148 }
149 kqop->changes = newchange;
150
151 newresult = recallocarray(kqop->events,
152 kqop->nevents, nevents, sizeof(struct kevent));
153
154 /*
155 * If we fail, we don't have to worry about freeing,
156 * the next realloc will pick it up.
157 */
158 if (newresult == NULL) {
159 event_warn("%s: recallocarray", __func__);
160 return (-1);
161 }
162 kqop->events = newresult;
163
164 kqop->nevents = nevents;
165 }
166
167 memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent));
168
169 event_debug(("%s: fd %d %s%s",
170 __func__, (int)kev->ident,
171 kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE",
172 kev->flags == EV_DELETE ? " (del)" : ""));
173
174 return (0);
175 }
176
177 static void
kq_sighandler(int sig)178 kq_sighandler(int sig)
179 {
180 /* Do nothing here */
181 }
182
183 static int
kq_dispatch(struct event_base * base,void * arg,struct timeval * tv)184 kq_dispatch(struct event_base *base, void *arg, struct timeval *tv)
185 {
186 struct kqop *kqop = arg;
187 struct kevent *changes = kqop->changes;
188 struct kevent *events = kqop->events;
189 struct event *ev;
190 struct timespec ts, *ts_p = NULL;
191 int i, res;
192
193 if (tv != NULL) {
194 TIMEVAL_TO_TIMESPEC(tv, &ts);
195 ts_p = &ts;
196 }
197
198 res = kevent(kqop->kq, kqop->nchanges ? changes : NULL, kqop->nchanges,
199 events, kqop->nevents, ts_p);
200 kqop->nchanges = 0;
201 if (res == -1) {
202 if (errno != EINTR) {
203 event_warn("kevent");
204 return (-1);
205 }
206
207 return (0);
208 }
209
210 event_debug(("%s: kevent reports %d", __func__, res));
211
212 for (i = 0; i < res; i++) {
213 int which = 0;
214
215 if (events[i].flags & EV_ERROR) {
216 switch (events[i].data) {
217
218 /* Can occur on delete if we are not currently
219 * watching any events on this fd. That can
220 * happen when the fd was closed and another
221 * file was opened with that fd. */
222 case ENOENT:
223 /* Can occur for reasons not fully understood
224 * on FreeBSD. */
225 case EINVAL:
226 continue;
227 /* Can occur on a delete if the fd is closed. Can
228 * occur on an add if the fd was one side of a pipe,
229 * and the other side was closed. */
230 case EBADF:
231 continue;
232 /* These two can occur on an add if the fd was one side
233 * of a pipe, and the other side was closed. */
234 case EPERM:
235 case EPIPE:
236 /* Report read events, if we're listening for
237 * them, so that the user can learn about any
238 * add errors. (If the operation was a
239 * delete, then udata should be cleared.) */
240 if (events[i].udata) {
241 /* The operation was an add:
242 * report the error as a read. */
243 which |= EV_READ;
244 break;
245 } else {
246 /* The operation was a del:
247 * report nothing. */
248 continue;
249 }
250
251 /* Other errors shouldn't occur. */
252 default:
253 errno = events[i].data;
254 return (-1);
255 }
256 } else if (events[i].filter == EVFILT_READ) {
257 which |= EV_READ;
258 } else if (events[i].filter == EVFILT_WRITE) {
259 which |= EV_WRITE;
260 } else if (events[i].filter == EVFILT_SIGNAL) {
261 which |= EV_SIGNAL;
262 }
263
264 if (!which)
265 continue;
266
267 if (events[i].filter == EVFILT_SIGNAL) {
268 struct event_list *head =
269 (struct event_list *)events[i].udata;
270 TAILQ_FOREACH(ev, head, ev_signal_next) {
271 event_active(ev, which, events[i].data);
272 }
273 } else {
274 ev = (struct event *)events[i].udata;
275
276 if (!(ev->ev_events & EV_PERSIST))
277 ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
278
279 event_active(ev, which, 1);
280 }
281 }
282
283 return (0);
284 }
285
286
287 static int
kq_add(void * arg,struct event * ev)288 kq_add(void *arg, struct event *ev)
289 {
290 struct kqop *kqop = arg;
291 struct kevent kev;
292
293 if (ev->ev_events & EV_SIGNAL) {
294 int nsignal = EVENT_SIGNAL(ev);
295
296 assert(nsignal >= 0 && nsignal < NSIG);
297 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
298 struct timespec timeout = { 0, 0 };
299
300 memset(&kev, 0, sizeof(kev));
301 kev.ident = nsignal;
302 kev.filter = EVFILT_SIGNAL;
303 kev.flags = EV_ADD;
304 kev.udata = &kqop->evsigevents[nsignal];
305
306 /* Be ready for the signal if it is sent any
307 * time between now and the next call to
308 * kq_dispatch. */
309 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
310 return (-1);
311
312 if (_evsignal_set_handler(ev->ev_base, nsignal,
313 kq_sighandler) == -1)
314 return (-1);
315 }
316
317 TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev,
318 ev_signal_next);
319 ev->ev_flags |= EVLIST_X_KQINKERNEL;
320 return (0);
321 }
322
323 if (ev->ev_events & EV_READ) {
324 memset(&kev, 0, sizeof(kev));
325 kev.ident = ev->ev_fd;
326 kev.filter = EVFILT_READ;
327 /* Make it behave like select() and poll() */
328 kev.fflags = NOTE_EOF;
329 kev.flags = EV_ADD;
330 if (!(ev->ev_events & EV_PERSIST))
331 kev.flags |= EV_ONESHOT;
332 kev.udata = ev;
333
334 if (kq_insert(kqop, &kev) == -1)
335 return (-1);
336
337 ev->ev_flags |= EVLIST_X_KQINKERNEL;
338 }
339
340 if (ev->ev_events & EV_WRITE) {
341 memset(&kev, 0, sizeof(kev));
342 kev.ident = ev->ev_fd;
343 kev.filter = EVFILT_WRITE;
344 kev.flags = EV_ADD;
345 if (!(ev->ev_events & EV_PERSIST))
346 kev.flags |= EV_ONESHOT;
347 kev.udata = ev;
348
349 if (kq_insert(kqop, &kev) == -1)
350 return (-1);
351
352 ev->ev_flags |= EVLIST_X_KQINKERNEL;
353 }
354
355 return (0);
356 }
357
358 static int
kq_del(void * arg,struct event * ev)359 kq_del(void *arg, struct event *ev)
360 {
361 int i, j;
362 struct kqop *kqop = arg;
363 struct kevent kev;
364
365 if (!(ev->ev_flags & EVLIST_X_KQINKERNEL))
366 return (0);
367
368 if (ev->ev_events & EV_SIGNAL) {
369 int nsignal = EVENT_SIGNAL(ev);
370 struct timespec timeout = { 0, 0 };
371
372 assert(nsignal >= 0 && nsignal < NSIG);
373 TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next);
374 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
375 memset(&kev, 0, sizeof(kev));
376 kev.ident = nsignal;
377 kev.filter = EVFILT_SIGNAL;
378 kev.flags = EV_DELETE;
379
380 /* Because we insert signal events
381 * immediately, we need to delete them
382 * immediately, too */
383 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
384 return (-1);
385
386 if (_evsignal_restore_handler(ev->ev_base,
387 nsignal) == -1)
388 return (-1);
389 }
390
391 ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
392 return (0);
393 }
394
395 for (i = j = 0; i < kqop->nchanges; i++) {
396 if (kqop->changes[i].udata == ev &&
397 (kqop->changes[i].flags & EV_ADD) != 0)
398 continue; /* delete this */
399 if (i != j)
400 memcpy(&kqop->changes[j], &kqop->changes[i],
401 sizeof(struct kevent));
402 j++;
403 }
404 if (kqop->nchanges != j) {
405 kqop->nchanges = j;
406 ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
407 return (0);
408 }
409
410 if (ev->ev_events & EV_READ) {
411 memset(&kev, 0, sizeof(kev));
412 kev.ident = ev->ev_fd;
413 kev.filter = EVFILT_READ;
414 kev.flags = EV_DELETE;
415
416 if (kq_insert(kqop, &kev) == -1)
417 return (-1);
418
419 ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
420 }
421
422 if (ev->ev_events & EV_WRITE) {
423 memset(&kev, 0, sizeof(kev));
424 kev.ident = ev->ev_fd;
425 kev.filter = EVFILT_WRITE;
426 kev.flags = EV_DELETE;
427
428 if (kq_insert(kqop, &kev) == -1)
429 return (-1);
430
431 ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
432 }
433
434 return (0);
435 }
436
437 static void
kq_dealloc(struct event_base * base,void * arg)438 kq_dealloc(struct event_base *base, void *arg)
439 {
440 struct kqop *kqop = arg;
441
442 evsignal_dealloc(base);
443
444 free(kqop->changes);
445 free(kqop->events);
446 if (kqop->kq >= 0 && kqop->pid == getpid())
447 close(kqop->kq);
448
449 memset(kqop, 0, sizeof(struct kqop));
450 free(kqop);
451 }
452