1 /*
2 * ircd-ratbox: A slightly useful ircd.
3 * epoll.c: Linux epoll compatible network routines.
4 *
5 * Copyright (C) 1990 Jarkko Oikarinen and University of Oulu, Co Center
6 * Copyright (C) 1996-2002 Hybrid Development Team
7 * Copyright (C) 2001 Adrian Chadd <adrian@creative.net.au>
8 * Copyright (C) 2002-2005 ircd-ratbox development team
9 * Copyright (C) 2002 Aaron Sethman <androsyn@ratbox.org>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
24 * USA
25 *
26 * $Id: epoll.c 26525 2009-05-13 15:31:01Z androsyn $
27 */
28 #define _GNU_SOURCE 1
29
30 #include <libratbox_config.h>
31 #include <ratbox_lib.h>
32 #include <commio-int.h>
33 #include <event-int.h>
34 #if defined(HAVE_EPOLL_CTL) && (HAVE_SYS_EPOLL_H)
35 #define USING_EPOLL
36 #include <fcntl.h>
37 #include <sys/epoll.h>
38
39 #if defined(HAVE_SIGNALFD) && (HAVE_SYS_SIGNALFD_H) && (USE_TIMER_CREATE) && (HAVE_SYS_UIO_H)
40 #include <signal.h>
41 #include <sys/signalfd.h>
42 #include <sys/uio.h>
43 #define EPOLL_SCHED_EVENT 1
44 #endif
45
46 #if defined(USE_TIMERFD_CREATE)
47 #include <sys/timerfd.h>
48 #endif
49
50 #define RTSIGNAL SIGRTMIN
51 struct epoll_info
52 {
53 int ep;
54 struct epoll_event *pfd;
55 int pfd_size;
56 };
57
58 static struct epoll_info *ep_info;
59 static int can_do_event;
60 static int can_do_timerfd;
61
62 /*
63 * rb_init_netio
64 *
65 * This is a needed exported function which will be called to initialise
66 * the network loop code.
67 */
68 int
rb_init_netio_epoll(void)69 rb_init_netio_epoll(void)
70 {
71 can_do_event = 0; /* shut up gcc */
72 can_do_timerfd = 0;
73 ep_info = rb_malloc(sizeof(struct epoll_info));
74 ep_info->pfd_size = getdtablesize();
75 ep_info->ep = epoll_create(ep_info->pfd_size);
76 if(ep_info->ep < 0)
77 {
78 return -1;
79 }
80
81 if(rb_open(ep_info->ep, RB_FD_UNKNOWN, "epoll file descriptor") == NULL)
82 {
83 rb_lib_log("Unable to rb_open epoll fd");
84 return -1;
85 }
86 ep_info->pfd = rb_malloc(sizeof(struct epoll_event) * ep_info->pfd_size);
87
88 return 0;
89 }
90
91 int
rb_setup_fd_epoll(rb_fde_t * F)92 rb_setup_fd_epoll(rb_fde_t *F)
93 {
94 return 0;
95 }
96
97
98 /*
99 * rb_setselect
100 *
101 * This is a needed exported function which will be called to register
102 * and deregister interest in a pending IO state for a given FD.
103 */
104 void
rb_setselect_epoll(rb_fde_t * F,unsigned int type,PF * handler,void * client_data)105 rb_setselect_epoll(rb_fde_t *F, unsigned int type, PF * handler, void *client_data)
106 {
107 struct epoll_event ep_event;
108 int old_flags = F->pflags;
109 int op = -1;
110
111 lrb_assert(IsFDOpen(F));
112
113 /* Update the list, even though we're not using it .. */
114 if(type & RB_SELECT_READ)
115 {
116 if(handler != NULL)
117 F->pflags |= EPOLLIN;
118 else
119 F->pflags &= ~EPOLLIN;
120 F->read_handler = handler;
121 F->read_data = client_data;
122 }
123
124 if(type & RB_SELECT_WRITE)
125 {
126 if(handler != NULL)
127 F->pflags |= EPOLLOUT;
128 else
129 F->pflags &= ~EPOLLOUT;
130 F->write_handler = handler;
131 F->write_data = client_data;
132 }
133
134 if(old_flags == 0 && F->pflags == 0)
135 return;
136 else if(F->pflags <= 0)
137 op = EPOLL_CTL_DEL;
138 else if(old_flags == 0 && F->pflags > 0)
139 op = EPOLL_CTL_ADD;
140 else if(F->pflags != old_flags)
141 op = EPOLL_CTL_MOD;
142
143 if(op == -1)
144 return;
145
146 ep_event.events = F->pflags;
147 ep_event.data.ptr = F;
148
149 if(op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD)
150 ep_event.events |= EPOLLET;
151
152 if(epoll_ctl(ep_info->ep, op, F->fd, &ep_event) != 0)
153 {
154 rb_lib_log("rb_setselect_epoll(): epoll_ctl failed: %s", strerror(errno));
155 abort();
156 }
157
158
159 }
160
161 /*
162 * rb_select
163 *
164 * Called to do the new-style IO, courtesy of squid (like most of this
165 * new IO code). This routine handles the stuff we've hidden in
166 * rb_setselect and fd_table[] and calls callbacks for IO ready
167 * events.
168 */
169
170 int
rb_select_epoll(long delay)171 rb_select_epoll(long delay)
172 {
173 int num, i, flags, old_flags, op;
174 struct epoll_event ep_event;
175 int o_errno;
176 void *data;
177
178 num = epoll_wait(ep_info->ep, ep_info->pfd, ep_info->pfd_size, delay);
179
180 /* save errno as rb_set_time() will likely clobber it */
181 o_errno = errno;
182 rb_set_time();
183 errno = o_errno;
184
185 if(num < 0 && !rb_ignore_errno(o_errno))
186 return RB_ERROR;
187
188 if(num <= 0)
189 return RB_OK;
190
191 for(i = 0; i < num; i++)
192 {
193 PF *hdl;
194 rb_fde_t *F = ep_info->pfd[i].data.ptr;
195 old_flags = F->pflags;
196 if(ep_info->pfd[i].events & (EPOLLIN | EPOLLHUP | EPOLLERR))
197 {
198 hdl = F->read_handler;
199 data = F->read_data;
200 F->read_handler = NULL;
201 F->read_data = NULL;
202 if(hdl)
203 {
204 hdl(F, data);
205 }
206 }
207
208 if(!IsFDOpen(F))
209 continue;
210 if(ep_info->pfd[i].events & (EPOLLOUT | EPOLLHUP | EPOLLERR))
211 {
212 hdl = F->write_handler;
213 data = F->write_data;
214 F->write_handler = NULL;
215 F->write_data = NULL;
216
217 if(hdl)
218 {
219 hdl(F, data);
220 }
221 }
222
223 if(!IsFDOpen(F))
224 continue;
225
226 flags = 0;
227
228 if(F->read_handler != NULL)
229 flags |= EPOLLIN;
230 if(F->write_handler != NULL)
231 flags |= EPOLLOUT;
232
233 if(old_flags != flags)
234 {
235 if(flags == 0)
236 op = EPOLL_CTL_DEL;
237 else
238 op = EPOLL_CTL_MOD;
239 F->pflags = ep_event.events = flags;
240 ep_event.data.ptr = F;
241 if(op == EPOLL_CTL_MOD || op == EPOLL_CTL_ADD)
242 ep_event.events |= EPOLLET;
243
244 if(epoll_ctl(ep_info->ep, op, F->fd, &ep_event) != 0)
245 {
246 rb_lib_log("rb_select_epoll(): epoll_ctl failed: %s",
247 strerror(errno));
248 }
249 }
250
251 }
252 return RB_OK;
253 }
254
255 #ifdef EPOLL_SCHED_EVENT
256 int
rb_epoll_supports_event(void)257 rb_epoll_supports_event(void)
258 {
259 /* try to detect at runtime if everything we need actually works */
260 timer_t timer;
261 struct sigevent ev;
262 int fd;
263 sigset_t set;
264
265 if(can_do_event == 1)
266 return 1;
267 if(can_do_event == -1)
268 return 0;
269
270 #ifdef USE_TIMERFD_CREATE
271 if((fd = timerfd_create(CLOCK_REALTIME, 0)) >= 0)
272 {
273 close(fd);
274 can_do_event = 1;
275 can_do_timerfd = 1;
276 return 1;
277 }
278 #endif
279
280 ev.sigev_signo = SIGVTALRM;
281 ev.sigev_notify = SIGEV_SIGNAL;
282 if(timer_create(CLOCK_REALTIME, &ev, &timer) != 0)
283 {
284 can_do_event = -1;
285 return 0;
286 }
287 timer_delete(timer);
288 sigemptyset(&set);
289 fd = signalfd(-1, &set, 0);
290 if(fd < 0)
291 {
292 can_do_event = -1;
293 return 0;
294 }
295 close(fd);
296 can_do_event = 1;
297 return 1;
298 }
299
300
301 /* bleh..work around a glibc header bug on 32bit systems */
302 struct our_signalfd_siginfo
303 {
304 uint32_t signo;
305 int32_t err;
306 int32_t code;
307 uint32_t pid;
308 uint32_t uid;
309 int32_t fd;
310 uint32_t tid;
311 uint32_t band;
312 uint32_t overrun;
313 uint32_t trapno;
314 int32_t status;
315 int32_t svint;
316 uint64_t svptr;
317 uint64_t utime;
318 uint64_t stime;
319 uint64_t addr;
320 uint8_t pad[48];
321 };
322
323
324 #define SIGFDIOV_COUNT 16
325 static void
signalfd_handler(rb_fde_t * F,void * data)326 signalfd_handler(rb_fde_t *F, void *data)
327 {
328 static struct our_signalfd_siginfo fdsig[SIGFDIOV_COUNT];
329 static struct iovec iov[SIGFDIOV_COUNT];
330 struct ev_entry *ev;
331 int ret, x;
332
333 for(x = 0; x < SIGFDIOV_COUNT; x++)
334 {
335 iov[x].iov_base = &fdsig[x];
336 iov[x].iov_len = sizeof(struct our_signalfd_siginfo);
337 }
338
339 while(1)
340 {
341 ret = readv(rb_get_fd(F), iov, SIGFDIOV_COUNT);
342
343 if(ret == 0 || (ret < 0 && !rb_ignore_errno(errno)))
344 {
345 rb_close(F);
346 rb_epoll_init_event();
347 return;
348 }
349
350 if(ret < 0)
351 {
352 rb_setselect(F, RB_SELECT_READ, signalfd_handler, NULL);
353 return;
354 }
355 for(x = 0; x < ret / (int)sizeof(struct our_signalfd_siginfo); x++)
356 {
357 #if __WORDSIZE == 32 && defined(__sparc__)
358 uint32_t *q = (uint32_t *)&fdsig[x].svptr;
359 ev = (struct ev_entry *)q[0];
360 #else
361 ev = (struct ev_entry *)(uintptr_t)(fdsig[x].svptr);
362
363 #endif
364 if(ev == NULL)
365 continue;
366 rb_run_event(ev);
367 }
368 }
369 }
370
371 void
rb_epoll_init_event(void)372 rb_epoll_init_event(void)
373 {
374
375 sigset_t ss;
376 rb_fde_t *F;
377 int sfd;
378 rb_epoll_supports_event();
379 if(!can_do_timerfd)
380 {
381 sigemptyset(&ss);
382 sigaddset(&ss, RTSIGNAL);
383 sigprocmask(SIG_BLOCK, &ss, 0);
384 sigemptyset(&ss);
385 sigaddset(&ss, RTSIGNAL);
386 sfd = signalfd(-1, &ss, 0);
387 if(sfd == -1)
388 {
389 can_do_event = -1;
390 return;
391 }
392 F = rb_open(sfd, RB_FD_UNKNOWN, "signalfd");
393 rb_set_nb(F);
394 signalfd_handler(F, NULL);
395 }
396 }
397
398 static int
rb_epoll_sched_event_signalfd(struct ev_entry * event,int when)399 rb_epoll_sched_event_signalfd(struct ev_entry *event, int when)
400 {
401 timer_t *id;
402 struct sigevent ev;
403 struct itimerspec ts;
404
405 memset(&ev, 0, sizeof(&ev));
406 event->comm_ptr = rb_malloc(sizeof(timer_t));
407 id = event->comm_ptr;
408 ev.sigev_notify = SIGEV_SIGNAL;
409 ev.sigev_signo = RTSIGNAL;
410 ev.sigev_value.sival_ptr = event;
411
412 if(timer_create(CLOCK_REALTIME, &ev, id) < 0)
413 {
414 rb_lib_log("timer_create: %s\n", strerror(errno));
415 return 0;
416 }
417 memset(&ts, 0, sizeof(ts));
418 ts.it_value.tv_sec = when;
419 ts.it_value.tv_nsec = 0;
420 if(event->frequency != 0)
421 ts.it_interval = ts.it_value;
422
423 if(timer_settime(*id, 0, &ts, NULL) < 0)
424 {
425 rb_lib_log("timer_settime: %s\n", strerror(errno));
426 return 0;
427 }
428 return 1;
429 }
430
431 #ifdef USE_TIMERFD_CREATE
432 static void
rb_read_timerfd(rb_fde_t * F,void * data)433 rb_read_timerfd(rb_fde_t *F, void *data)
434 {
435 struct ev_entry *event = (struct ev_entry *)data;
436 int retlen;
437 uint64_t count;
438
439 if(event == NULL)
440 {
441 rb_close(F);
442 return;
443 }
444
445 retlen = rb_read(F, &count, sizeof(count));
446
447 if(retlen == 0 || (retlen < 0 && !rb_ignore_errno(errno)))
448 {
449 rb_close(F);
450 rb_lib_log("rb_read_timerfd: timerfd[%s] closed on error: %s", event->name,
451 strerror(errno));
452 return;
453 }
454 rb_setselect(F, RB_SELECT_READ, rb_read_timerfd, event);
455 rb_run_event(event);
456 }
457
458
459 static int
rb_epoll_sched_event_timerfd(struct ev_entry * event,int when)460 rb_epoll_sched_event_timerfd(struct ev_entry *event, int when)
461 {
462 struct itimerspec ts;
463 static char buf[FD_DESC_SZ + 8];
464 int fd;
465 rb_fde_t *F;
466
467 if((fd = timerfd_create(CLOCK_REALTIME, 0)) < 0)
468 {
469 rb_lib_log("timerfd_create: %s\n", strerror(errno));
470 return 0;
471 }
472
473 memset(&ts, 0, sizeof(ts));
474 ts.it_value.tv_sec = when;
475 ts.it_value.tv_nsec = 0;
476 if(event->frequency != 0)
477 ts.it_interval = ts.it_value;
478
479 if(timerfd_settime(fd, 0, &ts, NULL) < 0)
480 {
481 rb_lib_log("timerfd_settime: %s\n", strerror(errno));
482 close(fd);
483 return 0;
484 }
485 rb_snprintf(buf, sizeof(buf), "timerfd: %s", event->name);
486 F = rb_open(fd, RB_FD_UNKNOWN, buf);
487 rb_set_nb(F);
488 event->comm_ptr = F;
489 rb_setselect(F, RB_SELECT_READ, rb_read_timerfd, event);
490 return 1;
491 }
492 #endif
493
494
495
496 int
rb_epoll_sched_event(struct ev_entry * event,int when)497 rb_epoll_sched_event(struct ev_entry *event, int when)
498 {
499 #ifdef USE_TIMERFD_CREATE
500 if(can_do_timerfd)
501 {
502 return rb_epoll_sched_event_timerfd(event, when);
503 }
504 #endif
505 return rb_epoll_sched_event_signalfd(event, when);
506 }
507
508 void
rb_epoll_unsched_event(struct ev_entry * event)509 rb_epoll_unsched_event(struct ev_entry *event)
510 {
511 #ifdef USE_TIMERFD_CREATE
512 if(can_do_timerfd)
513 {
514 rb_close((rb_fde_t *)event->comm_ptr);
515 event->comm_ptr = NULL;
516 return;
517 }
518 #endif
519 timer_delete(*((timer_t *) event->comm_ptr));
520 rb_free(event->comm_ptr);
521 event->comm_ptr = NULL;
522 }
523 #endif /* EPOLL_SCHED_EVENT */
524
525 #else /* epoll not supported here */
526 int
rb_init_netio_epoll(void)527 rb_init_netio_epoll(void)
528 {
529 return ENOSYS;
530 }
531
532 void
rb_setselect_epoll(rb_fde_t * F,unsigned int type,PF * handler,void * client_data)533 rb_setselect_epoll(rb_fde_t *F, unsigned int type, PF * handler, void *client_data)
534 {
535 errno = ENOSYS;
536 return;
537 }
538
539 int
rb_select_epoll(long delay)540 rb_select_epoll(long delay)
541 {
542 errno = ENOSYS;
543 return -1;
544 }
545
546 int
rb_setup_fd_epoll(rb_fde_t * F)547 rb_setup_fd_epoll(rb_fde_t *F)
548 {
549 errno = ENOSYS;
550 return -1;
551 }
552
553
554 #endif
555
556 #if !defined(USING_EPOLL) || !defined(EPOLL_SCHED_EVENT)
557 void
rb_epoll_init_event(void)558 rb_epoll_init_event(void)
559 {
560 return;
561 }
562
563 int
rb_epoll_sched_event(struct ev_entry * event,int when)564 rb_epoll_sched_event(struct ev_entry *event, int when)
565 {
566 errno = ENOSYS;
567 return -1;
568 }
569
570 void
rb_epoll_unsched_event(struct ev_entry * event)571 rb_epoll_unsched_event(struct ev_entry *event)
572 {
573 return;
574 }
575
576 int
rb_epoll_supports_event(void)577 rb_epoll_supports_event(void)
578 {
579 errno = ENOSYS;
580 return 0;
581 }
582 #endif /* !USING_EPOLL || !EPOLL_SCHED_EVENT */
583