1 /*
2  * Submitted by David Pacheco (dp.spambait@gmail.com)
3  *
4  * Copyright 2006-2007 Niels Provos
5  * Copyright 2007-2012 Niels Provos and Nick Mathewson
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. The name of the author may not be used to endorse or promote products
16  *    derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY
19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21  * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY
22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * Copyright (c) 2007 Sun Microsystems. All rights reserved.
32  * Use is subject to license terms.
33  */
34 
35 /*
36  * evport.c: event backend using Solaris 10 event ports. See port_create(3C).
37  * This implementation is loosely modeled after the one used for select(2) (in
38  * select.c).
39  *
40  * The outstanding events are tracked in a data structure called evport_data.
41  * Each entry in the ed_fds array corresponds to a file descriptor, and contains
42  * pointers to the read and write events that correspond to that fd. (That is,
43  * when the file is readable, the "read" event should handle it, etc.)
44  *
45  * evport_add and evport_del update this data structure. evport_dispatch uses it
46  * to determine where to callback when an event occurs (which it gets from
47  * port_getn).
48  *
49  * Helper functions are used: grow() grows the file descriptor array as
50  * necessary when large fd's come in. reassociate() takes care of maintaining
51  * the proper file-descriptor/event-port associations.
52  *
53  * As in the select(2) implementation, signals are handled by evsignal.
54  */
55 
56 #include "event2/event-config.h"
57 #include "evconfig-private.h"
58 
59 #ifdef EVENT__HAVE_EVENT_PORTS
60 
61 #include <sys/time.h>
62 #include <sys/queue.h>
63 #include <errno.h>
64 #include <poll.h>
65 #include <port.h>
66 #include <signal.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <string.h>
70 #include <time.h>
71 #include <unistd.h>
72 
73 #include "event2/thread.h"
74 
75 #include "evthread-internal.h"
76 #include "event-internal.h"
77 #include "log-internal.h"
78 #include "evsignal-internal.h"
79 #include "evmap-internal.h"
80 
81 #define INITIAL_EVENTS_PER_GETN 8
82 #define MAX_EVENTS_PER_GETN 4096
83 
84 /*
85  * Per-file-descriptor information about what events we're subscribed to. These
86  * fields are NULL if no event is subscribed to either of them.
87  */
88 
89 struct fd_info {
90 	/* combinations of EV_READ and EV_WRITE */
91 	short fdi_what;
92 	/* Index of this fd within ed_pending, plus 1.  Zero if this fd is
93 	 * not in ed_pending.  (The +1 is a hack so that memset(0) will set
94 	 * it to a nil index. */
95 	int pending_idx_plus_1;
96 };
97 
98 #define FDI_HAS_READ(fdi)  ((fdi)->fdi_what & EV_READ)
99 #define FDI_HAS_WRITE(fdi) ((fdi)->fdi_what & EV_WRITE)
100 #define FDI_HAS_EVENTS(fdi) (FDI_HAS_READ(fdi) || FDI_HAS_WRITE(fdi))
101 #define FDI_TO_SYSEVENTS(fdi) (FDI_HAS_READ(fdi) ? POLLIN : 0) | \
102     (FDI_HAS_WRITE(fdi) ? POLLOUT : 0)
103 
104 struct evport_data {
105 	int		ed_port;	/* event port for system events  */
106 	/* How many elements of ed_pending should we look at? */
107 	int ed_npending;
108 	/* How many elements are allocated in ed_pending and pevtlist? */
109 	int ed_maxevents;
110 	/* fdi's that we need to reassoc */
111 	int *ed_pending;
112 	/* storage space for incoming events. */
113 	port_event_t *ed_pevtlist;
114 
115 };
116 
117 static void*	evport_init(struct event_base *);
118 static int evport_add(struct event_base *, int fd, short old, short events, void *);
119 static int evport_del(struct event_base *, int fd, short old, short events, void *);
120 static int	evport_dispatch(struct event_base *, struct timeval *);
121 static void	evport_dealloc(struct event_base *);
122 static int	grow(struct evport_data *, int min_events);
123 
124 const struct eventop evportops = {
125 	"evport",
126 	evport_init,
127 	evport_add,
128 	evport_del,
129 	evport_dispatch,
130 	evport_dealloc,
131 	1, /* need reinit */
132 	0, /* features */
133 	sizeof(struct fd_info), /* fdinfo length */
134 };
135 
136 /*
137  * Initialize the event port implementation.
138  */
139 
140 static void*
evport_init(struct event_base * base)141 evport_init(struct event_base *base)
142 {
143 	struct evport_data *evpd;
144 
145 	if (!(evpd = mm_calloc(1, sizeof(struct evport_data))))
146 		return (NULL);
147 
148 	if ((evpd->ed_port = port_create()) == -1) {
149 		mm_free(evpd);
150 		return (NULL);
151 	}
152 
153 	if (grow(evpd, INITIAL_EVENTS_PER_GETN) < 0) {
154 		close(evpd->ed_port);
155 		mm_free(evpd);
156 		return NULL;
157 	}
158 
159 	evpd->ed_npending = 0;
160 
161 	evsig_init_(base);
162 
163 	return (evpd);
164 }
165 
166 static int
grow(struct evport_data * data,int min_events)167 grow(struct evport_data *data, int min_events)
168 {
169 	int newsize;
170 	int *new_pending;
171 	port_event_t *new_pevtlist;
172 	if (data->ed_maxevents) {
173 		newsize = data->ed_maxevents;
174 		do {
175 			newsize *= 2;
176 		} while (newsize < min_events);
177 	} else {
178 		newsize = min_events;
179 	}
180 
181 	new_pending = mm_realloc(data->ed_pending, sizeof(int)*newsize);
182 	if (new_pending == NULL)
183 		return -1;
184 	data->ed_pending = new_pending;
185 	new_pevtlist = mm_realloc(data->ed_pevtlist, sizeof(port_event_t)*newsize);
186 	if (new_pevtlist == NULL)
187 		return -1;
188 	data->ed_pevtlist = new_pevtlist;
189 
190 	data->ed_maxevents = newsize;
191 	return 0;
192 }
193 
194 #ifdef CHECK_INVARIANTS
195 /*
196  * Checks some basic properties about the evport_data structure. Because it
197  * checks all file descriptors, this function can be expensive when the maximum
198  * file descriptor ever used is rather large.
199  */
200 
201 static void
check_evportop(struct evport_data * evpd)202 check_evportop(struct evport_data *evpd)
203 {
204 	EVUTIL_ASSERT(evpd);
205 	EVUTIL_ASSERT(evpd->ed_port > 0);
206 }
207 
208 /*
209  * Verifies very basic integrity of a given port_event.
210  */
211 static void
check_event(port_event_t * pevt)212 check_event(port_event_t* pevt)
213 {
214 	/*
215 	 * We've only registered for PORT_SOURCE_FD events. The only
216 	 * other thing we can legitimately receive is PORT_SOURCE_ALERT,
217 	 * but since we're not using port_alert either, we can assume
218 	 * PORT_SOURCE_FD.
219 	 */
220 	EVUTIL_ASSERT(pevt->portev_source == PORT_SOURCE_FD);
221 }
222 
223 #else
224 #define check_evportop(epop)
225 #define check_event(pevt)
226 #endif /* CHECK_INVARIANTS */
227 
228 /*
229  * (Re)associates the given file descriptor with the event port. The OS events
230  * are specified (implicitly) from the fd_info struct.
231  */
232 static int
reassociate(struct evport_data * epdp,struct fd_info * fdip,int fd)233 reassociate(struct evport_data *epdp, struct fd_info *fdip, int fd)
234 {
235 	int sysevents = FDI_TO_SYSEVENTS(fdip);
236 
237 	if (sysevents != 0) {
238 		if (port_associate(epdp->ed_port, PORT_SOURCE_FD,
239 				   fd, sysevents, fdip) == -1) {
240 			event_warn("port_associate");
241 			return (-1);
242 		}
243 	}
244 
245 	check_evportop(epdp);
246 
247 	return (0);
248 }
249 
250 /*
251  * Main event loop - polls port_getn for some number of events, and processes
252  * them.
253  */
254 
255 static int
evport_dispatch(struct event_base * base,struct timeval * tv)256 evport_dispatch(struct event_base *base, struct timeval *tv)
257 {
258 	int i, res;
259 	struct evport_data *epdp = base->evbase;
260 	port_event_t *pevtlist = epdp->ed_pevtlist;
261 
262 	/*
263 	 * port_getn will block until it has at least nevents events. It will
264 	 * also return how many it's given us (which may be more than we asked
265 	 * for, as long as it's less than our maximum (ed_maxevents)) in
266 	 * nevents.
267 	 */
268 	int nevents = 1;
269 
270 	/*
271 	 * We have to convert a struct timeval to a struct timespec
272 	 * (only difference is nanoseconds vs. microseconds). If no time-based
273 	 * events are active, we should wait for I/O (and tv == NULL).
274 	 */
275 	struct timespec ts;
276 	struct timespec *ts_p = NULL;
277 	if (tv != NULL) {
278 		ts.tv_sec = tv->tv_sec;
279 		ts.tv_nsec = tv->tv_usec * 1000;
280 		ts_p = &ts;
281 	}
282 
283 	/*
284 	 * Before doing anything else, we need to reassociate the events we hit
285 	 * last time which need reassociation. See comment at the end of the
286 	 * loop below.
287 	 */
288 	for (i = 0; i < epdp->ed_npending; ++i) {
289 		struct fd_info *fdi = NULL;
290 		const int fd = epdp->ed_pending[i];
291 		if (fd != -1) {
292 			/* We might have cleared out this event; we need
293 			 * to be sure that it's still set. */
294 			fdi = evmap_io_get_fdinfo_(&base->io, fd);
295 		}
296 
297 		if (fdi != NULL && FDI_HAS_EVENTS(fdi)) {
298 			reassociate(epdp, fdi, fd);
299 			/* epdp->ed_pending[i] = -1; */
300 			fdi->pending_idx_plus_1 = 0;
301 		}
302 	}
303 
304 	EVBASE_RELEASE_LOCK(base, th_base_lock);
305 
306 	res = port_getn(epdp->ed_port, pevtlist, epdp->ed_maxevents,
307 	    (unsigned int *) &nevents, ts_p);
308 
309 	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
310 
311 	if (res == -1) {
312 		if (errno == EINTR || errno == EAGAIN) {
313 			return (0);
314 		} else if (errno == ETIME) {
315 			if (nevents == 0)
316 				return (0);
317 		} else {
318 			event_warn("port_getn");
319 			return (-1);
320 		}
321 	}
322 
323 	event_debug(("%s: port_getn reports %d events", __func__, nevents));
324 
325 	for (i = 0; i < nevents; ++i) {
326 		port_event_t *pevt = &pevtlist[i];
327 		int fd = (int) pevt->portev_object;
328 		struct fd_info *fdi = pevt->portev_user;
329 		/*EVUTIL_ASSERT(evmap_io_get_fdinfo_(&base->io, fd) == fdi);*/
330 
331 		check_evportop(epdp);
332 		check_event(pevt);
333 		epdp->ed_pending[i] = fd;
334 		fdi->pending_idx_plus_1 = i + 1;
335 
336 		/*
337 		 * Figure out what kind of event it was
338 		 * (because we have to pass this to the callback)
339 		 */
340 		res = 0;
341 		if (pevt->portev_events & (POLLERR|POLLHUP)) {
342 			res = EV_READ | EV_WRITE;
343 		} else {
344 			if (pevt->portev_events & POLLIN)
345 				res |= EV_READ;
346 			if (pevt->portev_events & POLLOUT)
347 				res |= EV_WRITE;
348 		}
349 
350 		/*
351 		 * Check for the error situations or a hangup situation
352 		 */
353 		if (pevt->portev_events & (POLLERR|POLLHUP|POLLNVAL))
354 			res |= EV_READ|EV_WRITE;
355 
356 		evmap_io_active_(base, fd, res);
357 	} /* end of all events gotten */
358 	epdp->ed_npending = nevents;
359 
360 	if (nevents == epdp->ed_maxevents &&
361 	    epdp->ed_maxevents < MAX_EVENTS_PER_GETN) {
362 		/* we used all the space this time.  We should be ready
363 		 * for more events next time around. */
364 		grow(epdp, epdp->ed_maxevents * 2);
365 	}
366 
367 	check_evportop(epdp);
368 
369 	return (0);
370 }
371 
372 
373 /*
374  * Adds the given event (so that you will be notified when it happens via
375  * the callback function).
376  */
377 
378 static int
evport_add(struct event_base * base,int fd,short old,short events,void * p)379 evport_add(struct event_base *base, int fd, short old, short events, void *p)
380 {
381 	struct evport_data *evpd = base->evbase;
382 	struct fd_info *fdi = p;
383 
384 	check_evportop(evpd);
385 
386 	fdi->fdi_what |= events;
387 
388 	return reassociate(evpd, fdi, fd);
389 }
390 
391 /*
392  * Removes the given event from the list of events to wait for.
393  */
394 
395 static int
evport_del(struct event_base * base,int fd,short old,short events,void * p)396 evport_del(struct event_base *base, int fd, short old, short events, void *p)
397 {
398 	struct evport_data *evpd = base->evbase;
399 	struct fd_info *fdi = p;
400 	int associated = ! fdi->pending_idx_plus_1;
401 
402 	check_evportop(evpd);
403 
404 	fdi->fdi_what &= ~(events &(EV_READ|EV_WRITE));
405 
406 	if (associated) {
407 		if (!FDI_HAS_EVENTS(fdi) &&
408 		    port_dissociate(evpd->ed_port, PORT_SOURCE_FD, fd) == -1) {
409 			/*
410 			 * Ignore EBADFD error the fd could have been closed
411 			 * before event_del() was called.
412 			 */
413 			if (errno != EBADFD) {
414 				event_warn("port_dissociate");
415 				return (-1);
416 			}
417 		} else {
418 			if (FDI_HAS_EVENTS(fdi)) {
419 				return (reassociate(evpd, fdi, fd));
420 			}
421 		}
422 	} else {
423 		if ((fdi->fdi_what & (EV_READ|EV_WRITE)) == 0) {
424 			const int i = fdi->pending_idx_plus_1 - 1;
425 			EVUTIL_ASSERT(evpd->ed_pending[i] == fd);
426 			evpd->ed_pending[i] = -1;
427 			fdi->pending_idx_plus_1 = 0;
428 		}
429 	}
430 	return 0;
431 }
432 
433 
434 static void
evport_dealloc(struct event_base * base)435 evport_dealloc(struct event_base *base)
436 {
437 	struct evport_data *evpd = base->evbase;
438 
439 	evsig_dealloc_(base);
440 
441 	close(evpd->ed_port);
442 
443 	if (evpd->ed_pending)
444 		mm_free(evpd->ed_pending);
445 	if (evpd->ed_pevtlist)
446 		mm_free(evpd->ed_pevtlist);
447 
448 	mm_free(evpd);
449 }
450 
451 #endif /* EVENT__HAVE_EVENT_PORTS */
452