1 /*
2  * scamper_fds: manage events and file descriptors
3  *
4  * $Id: scamper_fds.c,v 1.99 2020/04/27 07:32:21 mjl Exp $
5  *
6  * Copyright (C) 2004-2006 Matthew Luckie
7  * Copyright (C) 2006-2011 The University of Waikato
8  * Copyright (C) 2012-2014 Matthew Luckie
9  * Copyright (C) 2012-2015 The Regents of the University of California
10  * Copyright (C) 2016-2020 Matthew Luckie
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation, version 2.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  *
25  */
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 #include "internal.h"
31 
32 #include "scamper.h"
33 #include "scamper_fds.h"
34 #include "scamper_debug.h"
35 #include "scamper_icmp4.h"
36 #include "scamper_icmp6.h"
37 #include "scamper_udp4.h"
38 #include "scamper_udp6.h"
39 #include "scamper_tcp4.h"
40 #include "scamper_tcp6.h"
41 #include "scamper_ip4.h"
42 #include "scamper_dl.h"
43 #ifndef _WIN32
44 #include "scamper_rtsock.h"
45 #endif
46 #include "utils.h"
47 #include "mjl_list.h"
48 #include "mjl_splaytree.h"
49 
50 /*
51  * scamper_fd_poll
52  *
53  * node to hold callback details for the fd.
54  */
55 typedef struct scamper_fd_poll
56 {
57   scamper_fd_t    *fdn;    /* back pointer to the fd struct */
58   scamper_fd_cb_t  cb;     /* callback to use when event arises */
59   void            *param;  /* user-defined parameter to pass to callback */
60   dlist_t         *list;   /* which list the node is in */
61   dlist_node_t    *node;   /* node in the poll list */
62   uint8_t          flags;  /* flags associated with structure */
63 } scamper_fd_poll_t;
64 
65 /*
66  * scamper_fd
67  *
68  * a file descriptor, details of its type and other identifying information,
69  * and what to do when read/write events are found.
70  */
71 struct scamper_fd
72 {
73   int               fd;           /* the file descriptor being polled */
74   scamper_fd_t     *raw;          /* if udp4, the raw udp socket */
75   int               type;         /* the type of the file descriptor */
76   int               refcnt;       /* number of references to this structure */
77   scamper_fd_poll_t read;         /* if monitored for read events */
78   scamper_fd_poll_t write;        /* if monitored for write events */
79 
80   splaytree_node_t *fd_tree_node; /* node for this fd in the fd_tree */
81   dlist_node_t     *fd_list_node; /* node for this fd in the fd_list */
82 
83   struct timeval    tv;           /* when this node should be expired */
84   dlist_node_t     *rc0;          /* node in refcnt_0 list */
85 
86   union
87   {
88     struct fd_t_tcp
89     {
90       void         *addr;
91       uint16_t      sport;
92     } fd_t_tcp;
93 
94     struct fd_t_udp
95     {
96       void         *addr;
97       uint16_t      sport;
98     } fd_t_udp;
99 
100     struct fd_t_icmp
101     {
102       void         *addr;
103     } fd_t_icmp;
104 
105     struct fd_t_dl
106     {
107       int           ifindex;
108       scamper_dl_t *dl;
109     } fd_t_dl;
110 
111   } fd_t_un;
112 };
113 
114 #define SCAMPER_FD_TYPE_PRIVATE  0x00
115 #define SCAMPER_FD_TYPE_ICMP4    0x01
116 #define SCAMPER_FD_TYPE_ICMP4ERR 0x02
117 #define SCAMPER_FD_TYPE_ICMP6    0x03
118 #define SCAMPER_FD_TYPE_UDP4     0x04
119 #define SCAMPER_FD_TYPE_UDP4DG   0x05
120 #define SCAMPER_FD_TYPE_UDP6     0x06
121 #define SCAMPER_FD_TYPE_UDP6ERR  0x07
122 #define SCAMPER_FD_TYPE_TCP4     0x08
123 #define SCAMPER_FD_TYPE_TCP6     0x09
124 #define SCAMPER_FD_TYPE_DL       0x0a
125 #define SCAMPER_FD_TYPE_IP4      0x0b
126 #define SCAMPER_FD_TYPE_FILE     0x0c
127 
128 #ifndef _WIN32
129 #define SCAMPER_FD_TYPE_RTSOCK   0x0d
130 #define SCAMPER_FD_TYPE_IFSOCK   0x0e
131 #endif
132 
133 #define SCAMPER_FD_TYPE_IS_UDP(fd) (      \
134   (fd)->type == SCAMPER_FD_TYPE_UDP4   || \
135   (fd)->type == SCAMPER_FD_TYPE_UDP4DG || \
136   (fd)->type == SCAMPER_FD_TYPE_UDP6   || \
137   (fd)->type == SCAMPER_FD_TYPE_UDP6ERR)
138 
139 #define SCAMPER_FD_TYPE_IS_ICMP(fd) (       \
140   (fd)->type == SCAMPER_FD_TYPE_ICMP4    || \
141   (fd)->type == SCAMPER_FD_TYPE_ICMP4ERR || \
142   (fd)->type == SCAMPER_FD_TYPE_ICMP6)
143 
144 #define SCAMPER_FD_TYPE_IS_TCP(fd) (      \
145   (fd)->type == SCAMPER_FD_TYPE_TCP4 ||   \
146   (fd)->type == SCAMPER_FD_TYPE_TCP6)
147 
148 #define SCAMPER_FD_TYPE_IS_IPV4(fd) (       \
149   (fd)->type == SCAMPER_FD_TYPE_ICMP4    || \
150   (fd)->type == SCAMPER_FD_TYPE_ICMP4ERR || \
151   (fd)->type == SCAMPER_FD_TYPE_UDP4     || \
152   (fd)->type == SCAMPER_FD_TYPE_UDP4DG   || \
153   (fd)->type == SCAMPER_FD_TYPE_TCP4)
154 
155 #define SCAMPER_FD_TYPE_IS_IPV6(fd) (      \
156   (fd)->type == SCAMPER_FD_TYPE_ICMP6   || \
157   (fd)->type == SCAMPER_FD_TYPE_UDP6    || \
158   (fd)->type == SCAMPER_FD_TYPE_UDP6ERR || \
159   (fd)->type == SCAMPER_FD_TYPE_TCP6)
160 
161 #define SCAMPER_FD_TYPE_IS_DL(fd) (       \
162   (fd)->type == SCAMPER_FD_TYPE_DL)
163 
164 #define SCAMPER_FD_POLL_FLAG_INACTIVE 0x01 /* the fd should not be polled */
165 
166 #define fd_tcp_sport  fd_t_un.fd_t_tcp.sport
167 #define fd_tcp_addr   fd_t_un.fd_t_tcp.addr
168 #define fd_udp_sport  fd_t_un.fd_t_udp.sport
169 #define fd_udp_addr   fd_t_un.fd_t_udp.addr
170 #define fd_icmp_addr  fd_t_un.fd_t_icmp.addr
171 #define fd_dl_ifindex fd_t_un.fd_t_dl.ifindex
172 #define fd_dl_dl      fd_t_un.fd_t_dl.dl
173 
174 static scamper_fd_t **fd_array    = NULL;
175 static int            fd_array_s  = 0;
176 static splaytree_t   *fd_tree     = NULL;
177 static dlist_t       *fd_list     = NULL;
178 static dlist_t       *read_fds    = NULL;
179 static dlist_t       *write_fds   = NULL;
180 static dlist_t       *read_queue  = NULL;
181 static dlist_t       *write_queue = NULL;
182 static dlist_t       *refcnt_0    = NULL;
183 static int          (*pollfunc)(struct timeval *timeout) = NULL;
184 
185 #ifdef HAVE_SCAMPER_DEBUG
186 
187 static char *fd_addr_tostr(char *buf, size_t len, int af, void *addr)
188 {
189   char tmp[128];
190 
191   if(addr == NULL || addr_tostr(af, addr, tmp, sizeof(tmp)) == NULL)
192     return "";
193 
194   snprintf(buf, len, " %s", tmp);
195   return buf;
196 }
197 
198 static char *fd_tostr(scamper_fd_t *fdn)
199 {
200   static char buf[144];
201   char addr[128];
202 
203   switch(fdn->type)
204     {
205     case SCAMPER_FD_TYPE_PRIVATE:
206       return "private";
207 
208     case SCAMPER_FD_TYPE_IP4:
209       return "ip4";
210 
211     case SCAMPER_FD_TYPE_FILE:
212       return "file";
213 
214     case SCAMPER_FD_TYPE_ICMP4:
215       snprintf(buf, sizeof(buf), "icmp4%s",
216 	       fd_addr_tostr(addr, sizeof(addr), AF_INET, fdn->fd_icmp_addr));
217       return buf;
218 
219     case SCAMPER_FD_TYPE_ICMP4ERR:
220       snprintf(buf, sizeof(buf), "icmp4err%s",
221 	       fd_addr_tostr(addr, sizeof(addr), AF_INET, fdn->fd_icmp_addr));
222       return buf;
223 
224     case SCAMPER_FD_TYPE_ICMP6:
225       snprintf(buf, sizeof(buf), "icmp6%s",
226 	       fd_addr_tostr(addr, sizeof(addr), AF_INET6, fdn->fd_icmp_addr));
227       return buf;
228 
229     case SCAMPER_FD_TYPE_UDP4:
230       snprintf(buf, sizeof(buf), "udp4%s",
231 	       fd_addr_tostr(addr, sizeof(addr), AF_INET, fdn->fd_udp_addr));
232       return buf;
233 
234     case SCAMPER_FD_TYPE_UDP4DG:
235       snprintf(buf, sizeof(buf), "udp4dg%s %d",
236 	       fd_addr_tostr(addr, sizeof(addr), AF_INET, fdn->fd_udp_addr),
237 	       fdn->fd_udp_sport);
238       return buf;
239 
240     case SCAMPER_FD_TYPE_UDP6:
241       snprintf(buf, sizeof(buf), "udp6%s %d",
242 	       fd_addr_tostr(addr, sizeof(addr), AF_INET6, fdn->fd_udp_addr),
243 	       fdn->fd_udp_sport);
244       return buf;
245 
246     case SCAMPER_FD_TYPE_UDP6ERR:
247       snprintf(buf, sizeof(buf), "udp6err%s %d",
248 	       fd_addr_tostr(addr, sizeof(addr), AF_INET6, fdn->fd_udp_addr),
249 	       fdn->fd_udp_sport);
250       return buf;
251 
252     case SCAMPER_FD_TYPE_TCP4:
253       snprintf(buf, sizeof(buf), "tcp4%s %d",
254 	       fd_addr_tostr(addr, sizeof(addr), AF_INET, fdn->fd_tcp_addr),
255 	       fdn->fd_tcp_sport);
256       return buf;
257 
258     case SCAMPER_FD_TYPE_TCP6:
259       snprintf(buf, sizeof(buf), "tcp6%s %d",
260 	       fd_addr_tostr(addr, sizeof(addr), AF_INET6, fdn->fd_tcp_addr),
261 	       fdn->fd_tcp_sport);
262       return buf;
263 
264     case SCAMPER_FD_TYPE_DL:
265       snprintf(buf, sizeof(buf), "dl %d", fdn->fd_dl_ifindex);
266       return buf;
267 
268 #ifdef SCAMPER_FD_TYPE_RTSOCK
269     case SCAMPER_FD_TYPE_RTSOCK:
270       return "rtsock";
271 #endif
272 
273 #ifdef SCAMPER_FD_TYPE_IFSOCK
274     case SCAMPER_FD_TYPE_IFSOCK:
275       return "ifsock";
276 #endif
277     }
278 
279   return "?";
280 }
281 #endif
282 
283 static void fd_close(scamper_fd_t *fdn)
284 {
285   switch(fdn->type)
286     {
287     case SCAMPER_FD_TYPE_PRIVATE:
288     case SCAMPER_FD_TYPE_FILE:
289       break;
290 
291     case SCAMPER_FD_TYPE_ICMP4:
292     case SCAMPER_FD_TYPE_ICMP4ERR:
293       scamper_icmp4_close(fdn->fd);
294       break;
295 
296     case SCAMPER_FD_TYPE_ICMP6:
297       scamper_icmp6_close(fdn->fd);
298       break;
299 
300     case SCAMPER_FD_TYPE_UDP4:
301     case SCAMPER_FD_TYPE_UDP4DG:
302       scamper_udp4_close(fdn->fd);
303       break;
304 
305     case SCAMPER_FD_TYPE_UDP6:
306     case SCAMPER_FD_TYPE_UDP6ERR:
307       scamper_udp6_close(fdn->fd);
308       break;
309 
310     case SCAMPER_FD_TYPE_TCP4:
311       scamper_tcp4_close(fdn->fd);
312       break;
313 
314     case SCAMPER_FD_TYPE_TCP6:
315       scamper_tcp6_close(fdn->fd);
316       break;
317 
318     case SCAMPER_FD_TYPE_DL:
319       scamper_dl_close(fdn->fd);
320       break;
321 
322 #ifdef SCAMPER_FD_TYPE_RTSOCK
323     case SCAMPER_FD_TYPE_RTSOCK:
324       scamper_rtsock_close(fdn->fd);
325       break;
326 #endif
327 
328 #ifdef SCAMPER_FD_TYPE_IFSOCK
329     case SCAMPER_FD_TYPE_IFSOCK:
330       close(fdn->fd);
331       break;
332 #endif
333     }
334 
335   return;
336 }
337 
338 /*
339  * fd_free
340  *
341  * free up memory allocated to scamper's monitoring of the file descriptor.
342  */
343 static void fd_free(scamper_fd_t *fdn)
344 {
345   scamper_debug(__func__, "fd %d type %s", fdn->fd, fd_tostr(fdn));
346 
347   if(fdn->fd >= 0 && fdn->fd < fd_array_s && fd_array != NULL)
348     fd_array[fdn->fd] = NULL;
349 
350   if(fdn->read.node != NULL)
351     dlist_node_pop(fdn->read.list, fdn->read.node);
352 
353   if(fdn->write.node != NULL)
354     dlist_node_pop(fdn->write.list, fdn->write.node);
355 
356   if(fdn->rc0 != NULL)
357     dlist_node_pop(refcnt_0, fdn->rc0);
358 
359   if(fdn->fd_tree_node != NULL)
360     splaytree_remove_node(fd_tree, fdn->fd_tree_node);
361 
362   if(fdn->fd_list_node != NULL)
363     dlist_node_pop(fd_list, fdn->fd_list_node);
364 
365   if(SCAMPER_FD_TYPE_IS_ICMP(fdn))
366     {
367       if(fdn->fd_icmp_addr != NULL)
368 	free(fdn->fd_icmp_addr);
369     }
370   else if(SCAMPER_FD_TYPE_IS_UDP(fdn))
371     {
372       if(fdn->fd_udp_addr != NULL)
373 	free(fdn->fd_udp_addr);
374     }
375   else if(SCAMPER_FD_TYPE_IS_TCP(fdn))
376     {
377       if(fdn->fd_tcp_addr != NULL)
378 	free(fdn->fd_tcp_addr);
379     }
380   else if(SCAMPER_FD_TYPE_IS_DL(fdn))
381     {
382       if(fdn->fd_dl_dl != NULL)
383 	scamper_dl_state_free(fdn->fd_dl_dl);
384     }
385 
386   free(fdn);
387 
388   return;
389 }
390 
391 /*
392  * fd_refcnt_0
393  *
394  * this function is called whenever a fdn with a refcnt field of zero is
395  * found.
396  */
397 static void fd_refcnt_0(scamper_fd_t *fdn)
398 {
399   /*
400    * if the fd is in a list that is currently locked, then it can't be
401    * removed just yet
402    */
403   if(dlist_islocked(fd_list) != 0 ||
404      (fdn->read.list  != NULL && dlist_islocked(fdn->read.list)  != 0) ||
405      (fdn->write.list != NULL && dlist_islocked(fdn->write.list) != 0))
406     {
407       return;
408     }
409 
410   /*
411    * if this is a private fd and the reference count has reached zero,
412    * then the scamper_fd structure can be freed up completely now
413    */
414   if(fdn->type == SCAMPER_FD_TYPE_PRIVATE ||
415      fdn->type == SCAMPER_FD_TYPE_FILE)
416     {
417       fd_free(fdn);
418       return;
419     }
420 
421   /* if it is not possible to put the node on a list, just free it */
422   if((fdn->rc0 = dlist_tail_push(refcnt_0, fdn)) == NULL)
423     {
424       fd_close(fdn);
425       fd_free(fdn);
426       return;
427     }
428 
429   /*
430    * set this fd to be closed in ten seconds unless something else comes
431    * along and wants to use it.
432    */
433   gettimeofday_wrap(&fdn->tv);
434   fdn->tv.tv_sec += 10;
435 
436   return;
437 }
438 
439 static int fd_poll_setlist(void *item, void *param)
440 {
441   ((scamper_fd_poll_t *)item)->list = (dlist_t *)param;
442   return 0;
443 }
444 
445 /*
446  * fds_select_assemble
447  *
448  * given a list of scamper_fd_poll_t structures held in a list, compose an
449  * fd_set for them to pass to select.
450  */
451 static int fds_select_assemble(dlist_t *fds, slist_t **file_list,
452 			       fd_set *fdset, fd_set **fdsp, int *nfds)
453 {
454   scamper_fd_poll_t *fdp;
455   dlist_node_t      *node;
456   int                count = 0;
457 
458   FD_ZERO(fdset);
459 
460   node = dlist_head_node(fds);
461   while(node != NULL)
462     {
463       /* file descriptor associated with the node */
464       fdp = (scamper_fd_poll_t *)dlist_node_item(node);
465 
466       /* get the next node incase this node is subsequently removed */
467       node = dlist_node_next(node);
468 
469       /* if there is nothing using this fdn any longer, then stop polling it */
470       if(fdp->fdn->refcnt == 0 && fdp->fdn->rc0 == NULL)
471 	{
472 	  fd_refcnt_0(fdp->fdn);
473 	  continue;
474 	}
475 
476       /* if the inactive flag is set, then skip over this file descriptor */
477       if((fdp->flags & SCAMPER_FD_POLL_FLAG_INACTIVE) != 0)
478 	{
479 	  dlist_node_eject(fds, fdp->node);
480 	  fdp->list = NULL;
481 	  continue;
482 	}
483 
484       if(fdp->fdn->type == SCAMPER_FD_TYPE_FILE)
485 	{
486 	  if((*file_list == NULL && (*file_list = slist_alloc()) == NULL) ||
487 	     slist_tail_push(*file_list, fdp) == NULL)
488 	    return -1;
489 	  continue;
490 	}
491 
492       /* monitor this file descriptor */
493       FD_SET(fdp->fdn->fd, fdset);
494       count++;
495 
496       /* update the maxfd seen if appropriate */
497       if(*nfds < fdp->fdn->fd)
498 	*nfds = fdp->fdn->fd;
499     }
500 
501   /*
502    * if there are no fds in the set to monitor, then return a null pointer
503    * to pass to select
504    */
505   if(count == 0)
506     *fdsp = NULL;
507   else
508     *fdsp = fdset;
509 
510   return 0;
511 }
512 
513 /*
514  * fds_select_check
515  *
516  * given an fd_set that has been passed to select, as well as a list of
517  * fds that are being monitored, figure out which ones have an event and
518  * use the callback provided to deal with the event.
519  */
520 static void fds_select_check(fd_set *fdset, dlist_t *fds, int *count)
521 {
522   scamper_fd_poll_t *fdp;
523   dlist_node_t *node;
524 
525   /* stop now if there is nothing to check */
526   if(fdset == NULL || *count == 0)
527     {
528       return;
529     }
530 
531   /* nodes in this list should not be removed while this function is called */
532   dlist_lock(fds);
533 
534   /* loop through */
535   node = dlist_head_node(fds);
536   while(node != NULL && *count > 0)
537     {
538       fdp = (scamper_fd_poll_t *)dlist_node_item(node);
539       node = dlist_node_next(node);
540 
541       if(FD_ISSET(fdp->fdn->fd, fdset))
542 	{
543 	  fdp->cb(fdp->fdn->fd, fdp->param);
544 	  (*count)--;
545 	}
546     }
547 
548   /* can modify the list now */
549   dlist_unlock(fds);
550 
551   return;
552 }
553 
554 static void fds_files_check(dlist_t *fds, slist_t *list)
555 {
556   scamper_fd_poll_t *fdp;
557 
558   if(list == NULL)
559     return;
560 
561   dlist_lock(fds);
562   while((fdp = slist_head_pop(list)) != NULL)
563     fdp->cb(fdp->fdn->fd, fdp->param);
564   dlist_unlock(fds);
565 
566   return;
567 }
568 
569 static int fds_select(struct timeval *timeout)
570 {
571   struct timeval tv;
572   fd_set rfds, *rfdsp;
573   fd_set wfds, *wfdsp;
574   slist_t *rfiles = NULL, *wfiles = NULL;
575   int count, nfds = -1;
576 
577   /* concat any new fds to monitor now */
578   dlist_foreach(read_queue, fd_poll_setlist, read_fds);
579   dlist_concat(read_fds, read_queue);
580   dlist_foreach(write_queue, fd_poll_setlist, write_fds);
581   dlist_concat(write_fds, write_queue);
582 
583   /* compose the sets of file descriptors to monitor */
584   if(fds_select_assemble(read_fds, &rfiles, &rfds, &rfdsp, &nfds) != 0)
585     goto err;
586   if(fds_select_assemble(write_fds, &wfiles, &wfds, &wfdsp, &nfds) != 0)
587     goto err;
588 
589   if(rfiles != NULL || wfiles != NULL)
590     {
591       tv.tv_sec = 0; tv.tv_usec = 0;
592       timeout = &tv;
593     }
594 
595   /* find out which file descriptors have an event */
596 #ifdef _WIN32
597   if(nfds == -1 && rfiles == NULL && wfiles == NULL)
598     {
599       if(timeout != NULL && timeout->tv_sec >= 0 && timeout->tv_usec >= 0)
600 	Sleep((timeout->tv_sec * 1000) + (timeout->tv_usec / 1000));
601       count = 0;
602     }
603   else
604 #endif
605   if((count = select(nfds+1, rfdsp, wfdsp, NULL, timeout)) < 0)
606     {
607       printerror(__func__, "select failed");
608       goto err;
609     }
610 
611   /* read and write to files outside of select */
612   if(rfiles != NULL)
613     {
614       fds_files_check(read_fds, rfiles);
615       slist_free(rfiles); rfiles = NULL;
616     }
617 
618   if(wfiles != NULL)
619     {
620       fds_files_check(write_fds, wfiles);
621       slist_free(wfiles); wfiles = NULL;
622     }
623 
624   /* if there are fds to check, then check them */
625   if(count > 0)
626     {
627       fds_select_check(rfdsp, read_fds, &count);
628       fds_select_check(wfdsp, write_fds, &count);
629     }
630 
631   return 0;
632 
633  err:
634   if(rfiles != NULL) slist_free(rfiles);
635   if(wfiles != NULL) slist_free(wfiles);
636   return -1;
637 }
638 
639 #ifdef HAVE_POLL
640 static struct pollfd *poll_fds = NULL;
641 static int poll_fdc = 0;
642 
643 static void fds_poll_check(short event, int rc, int count)
644 {
645   scamper_fd_t *fd;
646   int i;
647 
648   for(i=0; i<count; i++)
649     {
650       /* skip over if we were not interested in this type of event */
651       if((poll_fds[i].events & event) == 0)
652 	continue;
653 
654       /* skip over if there is no event of this type */
655       if((poll_fds[i].revents & (event|POLLHUP|POLLERR)) == 0)
656 	continue;
657 
658       /*
659        * ensure that the fd is still valid as far as scamper's monitoring
660        * of it goes.
661        */
662       if(poll_fds[i].fd >= 0 && poll_fds[i].fd < fd_array_s &&
663 	 (fd = fd_array[poll_fds[i].fd]) != NULL)
664 	{
665 	  if(event == POLLIN)
666 	    fd->read.cb(fd->fd, fd->read.param);
667 	  else
668 	    fd->write.cb(fd->fd, fd->write.param);
669 	}
670 
671       if(--rc == 0)
672 	break;
673     }
674 
675   return;
676 }
677 
678 static int fds_poll(struct timeval *tv)
679 {
680   scamper_fd_t *fd;
681   dlist_node_t *n;
682   int timeout;
683   int rc, count = 0, in = 0, out = 0;
684   size_t size;
685 
686   n = dlist_head_node(fd_list);
687   while(n != NULL)
688     {
689       fd = dlist_node_item(n);
690       n  = dlist_node_next(n);
691 
692       /* if there is nothing using this fdn any longer, then stop polling it */
693       if(fd->refcnt == 0 && fd->rc0 == NULL)
694 	{
695 	  fd_refcnt_0(fd);
696 	  continue;
697 	}
698 
699       /* don't poll an inactive fd */
700       if((fd->read.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) != 0 &&
701 	 (fd->write.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) != 0)
702 	continue;
703 
704       if(count + 1 > poll_fdc)
705 	{
706 	  size = (count+1) * sizeof(struct pollfd);
707 	  if(realloc_wrap((void **)&poll_fds, size) != 0)
708 	    {
709 	      printerror(__func__, "could not realloc poll_fds");
710 	      return -1;
711 	    }
712 	  poll_fdc = count + 1;
713 	}
714 
715       poll_fds[count].fd = fd->fd;
716       poll_fds[count].events = 0;
717       poll_fds[count].revents = 0;
718 
719       if((fd->read.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) == 0)
720 	{
721 	  poll_fds[count].events |= POLLIN;
722 	  in++;
723 	}
724       if((fd->write.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) == 0)
725 	{
726 	  poll_fds[count].events |= POLLOUT;
727 	  out++;
728 	}
729 
730       count++;
731     }
732 
733   if(tv != NULL)
734     {
735       timeout = (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
736       if(timeout == 0 && tv->tv_usec != 0)
737 	timeout++;
738     }
739   else
740     {
741       timeout = -1;
742     }
743 
744   if((rc = poll(poll_fds, count, timeout)) < 0)
745     {
746       printerror(__func__, "could not poll");
747       return -1;
748     }
749 
750   if(rc > 0)
751     {
752       if(in != 0)
753 	fds_poll_check(POLLIN, rc < in ? rc : in, count);
754       if(out != 0)
755 	fds_poll_check(POLLOUT, rc < out ? rc : out, count);
756     }
757 
758   return 0;
759 }
760 #endif
761 
762 #ifdef HAVE_KQUEUE
763 static struct kevent *kevlist = NULL;
764 static int kevlistlen = 0;
765 static int kq = -1;
766 
767 static int fds_kqueue_init(void)
768 {
769   if((kq = kqueue()) == -1)
770     {
771       printerror(__func__, "could not create kqueue");
772       return -1;
773     }
774   scamper_debug(__func__, "fd %d", kq);
775   return 0;
776 }
777 
778 static void fds_kqueue_set(scamper_fd_t *fd, short filter, u_short flags)
779 {
780   struct kevent kev;
781   EV_SET(&kev, fd->fd, filter, flags, 0, 0, fd);
782   if(kevent(kq, &kev, 1, NULL, 0, NULL) != 0)
783     {
784       printerror(__func__, "fd %d %s %s", fd->fd,
785 		 filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE",
786 		 flags == EV_ADD ? "EV_ADD" : "EV_DELETE");
787     }
788   return;
789 }
790 
791 static int fds_kqueue(struct timeval *tv)
792 {
793   scamper_fd_t *fdp;
794   struct timespec ts, *tsp = NULL;
795   struct kevent *kev;
796   int fd, i, c;
797 
798   if((c = dlist_count(read_fds) + dlist_count(write_fds)) >= kevlistlen)
799     {
800       c += 8;
801       if(realloc_wrap((void **)&kevlist, sizeof(struct kevent) * c) != 0)
802 	{
803 	  if(kevlistlen == 0)
804 	    {
805 	      printerror(__func__, "could not alloc kevlist");
806 	      return -1;
807 	    }
808 	}
809       else
810 	{
811 	  kevlistlen = c;
812 	}
813     }
814 
815   if(tv != NULL)
816     {
817       ts.tv_sec  = tv->tv_sec;
818       ts.tv_nsec = tv->tv_usec * 1000;
819       tsp = &ts;
820     }
821 
822   if((c = kevent(kq, NULL, 0, kevlist, kevlistlen, tsp)) == -1)
823     {
824       printerror(__func__, "kevent failed");
825       return -1;
826     }
827 
828   for(i=0; i<c; i++)
829     {
830       kev = &kevlist[i];
831       fd = kev->ident;
832 
833       if(fd < 0 || fd >= fd_array_s)
834 	continue;
835       if((fdp = fd_array[fd]) == NULL)
836 	continue;
837       if(kev->filter == EVFILT_READ)
838 	fdp->read.cb(fd, fdp->read.param);
839       else if(kev->filter == EVFILT_WRITE)
840 	fdp->write.cb(fd, fdp->write.param);
841     }
842 
843   return 0;
844 }
845 #endif
846 
847 #ifdef HAVE_EPOLL
848 static struct epoll_event *ep_events = NULL;
849 static int ep_event_c = 0;
850 static int ep = -1;
851 static int ep_fdc = 0;
852 
853 static int fds_epoll_init(void)
854 {
855   if((ep = epoll_create(10)) == -1)
856     {
857       printerror(__func__, "could not epoll_create");
858       return -1;
859     }
860   scamper_debug(__func__, "fd %d", ep);
861   return 0;
862 }
863 
864 static void fds_epoll_ctl(scamper_fd_t *fd, uint32_t ev, int op)
865 {
866   struct epoll_event epev;
867 
868   if(op == EPOLL_CTL_ADD &&
869      ((ev == EPOLLIN &&
870        (fd->write.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) == 0) ||
871       (ev == EPOLLOUT &&
872        (fd->read.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) == 0)))
873     {
874       op = EPOLL_CTL_MOD;
875       ev = EPOLLIN | EPOLLOUT;
876     }
877   else if(op == EPOLL_CTL_DEL && ev == EPOLLIN &&
878 	  (fd->write.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) == 0)
879     {
880       op = EPOLL_CTL_MOD;
881       ev = EPOLLOUT;
882     }
883   else if(op == EPOLL_CTL_DEL && ev == EPOLLOUT &&
884 	  (fd->read.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) == 0)
885     {
886       op = EPOLL_CTL_MOD;
887       ev = EPOLLIN;
888     }
889 
890   if(op == EPOLL_CTL_ADD)
891     ep_fdc++;
892   else if(op == EPOLL_CTL_DEL)
893     ep_fdc--;
894 
895   epev.data.fd = fd->fd;
896   epev.events = ev;
897 
898   if(epoll_ctl(ep, op, fd->fd, &epev) != 0)
899     printerror(__func__, "fd %d op %d ev %u", fd->fd, op, ev);
900 
901   return;
902 }
903 
904 static int fds_epoll(struct timeval *tv)
905 {
906   int i, fd, rc, timeout;
907   scamper_fd_t *fdp;
908   size_t size;
909 
910   if(ep_fdc >= ep_event_c)
911     {
912       rc = ep_fdc + 8;
913       size = sizeof(struct epoll_event) * rc;
914       if(realloc_wrap((void **)&ep_events, size) != 0)
915 	{
916 	  if(ep_event_c == 0)
917 	    {
918 	      printerror(__func__, "could not alloc events");
919 	      return -1;
920 	    }
921 	}
922       else
923 	{
924 	  ep_event_c = rc;
925 	}
926     }
927 
928   if(tv != NULL)
929     {
930       timeout = (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
931       if(timeout == 0 && tv->tv_usec != 0)
932 	timeout++;
933     }
934   else
935     {
936       timeout = -1;
937     }
938 
939   if((rc = epoll_wait(ep, ep_events, ep_event_c, timeout)) == -1)
940     {
941       printerror(__func__, "could not epoll_wait");
942       return -1;
943     }
944 
945   for(i=0; i<rc; i++)
946     {
947       fd = ep_events[i].data.fd;
948       if(fd < 0 || fd >= fd_array_s)
949 	continue;
950 
951       if(ep_events[i].events & EPOLLIN)
952 	{
953 	  if((fdp = fd_array[fd]) == NULL)
954 	    continue;
955 	  fdp->read.cb(fd, fdp->read.param);
956 	}
957 
958       if(ep_events[i].events & EPOLLOUT)
959 	{
960 	  if((fdp = fd_array[fd]) == NULL)
961 	    continue;
962 	  fdp->write.cb(fd, fdp->write.param);
963 	}
964     }
965 
966   return 0;
967 }
968 #endif
969 
970 static int fd_addr_cmp(int type, void *a, void *b)
971 {
972   assert(type == SCAMPER_FD_TYPE_TCP4   || type == SCAMPER_FD_TYPE_TCP6 ||
973 	 type == SCAMPER_FD_TYPE_UDP4   || type == SCAMPER_FD_TYPE_UDP6 ||
974 	 type == SCAMPER_FD_TYPE_UDP4DG || type == SCAMPER_FD_TYPE_UDP6ERR ||
975 	 type == SCAMPER_FD_TYPE_ICMP4  || type == SCAMPER_FD_TYPE_ICMP6 ||
976 	 type == SCAMPER_FD_TYPE_ICMP4ERR);
977 
978   if(a == NULL && b != NULL) return -1;
979   if(a != NULL && b == NULL) return  1;
980   if(a == NULL && b == NULL) return  0;
981 
982   if(type == SCAMPER_FD_TYPE_TCP4   ||
983      type == SCAMPER_FD_TYPE_UDP4   ||
984      type == SCAMPER_FD_TYPE_UDP4DG ||
985      type == SCAMPER_FD_TYPE_ICMP4  ||
986      type == SCAMPER_FD_TYPE_ICMP4ERR)
987     return addr4_cmp(a, b);
988   else
989     return addr6_cmp(a, b);
990 }
991 
992 /*
993  * fd_cmp
994  *
995  * given two scamper_fd_t structures, determine if their properties are
996  * the same.  used to maintain the splaytree of existing file descriptors
997  * held by scamper.
998  */
999 static int fd_cmp(const scamper_fd_t *a, const scamper_fd_t *b)
1000 {
1001   if(a->type < b->type) return -1;
1002   if(a->type > b->type) return  1;
1003 
1004   switch(a->type)
1005     {
1006     case SCAMPER_FD_TYPE_TCP4:
1007     case SCAMPER_FD_TYPE_TCP6:
1008       if(a->fd_tcp_sport < b->fd_tcp_sport) return -1;
1009       if(a->fd_tcp_sport > b->fd_tcp_sport) return  1;
1010       return fd_addr_cmp(a->type, a->fd_tcp_addr, b->fd_tcp_addr);
1011 
1012     case SCAMPER_FD_TYPE_UDP4:
1013       return fd_addr_cmp(a->type, a->fd_udp_addr, b->fd_udp_addr);
1014 
1015     case SCAMPER_FD_TYPE_UDP4DG:
1016     case SCAMPER_FD_TYPE_UDP6:
1017     case SCAMPER_FD_TYPE_UDP6ERR:
1018       if(a->fd_udp_sport < b->fd_udp_sport) return -1;
1019       if(a->fd_udp_sport > b->fd_udp_sport) return  1;
1020       return fd_addr_cmp(a->type, a->fd_udp_addr, b->fd_udp_addr);
1021 
1022     case SCAMPER_FD_TYPE_DL:
1023       if(a->fd_dl_ifindex < b->fd_dl_ifindex) return -1;
1024       if(a->fd_dl_ifindex > b->fd_dl_ifindex) return  1;
1025       return 0;
1026 
1027     case SCAMPER_FD_TYPE_ICMP4:
1028     case SCAMPER_FD_TYPE_ICMP4ERR:
1029     case SCAMPER_FD_TYPE_ICMP6:
1030       return fd_addr_cmp(a->type, a->fd_icmp_addr, b->fd_icmp_addr);
1031     }
1032 
1033   return 0;
1034 }
1035 
1036 /*
1037  * fd_alloc
1038  *
1039  * allocate a scamper_fd_t structure and do generic setup tasks.
1040  */
1041 #ifndef DMALLOC
1042 static scamper_fd_t *fd_alloc(int type, int fd)
1043 #else
1044 #define fd_alloc(type, fd) fd_alloc_dm((type), (fd), __FILE__, __LINE__)
1045 static scamper_fd_t *fd_alloc_dm(int type, int fd, const char *file,
1046 				 const int line)
1047 #endif
1048 {
1049   scamper_fd_t *fdn = NULL;
1050   size_t size;
1051   int i;
1052 
1053 #ifndef DMALLOC
1054   if((fdn = malloc_zero(sizeof(scamper_fd_t))) == NULL)
1055 #else
1056   if((fdn = malloc_zero_dm(sizeof(scamper_fd_t), file, line)) == NULL)
1057 #endif
1058     {
1059       goto err;
1060     }
1061   fdn->type   = type;
1062   fdn->fd     = fd;
1063   fdn->refcnt = 1;
1064 
1065   /* set up to poll read ability */
1066   if((fdn->read.node = dlist_node_alloc(&fdn->read)) == NULL)
1067     {
1068       goto err;
1069     }
1070   fdn->read.fdn   = fdn;
1071   fdn->read.flags = SCAMPER_FD_POLL_FLAG_INACTIVE;
1072 
1073   /* set up to poll write ability */
1074   if((fdn->write.node = dlist_node_alloc(&fdn->write)) == NULL)
1075     {
1076       goto err;
1077     }
1078   fdn->write.fdn   = fdn;
1079   fdn->write.flags = SCAMPER_FD_POLL_FLAG_INACTIVE;
1080 
1081   /* store the fd in an array indexed by the fd number */
1082   if(fd+1 > fd_array_s)
1083     {
1084       size = sizeof(scamper_fd_t *) * (fd+1);
1085       if(realloc_wrap((void **)&fd_array, size) != 0)
1086 	goto err;
1087       for(i=fd_array_s; i<fd+1; i++)
1088 	fd_array[i] = NULL;
1089       fd_array_s = fd+1;
1090     }
1091 
1092   /* ensure the same fd is not already registered */
1093   assert(fd_array[fd] == NULL);
1094   fd_array[fd] = fdn;
1095 
1096   return fdn;
1097 
1098  err:
1099   if(fdn != NULL) fd_free(fdn);
1100   return NULL;
1101 }
1102 
1103 /*
1104  * fd_find
1105  *
1106  * search the tree of file descriptors known to scamper for a matching
1107  * entry.  if one is found, increment its reference count and return it.
1108  */
1109 static scamper_fd_t *fd_find(scamper_fd_t *findme)
1110 {
1111   scamper_fd_t *fdn;
1112 
1113   if((fdn = splaytree_find(fd_tree, findme)) != NULL)
1114     {
1115       if(fdn->refcnt == 0 && fdn->rc0 != NULL)
1116 	{
1117 	  dlist_node_pop(refcnt_0, fdn->rc0);
1118 	  fdn->rc0 = NULL;
1119 	}
1120 
1121       fdn->refcnt++;
1122     }
1123 
1124   return fdn;
1125 }
1126 
1127 /*
1128  * fd_null
1129  *
1130  * allocate a file descriptor of a specified type.
1131  */
1132 static scamper_fd_t *fd_null(int type)
1133 {
1134   scamper_fd_t *fdn = NULL, findme;
1135   int fd = -1;
1136 
1137   /* first check if a sharable fd exists for this type */
1138   findme.type = type;
1139   if((fdn = fd_find(&findme)) != NULL)
1140     {
1141       return fdn;
1142     }
1143 
1144   switch(type)
1145     {
1146 #if defined(SCAMPER_FD_TYPE_RTSOCK)
1147     case SCAMPER_FD_TYPE_RTSOCK:
1148       fd = scamper_rtsock_open();
1149       break;
1150 #endif
1151 
1152 #if defined(SCAMPER_FD_TYPE_IFSOCK)
1153     case SCAMPER_FD_TYPE_IFSOCK:
1154       fd = socket(AF_INET, SOCK_DGRAM, 0);
1155       break;
1156 #endif
1157 
1158     case SCAMPER_FD_TYPE_IP4:
1159       fd = scamper_ip4_openraw();
1160       break;
1161     }
1162 
1163   if(fd == -1 || (fdn = fd_alloc(type, fd)) == NULL ||
1164      (fdn->fd_tree_node = splaytree_insert(fd_tree, fdn)) == NULL ||
1165      (fdn->fd_list_node = dlist_tail_push(fd_list, fdn)) == NULL)
1166     {
1167       goto err;
1168     }
1169 
1170   scamper_debug(__func__, "fd %d type %s", fdn->fd, fd_tostr(fdn));
1171   return fdn;
1172 
1173  err:
1174   if(fd != -1)
1175     {
1176       switch(type)
1177 	{
1178 #if defined(SCAMPER_FD_TYPE_RTSOCK)
1179 	case SCAMPER_FD_TYPE_RTSOCK:
1180 	  scamper_rtsock_close(fd);
1181 	  break;
1182 #endif
1183 
1184 #if defined(SCAMPER_FD_TYPE_IFSOCK)
1185 	case SCAMPER_FD_TYPE_IFSOCK:
1186 	  close(fd);
1187 	  break;
1188 #endif
1189 
1190 	case SCAMPER_FD_TYPE_IP4:
1191 	  scamper_ip4_close(fd);
1192 	  break;
1193 	}
1194     }
1195   if(fdn != NULL) fd_free(fdn);
1196   return NULL;
1197 }
1198 
1199 static scamper_fd_t *fd_icmp(int type, void *addr)
1200 {
1201   scamper_fd_t *fdn = NULL, findme;
1202   size_t len = 0;
1203   int fd = -1;
1204 
1205   findme.type = type;
1206   findme.fd_icmp_addr = addr;
1207 
1208   if((fdn = fd_find(&findme)) != NULL)
1209     {
1210       return fdn;
1211     }
1212 
1213   if(type == SCAMPER_FD_TYPE_ICMP4)
1214     {
1215       fd  = scamper_icmp4_open(addr);
1216       len = sizeof(struct in_addr);
1217     }
1218   else if(type == SCAMPER_FD_TYPE_ICMP6)
1219     {
1220       fd  = scamper_icmp6_open(addr);
1221       len = sizeof(struct in6_addr);
1222     }
1223   else if(type == SCAMPER_FD_TYPE_ICMP4ERR)
1224     {
1225       fd  = scamper_icmp4_open_err(addr);
1226       len = sizeof(struct in_addr);
1227     }
1228 
1229   if(fd == -1 || (fdn = fd_alloc(type, fd)) == NULL ||
1230      (addr != NULL && (fdn->fd_icmp_addr = memdup(addr, len)) == NULL) ||
1231      (fdn->fd_tree_node = splaytree_insert(fd_tree, fdn)) == NULL ||
1232      (fdn->fd_list_node = dlist_tail_push(fd_list, fdn)) == NULL)
1233     {
1234       goto err;
1235     }
1236 
1237   scamper_debug(__func__, "fd %d type %s", fdn->fd, fd_tostr(fdn));
1238   return fdn;
1239 
1240  err:
1241   if(fd != -1)
1242     {
1243       if(type == SCAMPER_FD_TYPE_ICMP4 || type == SCAMPER_FD_TYPE_ICMP4ERR)
1244 	scamper_icmp4_close(fd);
1245       else if(type == SCAMPER_FD_TYPE_ICMP6)
1246 	scamper_icmp6_close(fd);
1247     }
1248   if(fdn != NULL) fd_free(fdn);
1249   return NULL;
1250 }
1251 
1252 static scamper_fd_t *fd_tcp(int type, void *addr, uint16_t sport)
1253 {
1254   scamper_fd_t *fdn, findme;
1255   size_t len = 0;
1256   int fd = -1;
1257 
1258   assert(type == SCAMPER_FD_TYPE_TCP4 ||
1259 	 type == SCAMPER_FD_TYPE_TCP6);
1260 
1261   findme.type = type;
1262   findme.fd_tcp_addr = addr;
1263   findme.fd_tcp_sport = sport;
1264 
1265   if((fdn = fd_find(&findme)) != NULL)
1266     return fdn;
1267 
1268   if(type == SCAMPER_FD_TYPE_TCP4)
1269     {
1270       fd  = scamper_tcp4_open(addr, sport);
1271       len = sizeof(struct in_addr);
1272     }
1273   else if(type == SCAMPER_FD_TYPE_TCP6)
1274     {
1275       fd = scamper_tcp6_open(addr, sport);
1276       len = sizeof(struct in6_addr);
1277     }
1278 
1279   if(fd == -1 || (fdn = fd_alloc(type, fd)) == NULL ||
1280      (addr != NULL && (fdn->fd_tcp_addr = memdup(addr, len)) == NULL))
1281     {
1282       goto err;
1283     }
1284   fdn->fd_tcp_sport = sport;
1285 
1286   if((fdn->fd_tree_node = splaytree_insert(fd_tree, fdn)) == NULL ||
1287      (fdn->fd_list_node = dlist_tail_push(fd_list, fdn)) == NULL)
1288     {
1289       goto err;
1290     }
1291 
1292   scamper_debug(__func__, "fd %d type %s", fdn->fd, fd_tostr(fdn));
1293   return fdn;
1294 
1295  err:
1296   if(fd != -1)
1297     {
1298       if(type == SCAMPER_FD_TYPE_TCP4)
1299 	scamper_tcp4_close(fd);
1300       else if(type == SCAMPER_FD_TYPE_TCP6)
1301 	scamper_tcp6_close(fd);
1302     }
1303   if(fdn != NULL) fd_free(fdn);
1304   return NULL;
1305 }
1306 
1307 static scamper_fd_t *fd_udp(int type, void *addr, uint16_t sport)
1308 {
1309   scamper_fd_t *fdn, findme;
1310   size_t len = 0;
1311   int fd = -1;
1312 
1313   findme.type = type;
1314   findme.fd_udp_addr = addr;
1315   findme.fd_udp_sport = sport;
1316 
1317   if((fdn = fd_find(&findme)) != NULL)
1318     return fdn;
1319 
1320   if(type == SCAMPER_FD_TYPE_UDP4)
1321     {
1322       fd  = scamper_udp4_openraw(addr);
1323       len = sizeof(struct in_addr);
1324     }
1325   else if(type == SCAMPER_FD_TYPE_UDP6)
1326     {
1327       fd  = scamper_udp6_open(addr, sport);
1328       len = sizeof(struct in6_addr);
1329     }
1330   else if(type == SCAMPER_FD_TYPE_UDP4DG)
1331     {
1332       fd  = scamper_udp4_opendgram(addr, sport);
1333       len = sizeof(struct in_addr);
1334     }
1335   else if(type == SCAMPER_FD_TYPE_UDP6ERR)
1336     {
1337       fd  = scamper_udp6_open_err(addr, sport);
1338       len = sizeof(struct in6_addr);
1339     }
1340 
1341   if(fd == -1 || (fdn = fd_alloc(type, fd)) == NULL ||
1342      (addr != NULL && (fdn->fd_udp_addr = memdup(addr, len)) == NULL))
1343     {
1344       printerror(__func__, "could not open socket");
1345       goto err;
1346     }
1347   fdn->fd_udp_sport = sport;
1348 
1349   if((fdn->fd_tree_node = splaytree_insert(fd_tree, fdn)) == NULL)
1350     {
1351       printerror(__func__, "could not add socket to tree");
1352       goto err;
1353     }
1354   if((fdn->fd_list_node = dlist_tail_push(fd_list, fdn)) == NULL)
1355     {
1356       printerror(__func__, "could not add socket to list");
1357       goto err;
1358     }
1359 
1360   scamper_debug(__func__, "fd %d type %s", fdn->fd, fd_tostr(fdn));
1361   return fdn;
1362 
1363  err:
1364   if(fd != -1)
1365     {
1366       if(type == SCAMPER_FD_TYPE_UDP4 || type == SCAMPER_FD_TYPE_UDP4DG)
1367 	scamper_udp4_close(fd);
1368       else if(type == SCAMPER_FD_TYPE_UDP6 || type == SCAMPER_FD_TYPE_UDP6ERR)
1369 	scamper_udp6_close(fd);
1370     }
1371   if(fdn != NULL) fd_free(fdn);
1372   return NULL;
1373 }
1374 
1375 /*
1376  * scamper_fds_poll
1377  *
1378  * the money function: this function polls the file descriptors held by
1379  * scamper.  for each fd with an event, it calls the callback registered
1380  * with the fd.
1381  */
1382 int scamper_fds_poll(struct timeval *timeout)
1383 {
1384   scamper_fd_t *fdn;
1385   struct timeval tv;
1386 
1387   /*
1388    * if there are fds that can be reaped, then do so.
1389    * if there are fds left over after, use that to guide the select timeout.
1390    */
1391   if(dlist_count(refcnt_0) > 0)
1392     {
1393       gettimeofday_wrap(&tv);
1394 
1395       while((fdn = (scamper_fd_t *)dlist_head_item(refcnt_0)) != NULL)
1396 	{
1397 	  assert(fdn->refcnt == 0);
1398 
1399 	  if(timeval_cmp(&fdn->tv, &tv) > 0)
1400 	    break;
1401 
1402 	  fd_close(fdn);
1403 	  fd_free(fdn);
1404 	}
1405 
1406       if(fdn != NULL)
1407 	{
1408 	  timeval_diff_tv(&tv, &tv, &fdn->tv);
1409 	  if(timeout == NULL || timeval_cmp(&tv, timeout) < 0)
1410 	    timeout = &tv;
1411 	}
1412     }
1413 
1414   return pollfunc(timeout);
1415 }
1416 
1417 /*
1418  * scamper_fd_fd_get
1419  *
1420  * return the actual file descriptor associated with the scamper_fd_t
1421  */
1422 int scamper_fd_fd_get(const scamper_fd_t *fdn)
1423 {
1424   if(fdn->raw == NULL)
1425     return fdn->fd;
1426   return fdn->raw->fd;
1427 }
1428 
1429 /*
1430  * scamper_fd_fd_set
1431  *
1432  * set the file descriptor being monitored with the scamper_fd_t
1433  */
1434 int scamper_fd_fd_set(scamper_fd_t *fdn, int fd)
1435 {
1436   fdn->fd = fd;
1437   return 0;
1438 }
1439 
1440 /*
1441  * scamper_fd_read_pause
1442  *
1443  * ignore any read events on the fd.
1444  */
1445 void scamper_fd_read_pause(scamper_fd_t *fdn)
1446 {
1447 #ifdef HAVE_KQUEUE
1448   if(kq != -1 && (fdn->read.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) == 0)
1449     fds_kqueue_set(fdn, EVFILT_READ, EV_DELETE);
1450 #endif
1451 
1452 #ifdef HAVE_EPOLL
1453   if(ep != -1 && (fdn->read.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) == 0)
1454     fds_epoll_ctl(fdn, EPOLLIN, EPOLL_CTL_DEL);
1455 #endif
1456 
1457   fdn->read.flags |= SCAMPER_FD_POLL_FLAG_INACTIVE;
1458   return;
1459 }
1460 
1461 /*
1462  * scamper_fd_read_unpause
1463  *
1464  * monitor read events on the fd.  unset the inactive flag, and push the
1465  * node back onto the read list
1466  */
1467 void scamper_fd_read_unpause(scamper_fd_t *fdn)
1468 {
1469   assert(fdn->read.cb != NULL);
1470 
1471   if((fdn->read.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) != 0)
1472     {
1473       fdn->read.flags &= ~(SCAMPER_FD_POLL_FLAG_INACTIVE);
1474 
1475 #ifdef HAVE_KQUEUE
1476       if(kq != -1)
1477 	fds_kqueue_set(fdn, EVFILT_READ, EV_ADD);
1478 #endif
1479 
1480 #ifdef HAVE_EPOLL
1481       if(ep != -1)
1482 	fds_epoll_ctl(fdn, EPOLLIN, EPOLL_CTL_ADD);
1483 #endif
1484 
1485       /*
1486        * the fd may still be on the read fds list, just with the inactive bit
1487        * set.  if it isn't, then we have to put it on the queue.
1488        */
1489       if(fdn->read.list != read_fds)
1490 	{
1491 	  dlist_node_head_push(read_queue, fdn->read.node);
1492 	  fdn->read.list = read_queue;
1493 	}
1494     }
1495 
1496   return;
1497 }
1498 
1499 /*
1500  * scamper_fd_write_pause
1501  *
1502  * ignore any write events on the fd
1503  */
1504 void scamper_fd_write_pause(scamper_fd_t *fdn)
1505 {
1506 #ifdef HAVE_KQUEUE
1507   if(kq != -1 && (fdn->write.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) == 0)
1508     fds_kqueue_set(fdn, EVFILT_WRITE, EV_DELETE);
1509 #endif
1510 
1511 #ifdef HAVE_EPOLL
1512   if(ep != -1 && (fdn->write.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) == 0)
1513     fds_epoll_ctl(fdn, EPOLLOUT, EPOLL_CTL_DEL);
1514 #endif
1515 
1516   fdn->write.flags |= SCAMPER_FD_POLL_FLAG_INACTIVE;
1517   return;
1518 }
1519 
1520 /*
1521  * scamper_fd_write_unpause
1522  *
1523  * monitor write events on the fd.  unset the inactive flag, and push the
1524  * node back onto the write list
1525  */
1526 void scamper_fd_write_unpause(scamper_fd_t *fdn)
1527 {
1528   assert(fdn->write.cb != NULL);
1529 
1530   if((fdn->write.flags & SCAMPER_FD_POLL_FLAG_INACTIVE) != 0)
1531     {
1532       fdn->write.flags &= ~(SCAMPER_FD_POLL_FLAG_INACTIVE);
1533 
1534 #ifdef HAVE_KQUEUE
1535       if(kq != -1)
1536 	fds_kqueue_set(fdn, EVFILT_WRITE, EV_ADD);
1537 #endif
1538 
1539 #ifdef HAVE_EPOLL
1540       if(ep != -1)
1541 	fds_epoll_ctl(fdn, EPOLLOUT, EPOLL_CTL_ADD);
1542 #endif
1543 
1544       /*
1545        * the fd may still be on the write fds list, just with the inactive bit
1546        * set.  if it isn't, then we have to put it on the queue.
1547        */
1548       if(fdn->write.list != write_fds)
1549 	{
1550 	  dlist_node_head_push(write_queue, fdn->write.node);
1551 	  fdn->write.list = write_queue;
1552 	}
1553     }
1554 
1555   return;
1556 }
1557 
1558 void scamper_fd_read_set(scamper_fd_t *fdn, scamper_fd_cb_t cb, void *param)
1559 {
1560   assert(fdn->type == SCAMPER_FD_TYPE_PRIVATE);
1561   fdn->read.cb = cb;
1562   fdn->read.param = param;
1563   return;
1564 }
1565 
1566 void scamper_fd_write_set(scamper_fd_t *fdn, scamper_fd_cb_t cb, void *param)
1567 {
1568   assert(fdn->type == SCAMPER_FD_TYPE_PRIVATE);
1569   fdn->write.cb = cb;
1570   fdn->write.param = param;
1571   return;
1572 }
1573 
1574 scamper_fd_t *scamper_fd_icmp4(void *addr)
1575 {
1576   scamper_fd_t *fdn;
1577   if((fdn = fd_icmp(SCAMPER_FD_TYPE_ICMP4, addr)) != NULL)
1578     {
1579       fdn->read.cb = scamper_icmp4_read_cb;
1580       scamper_fd_read_unpause(fdn);
1581     }
1582   return fdn;
1583 }
1584 
1585 scamper_fd_t *scamper_fd_icmp4_err(void *addr)
1586 {
1587   scamper_fd_t *fdn;
1588   if((fdn = fd_icmp(SCAMPER_FD_TYPE_ICMP4ERR, addr)) != NULL)
1589     {
1590       fdn->read.cb = scamper_icmp4_read_err_cb;
1591       scamper_fd_read_unpause(fdn);
1592     }
1593   return fdn;
1594 }
1595 
1596 scamper_fd_t *scamper_fd_icmp6(void *addr)
1597 {
1598   scamper_fd_t *fdn;
1599   if((fdn = fd_icmp(SCAMPER_FD_TYPE_ICMP6, addr)) != NULL)
1600     {
1601       fdn->read.cb = scamper_icmp6_read_cb;
1602       scamper_fd_read_unpause(fdn);
1603     }
1604   return fdn;
1605 }
1606 
1607 #ifndef _WIN32
1608 scamper_fd_t *scamper_fd_rtsock(void)
1609 {
1610   scamper_fd_t *fdn;
1611 
1612   if((fdn = fd_null(SCAMPER_FD_TYPE_RTSOCK)) != NULL)
1613     {
1614       fdn->read.cb = scamper_rtsock_read_cb;
1615       scamper_fd_read_unpause(fdn);
1616     }
1617 
1618   return fdn;
1619 }
1620 #endif
1621 
1622 scamper_fd_t *scamper_fd_tcp4(void *addr, uint16_t sport)
1623 {
1624   return fd_tcp(SCAMPER_FD_TYPE_TCP4, addr, sport);
1625 }
1626 
1627 scamper_fd_t *scamper_fd_tcp6(void *addr, uint16_t sport)
1628 {
1629   return fd_tcp(SCAMPER_FD_TYPE_TCP6, addr, sport);
1630 }
1631 
1632 scamper_fd_t *scamper_fd_udp4(void *addr, uint16_t sport)
1633 {
1634   scamper_fd_t *fd;
1635 
1636   if((fd = fd_udp(SCAMPER_FD_TYPE_UDP4DG, addr, sport)) == NULL)
1637     return NULL;
1638 
1639   if(fd->raw == NULL)
1640     {
1641       if((fd->raw = fd_udp(SCAMPER_FD_TYPE_UDP4, addr, sport)) == NULL)
1642 	{
1643 	  scamper_fd_free(fd);
1644 	  return NULL;
1645 	}
1646     }
1647   return fd;
1648 }
1649 
1650 scamper_fd_t *scamper_fd_udp6(void *addr, uint16_t sport)
1651 {
1652   return fd_udp(SCAMPER_FD_TYPE_UDP6, addr, sport);
1653 }
1654 
1655 scamper_fd_t *scamper_fd_udp6_err(void *addr, uint16_t sport)
1656 {
1657   scamper_fd_t *fdn;
1658   if((fdn = fd_udp(SCAMPER_FD_TYPE_UDP6ERR, addr, sport)) != NULL)
1659     {
1660       fdn->read.param = fdn;
1661       fdn->read.cb = scamper_udp6_read_err_cb;
1662       scamper_fd_read_unpause(fdn);
1663     }
1664   return fdn;
1665 }
1666 
1667 scamper_fd_t *scamper_fd_ip4(void)
1668 {
1669   return fd_null(SCAMPER_FD_TYPE_IP4);
1670 }
1671 
1672 #ifndef _WIN32
1673 scamper_fd_t *scamper_fd_ifsock(void)
1674 {
1675   return fd_null(SCAMPER_FD_TYPE_IFSOCK);
1676 }
1677 #endif
1678 
1679 scamper_fd_t *scamper_fd_dl(int ifindex)
1680 {
1681   scamper_fd_t *fdn = NULL, findme;
1682   int fd = -1;
1683 
1684   findme.type = SCAMPER_FD_TYPE_DL;
1685   findme.fd_dl_ifindex = ifindex;
1686 
1687   if((fdn = fd_find(&findme)) != NULL)
1688     {
1689       scamper_fd_read_unpause(fdn);
1690       return fdn;
1691     }
1692 
1693   /*
1694    * open the file descriptor for the ifindex, and then allocate a scamper_fd
1695    * for the file descriptor
1696    */
1697   if((fd  = scamper_dl_open(ifindex)) == -1 ||
1698      (fdn = fd_alloc(SCAMPER_FD_TYPE_DL, fd)) == NULL)
1699     {
1700       goto err;
1701     }
1702 
1703   /*
1704    * record the ifindex for the file descriptor, and then allocate the state
1705    * that is maintained with it
1706    */
1707   fdn->fd_dl_ifindex = ifindex;
1708 
1709   /*
1710    * 1. add the file descriptor to the splay tree
1711    * 2. allocate state for the datalink file descriptor
1712    */
1713   if((fdn->fd_tree_node = splaytree_insert(fd_tree, fdn)) == NULL ||
1714      (fdn->fd_list_node = dlist_tail_push(fd_list, fdn)) == NULL ||
1715      (fdn->fd_dl_dl = scamper_dl_state_alloc(fdn)) == NULL)
1716     {
1717       goto err;
1718     }
1719 
1720   /* set the file descriptor up for reading */
1721   fdn->read.cb     = scamper_dl_read_cb;
1722   fdn->read.param  = fdn->fd_dl_dl;
1723   fdn->write.cb    = NULL;
1724   fdn->write.param = NULL;
1725   scamper_fd_read_unpause(fdn);
1726 
1727   scamper_debug(__func__, "fd %d type %s", fdn->fd, fd_tostr(fdn));
1728   return fdn;
1729 
1730  err:
1731   if(fdn != NULL) free(fdn);
1732   if(fd != -1) scamper_dl_close(fd);
1733   return NULL;
1734 }
1735 
1736 /*
1737  * scamper_fd_private
1738  *
1739  * allocate a private fd for scamper to manage.  this fd is not shared amongst
1740  * scamper.
1741  */
1742 scamper_fd_t *scamper_fd_private(int fd, void *param, scamper_fd_cb_t read_cb,
1743 				 scamper_fd_cb_t write_cb)
1744 {
1745   scamper_fd_t *fdn = NULL;
1746 
1747   if((fdn = fd_alloc(SCAMPER_FD_TYPE_PRIVATE, fd)) == NULL)
1748     {
1749       goto err;
1750     }
1751 
1752   if((fdn->fd_list_node = dlist_tail_push(fd_list, fdn)) == NULL)
1753     goto err;
1754 
1755   if(read_cb != NULL)
1756     {
1757       scamper_fd_read_set(fdn, read_cb, param);
1758       scamper_fd_read_unpause(fdn);
1759     }
1760 
1761   if(write_cb != NULL)
1762     {
1763       scamper_fd_write_set(fdn, write_cb, param);
1764       scamper_fd_write_unpause(fdn);
1765     }
1766 
1767   return fdn;
1768 
1769  err:
1770   if(fdn != NULL) fd_free(fdn);
1771   return NULL;
1772 }
1773 
1774 scamper_fd_t *scamper_fd_file(int fd, scamper_fd_cb_t read_cb, void *param)
1775 {
1776   scamper_fd_t *fdn = NULL;
1777 
1778   if((fdn = fd_alloc(SCAMPER_FD_TYPE_FILE, fd)) == NULL ||
1779      (fdn->fd_list_node = dlist_tail_push(fd_list, fdn)) == NULL)
1780     goto err;
1781   fdn->read.cb = read_cb;
1782   fdn->read.param = param;
1783   scamper_fd_read_unpause(fdn);
1784 
1785   return fdn;
1786 
1787  err:
1788   if(fdn != NULL) fd_free(fdn);
1789   return NULL;
1790 }
1791 
1792 /*
1793  * scamper_fd_ifindex
1794  *
1795  * if the file descriptor is associated with a known ifindex, return details
1796  * of it.
1797  */
1798 int scamper_fd_ifindex(const scamper_fd_t *fdn, int *ifindex)
1799 {
1800   if(fdn->type == SCAMPER_FD_TYPE_DL)
1801     {
1802       *ifindex = fdn->fd_dl_ifindex;
1803       return 0;
1804     }
1805 
1806   return -1;
1807 }
1808 
1809 scamper_dl_t *scamper_fd_dl_get(const scamper_fd_t *fdn)
1810 {
1811   assert(fdn != NULL);
1812   assert(fdn->type == SCAMPER_FD_TYPE_DL);
1813   return fdn->fd_dl_dl;
1814 }
1815 
1816 /*
1817  * scamper_fd_addr
1818  *
1819  * if the file descriptor is bound to an address, return details of it.
1820  * -1 if invalid parameters.  0 if not bound.  1 if bound.
1821  */
1822 int scamper_fd_addr(const scamper_fd_t *fdn, void *addr, size_t len)
1823 {
1824   void  *a;
1825   size_t l;
1826 
1827   switch(fdn->type)
1828     {
1829     case SCAMPER_FD_TYPE_UDP4:     a = fdn->fd_udp_addr;  l = 4;  break;
1830     case SCAMPER_FD_TYPE_UDP4DG:   a = fdn->fd_udp_addr;  l = 4;  break;
1831     case SCAMPER_FD_TYPE_UDP6:     a = fdn->fd_udp_addr;  l = 16; break;
1832     case SCAMPER_FD_TYPE_UDP6ERR:  a = fdn->fd_udp_addr;  l = 16; break;
1833     case SCAMPER_FD_TYPE_TCP4:     a = fdn->fd_tcp_addr;  l = 4;  break;
1834     case SCAMPER_FD_TYPE_TCP6:     a = fdn->fd_tcp_addr;  l = 16; break;
1835     case SCAMPER_FD_TYPE_ICMP4:    a = fdn->fd_icmp_addr; l = 4;  break;
1836     case SCAMPER_FD_TYPE_ICMP4ERR: a = fdn->fd_icmp_addr; l = 4;  break;
1837     case SCAMPER_FD_TYPE_ICMP6:    a = fdn->fd_icmp_addr; l = 16; break;
1838     default: return -1;
1839     }
1840 
1841   if(len < l)
1842     return -1;
1843   if(a == NULL)
1844     return 0;
1845   memcpy(addr, a, l);
1846   return 1;
1847 }
1848 
1849 /*
1850  * scamper_fd_sport
1851  *
1852  * if the file descriptor has a known source port, return details of it.
1853  */
1854 int scamper_fd_sport(const scamper_fd_t *fdn, uint16_t *sport)
1855 {
1856   if(SCAMPER_FD_TYPE_IS_UDP(fdn))
1857     {
1858       *sport = fdn->fd_udp_sport;
1859       return 0;
1860     }
1861   else if(SCAMPER_FD_TYPE_IS_TCP(fdn))
1862     {
1863       *sport = fdn->fd_tcp_sport;
1864       return 0;
1865     }
1866   return -1;
1867 }
1868 
1869 /*
1870  * scamper_fd_free
1871  *
1872  * this function reduces the reference count for a given file descriptor.
1873  *
1874  * if zero is reached, the fd will be dealt with when scamper_fd_poll is next
1875  * called.  the fd cannot be summarily removed here without the potential
1876  * to screw up any current call to scamper_fd_poll as that function assumes
1877  * the list remains intact for the duration of any events found with select.
1878  *
1879  */
1880 void scamper_fd_free(scamper_fd_t *fdn)
1881 {
1882   assert(fdn != NULL);
1883   assert(fdn->refcnt > 0);
1884 
1885   if(--fdn->refcnt == 0)
1886     {
1887       if(fdn->raw != NULL)
1888 	{
1889 	  scamper_fd_free(fdn->raw);
1890 	  fdn->raw = NULL;
1891 	}
1892       fd_refcnt_0(fdn);
1893     }
1894 
1895   return;
1896 }
1897 
1898 /*
1899  * alloc_list
1900  *
1901  * helper function to allocate a list for scamper_fds_init
1902  */
1903 static dlist_t *alloc_list(char *name)
1904 {
1905   dlist_t *list;
1906   if((list = dlist_alloc()) == NULL)
1907     printerror(__func__, "alloc %s failed", name);
1908   return list;
1909 }
1910 
1911 /*
1912  * scamper_fds_init
1913  *
1914  * setup the global data structures necessary for scamper to manage a set of
1915  * file descriptors
1916  */
1917 int scamper_fds_init()
1918 {
1919 #ifdef HAVE_GETDTABLESIZE
1920   scamper_debug(__func__, "fd table size: %d", getdtablesize());
1921 #endif
1922 
1923 #ifdef HAVE_POLL
1924   pollfunc = fds_poll;
1925 #endif
1926 
1927 #ifdef HAVE_KQUEUE
1928   if(scamper_option_kqueue())
1929     {
1930       pollfunc = fds_kqueue;
1931       if(fds_kqueue_init() != 0)
1932 	return -1;
1933     }
1934 #endif
1935 
1936 #ifdef HAVE_EPOLL
1937   if(scamper_option_epoll())
1938     {
1939       pollfunc = fds_epoll;
1940       if(fds_epoll_init() != 0)
1941 	return -1;
1942     }
1943 #endif
1944 
1945   if(scamper_option_select() || pollfunc == NULL)
1946     pollfunc = fds_select;
1947 
1948   if((fd_list     = alloc_list("fd_list")) == NULL ||
1949      (read_fds    = alloc_list("read_fds"))   == NULL ||
1950      (read_queue  = alloc_list("read_queue"))  == NULL ||
1951      (write_fds   = alloc_list("write_fds"))  == NULL ||
1952      (write_queue = alloc_list("write_queue")) == NULL ||
1953      (refcnt_0    = alloc_list("refcnt_0"))  == NULL)
1954     {
1955       return -1;
1956     }
1957 
1958   if((fd_tree = splaytree_alloc((splaytree_cmp_t)fd_cmp)) == NULL)
1959     {
1960       printerror(__func__, "alloc fd tree failed");
1961       return -1;
1962     }
1963 
1964   return 0;
1965 }
1966 
1967 /*
1968  * cleanup_list
1969  *
1970  * helper function to remove scamper_fd_poll structures from any lists.
1971  */
1972 static void cleanup_list(dlist_t *list)
1973 {
1974   scamper_fd_poll_t *poll;
1975 
1976   if(list == NULL) return;
1977 
1978   while((poll = dlist_head_pop(list)) != NULL)
1979     {
1980       poll->list = NULL;
1981       poll->node = NULL;
1982     }
1983 
1984   dlist_free(list);
1985 
1986   return;
1987 }
1988 
1989 /*
1990  * scamper_fds_cleanup
1991  *
1992  * tidy up the state allocated to maintain fd records.
1993  */
1994 void scamper_fds_cleanup()
1995 {
1996   scamper_fd_t *fdn;
1997 
1998   /* clean up the lists */
1999   cleanup_list(read_fds);    read_fds = NULL;
2000   cleanup_list(write_fds);   write_fds = NULL;
2001   cleanup_list(read_queue);  read_queue = NULL;
2002   cleanup_list(write_queue); write_queue = NULL;
2003 
2004   /* reap anything on the reap list */
2005   if(refcnt_0 != NULL)
2006     {
2007       while((fdn = (scamper_fd_t *)dlist_head_item(refcnt_0)) != NULL)
2008 	{
2009 	  fd_close(fdn);
2010 	  fd_free(fdn);
2011 	}
2012       dlist_free(refcnt_0);
2013       refcnt_0 = NULL;
2014     }
2015 
2016   /* clean up the tree */
2017   if(fd_tree != NULL)
2018     {
2019       splaytree_free(fd_tree, NULL);
2020       fd_tree = NULL;
2021     }
2022 
2023   /* clean up the list */
2024   if(fd_list != NULL)
2025     {
2026       dlist_free(fd_list);
2027       fd_list = NULL;
2028     }
2029 
2030   /* clean up the array */
2031   if(fd_array != NULL)
2032     {
2033       free(fd_array);
2034       fd_array = NULL;
2035     }
2036 
2037 #ifdef HAVE_POLL
2038   if(poll_fds != NULL)
2039     {
2040       free(poll_fds);
2041       poll_fds = NULL;
2042     }
2043 #endif
2044 
2045 #ifdef HAVE_KQUEUE
2046   if(kq != -1)
2047     {
2048       close(kq);
2049       kq = -1;
2050     }
2051   if(kevlist != NULL)
2052     {
2053       free(kevlist);
2054       kevlist = NULL;
2055     }
2056 #endif
2057 
2058 #ifdef HAVE_EPOLL
2059   if(ep != -1)
2060     {
2061       close(ep);
2062       ep = -1;
2063     }
2064   if(ep_events != NULL)
2065     {
2066       free(ep_events);
2067       ep_events = NULL;
2068     }
2069 #endif
2070 
2071   return;
2072 }
2073