xref: /netbsd/external/bsd/ntp/dist/ntpd/ntp_io.c (revision e03b00c8)
1 /*	$NetBSD: ntp_io.c,v 1.32 2022/10/09 21:41:03 christos Exp $	*/
2 
3 /*
4  * ntp_io.c - input/output routines for ntpd.	The socket-opening code
5  *		   was shamelessly stolen from ntpd.
6  */
7 
8 #ifdef HAVE_CONFIG_H
9 # include <config.h>
10 #endif
11 
12 #include <stdio.h>
13 #include <signal.h>
14 #ifdef HAVE_FNMATCH_H
15 # include <fnmatch.h>
16 # if !defined(FNM_CASEFOLD) && defined(FNM_IGNORECASE)
17 #  define FNM_CASEFOLD FNM_IGNORECASE
18 # endif
19 #endif
20 #ifdef HAVE_SYS_PARAM_H
21 # include <sys/param.h>
22 #endif
23 #ifdef HAVE_SYS_IOCTL_H
24 # include <sys/ioctl.h>
25 #endif
26 #ifdef HAVE_SYS_SOCKIO_H	/* UXPV: SIOC* #defines (Frank Vance <fvance@waii.com>) */
27 # include <sys/sockio.h>
28 #endif
29 #ifdef HAVE_SYS_UIO_H
30 # include <sys/uio.h>
31 #endif
32 
33 #include "ntp_machine.h"
34 #include "ntpd.h"
35 #include "ntp_io.h"
36 #include "iosignal.h"
37 #include "ntp_lists.h"
38 #include "ntp_refclock.h"
39 #include "ntp_stdlib.h"
40 #include "ntp_worker.h"
41 #include "ntp_request.h"
42 #include "ntp_assert.h"
43 #include "timevalops.h"
44 #include "timespecops.h"
45 #include "ntpd-opts.h"
46 #include "safecast.h"
47 
48 /* Don't include ISC's version of IPv6 variables and structures */
49 #define ISC_IPV6_H 1
50 #include <isc/mem.h>
51 #include <isc/interfaceiter.h>
52 #include <isc/netaddr.h>
53 #include <isc/result.h>
54 #include <isc/sockaddr.h>
55 
56 #ifdef SIM
57 #include "ntpsim.h"
58 #endif
59 
60 #ifdef HAS_ROUTING_SOCKET
61 # include <net/route.h>
62 # ifdef HAVE_RTNETLINK
63 #  include <linux/rtnetlink.h>
64 # endif
65 #endif
66 
67 /*
68  * setsockopt does not always have the same arg declaration
69  * across all platforms. If it's not defined we make it empty
70  */
71 
72 #ifndef SETSOCKOPT_ARG_CAST
73 #define SETSOCKOPT_ARG_CAST
74 #endif
75 
76 extern int listen_to_virtual_ips;
77 
78 #ifndef IPTOS_DSCP_EF
79 #define IPTOS_DSCP_EF 0xb8
80 #endif
81 int qos = IPTOS_DSCP_EF;	/* QoS RFC3246 */
82 
83 #ifdef LEAP_SMEAR
84 /* TODO burnicki: This should be moved to ntp_timer.c, but if we do so
85  * we get a linker error. Since we're running out of time before the leap
86  * second occurs, we let it here where it just works.
87  */
88 int leap_smear_intv;
89 #endif
90 
91 /*
92  * NIC rule entry
93  */
94 typedef struct nic_rule_tag nic_rule;
95 
96 struct nic_rule_tag {
97 	nic_rule *	next;
98 	nic_rule_action	action;
99 	nic_rule_match	match_type;
100 	char *		if_name;
101 	sockaddr_u	addr;
102 	int		prefixlen;
103 };
104 
105 /*
106  * NIC rule listhead.  Entries are added at the head so that the first
107  * match in the list is the last matching rule specified.
108  */
109 nic_rule *nic_rule_list;
110 
111 
112 #if defined(SO_BINTIME) && defined(SCM_BINTIME) && defined(CMSG_FIRSTHDR)
113 #  define HAVE_PACKET_TIMESTAMP
114 #  define HAVE_BINTIME
115 #  ifdef BINTIME_CTLMSGBUF_SIZE
116 #   define CMSG_BUFSIZE BINTIME_CTLMSGBUF_SIZE
117 #  else
118 #   define CMSG_BUFSIZE  1536 /* moderate default */
119 #  endif
120 #elif defined(SO_TIMESTAMPNS) && defined(SCM_TIMESTAMPNS) && defined(CMSG_FIRSTHDR)
121 #  define HAVE_PACKET_TIMESTAMP
122 #  define HAVE_TIMESTAMPNS
123 #  ifdef TIMESTAMPNS_CTLMSGBUF_SIZE
124 #   define CMSG_BUFSIZE TIMESTAMPNS_CTLMSGBUF_SIZE
125 #  else
126 #   define CMSG_BUFSIZE  1536 /* moderate default */
127 #  endif
128 #elif defined(SO_TIMESTAMP) && defined(SCM_TIMESTAMP) && defined(CMSG_FIRSTHDR)
129 #  define HAVE_PACKET_TIMESTAMP
130 #  define HAVE_TIMESTAMP
131 #  ifdef TIMESTAMP_CTLMSGBUF_SIZE
132 #   define CMSG_BUFSIZE TIMESTAMP_CTLMSGBUF_SIZE
133 #  else
134 #   define CMSG_BUFSIZE  1536 /* moderate default */
135 #  endif
136 #else
137 /* fill in for old/other timestamp interfaces */
138 #endif
139 
140 #if defined(SYS_WINNT)
141 #include "win32_io.h"
142 #include <isc/win32os.h>
143 #endif
144 
145 /*
146  * We do asynchronous input using the SIGIO facility.  A number of
147  * recvbuf buffers are preallocated for input.	In the signal
148  * handler we poll to see which sockets are ready and read the
149  * packets from them into the recvbuf's along with a time stamp and
150  * an indication of the source host and the interface it was received
151  * through.  This allows us to get as accurate receive time stamps
152  * as possible independent of other processing going on.
153  *
154  * We watch the number of recvbufs available to the signal handler
155  * and allocate more when this number drops below the low water
156  * mark.  If the signal handler should run out of buffers in the
157  * interim it will drop incoming frames, the idea being that it is
158  * better to drop a packet than to be inaccurate.
159  */
160 
161 
162 /*
163  * Other statistics of possible interest
164  */
165 volatile u_long packets_dropped;	/* total number of packets dropped on reception */
166 volatile u_long packets_ignored;	/* packets received on wild card interface */
167 volatile u_long packets_received;	/* total number of packets received */
168 	 u_long packets_sent;		/* total number of packets sent */
169 	 u_long packets_notsent;	/* total number of packets which couldn't be sent */
170 
171 volatile u_long handler_calls;	/* number of calls to interrupt handler */
172 volatile u_long handler_pkts;	/* number of pkts received by handler */
173 u_long io_timereset;		/* time counters were reset */
174 
175 /*
176  * Interface stuff
177  */
178 endpt *	any_interface;		/* wildcard ipv4 interface */
179 endpt *	any6_interface;		/* wildcard ipv6 interface */
180 endpt *	loopback_interface;	/* loopback ipv4 interface */
181 
182 static isc_boolean_t broadcast_client_enabled;	/* is broadcast client enabled */
183 u_int sys_ifnum;			/* next .ifnum to assign */
184 int ninterfaces;			/* Total number of interfaces */
185 
186 int disable_dynamic_updates;		/* scan interfaces once only */
187 
188 #ifdef REFCLOCK
189 /*
190  * Refclock stuff.	We keep a chain of structures with data concerning
191  * the guys we are doing I/O for.
192  */
193 static	struct refclockio *refio;
194 #endif /* REFCLOCK */
195 
196 /*
197  * File descriptor masks etc. for call to select
198  * Not needed for I/O Completion Ports or anything outside this file
199  */
200 static fd_set activefds;
201 static int maxactivefd;
202 
203 /*
204  * bit alternating value to detect verified interfaces during an update cycle
205  */
206 static  u_short		sys_interphase = 0;
207 
208 static endpt *	new_interface(endpt *);
209 static void	add_interface(endpt *);
210 static int	update_interfaces(u_short, interface_receiver_t,
211 				  void *);
212 static void	remove_interface(endpt *);
213 static endpt *	create_interface(u_short, endpt *);
214 
215 static int	is_wildcard_addr	(const sockaddr_u *);
216 
217 /*
218  * Multicast functions
219  */
220 static	isc_boolean_t	addr_ismulticast	(sockaddr_u *);
221 static	isc_boolean_t	is_anycast		(sockaddr_u *,
222 						 const char *);
223 
224 /*
225  * Not all platforms support multicast
226  */
227 #ifdef MCAST
228 static	isc_boolean_t	socket_multicast_enable	(endpt *, sockaddr_u *);
229 static	isc_boolean_t	socket_multicast_disable(endpt *, sockaddr_u *);
230 #endif
231 
232 #ifdef DEBUG
233 static void interface_dump	(const endpt *);
234 static void sockaddr_dump	(const sockaddr_u *);
235 static void print_interface	(const endpt *, const char *, const char *);
236 #define DPRINT_INTERFACE(level, args) do { if (debug >= (level)) { print_interface args; } } while (0)
237 #else
238 #define DPRINT_INTERFACE(level, args) do {} while (0)
239 #endif
240 
241 typedef struct vsock vsock_t;
242 enum desc_type { FD_TYPE_SOCKET, FD_TYPE_FILE };
243 
244 struct vsock {
245 	vsock_t	*	link;
246 	SOCKET		fd;
247 	enum desc_type	type;
248 };
249 
250 vsock_t	*fd_list;
251 
252 #if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
253 /*
254  * async notification processing (e. g. routing sockets)
255  */
256 /*
257  * support for receiving data on fd that is not a refclock or a socket
258  * like e. g. routing sockets
259  */
260 struct asyncio_reader {
261 	struct asyncio_reader *link;		    /* the list this is being kept in */
262 	SOCKET fd;				    /* fd to be read */
263 	void  *data;				    /* possibly local data */
264 	void (*receiver)(struct asyncio_reader *);  /* input handler */
265 };
266 
267 struct asyncio_reader *asyncio_reader_list;
268 
269 static void delete_asyncio_reader (struct asyncio_reader *);
270 static struct asyncio_reader *new_asyncio_reader (void);
271 static void add_asyncio_reader (struct asyncio_reader *, enum desc_type);
272 static void remove_asyncio_reader (struct asyncio_reader *);
273 
274 #endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
275 
276 static void init_async_notifications (void);
277 
278 static	int	addr_eqprefix	(const sockaddr_u *, const sockaddr_u *,
279 				 int);
280 static int	addr_samesubnet	(const sockaddr_u *, const sockaddr_u *,
281 				 const sockaddr_u *, const sockaddr_u *);
282 static	int	create_sockets	(u_short);
283 static	SOCKET	open_socket	(sockaddr_u *, int, int, endpt *);
284 static	void	set_reuseaddr	(int);
285 static	isc_boolean_t	socket_broadcast_enable	 (struct interface *, SOCKET, sockaddr_u *);
286 
287 #if !defined(HAVE_IO_COMPLETION_PORT) && !defined(HAVE_SIGNALED_IO)
288 static	char *	fdbits		(int, const fd_set *);
289 #endif
290 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
291 static	isc_boolean_t	socket_broadcast_disable (struct interface *, sockaddr_u *);
292 #endif
293 
294 typedef struct remaddr remaddr_t;
295 
296 struct remaddr {
297 	remaddr_t *		link;
298 	sockaddr_u		addr;
299 	endpt *			ep;
300 };
301 
302 remaddr_t *	remoteaddr_list;
303 endpt *		ep_list;	/* complete endpt list */
304 endpt *		mc4_list;	/* IPv4 mcast-capable unicast endpts */
305 endpt *		mc6_list;	/* IPv6 mcast-capable unicast endpts */
306 
307 static endpt *	wildipv4;
308 static endpt *	wildipv6;
309 
310 #ifdef SYS_WINNT
311 int accept_wildcard_if_for_winnt;
312 #else
313 const int accept_wildcard_if_for_winnt = FALSE;
314 #endif
315 
316 static void	add_fd_to_list		(SOCKET, enum desc_type);
317 static endpt *	find_addr_in_list	(sockaddr_u *);
318 static endpt *	find_flagged_addr_in_list(sockaddr_u *, u_int32);
319 static void	delete_addr_from_list	(sockaddr_u *);
320 static void	delete_interface_from_list(endpt *);
321 static void	close_and_delete_fd_from_list(SOCKET);
322 static void	add_addr_to_list	(sockaddr_u *, endpt *);
323 static void	create_wildcards	(u_short);
324 static endpt *	findlocalinterface	(sockaddr_u *, int, int);
325 static endpt *	findclosestinterface	(sockaddr_u *, int);
326 #ifdef DEBUG
327 static const char *	action_text	(nic_rule_action);
328 #endif
329 static nic_rule_action	interface_action(char *, sockaddr_u *, u_int32);
330 static void		convert_isc_if	(isc_interface_t *,
331 					 endpt *, u_short);
332 static void		calc_addr_distance(sockaddr_u *,
333 					   const sockaddr_u *,
334 					   const sockaddr_u *);
335 static int		cmp_addr_distance(const sockaddr_u *,
336 					  const sockaddr_u *);
337 
338 /*
339  * Routines to read the ntp packets
340  */
341 #if !defined(HAVE_IO_COMPLETION_PORT)
342 static inline int	read_network_packet	(SOCKET, struct interface *, l_fp);
343 static void		ntpd_addremove_io_fd	(int, int, int);
344 static void 		input_handler_scan	(const l_fp*, const fd_set*);
345 static int/*BOOL*/	sanitize_fdset		(int errc);
346 #ifdef REFCLOCK
347 static inline int	read_refclock_packet	(SOCKET, struct refclockio *, l_fp);
348 #endif
349 #ifdef HAVE_SIGNALED_IO
350 static void 		input_handler		(l_fp*);
351 #endif
352 #endif
353 
354 
355 #ifndef HAVE_IO_COMPLETION_PORT
356 void
maintain_activefds(int fd,int closing)357 maintain_activefds(
358 	int fd,
359 	int closing
360 	)
361 {
362 	int i;
363 
364 	if (fd < 0 || fd >= FD_SETSIZE) {
365 		msyslog(LOG_ERR,
366 			"Too many sockets in use, FD_SETSIZE %d exceeded by fd %d",
367 			FD_SETSIZE, fd);
368 		exit(1);
369 	}
370 
371 	if (!closing) {
372 		FD_SET(fd, &activefds);
373 		maxactivefd = max(fd, maxactivefd);
374 	} else {
375 		FD_CLR(fd, &activefds);
376 		if (maxactivefd && fd == maxactivefd) {
377 			for (i = maxactivefd - 1; i >= 0; i--)
378 				if (FD_ISSET(i, &activefds)) {
379 					maxactivefd = i;
380 					break;
381 				}
382 			INSIST(fd != maxactivefd);
383 		}
384 	}
385 }
386 #endif	/* !HAVE_IO_COMPLETION_PORT */
387 
388 
389 #ifdef DEBUG_TIMING
390 /*
391  * collect timing information for various processing
392  * paths. currently we only pass them on to the file
393  * for later processing. this could also do histogram
394  * based analysis in other to reduce the load (and skew)
395  * dur to the file output
396  */
397 void
collect_timing(struct recvbuf * rb,const char * tag,int count,l_fp * dts)398 collect_timing(struct recvbuf *rb, const char *tag, int count, l_fp *dts)
399 {
400 	char buf[256];
401 
402 	snprintf(buf, sizeof(buf), "%s %d %s %s",
403 		 (rb != NULL)
404 		     ? ((rb->dstadr != NULL)
405 			    ? stoa(&rb->recv_srcadr)
406 			    : "-REFCLOCK-")
407 		     : "-",
408 		 count, lfptoa(dts, 9), tag);
409 	record_timing_stats(buf);
410 }
411 #endif
412 
413 /*
414  * About dynamic interfaces, sockets, reception and more...
415  *
416  * the code solves following tasks:
417  *
418  *   - keep a current list of active interfaces in order
419  *     to bind to to the interface address on NTP_PORT so that
420  *     all wild and specific bindings for NTP_PORT are taken by ntpd
421  *     to avoid other daemons messing with the time or sockets.
422  *   - all interfaces keep a list of peers that are referencing
423  *     the interface in order to quickly re-assign the peers to
424  *     new interface in case an interface is deleted (=> gone from system or
425  *     down)
426  *   - have a preconfigured socket ready with the right local address
427  *     for transmission and reception
428  *   - have an address list for all destination addresses used within ntpd
429  *     to find the "right" preconfigured socket.
430  *   - facilitate updating the internal interface list with respect to
431  *     the current kernel state
432  *
433  * special issues:
434  *
435  *   - mapping of multicast addresses to the interface affected is not always
436  *     one to one - especially on hosts with multiple interfaces
437  *     the code here currently allocates a separate interface entry for those
438  *     multicast addresses
439  *     iff it is able to bind to a *new* socket with the multicast address (flags |= MCASTIF)
440  *     in case of failure the multicast address is bound to an existing interface.
441  *   - on some systems it is perfectly legal to assign the same address to
442  *     multiple interfaces. Therefore this code does not keep a list of interfaces
443  *     but a list of interfaces that represent a unique address as determined by the kernel
444  *     by the procedure in findlocalinterface. Thus it is perfectly legal to see only
445  *     one representative of a group of real interfaces if they share the same address.
446  *
447  * Frank Kardel 20050910
448  */
449 
450 /*
451  * init_io - initialize I/O module.
452  */
453 void
init_io(void)454 init_io(void)
455 {
456 	/* Init buffer free list and stat counters */
457 	init_recvbuff(RECV_INIT);
458 #ifdef SO_RERROR
459 	/* route(4) overflow can be observed */
460 	interface_interval = 0;
461 #else
462 	/* update interface every 5 minutes as default */
463 	interface_interval = 300;
464 #endif
465 
466 #ifdef WORK_PIPE
467 	addremove_io_fd = &ntpd_addremove_io_fd;
468 #endif
469 
470 #if defined(SYS_WINNT)
471 	init_io_completion_port();
472 #elif defined(HAVE_SIGNALED_IO)
473 	(void) set_signal(input_handler);
474 #endif
475 }
476 
477 
478 static void
ntpd_addremove_io_fd(int fd,int is_pipe,int remove_it)479 ntpd_addremove_io_fd(
480 	int	fd,
481 	int	is_pipe,
482 	int	remove_it
483 	)
484 {
485 	UNUSED_ARG(is_pipe);
486 
487 #ifdef HAVE_SIGNALED_IO
488 	if (!remove_it)
489 		init_socket_sig(fd);
490 #endif /* not HAVE_SIGNALED_IO */
491 
492 	maintain_activefds(fd, remove_it);
493 }
494 
495 
496 /*
497  * io_open_sockets - call socket creation routine
498  */
499 void
io_open_sockets(void)500 io_open_sockets(void)
501 {
502 	static int already_opened;
503 
504 	if (already_opened || HAVE_OPT( SAVECONFIGQUIT ))
505 		return;
506 
507 	already_opened = 1;
508 
509 	/*
510 	 * Create the sockets
511 	 */
512 	BLOCKIO();
513 	create_sockets(NTP_PORT);
514 	UNBLOCKIO();
515 
516 	init_async_notifications();
517 
518 	DPRINTF(3, ("io_open_sockets: maxactivefd %d\n", maxactivefd));
519 }
520 
521 
522 #ifdef DEBUG
523 /*
524  * function to dump the contents of the interface structure
525  * for debugging use only.
526  * We face a dilemma here -- sockets are FDs under POSIX and
527  * actually HANDLES under Windows. So we use '%lld' as format
528  * and cast the value to 'long long'; this should not hurt
529  * with UNIX-like systems and does not truncate values on Win64.
530  */
531 void
interface_dump(const endpt * itf)532 interface_dump(const endpt *itf)
533 {
534 	printf("Dumping interface: %p\n", itf);
535 	printf("fd = %lld\n", (long long)itf->fd);
536 	printf("bfd = %lld\n", (long long)itf->bfd);
537 	printf("sin = %s,\n", stoa(&itf->sin));
538 	sockaddr_dump(&itf->sin);
539 	printf("bcast = %s,\n", stoa(&itf->bcast));
540 	sockaddr_dump(&itf->bcast);
541 	printf("mask = %s,\n", stoa(&itf->mask));
542 	sockaddr_dump(&itf->mask);
543 	printf("name = %s\n", itf->name);
544 	printf("flags = 0x%08x\n", itf->flags);
545 	printf("last_ttl = %d\n", itf->last_ttl);
546 	printf("addr_refid = %08x\n", itf->addr_refid);
547 	printf("num_mcast = %d\n", itf->num_mcast);
548 	printf("received = %ld\n", itf->received);
549 	printf("sent = %ld\n", itf->sent);
550 	printf("notsent = %ld\n", itf->notsent);
551 	printf("ifindex = %u\n", itf->ifindex);
552 	printf("peercnt = %u\n", itf->peercnt);
553 	printf("phase = %u\n", itf->phase);
554 }
555 
556 /*
557  * sockaddr_dump - hex dump the start of a sockaddr_u
558  */
559 static void
sockaddr_dump(const sockaddr_u * psau)560 sockaddr_dump(const sockaddr_u *psau)
561 {
562 	/* Limit the size of the sockaddr_in6 hex dump */
563 	const int maxsize = min(32, sizeof(psau->sa6));
564 	const u_char *	cp;
565 	int		i;
566 
567 	/* XXX: Should we limit maxsize based on psau->saX.sin_family? */
568 	cp = (const void *)&psau->sa6;
569 
570 	for(i = 0; i < maxsize; i++) {
571 		printf("%02x", *cp++);
572 		if (!((i + 1) % 4))
573 			printf(" ");
574 	}
575 	printf("\n");
576 }
577 
578 /*
579  * print_interface - helper to output debug information
580  */
581 static void
print_interface(const endpt * iface,const char * pfx,const char * sfx)582 print_interface(const endpt *iface, const char *pfx, const char *sfx)
583 {
584 	printf("%sinterface #%d: fd=%lld, bfd=%lld, name=%s, flags=0x%x, ifindex=%u, sin=%s",
585 	       pfx,
586 	       iface->ifnum,
587 	       (long long)iface->fd,
588 	       (long long)iface->bfd,
589 	       iface->name,
590 	       iface->flags,
591 	       iface->ifindex,
592 	       stoa(&iface->sin));
593 	if (AF_INET == iface->family) {
594 		if (iface->flags & INT_BROADCAST)
595 			printf(", bcast=%s", stoa(&iface->bcast));
596 		printf(", mask=%s", stoa(&iface->mask));
597 	}
598 	printf(", %s:%s",
599 	       (iface->ignore_packets)
600 		   ? "Disabled"
601 		   : "Enabled",
602 	       sfx);
603 	if (debug > 4)	/* in-depth debugging only */
604 		interface_dump(iface);
605 }
606 #endif
607 
608 #if !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET)
609 /*
610  * create an asyncio_reader structure
611  */
612 static struct asyncio_reader *
new_asyncio_reader(void)613 new_asyncio_reader(void)
614 {
615 	struct asyncio_reader *reader;
616 
617 	reader = emalloc_zero(sizeof(*reader));
618 	reader->fd = INVALID_SOCKET;
619 
620 	return reader;
621 }
622 
623 /*
624  * delete a reader
625  */
626 static void
delete_asyncio_reader(struct asyncio_reader * reader)627 delete_asyncio_reader(
628 	struct asyncio_reader *reader
629 	)
630 {
631 	free(reader);
632 }
633 
634 /*
635  * add asynchio_reader
636  */
637 static void
add_asyncio_reader(struct asyncio_reader * reader,enum desc_type type)638 add_asyncio_reader(
639 	struct asyncio_reader *	reader,
640 	enum desc_type		type)
641 {
642 	LINK_SLIST(asyncio_reader_list, reader, link);
643 	add_fd_to_list(reader->fd, type);
644 }
645 
646 /*
647  * remove asynchio_reader
648  */
649 static void
remove_asyncio_reader(struct asyncio_reader * reader)650 remove_asyncio_reader(
651 	struct asyncio_reader *reader
652 	)
653 {
654 	struct asyncio_reader *unlinked;
655 
656 	UNLINK_SLIST(unlinked, asyncio_reader_list, reader, link,
657 	    struct asyncio_reader);
658 
659 	if (reader->fd != INVALID_SOCKET)
660 		close_and_delete_fd_from_list(reader->fd);
661 
662 	reader->fd = INVALID_SOCKET;
663 }
664 #endif /* !defined(HAVE_IO_COMPLETION_PORT) && defined(HAS_ROUTING_SOCKET) */
665 
666 
667 /* compare two sockaddr prefixes */
668 static int
addr_eqprefix(const sockaddr_u * a,const sockaddr_u * b,int prefixlen)669 addr_eqprefix(
670 	const sockaddr_u *	a,
671 	const sockaddr_u *	b,
672 	int			prefixlen
673 	)
674 {
675 	isc_netaddr_t		isc_a;
676 	isc_netaddr_t		isc_b;
677 	isc_sockaddr_t		isc_sa;
678 
679 	ZERO(isc_sa);
680 	memcpy(&isc_sa.type, a, min(sizeof(isc_sa.type), sizeof(*a)));
681 	isc_netaddr_fromsockaddr(&isc_a, &isc_sa);
682 
683 	ZERO(isc_sa);
684 	memcpy(&isc_sa.type, b, min(sizeof(isc_sa.type), sizeof(*b)));
685 	isc_netaddr_fromsockaddr(&isc_b, &isc_sa);
686 
687 	return (int)isc_netaddr_eqprefix(&isc_a, &isc_b,
688 					 (u_int)prefixlen);
689 }
690 
691 
692 static int
addr_samesubnet(const sockaddr_u * a,const sockaddr_u * a_mask,const sockaddr_u * b,const sockaddr_u * b_mask)693 addr_samesubnet(
694 	const sockaddr_u *	a,
695 	const sockaddr_u *	a_mask,
696 	const sockaddr_u *	b,
697 	const sockaddr_u *	b_mask
698 	)
699 {
700 	const u_int32 *	pa;
701 	const u_int32 *	pa_limit;
702 	const u_int32 *	pb;
703 	const u_int32 *	pm;
704 	size_t		loops;
705 
706 	REQUIRE(AF(a) == AF(a_mask));
707 	REQUIRE(AF(b) == AF(b_mask));
708 	/*
709 	 * With address and mask families verified to match, comparing
710 	 * the masks also validates the address's families match.
711 	 */
712 	if (!SOCK_EQ(a_mask, b_mask))
713 		return FALSE;
714 
715 	if (IS_IPV6(a)) {
716 		loops = sizeof(NSRCADR6(a)) / sizeof(*pa);
717 		pa = (const void *)&NSRCADR6(a);
718 		pb = (const void *)&NSRCADR6(b);
719 		pm = (const void *)&NSRCADR6(a_mask);
720 	} else {
721 		loops = sizeof(NSRCADR(a)) / sizeof(*pa);
722 		pa = (const void *)&NSRCADR(a);
723 		pb = (const void *)&NSRCADR(b);
724 		pm = (const void *)&NSRCADR(a_mask);
725 	}
726 	for (pa_limit = pa + loops; pa < pa_limit; pa++, pb++, pm++)
727 		if ((*pa & *pm) != (*pb & *pm))
728 			return FALSE;
729 
730 	return TRUE;
731 }
732 
733 
734 /*
735  * interface list enumerator - visitor pattern
736  */
737 void
interface_enumerate(interface_receiver_t receiver,void * data)738 interface_enumerate(
739 	interface_receiver_t	receiver,
740 	void *			data
741 	)
742 {
743 	interface_info_t ifi;
744 
745 	ifi.action = IFS_EXISTS;
746 	for (ifi.ep = ep_list; ifi.ep != NULL; ifi.ep = ifi.ep->elink)
747 		(*receiver)(data, &ifi);
748 }
749 
750 /*
751  * do standard initialization of interface structure
752  */
753 static void
init_interface(endpt * ep)754 init_interface(
755 	endpt *ep
756 	)
757 {
758 	ZERO(*ep);
759 	ep->fd = INVALID_SOCKET;
760 	ep->bfd = INVALID_SOCKET;
761 	ep->phase = sys_interphase;
762 }
763 
764 
765 /*
766  * create new interface structure initialize from
767  * template structure or via standard initialization
768  * function
769  */
770 static struct interface *
new_interface(struct interface * interface)771 new_interface(
772 	struct interface *interface
773 	)
774 {
775 	struct interface *	iface;
776 
777 	iface = emalloc(sizeof(*iface));
778 
779 	if (NULL == interface)
780 		init_interface(iface);
781 	else				/* use the template */
782 		memcpy(iface, interface, sizeof(*iface));
783 
784 	/* count every new instance of an interface in the system */
785 	iface->ifnum = sys_ifnum++;
786 	iface->starttime = current_time;
787 
788 #   ifdef HAVE_IO_COMPLETION_PORT
789 	if (!io_completion_port_add_interface(iface)) {
790 		msyslog(LOG_EMERG, "cannot register interface with IO engine -- will exit now");
791 		exit(1);
792 	}
793 #   endif
794 	return iface;
795 }
796 
797 
798 /*
799  * return interface storage into free memory pool
800  */
801 static void
delete_interface(endpt * ep)802 delete_interface(
803 	endpt *ep
804 	)
805 {
806 #    ifdef HAVE_IO_COMPLETION_PORT
807 	io_completion_port_remove_interface(ep);
808 #    endif
809 	free(ep);
810 }
811 
812 
813 /*
814  * link interface into list of known interfaces
815  */
816 static void
add_interface(endpt * ep)817 add_interface(
818 	endpt *	ep
819 	)
820 {
821 	endpt **	pmclisthead;
822 	endpt *		scan;
823 	endpt *		scan_next;
824 	endpt *		unlinked;
825 	sockaddr_u *	addr;
826 	int		ep_local;
827 	int		scan_local;
828 	int		same_subnet;
829 	int		ep_univ_iid;	/* iface ID from MAC address */
830 	int		scan_univ_iid;	/* see RFC 4291 */
831 	int		ep_privacy;	/* random local iface ID */
832 	int		scan_privacy;	/* see RFC 4941 */
833 	int		rc;
834 
835 	/* Calculate the refid */
836 	ep->addr_refid = addr2refid(&ep->sin);
837 	/* link at tail so ntpdc -c ifstats index increases each row */
838 	LINK_TAIL_SLIST(ep_list, ep, elink, endpt);
839 	ninterfaces++;
840 #ifdef MCAST
841 	/* the rest is for enabled multicast-capable addresses only */
842 	if (ep->ignore_packets || !(INT_MULTICAST & ep->flags) ||
843 	    INT_LOOPBACK & ep->flags)
844 		return;
845 # ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
846 	if (AF_INET6 == ep->family)
847 		return;
848 # endif
849 	pmclisthead = (AF_INET == ep->family)
850 			 ? &mc4_list
851 			 : &mc6_list;
852 
853 	if (AF_INET6 == ep->family) {
854 		ep_local =
855 		    IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&ep->sin)) ||
856 		    IN6_IS_ADDR_SITELOCAL(PSOCK_ADDR6(&ep->sin));
857 		ep_univ_iid = IS_IID_UNIV(&ep->sin);
858 		ep_privacy = !!(INT_PRIVACY & ep->flags);
859 	} else {
860 		ep_local = FALSE;
861 		ep_univ_iid = FALSE;
862 		ep_privacy = FALSE;
863 	}
864 	DPRINTF(4, ("add_interface mcast-capable %s%s%s%s\n",
865 		    stoa(&ep->sin),
866 		    (ep_local) ? " link/scope-local" : "",
867 		    (ep_univ_iid) ? " univ-IID" : "",
868 		    (ep_privacy) ? " privacy" : ""));
869 	/*
870 	 * If we have multiple local addresses on the same network
871 	 * interface, and some are link- or site-local, do not multicast
872 	 * out from the link-/site-local addresses by default, to avoid
873 	 * duplicate manycastclient associations between v6 peers using
874 	 * link-local and global addresses.  link-local can still be
875 	 * chosen using "nic ignore myv6globalprefix::/64".
876 	 * Similarly, if we have multiple global addresses from the same
877 	 * prefix on the same network interface, multicast from one,
878 	 * preferring EUI-64, then static, then least RFC 4941 privacy
879 	 * addresses.
880 	 */
881 	for (scan = *pmclisthead; scan != NULL; scan = scan_next) {
882 		scan_next = scan->mclink;
883 		if (ep->family != scan->family)
884 			continue;
885 		if (strcmp(ep->name, scan->name))
886 			continue;
887 		same_subnet = addr_samesubnet(&ep->sin, &ep->mask,
888 					      &scan->sin, &scan->mask);
889 		if (AF_INET6 == ep->family) {
890 			addr = &scan->sin;
891 			scan_local =
892 			    IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(addr)) ||
893 			    IN6_IS_ADDR_SITELOCAL(PSOCK_ADDR6(addr));
894 			scan_univ_iid = IS_IID_UNIV(addr);
895 			scan_privacy = !!(INT_PRIVACY & scan->flags);
896 		} else {
897 			scan_local = FALSE;
898 			scan_univ_iid = FALSE;
899 			scan_privacy = FALSE;
900 		}
901 		DPRINTF(4, ("add_interface mcast-capable scan %s%s%s%s\n",
902 			    stoa(&scan->sin),
903 			    (scan_local) ? " link/scope-local" : "",
904 			    (scan_univ_iid) ? " univ-IID" : "",
905 			    (scan_privacy) ? " privacy" : ""));
906 		if ((ep_local && !scan_local) || (same_subnet &&
907 		    ((ep_privacy && !scan_privacy) ||
908 		     (!ep_univ_iid && scan_univ_iid)))) {
909 			DPRINTF(4, ("did not add %s to %s of IPv6 multicast-capable list which already has %s\n",
910 				stoa(&ep->sin),
911 				(ep_local)
912 				    ? "tail"
913 				    : "head",
914 				stoa(&scan->sin)));
915 			return;
916 		}
917 		if ((scan_local && !ep_local) || (same_subnet &&
918 		    ((scan_privacy && !ep_privacy) ||
919 		     (!scan_univ_iid && ep_univ_iid)))) {
920 			UNLINK_SLIST(unlinked, *pmclisthead,
921 				     scan, mclink, endpt);
922 			DPRINTF(4, ("%s %s from IPv6 multicast-capable list to add %s\n",
923 				(unlinked != scan)
924 				    ? "Failed to remove"
925 				    : "removed",
926 				stoa(&scan->sin), stoa(&ep->sin)));
927 		}
928 	}
929 	/*
930 	 * Add link/site local at the tail of the multicast-
931 	 * capable unicast interfaces list, so that ntpd will
932 	 * send from global addresses before link-/site-local
933 	 * ones.
934 	 */
935 	if (ep_local)
936 		LINK_TAIL_SLIST(*pmclisthead, ep, mclink, endpt);
937 	else
938 		LINK_SLIST(*pmclisthead, ep, mclink);
939 	DPRINTF(4, ("added %s to %s of IPv%s multicast-capable unicast local address list\n",
940 		stoa(&ep->sin),
941 		(ep_local)
942 		    ? "tail"
943 		    : "head",
944 		(AF_INET == ep->family)
945 		    ? "4"
946 		    : "6"));
947 
948 	if (INVALID_SOCKET == ep->fd)
949 		return;
950 
951 	/*
952 	 * select the local address from which to send to multicast.
953 	 */
954 	switch (AF(&ep->sin)) {
955 
956 	case AF_INET :
957 		rc = setsockopt(ep->fd, IPPROTO_IP,
958 				IP_MULTICAST_IF,
959 				(void *)&NSRCADR(&ep->sin),
960 				sizeof(NSRCADR(&ep->sin)));
961 		if (rc)
962 			msyslog(LOG_ERR,
963 				"setsockopt IP_MULTICAST_IF %s fails: %m",
964 				stoa(&ep->sin));
965 		break;
966 
967 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
968 	case AF_INET6 :
969 		rc = setsockopt(ep->fd, IPPROTO_IPV6,
970 				 IPV6_MULTICAST_IF,
971 				 (void *)&ep->ifindex,
972 				 sizeof(ep->ifindex));
973 		/* do not complain if bound addr scope is ifindex */
974 		if (rc && ep->ifindex != SCOPE(&ep->sin))
975 			msyslog(LOG_ERR,
976 				"setsockopt IPV6_MULTICAST_IF %u for %s fails: %m",
977 				ep->ifindex, stoa(&ep->sin));
978 		break;
979 # endif
980 	}
981 #endif	/* MCAST */
982 }
983 
984 
985 /*
986  * remove interface from known interface list and clean up
987  * associated resources
988  */
989 static void
remove_interface(endpt * ep)990 remove_interface(
991 	endpt *	ep
992 	)
993 {
994 	endpt *		unlinked;
995 	endpt **	pmclisthead;
996 	sockaddr_u	resmask;
997 
998 	UNLINK_SLIST(unlinked, ep_list, ep, elink, endpt);
999 	if (!ep->ignore_packets && INT_MULTICAST & ep->flags) {
1000 		pmclisthead = (AF_INET == ep->family)
1001 				 ? &mc4_list
1002 				 : &mc6_list;
1003 		UNLINK_SLIST(unlinked, *pmclisthead, ep, mclink, endpt);
1004 		DPRINTF(4, ("%s %s IPv%s multicast-capable unicast local address list\n",
1005 			stoa(&ep->sin),
1006 			(unlinked != NULL)
1007 			    ? "removed from"
1008 			    : "not found on",
1009 			(AF_INET == ep->family)
1010 			    ? "4"
1011 			    : "6"));
1012 	}
1013 	delete_interface_from_list(ep);
1014 
1015 	if (ep->fd != INVALID_SOCKET) {
1016 		msyslog(LOG_INFO,
1017 			"Deleting interface #%d %s, %s#%d, interface stats: received=%ld, sent=%ld, dropped=%ld, active_time=%ld secs",
1018 			ep->ifnum,
1019 			ep->name,
1020 			stoa(&ep->sin),
1021 			SRCPORT(&ep->sin),
1022 			ep->received,
1023 			ep->sent,
1024 			ep->notsent,
1025 			current_time - ep->starttime);
1026 #	    ifdef HAVE_IO_COMPLETION_PORT
1027 		io_completion_port_remove_socket(ep->fd, ep);
1028 #	    endif
1029 		close_and_delete_fd_from_list(ep->fd);
1030 		ep->fd = INVALID_SOCKET;
1031 	}
1032 
1033 	if (ep->bfd != INVALID_SOCKET) {
1034 		msyslog(LOG_INFO,
1035 			"stop listening for broadcasts to %s on interface #%d %s",
1036 			stoa(&ep->bcast), ep->ifnum, ep->name);
1037 #	    ifdef HAVE_IO_COMPLETION_PORT
1038 		io_completion_port_remove_socket(ep->bfd, ep);
1039 #	    endif
1040 		close_and_delete_fd_from_list(ep->bfd);
1041 		ep->bfd = INVALID_SOCKET;
1042 	}
1043 #   ifdef HAVE_IO_COMPLETION_PORT
1044 	io_completion_port_remove_interface(ep);
1045 #   endif
1046 
1047 	ninterfaces--;
1048 	mon_clearinterface(ep);
1049 
1050 	/* remove restrict interface entry */
1051 	SET_HOSTMASK(&resmask, AF(&ep->sin));
1052 	hack_restrict(RESTRICT_REMOVEIF, &ep->sin, &resmask,
1053 		      -3, RESM_NTPONLY | RESM_INTERFACE, RES_IGNORE, 0);
1054 }
1055 
1056 
1057 static void
log_listen_address(endpt * ep)1058 log_listen_address(
1059 	endpt *	ep
1060 	)
1061 {
1062 	msyslog(LOG_INFO, "%s on %d %s %s",
1063 		(ep->ignore_packets)
1064 		    ? "Listen and drop"
1065 		    : "Listen normally",
1066 		ep->ifnum,
1067 		ep->name,
1068 		sptoa(&ep->sin));
1069 }
1070 
1071 
1072 static void
create_wildcards(u_short port)1073 create_wildcards(
1074 	u_short	port
1075 	)
1076 {
1077 	int			v4wild;
1078 #ifdef INCLUDE_IPV6_SUPPORT
1079 	int			v6wild;
1080 #endif
1081 	sockaddr_u		wildaddr;
1082 	nic_rule_action		action;
1083 	struct interface *	wildif;
1084 
1085 	/*
1086 	 * silence "potentially uninitialized" warnings from VC9
1087 	 * failing to follow the logic.  Ideally action could remain
1088 	 * uninitialized, and the memset be the first statement under
1089 	 * the first if (v4wild).
1090 	 */
1091 	action = ACTION_LISTEN;
1092 	ZERO(wildaddr);
1093 
1094 #ifdef INCLUDE_IPV6_SUPPORT
1095 	/*
1096 	 * create pseudo-interface with wildcard IPv6 address
1097 	 */
1098 	v6wild = ipv6_works;
1099 	if (v6wild) {
1100 		/* set wildaddr to the v6 wildcard address :: */
1101 		ZERO(wildaddr);
1102 		AF(&wildaddr) = AF_INET6;
1103 		SET_ADDR6N(&wildaddr, in6addr_any);
1104 		SET_PORT(&wildaddr, port);
1105 		SET_SCOPE(&wildaddr, 0);
1106 
1107 		/* check for interface/nic rules affecting the wildcard */
1108 		action = interface_action(NULL, &wildaddr, 0);
1109 		v6wild = (ACTION_IGNORE != action);
1110 	}
1111 	if (v6wild) {
1112 		wildif = new_interface(NULL);
1113 
1114 		strlcpy(wildif->name, "v6wildcard", sizeof(wildif->name));
1115 		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1116 		wildif->family = AF_INET6;
1117 		AF(&wildif->mask) = AF_INET6;
1118 		SET_ONESMASK(&wildif->mask);
1119 
1120 		wildif->flags = INT_UP | INT_WILDCARD;
1121 		wildif->ignore_packets = (ACTION_DROP == action);
1122 
1123 		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1124 
1125 		if (wildif->fd != INVALID_SOCKET) {
1126 			wildipv6 = wildif;
1127 			any6_interface = wildif;
1128 			add_addr_to_list(&wildif->sin, wildif);
1129 			add_interface(wildif);
1130 			log_listen_address(wildif);
1131 		} else {
1132 			msyslog(LOG_ERR,
1133 				"unable to bind to wildcard address %s - another process may be running - EXITING",
1134 				stoa(&wildif->sin));
1135 			exit(1);
1136 		}
1137 		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1138 	}
1139 #endif
1140 
1141 	/*
1142 	 * create pseudo-interface with wildcard IPv4 address
1143 	 */
1144 	v4wild = ipv4_works;
1145 	if (v4wild) {
1146 		/* set wildaddr to the v4 wildcard address 0.0.0.0 */
1147 		AF(&wildaddr) = AF_INET;
1148 		SET_ADDR4N(&wildaddr, INADDR_ANY);
1149 		SET_PORT(&wildaddr, port);
1150 
1151 		/* check for interface/nic rules affecting the wildcard */
1152 		action = interface_action(NULL, &wildaddr, 0);
1153 		v4wild = (ACTION_IGNORE != action);
1154 	}
1155 	if (v4wild) {
1156 		wildif = new_interface(NULL);
1157 
1158 		strlcpy(wildif->name, "v4wildcard", sizeof(wildif->name));
1159 		memcpy(&wildif->sin, &wildaddr, sizeof(wildif->sin));
1160 		wildif->family = AF_INET;
1161 		AF(&wildif->mask) = AF_INET;
1162 		SET_ONESMASK(&wildif->mask);
1163 
1164 		wildif->flags = INT_BROADCAST | INT_UP | INT_WILDCARD;
1165 		wildif->ignore_packets = (ACTION_DROP == action);
1166 #if defined(MCAST)
1167 		/*
1168 		 * enable multicast reception on the broadcast socket
1169 		 */
1170 		AF(&wildif->bcast) = AF_INET;
1171 		SET_ADDR4N(&wildif->bcast, INADDR_ANY);
1172 		SET_PORT(&wildif->bcast, port);
1173 #endif /* MCAST */
1174 		wildif->fd = open_socket(&wildif->sin, 0, 1, wildif);
1175 
1176 		if (wildif->fd != INVALID_SOCKET) {
1177 			wildipv4 = wildif;
1178 			any_interface = wildif;
1179 
1180 			add_addr_to_list(&wildif->sin, wildif);
1181 			add_interface(wildif);
1182 			log_listen_address(wildif);
1183 		} else {
1184 			msyslog(LOG_ERR,
1185 				"unable to bind to wildcard address %s - another process may be running - EXITING",
1186 				stoa(&wildif->sin));
1187 			exit(1);
1188 		}
1189 		DPRINT_INTERFACE(2, (wildif, "created ", "\n"));
1190 	}
1191 }
1192 
1193 
1194 /*
1195  * add_nic_rule() -- insert a rule entry at the head of nic_rule_list.
1196  */
1197 void
add_nic_rule(nic_rule_match match_type,const char * if_name,int prefixlen,nic_rule_action action)1198 add_nic_rule(
1199 	nic_rule_match	match_type,
1200 	const char *	if_name,	/* interface name or numeric address */
1201 	int		prefixlen,
1202 	nic_rule_action	action
1203 	)
1204 {
1205 	nic_rule *	rule;
1206 	isc_boolean_t	is_ip;
1207 
1208 	rule = emalloc_zero(sizeof(*rule));
1209 	rule->match_type = match_type;
1210 	rule->prefixlen = prefixlen;
1211 	rule->action = action;
1212 
1213 	if (MATCH_IFNAME == match_type) {
1214 		REQUIRE(NULL != if_name);
1215 		rule->if_name = estrdup(if_name);
1216 	} else if (MATCH_IFADDR == match_type) {
1217 		REQUIRE(NULL != if_name);
1218 		/* set rule->addr */
1219 		is_ip = is_ip_address(if_name, AF_UNSPEC, &rule->addr);
1220 		REQUIRE(is_ip);
1221 	} else
1222 		REQUIRE(NULL == if_name);
1223 
1224 	LINK_SLIST(nic_rule_list, rule, next);
1225 }
1226 
1227 
1228 #ifdef DEBUG
1229 static const char *
action_text(nic_rule_action action)1230 action_text(
1231 	nic_rule_action	action
1232 	)
1233 {
1234 	const char *t;
1235 
1236 	switch (action) {
1237 
1238 	default:
1239 		t = "ERROR";	/* quiet uninit warning */
1240 		DPRINTF(1, ("fatal: unknown nic_rule_action %d\n",
1241 			    action));
1242 		ENSURE(0);
1243 		break;
1244 
1245 	case ACTION_LISTEN:
1246 		t = "listen";
1247 		break;
1248 
1249 	case ACTION_IGNORE:
1250 		t = "ignore";
1251 		break;
1252 
1253 	case ACTION_DROP:
1254 		t = "drop";
1255 		break;
1256 	}
1257 
1258 	return t;
1259 }
1260 #endif	/* DEBUG */
1261 
1262 
1263 static nic_rule_action
interface_action(char * if_name,sockaddr_u * if_addr,u_int32 if_flags)1264 interface_action(
1265 	char *		if_name,
1266 	sockaddr_u *	if_addr,
1267 	u_int32		if_flags
1268 	)
1269 {
1270 	nic_rule *	rule;
1271 	int		isloopback;
1272 	int		iswildcard;
1273 
1274 	DPRINTF(4, ("interface_action: interface %s ",
1275 		    (if_name != NULL) ? if_name : "wildcard"));
1276 
1277 	iswildcard = is_wildcard_addr(if_addr);
1278 	isloopback = !!(INT_LOOPBACK & if_flags);
1279 
1280 	/*
1281 	 * Find any matching NIC rule from --interface / -I or ntp.conf
1282 	 * interface/nic rules.
1283 	 */
1284 	for (rule = nic_rule_list; rule != NULL; rule = rule->next) {
1285 
1286 		switch (rule->match_type) {
1287 
1288 		case MATCH_ALL:
1289 			/* loopback and wildcard excluded from "all" */
1290 			if (isloopback || iswildcard)
1291 				break;
1292 			DPRINTF(4, ("nic all %s\n",
1293 			    action_text(rule->action)));
1294 			return rule->action;
1295 
1296 		case MATCH_IPV4:
1297 			if (IS_IPV4(if_addr)) {
1298 				DPRINTF(4, ("nic ipv4 %s\n",
1299 				    action_text(rule->action)));
1300 				return rule->action;
1301 			}
1302 			break;
1303 
1304 		case MATCH_IPV6:
1305 			if (IS_IPV6(if_addr)) {
1306 				DPRINTF(4, ("nic ipv6 %s\n",
1307 				    action_text(rule->action)));
1308 				return rule->action;
1309 			}
1310 			break;
1311 
1312 		case MATCH_WILDCARD:
1313 			if (iswildcard) {
1314 				DPRINTF(4, ("nic wildcard %s\n",
1315 				    action_text(rule->action)));
1316 				return rule->action;
1317 			}
1318 			break;
1319 
1320 		case MATCH_IFADDR:
1321 			if (rule->prefixlen != -1) {
1322 				if (addr_eqprefix(if_addr, &rule->addr,
1323 						  rule->prefixlen)) {
1324 
1325 					DPRINTF(4, ("subnet address match - %s\n",
1326 					    action_text(rule->action)));
1327 					return rule->action;
1328 				}
1329 			} else
1330 				if (SOCK_EQ(if_addr, &rule->addr)) {
1331 
1332 					DPRINTF(4, ("address match - %s\n",
1333 					    action_text(rule->action)));
1334 					return rule->action;
1335 				}
1336 			break;
1337 
1338 		case MATCH_IFNAME:
1339 			if (if_name != NULL
1340 #if defined(HAVE_FNMATCH) && defined(FNM_CASEFOLD)
1341 			    && !fnmatch(rule->if_name, if_name, FNM_CASEFOLD)
1342 #else
1343 			    && !strcasecmp(if_name, rule->if_name)
1344 #endif
1345 			    ) {
1346 
1347 				DPRINTF(4, ("interface name match - %s\n",
1348 				    action_text(rule->action)));
1349 				return rule->action;
1350 			}
1351 			break;
1352 		}
1353 	}
1354 
1355 	/*
1356 	 * Unless explicitly disabled such as with "nic ignore ::1"
1357 	 * listen on loopback addresses.  Since ntpq and ntpdc query
1358 	 * "localhost" by default, which typically resolves to ::1 and
1359 	 * 127.0.0.1, it's useful to default to listening on both.
1360 	 */
1361 	if (isloopback) {
1362 		DPRINTF(4, ("default loopback listen\n"));
1363 		return ACTION_LISTEN;
1364 	}
1365 
1366 	/*
1367 	 * Treat wildcard addresses specially.  If there is no explicit
1368 	 * "nic ... wildcard" or "nic ... 0.0.0.0" or "nic ... ::" rule
1369 	 * default to drop.
1370 	 */
1371 	if (iswildcard) {
1372 		DPRINTF(4, ("default wildcard drop\n"));
1373 		return ACTION_DROP;
1374 	}
1375 
1376 	/*
1377 	 * Check for "virtual IP" (colon in the interface name) after
1378 	 * the rules so that "ntpd --interface eth0:1 -novirtualips"
1379 	 * does indeed listen on eth0:1's addresses.
1380 	 */
1381 	if (!listen_to_virtual_ips && if_name != NULL
1382 	    && (strchr(if_name, ':') != NULL)) {
1383 
1384 		DPRINTF(4, ("virtual ip - ignore\n"));
1385 		return ACTION_IGNORE;
1386 	}
1387 
1388 	/*
1389 	 * If there are no --interface/-I command-line options and no
1390 	 * interface/nic rules in ntp.conf, the default action is to
1391 	 * listen.  In the presence of rules from either, the default
1392 	 * is to ignore.  This implements ntpd's traditional listen-
1393 	 * every default with no interface listen configuration, and
1394 	 * ensures a single -I eth0 or "nic listen eth0" means do not
1395 	 * listen on any other addresses.
1396 	 */
1397 	if (NULL == nic_rule_list) {
1398 		DPRINTF(4, ("default listen\n"));
1399 		return ACTION_LISTEN;
1400 	}
1401 
1402 	DPRINTF(4, ("implicit ignore\n"));
1403 	return ACTION_IGNORE;
1404 }
1405 
1406 
1407 static void
convert_isc_if(isc_interface_t * isc_if,endpt * itf,u_short port)1408 convert_isc_if(
1409 	isc_interface_t *isc_if,
1410 	endpt *itf,
1411 	u_short port
1412 	)
1413 {
1414 	const u_char v6loop[16] = {0, 0, 0, 0, 0, 0, 0, 0,
1415 				   0, 0, 0, 0, 0, 0, 0, 1};
1416 
1417 	strlcpy(itf->name, isc_if->name, sizeof(itf->name));
1418 	itf->ifindex = isc_if->ifindex;
1419 	itf->family = (u_short)isc_if->af;
1420 	AF(&itf->sin) = itf->family;
1421 	AF(&itf->mask) = itf->family;
1422 	AF(&itf->bcast) = itf->family;
1423 	SET_PORT(&itf->sin, port);
1424 	SET_PORT(&itf->mask, port);
1425 	SET_PORT(&itf->bcast, port);
1426 
1427 	if (IS_IPV4(&itf->sin)) {
1428 		NSRCADR(&itf->sin) = isc_if->address.type.in.s_addr;
1429 		NSRCADR(&itf->mask) = isc_if->netmask.type.in.s_addr;
1430 
1431 		if (isc_if->flags & INTERFACE_F_BROADCAST) {
1432 			itf->flags |= INT_BROADCAST;
1433 			NSRCADR(&itf->bcast) =
1434 			    isc_if->broadcast.type.in.s_addr;
1435 		}
1436 	}
1437 #ifdef INCLUDE_IPV6_SUPPORT
1438 	else if (IS_IPV6(&itf->sin)) {
1439 		SET_ADDR6N(&itf->sin, isc_if->address.type.in6);
1440 		SET_ADDR6N(&itf->mask, isc_if->netmask.type.in6);
1441 
1442 		SET_SCOPE(&itf->sin, isc_if->address.zone);
1443 	}
1444 #endif /* INCLUDE_IPV6_SUPPORT */
1445 
1446 
1447 	/* Process the rest of the flags */
1448 
1449 	itf->flags |=
1450 		  ((INTERFACE_F_UP & isc_if->flags)
1451 			? INT_UP : 0)
1452 		| ((INTERFACE_F_LOOPBACK & isc_if->flags)
1453 			? INT_LOOPBACK : 0)
1454 		| ((INTERFACE_F_POINTTOPOINT & isc_if->flags)
1455 			? INT_PPP : 0)
1456 		| ((INTERFACE_F_MULTICAST & isc_if->flags)
1457 			? INT_MULTICAST : 0)
1458 		| ((INTERFACE_F_PRIVACY & isc_if->flags)
1459 			? INT_PRIVACY : 0)
1460 		;
1461 
1462 	/*
1463 	 * Clear the loopback flag if the address is not localhost.
1464 	 * http://bugs.ntp.org/1683
1465 	 */
1466 	if (INT_LOOPBACK & itf->flags) {
1467 		if (AF_INET == itf->family) {
1468 			if (127 != (SRCADR(&itf->sin) >> 24))
1469 				itf->flags &= ~INT_LOOPBACK;
1470 		} else {
1471 			if (memcmp(v6loop, NSRCADR6(&itf->sin),
1472 				   sizeof(NSRCADR6(&itf->sin))))
1473 				itf->flags &= ~INT_LOOPBACK;
1474 		}
1475 	}
1476 }
1477 
1478 
1479 /*
1480  * refresh_interface
1481  *
1482  * some OSes have been observed to keep
1483  * cached routes even when more specific routes
1484  * become available.
1485  * this can be mitigated by re-binding
1486  * the socket.
1487  */
1488 static int
refresh_interface(struct interface * interface)1489 refresh_interface(
1490 	struct interface * interface
1491 	)
1492 {
1493 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
1494 	if (interface->fd != INVALID_SOCKET) {
1495 		int bcast = (interface->flags & INT_BCASTXMIT) != 0;
1496 		/* as we forcibly close() the socket remove the
1497 		   broadcast permission indication */
1498 		if (bcast)
1499 			socket_broadcast_disable(interface, &interface->sin);
1500 
1501 		close_and_delete_fd_from_list(interface->fd);
1502 
1503 		/* create new socket picking up a new first hop binding
1504 		   at connect() time */
1505 		interface->fd = open_socket(&interface->sin,
1506 					    bcast, 0, interface);
1507 		 /*
1508 		  * reset TTL indication so TTL is is set again
1509 		  * next time around
1510 		  */
1511 		interface->last_ttl = 0;
1512 		return (interface->fd != INVALID_SOCKET);
1513 	} else
1514 		return 0;	/* invalid sockets are not refreshable */
1515 #else /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1516 	return (interface->fd != INVALID_SOCKET);
1517 #endif /* !OS_MISSES_SPECIFIC_ROUTE_UPDATES */
1518 }
1519 
1520 /*
1521  * interface_update - externally callable update function
1522  */
1523 void
interface_update(interface_receiver_t receiver,void * data)1524 interface_update(
1525 	interface_receiver_t	receiver,
1526 	void *			data)
1527 {
1528 	int new_interface_found;
1529 
1530 	if (disable_dynamic_updates)
1531 		return;
1532 
1533 	BLOCKIO();
1534 	new_interface_found = update_interfaces(NTP_PORT, receiver, data);
1535 	UNBLOCKIO();
1536 
1537 	if (!new_interface_found)
1538 		return;
1539 
1540 #ifdef DEBUG
1541 	msyslog(LOG_DEBUG, "new interface(s) found: waking up resolver");
1542 #endif
1543 	interrupt_worker_sleep();
1544 }
1545 
1546 
1547 /*
1548  * sau_from_netaddr() - convert network address on-wire formats.
1549  * Convert from libisc's isc_netaddr_t to NTP's sockaddr_u
1550  */
1551 void
sau_from_netaddr(sockaddr_u * psau,const isc_netaddr_t * pna)1552 sau_from_netaddr(
1553 	sockaddr_u *psau,
1554 	const isc_netaddr_t *pna
1555 	)
1556 {
1557 	ZERO_SOCK(psau);
1558 	AF(psau) = (u_short)pna->family;
1559 	switch (pna->family) {
1560 
1561 	case AF_INET:
1562 		memcpy(&psau->sa4.sin_addr, &pna->type.in,
1563 		       sizeof(psau->sa4.sin_addr));
1564 		break;
1565 
1566 	case AF_INET6:
1567 		memcpy(&psau->sa6.sin6_addr, &pna->type.in6,
1568 		       sizeof(psau->sa6.sin6_addr));
1569 		break;
1570 	}
1571 }
1572 
1573 
1574 static int
is_wildcard_addr(const sockaddr_u * psau)1575 is_wildcard_addr(
1576 	const sockaddr_u *psau
1577 	)
1578 {
1579 	if (IS_IPV4(psau) && !NSRCADR(psau))
1580 		return 1;
1581 
1582 #ifdef INCLUDE_IPV6_SUPPORT
1583 	if (IS_IPV6(psau) && S_ADDR6_EQ(psau, &in6addr_any))
1584 		return 1;
1585 #endif
1586 
1587 	return 0;
1588 }
1589 
1590 
1591 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
1592 /*
1593  * enable/disable re-use of wildcard address socket
1594  */
1595 static void
set_wildcard_reuse(u_short family,int on)1596 set_wildcard_reuse(
1597 	u_short	family,
1598 	int	on
1599 	)
1600 {
1601 	struct interface *any;
1602 	SOCKET fd = INVALID_SOCKET;
1603 
1604 	any = ANY_INTERFACE_BYFAM(family);
1605 	if (any != NULL)
1606 		fd = any->fd;
1607 
1608 	if (fd != INVALID_SOCKET) {
1609 		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
1610 			       (void *)&on, sizeof(on)))
1611 			msyslog(LOG_ERR,
1612 				"set_wildcard_reuse: setsockopt(SO_REUSEADDR, %s) failed: %m",
1613 				on ? "on" : "off");
1614 
1615 		DPRINTF(4, ("set SO_REUSEADDR to %s on %s\n",
1616 			    on ? "on" : "off",
1617 			    stoa(&any->sin)));
1618 	}
1619 }
1620 #endif /* OS_NEEDS_REUSEADDR_FOR_IFADDRBIND */
1621 
1622 static isc_boolean_t
check_flags(sockaddr_u * psau,const char * name,u_int32 flags)1623 check_flags(
1624 	sockaddr_u *psau,
1625 	const char *name,
1626 	u_int32 flags
1627 	)
1628 {
1629 #if defined(SIOCGIFAFLAG_IN)
1630 	struct ifreq ifr;
1631 	int fd;
1632 
1633 	if (psau->sa.sa_family != AF_INET)
1634 		return ISC_FALSE;
1635 	if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1636 		return ISC_FALSE;
1637 	ZERO(ifr);
1638 	memcpy(&ifr.ifr_addr, &psau->sa, sizeof(ifr.ifr_addr));
1639 	strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name));
1640 	if (ioctl(fd, SIOCGIFAFLAG_IN, &ifr) < 0) {
1641 		close(fd);
1642 		return ISC_FALSE;
1643 	}
1644 	close(fd);
1645 	if ((ifr.ifr_addrflags & flags) != 0)
1646 		return ISC_TRUE;
1647 #endif	/* SIOCGIFAFLAG_IN */
1648 	return ISC_FALSE;
1649 }
1650 
1651 static isc_boolean_t
check_flags6(sockaddr_u * psau,const char * name,u_int32 flags6)1652 check_flags6(
1653 	sockaddr_u *psau,
1654 	const char *name,
1655 	u_int32 flags6
1656 	)
1657 {
1658 #if defined(INCLUDE_IPV6_SUPPORT) && defined(SIOCGIFAFLAG_IN6)
1659 	struct in6_ifreq ifr6;
1660 	int fd;
1661 
1662 	if (psau->sa.sa_family != AF_INET6)
1663 		return ISC_FALSE;
1664 	if ((fd = socket(AF_INET6, SOCK_DGRAM, 0)) < 0)
1665 		return ISC_FALSE;
1666 	ZERO(ifr6);
1667 	memcpy(&ifr6.ifr_addr, &psau->sa6, sizeof(ifr6.ifr_addr));
1668 	strlcpy(ifr6.ifr_name, name, sizeof(ifr6.ifr_name));
1669 	if (ioctl(fd, SIOCGIFAFLAG_IN6, &ifr6) < 0) {
1670 		close(fd);
1671 		return ISC_FALSE;
1672 	}
1673 	close(fd);
1674 	if ((ifr6.ifr_ifru.ifru_flags6 & flags6) != 0)
1675 		return ISC_TRUE;
1676 #endif	/* INCLUDE_IPV6_SUPPORT && SIOCGIFAFLAG_IN6 */
1677 	return ISC_FALSE;
1678 }
1679 
1680 static isc_boolean_t
is_anycast(sockaddr_u * psau,const char * name)1681 is_anycast(
1682 	sockaddr_u *psau,
1683 	const char *name
1684 	)
1685 {
1686 #ifdef IN6_IFF_ANYCAST
1687 	return check_flags6(psau, name, IN6_IFF_ANYCAST);
1688 #else
1689 	return ISC_FALSE;
1690 #endif
1691 }
1692 
1693 static isc_boolean_t
is_valid(sockaddr_u * psau,const char * name)1694 is_valid(
1695 	sockaddr_u *psau,
1696 	const char *name
1697 	)
1698 {
1699 	u_int32 flags;
1700 
1701 	flags = 0;
1702 	switch (psau->sa.sa_family) {
1703 	case AF_INET:
1704 #ifdef IN_IFF_DETACHED
1705 		flags |= IN_IFF_DETACHED;
1706 #endif
1707 #ifdef IN_IFF_TENTATIVE
1708 		flags |= IN_IFF_TENTATIVE;
1709 #endif
1710 		return check_flags(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1711 	case AF_INET6:
1712 #ifdef IN6_IFF_DEPARTED
1713 		flags |= IN6_IFF_DEPARTED;
1714 #endif
1715 #ifdef IN6_IFF_DETACHED
1716 		flags |= IN6_IFF_DETACHED;
1717 #endif
1718 #ifdef IN6_IFF_TENTATIVE
1719 		flags |= IN6_IFF_TENTATIVE;
1720 #endif
1721 		return check_flags6(psau, name, flags) ? ISC_FALSE : ISC_TRUE;
1722 	default:
1723 		return ISC_FALSE;
1724 	}
1725 }
1726 
1727 /*
1728  * update_interface strategy
1729  *
1730  * toggle configuration phase
1731  *
1732  * Phase 1:
1733  * forall currently existing interfaces
1734  *   if address is known:
1735  *	drop socket - rebind again
1736  *
1737  *   if address is NOT known:
1738  *	attempt to create a new interface entry
1739  *
1740  * Phase 2:
1741  * forall currently known non MCAST and WILDCARD interfaces
1742  *   if interface does not match configuration phase (not seen in phase 1):
1743  *	remove interface from known interface list
1744  *	forall peers associated with this interface
1745  *         disconnect peer from this interface
1746  *
1747  * Phase 3:
1748  *   attempt to re-assign interfaces to peers
1749  *
1750  */
1751 
1752 static int
update_interfaces(u_short port,interface_receiver_t receiver,void * data)1753 update_interfaces(
1754 	u_short			port,
1755 	interface_receiver_t	receiver,
1756 	void *			data
1757 	)
1758 {
1759 	isc_mem_t *		mctx = (void *)-1;
1760 	interface_info_t	ifi;
1761 	isc_interfaceiter_t *	iter;
1762 	isc_result_t		result;
1763 	isc_interface_t		isc_if;
1764 	int			new_interface_found;
1765 	unsigned int		family;
1766 	endpt			enumep;
1767 	endpt *			ep;
1768 	endpt *			next_ep;
1769 
1770 	DPRINTF(3, ("update_interfaces(%d)\n", port));
1771 
1772 	/*
1773 	 * phase one - scan interfaces
1774 	 * - create those that are not found
1775 	 * - update those that are found
1776 	 */
1777 
1778 	new_interface_found = FALSE;
1779 	iter = NULL;
1780 	result = isc_interfaceiter_create(mctx, &iter);
1781 
1782 	if (result != ISC_R_SUCCESS)
1783 		return 0;
1784 
1785 	/*
1786 	 * Toggle system interface scan phase to find untouched
1787 	 * interfaces to be deleted.
1788 	 */
1789 	sys_interphase ^= 0x1;
1790 
1791 	for (result = isc_interfaceiter_first(iter);
1792 	     ISC_R_SUCCESS == result;
1793 	     result = isc_interfaceiter_next(iter)) {
1794 
1795 		result = isc_interfaceiter_current(iter, &isc_if);
1796 
1797 		if (result != ISC_R_SUCCESS)
1798 			break;
1799 
1800 		/* See if we have a valid family to use */
1801 		family = isc_if.address.family;
1802 		if (AF_INET != family && AF_INET6 != family)
1803 			continue;
1804 		if (AF_INET == family && !ipv4_works)
1805 			continue;
1806 		if (AF_INET6 == family && !ipv6_works)
1807 			continue;
1808 
1809 		/* create prototype */
1810 		init_interface(&enumep);
1811 
1812 		convert_isc_if(&isc_if, &enumep, port);
1813 
1814 		DPRINT_INTERFACE(4, (&enumep, "examining ", "\n"));
1815 
1816 		/*
1817 		 * Check if and how we are going to use the interface.
1818 		 */
1819 		switch (interface_action(enumep.name, &enumep.sin,
1820 					 enumep.flags)) {
1821 
1822 		case ACTION_IGNORE:
1823 			DPRINTF(4, ("ignoring interface %s (%s) - by nic rules\n",
1824 				    enumep.name, stoa(&enumep.sin)));
1825 			continue;
1826 
1827 		case ACTION_LISTEN:
1828 			DPRINTF(4, ("listen interface %s (%s) - by nic rules\n",
1829 				    enumep.name, stoa(&enumep.sin)));
1830 			enumep.ignore_packets = ISC_FALSE;
1831 			break;
1832 
1833 		case ACTION_DROP:
1834 			DPRINTF(4, ("drop on interface %s (%s) - by nic rules\n",
1835 				    enumep.name, stoa(&enumep.sin)));
1836 			enumep.ignore_packets = ISC_TRUE;
1837 			break;
1838 		}
1839 
1840 		 /* interfaces must be UP to be usable */
1841 		if (!(enumep.flags & INT_UP)) {
1842 			DPRINTF(4, ("skipping interface %s (%s) - DOWN\n",
1843 				    enumep.name, stoa(&enumep.sin)));
1844 			continue;
1845 		}
1846 
1847 		/*
1848 		 * skip any interfaces UP and bound to a wildcard
1849 		 * address - some dhcp clients produce that in the
1850 		 * wild
1851 		 */
1852 		if (is_wildcard_addr(&enumep.sin))
1853 			continue;
1854 
1855 		if (is_anycast(&enumep.sin, isc_if.name))
1856 			continue;
1857 
1858 		/*
1859 		 * skip any address that is an invalid state to be used
1860 		 */
1861 		if (!is_valid(&enumep.sin, isc_if.name))
1862 			continue;
1863 
1864 		/*
1865 		 * map to local *address* in order to map all duplicate
1866 		 * interfaces to an endpt structure with the appropriate
1867 		 * socket.  Our name space is (ip-address), NOT
1868 		 * (interface name, ip-address).
1869 		 */
1870 		ep = getinterface(&enumep.sin, INT_WILDCARD);
1871 
1872 		if (ep != NULL && refresh_interface(ep)) {
1873 			/*
1874 			 * found existing and up to date interface -
1875 			 * mark present.
1876 			 */
1877 			if (ep->phase != sys_interphase) {
1878 				/*
1879 				 * On a new round we reset the name so
1880 				 * the interface name shows up again if
1881 				 * this address is no longer shared.
1882 				 * We reset ignore_packets from the
1883 				 * new prototype to respect any runtime
1884 				 * changes to the nic rules.
1885 				 */
1886 				strlcpy(ep->name, enumep.name,
1887 					sizeof(ep->name));
1888 				ep->ignore_packets =
1889 					    enumep.ignore_packets;
1890 			} else {
1891 				/* name collision - rename interface */
1892 				strlcpy(ep->name, "*multiple*",
1893 					sizeof(ep->name));
1894 			}
1895 
1896 			DPRINT_INTERFACE(4, (ep, "updating ",
1897 					     " present\n"));
1898 
1899 			if (ep->ignore_packets !=
1900 			    enumep.ignore_packets) {
1901 				/*
1902 				 * We have conflicting configurations
1903 				 * for the interface address. This is
1904 				 * caused by using -I <interfacename>
1905 				 * for an interface that shares its
1906 				 * address with other interfaces. We
1907 				 * can not disambiguate incoming
1908 				 * packets delivered to this socket
1909 				 * without extra syscalls/features.
1910 				 * These are not (commonly) available.
1911 				 * Note this is a more unusual
1912 				 * configuration where several
1913 				 * interfaces share an address but
1914 				 * filtering via interface name is
1915 				 * attempted.  We resolve the
1916 				 * configuration conflict by disabling
1917 				 * the processing of received packets.
1918 				 * This leads to no service on the
1919 				 * interface address where the conflict
1920 				 * occurs.
1921 				 */
1922 				msyslog(LOG_ERR,
1923 					"WARNING: conflicting enable configuration for interfaces %s and %s for address %s - unsupported configuration - address DISABLED",
1924 					enumep.name, ep->name,
1925 					stoa(&enumep.sin));
1926 
1927 				ep->ignore_packets = ISC_TRUE;
1928 			}
1929 
1930 			ep->phase = sys_interphase;
1931 
1932 			ifi.action = IFS_EXISTS;
1933 			ifi.ep = ep;
1934 			if (receiver != NULL)
1935 				(*receiver)(data, &ifi);
1936 		} else {
1937 			/*
1938 			 * This is new or refreshing failed - add to
1939 			 * our interface list.  If refreshing failed we
1940 			 * will delete the interface structure in phase
1941 			 * 2 as the interface was not marked current.
1942 			 * We can bind to the address as the refresh
1943 			 * code already closed the offending socket
1944 			 */
1945 			ep = create_interface(port, &enumep);
1946 
1947 			if (ep != NULL) {
1948 				ifi.action = IFS_CREATED;
1949 				ifi.ep = ep;
1950 				if (receiver != NULL)
1951 					(*receiver)(data, &ifi);
1952 
1953 				new_interface_found = TRUE;
1954 				DPRINT_INTERFACE(3,
1955 					(ep, "updating ",
1956 					 " new - created\n"));
1957 			} else {
1958 				DPRINT_INTERFACE(3,
1959 					(&enumep, "updating ",
1960 					 " new - creation FAILED"));
1961 
1962 				msyslog(LOG_INFO,
1963 					"failed to init interface for address %s",
1964 					stoa(&enumep.sin));
1965 				continue;
1966 			}
1967 		}
1968 	}
1969 
1970 	isc_interfaceiter_destroy(&iter);
1971 
1972 	/*
1973 	 * phase 2 - delete gone interfaces - reassigning peers to
1974 	 * other interfaces
1975 	 */
1976 	for (ep = ep_list; ep != NULL; ep = next_ep) {
1977 		next_ep = ep->elink;
1978 
1979 		/*
1980 		 * if phase does not match sys_phase this interface was
1981 		 * not enumerated during the last interface scan - so it
1982 		 * is gone and will be deleted here unless it did not
1983 		 * originate from interface enumeration (INT_WILDCARD,
1984 		 * INT_MCASTIF).
1985 		 */
1986 		if (((INT_WILDCARD | INT_MCASTIF) & ep->flags) ||
1987 		    ep->phase == sys_interphase)
1988 			continue;
1989 
1990 		DPRINT_INTERFACE(3, (ep, "updating ",
1991 				     "GONE - deleting\n"));
1992 		remove_interface(ep);
1993 
1994 		ifi.action = IFS_DELETED;
1995 		ifi.ep = ep;
1996 		if (receiver != NULL)
1997 			(*receiver)(data, &ifi);
1998 
1999 		/* disconnect peers from deleted endpt. */
2000 		while (ep->peers != NULL)
2001 			set_peerdstadr(ep->peers, NULL);
2002 
2003 		/*
2004 		 * update globals in case we lose
2005 		 * a loopback interface
2006 		 */
2007 		if (ep == loopback_interface)
2008 			loopback_interface = NULL;
2009 
2010 		delete_interface(ep);
2011 	}
2012 
2013 	/*
2014 	 * phase 3 - re-configure as the world has possibly changed
2015 	 *
2016 	 * never ever make this conditional again - it is needed to track
2017 	 * routing updates. see bug #2506
2018 	 */
2019 	refresh_all_peerinterfaces();
2020 
2021 	if (broadcast_client_enabled || sys_bclient)
2022 		io_setbclient();
2023 
2024 #ifdef MCAST
2025 	/*
2026 	 * Check multicast interfaces and try to join multicast groups if
2027          * not joined yet.
2028          */
2029 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2030 		remaddr_t *entry;
2031 
2032 		if (!(INT_MCASTIF & ep->flags) || (INT_MCASTOPEN & ep->flags))
2033 			continue;
2034 
2035 		/* Find remote address that was linked to this interface */
2036 		for (entry = remoteaddr_list;
2037 		     entry != NULL;
2038 		     entry = entry->link) {
2039 			if (entry->ep == ep) {
2040 				if (socket_multicast_enable(ep, &entry->addr)) {
2041 					msyslog(LOG_INFO,
2042 						"Joined %s socket to multicast group %s",
2043 						stoa(&ep->sin),
2044 						stoa(&entry->addr));
2045 				}
2046 				break;
2047 			}
2048 		}
2049 	}
2050 #endif /* MCAST */
2051 
2052 	return new_interface_found;
2053 }
2054 
2055 
2056 /*
2057  * create_sockets - create a socket for each interface plus a default
2058  *			socket for when we don't know where to send
2059  */
2060 static int
create_sockets(u_short port)2061 create_sockets(
2062 	u_short port
2063 	)
2064 {
2065 #ifndef HAVE_IO_COMPLETION_PORT
2066 	/*
2067 	 * I/O Completion Ports don't care about the select and FD_SET
2068 	 */
2069 	maxactivefd = 0;
2070 	FD_ZERO(&activefds);
2071 #endif
2072 
2073 	DPRINTF(2, ("create_sockets(%d)\n", port));
2074 
2075 	create_wildcards(port);
2076 
2077 	update_interfaces(port, NULL, NULL);
2078 
2079 	/*
2080 	 * Now that we have opened all the sockets, turn off the reuse
2081 	 * flag for security.
2082 	 */
2083 	set_reuseaddr(0);
2084 
2085 	DPRINTF(2, ("create_sockets: Total interfaces = %d\n", ninterfaces));
2086 
2087 	return ninterfaces;
2088 }
2089 
2090 /*
2091  * create_interface - create a new interface for a given prototype
2092  *		      binding the socket.
2093  */
2094 static struct interface *
create_interface(u_short port,struct interface * protot)2095 create_interface(
2096 	u_short			port,
2097 	struct interface *	protot
2098 	)
2099 {
2100 	sockaddr_u	resmask;
2101 	endpt *		iface;
2102 #if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2103 	remaddr_t *	entry;
2104 	remaddr_t *	next_entry;
2105 #endif
2106 	DPRINTF(2, ("create_interface(%s#%d)\n", stoa(&protot->sin),
2107 		    port));
2108 
2109 	/* build an interface */
2110 	iface = new_interface(protot);
2111 
2112 	/*
2113 	 * create socket
2114 	 */
2115 	iface->fd = open_socket(&iface->sin, 0, 0, iface);
2116 
2117 	if (iface->fd != INVALID_SOCKET)
2118 		log_listen_address(iface);
2119 
2120 	if ((INT_BROADCAST & iface->flags)
2121 	    && iface->bfd != INVALID_SOCKET)
2122 		msyslog(LOG_INFO, "Listening on broadcast address %s#%d",
2123 			stoa((&iface->bcast)), port);
2124 
2125 	if (INVALID_SOCKET == iface->fd
2126 	    && INVALID_SOCKET == iface->bfd) {
2127 		msyslog(LOG_ERR, "unable to create socket on %s (%d) for %s#%d",
2128 			iface->name,
2129 			iface->ifnum,
2130 			stoa((&iface->sin)),
2131 			port);
2132 		delete_interface(iface);
2133 		return NULL;
2134 	}
2135 
2136 	/*
2137 	 * Blacklist our own addresses, no use talking to ourself
2138 	 */
2139 	SET_HOSTMASK(&resmask, AF(&iface->sin));
2140 	hack_restrict(RESTRICT_FLAGS, &iface->sin, &resmask,
2141 		      -4, RESM_NTPONLY | RESM_INTERFACE, RES_IGNORE, 0);
2142 
2143 	/*
2144 	 * set globals with the first found
2145 	 * loopback interface of the appropriate class
2146 	 */
2147 	if (NULL == loopback_interface && AF_INET == iface->family
2148 	    && (INT_LOOPBACK & iface->flags))
2149 		loopback_interface = iface;
2150 
2151 	/*
2152 	 * put into our interface list
2153 	 */
2154 	add_addr_to_list(&iface->sin, iface);
2155 	add_interface(iface);
2156 
2157 #if defined(MCAST) && defined(MULTICAST_NONEWSOCKET)
2158 	/*
2159 	 * Join any previously-configured compatible multicast groups.
2160 	 */
2161 	if (INT_MULTICAST & iface->flags &&
2162 	    !((INT_LOOPBACK | INT_WILDCARD) & iface->flags) &&
2163 	    !iface->ignore_packets) {
2164 		for (entry = remoteaddr_list;
2165 		     entry != NULL;
2166 		     entry = next_entry) {
2167 			next_entry = entry->link;
2168 			if (AF(&iface->sin) != AF(&entry->addr) ||
2169 			    !IS_MCAST(&entry->addr))
2170 				continue;
2171 			if (socket_multicast_enable(iface,
2172 						    &entry->addr))
2173 				msyslog(LOG_INFO,
2174 					"Joined %s socket to multicast group %s",
2175 					stoa(&iface->sin),
2176 					stoa(&entry->addr));
2177 			else
2178 				msyslog(LOG_ERR,
2179 					"Failed to join %s socket to multicast group %s",
2180 					stoa(&iface->sin),
2181 					stoa(&entry->addr));
2182 		}
2183 	}
2184 #endif	/* MCAST && MCAST_NONEWSOCKET */
2185 
2186 	DPRINT_INTERFACE(2, (iface, "created ", "\n"));
2187 	return iface;
2188 }
2189 
2190 
2191 #ifdef SO_EXCLUSIVEADDRUSE
2192 static void
set_excladdruse(SOCKET fd)2193 set_excladdruse(
2194 	SOCKET fd
2195 	)
2196 {
2197 	int one = 1;
2198 	int failed;
2199 #ifdef SYS_WINNT
2200 	DWORD err;
2201 #endif
2202 
2203 	failed = setsockopt(fd, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
2204 			    (void *)&one, sizeof(one));
2205 
2206 	if (!failed)
2207 		return;
2208 
2209 #ifdef SYS_WINNT
2210 	/*
2211 	 * Prior to Windows XP setting SO_EXCLUSIVEADDRUSE can fail with
2212 	 * error WSAINVAL depending on service pack level and whether
2213 	 * the user account is in the Administrators group.  Do not
2214 	 * complain if it fails that way on versions prior to XP (5.1).
2215 	 */
2216 	err = GetLastError();
2217 
2218 	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0	/* < 5.1/XP */
2219 	    && WSAEINVAL == err)
2220 		return;
2221 
2222 	SetLastError(err);
2223 #endif
2224 	msyslog(LOG_ERR,
2225 		"setsockopt(%d, SO_EXCLUSIVEADDRUSE, on): %m",
2226 		(int)fd);
2227 }
2228 #endif  /* SO_EXCLUSIVEADDRUSE */
2229 
2230 
2231 /*
2232  * set_reuseaddr() - set/clear REUSEADDR on all sockets
2233  *			NB possible hole - should we be doing this on broadcast
2234  *			fd's also?
2235  */
2236 static void
set_reuseaddr(int flag)2237 set_reuseaddr(
2238 	int flag
2239 	)
2240 {
2241 #ifndef SO_EXCLUSIVEADDRUSE
2242 	endpt *ep;
2243 
2244 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2245 		if (ep->flags & INT_WILDCARD)
2246 			continue;
2247 
2248 		/*
2249 		 * if ep->fd  is INVALID_SOCKET, we might have a adapter
2250 		 * configured but not present
2251 		 */
2252 		DPRINTF(4, ("setting SO_REUSEADDR on %.16s@%s to %s\n",
2253 			    ep->name, stoa(&ep->sin),
2254 			    flag ? "on" : "off"));
2255 
2256 		if (ep->fd != INVALID_SOCKET) {
2257 			if (setsockopt(ep->fd, SOL_SOCKET, SO_REUSEADDR,
2258 				       (void *)&flag, sizeof(flag))) {
2259 				msyslog(LOG_ERR, "set_reuseaddr: setsockopt(%s, SO_REUSEADDR, %s) failed: %m",
2260 					stoa(&ep->sin), flag ? "on" : "off");
2261 			}
2262 		}
2263 	}
2264 #endif /* ! SO_EXCLUSIVEADDRUSE */
2265 }
2266 
2267 /*
2268  * This is just a wrapper around an internal function so we can
2269  * make other changes as necessary later on
2270  */
2271 void
enable_broadcast(struct interface * iface,sockaddr_u * baddr)2272 enable_broadcast(
2273 	struct interface *	iface,
2274 	sockaddr_u *		baddr
2275 	)
2276 {
2277 #ifdef OPEN_BCAST_SOCKET
2278 	socket_broadcast_enable(iface, iface->fd, baddr);
2279 #endif
2280 }
2281 
2282 #ifdef OPEN_BCAST_SOCKET
2283 /*
2284  * Enable a broadcast address to a given socket
2285  * The socket is in the ep_list all we need to do is enable
2286  * broadcasting. It is not this function's job to select the socket
2287  */
2288 static isc_boolean_t
socket_broadcast_enable(struct interface * iface,SOCKET fd,sockaddr_u * baddr)2289 socket_broadcast_enable(
2290 	struct interface *	iface,
2291 	SOCKET			fd,
2292 	sockaddr_u *		baddr
2293 	)
2294 {
2295 #ifdef SO_BROADCAST
2296 	int on = 1;
2297 
2298 	if (IS_IPV4(baddr)) {
2299 		/* if this interface can support broadcast, set SO_BROADCAST */
2300 		if (setsockopt(fd, SOL_SOCKET, SO_BROADCAST,
2301 			       (void *)&on, sizeof(on)))
2302 			msyslog(LOG_ERR,
2303 				"setsockopt(SO_BROADCAST) enable failure on address %s: %m",
2304 				stoa(baddr));
2305 		else
2306 			DPRINTF(2, ("Broadcast enabled on socket %d for address %s\n",
2307 				    fd, stoa(baddr)));
2308 	}
2309 	iface->flags |= INT_BCASTXMIT;
2310 	return ISC_TRUE;
2311 #else
2312 	return ISC_FALSE;
2313 #endif /* SO_BROADCAST */
2314 }
2315 
2316 #ifdef  OS_MISSES_SPECIFIC_ROUTE_UPDATES
2317 /*
2318  * Remove a broadcast address from a given socket
2319  * The socket is in the ep_list all we need to do is disable
2320  * broadcasting. It is not this function's job to select the socket
2321  */
2322 static isc_boolean_t
socket_broadcast_disable(struct interface * iface,sockaddr_u * baddr)2323 socket_broadcast_disable(
2324 	struct interface *	iface,
2325 	sockaddr_u *		baddr
2326 	)
2327 {
2328 #ifdef SO_BROADCAST
2329 	int off = 0;	/* This seems to be OK as an int */
2330 
2331 	if (IS_IPV4(baddr) && setsockopt(iface->fd, SOL_SOCKET,
2332 	    SO_BROADCAST, (void *)&off, sizeof(off)))
2333 		msyslog(LOG_ERR,
2334 			"setsockopt(SO_BROADCAST) disable failure on address %s: %m",
2335 			stoa(baddr));
2336 
2337 	iface->flags &= ~INT_BCASTXMIT;
2338 	return ISC_TRUE;
2339 #else
2340 	return ISC_FALSE;
2341 #endif /* SO_BROADCAST */
2342 }
2343 #endif /* OS_MISSES_SPECIFIC_ROUTE_UPDATES */
2344 
2345 #endif /* OPEN_BCAST_SOCKET */
2346 
2347 /*
2348  * return the broadcast client flag value
2349  */
2350 /*isc_boolean_t
2351 get_broadcastclient_flag(void)
2352 {
2353 	return (broadcast_client_enabled);
2354 }
2355 */
2356 
2357 /*
2358  * Check to see if the address is a multicast address
2359  */
2360 static isc_boolean_t
addr_ismulticast(sockaddr_u * maddr)2361 addr_ismulticast(
2362 	sockaddr_u *maddr
2363 	)
2364 {
2365 	isc_boolean_t result;
2366 
2367 #ifndef INCLUDE_IPV6_MULTICAST_SUPPORT
2368 	/*
2369 	 * If we don't have IPV6 support any IPV6 addr is not multicast
2370 	 */
2371 	if (IS_IPV6(maddr))
2372 		result = ISC_FALSE;
2373 	else
2374 #endif
2375 		result = IS_MCAST(maddr);
2376 
2377 	if (!result)
2378 		DPRINTF(4, ("address %s is not multicast\n",
2379 			    stoa(maddr)));
2380 
2381 	return result;
2382 }
2383 
2384 /*
2385  * Multicast servers need to set the appropriate Multicast interface
2386  * socket option in order for it to know which interface to use for
2387  * send the multicast packet.
2388  */
2389 void
enable_multicast_if(struct interface * iface,sockaddr_u * maddr)2390 enable_multicast_if(
2391 	struct interface *	iface,
2392 	sockaddr_u *		maddr
2393 	)
2394 {
2395 #ifdef MCAST
2396 #ifdef IP_MULTICAST_LOOP
2397 	TYPEOF_IP_MULTICAST_LOOP off = 0;
2398 #endif
2399 #if defined(INCLUDE_IPV6_MULTICAST_SUPPORT) && defined(IPV6_MULTICAST_LOOP)
2400 	u_int off6 = 0;
2401 #endif
2402 
2403 	REQUIRE(AF(maddr) == AF(&iface->sin));
2404 
2405 	switch (AF(&iface->sin)) {
2406 
2407 	case AF_INET:
2408 #ifdef IP_MULTICAST_LOOP
2409 		/*
2410 		 * Don't send back to itself, but allow failure to set
2411 		 */
2412 		if (setsockopt(iface->fd, IPPROTO_IP,
2413 			       IP_MULTICAST_LOOP,
2414 			       (void *)&off,
2415 			       sizeof(off))) {
2416 
2417 			msyslog(LOG_ERR,
2418 				"setsockopt IP_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2419 				iface->fd, stoa(&iface->sin),
2420 				stoa(maddr));
2421 		}
2422 #endif
2423 		break;
2424 
2425 	case AF_INET6:
2426 #ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2427 #ifdef IPV6_MULTICAST_LOOP
2428 		/*
2429 		 * Don't send back to itself, but allow failure to set
2430 		 */
2431 		if (setsockopt(iface->fd, IPPROTO_IPV6,
2432 			       IPV6_MULTICAST_LOOP,
2433 			       (void *) &off6, sizeof(off6))) {
2434 
2435 			msyslog(LOG_ERR,
2436 				"setsockopt IPV6_MULTICAST_LOOP failed: %m on socket %d, addr %s for multicast address %s",
2437 				iface->fd, stoa(&iface->sin),
2438 				stoa(maddr));
2439 		}
2440 #endif
2441 		break;
2442 #else
2443 		return;
2444 #endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2445 	}
2446 	return;
2447 #endif
2448 }
2449 
2450 /*
2451  * Add a multicast address to a given socket
2452  * The socket is in the ep_list all we need to do is enable
2453  * multicasting. It is not this function's job to select the socket
2454  */
2455 #if defined(MCAST)
2456 static isc_boolean_t
socket_multicast_enable(endpt * iface,sockaddr_u * maddr)2457 socket_multicast_enable(
2458 	endpt *		iface,
2459 	sockaddr_u *	maddr
2460 	)
2461 {
2462 	struct ip_mreq		mreq;
2463 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2464 	struct ipv6_mreq	mreq6;
2465 # endif
2466 	switch (AF(maddr)) {
2467 
2468 	case AF_INET:
2469 		ZERO(mreq);
2470 		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2471 		mreq.imr_interface.s_addr = htonl(INADDR_ANY);
2472 		if (setsockopt(iface->fd,
2473 			       IPPROTO_IP,
2474 			       IP_ADD_MEMBERSHIP,
2475 			       (void *)&mreq,
2476 			       sizeof(mreq))) {
2477 			DPRINTF(2, (
2478 				"setsockopt IP_ADD_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2479 				iface->fd, stoa(&iface->sin),
2480 				mreq.imr_multiaddr.s_addr,
2481 				mreq.imr_interface.s_addr,
2482 				stoa(maddr)));
2483 			return ISC_FALSE;
2484 		}
2485 		DPRINTF(4, ("Added IPv4 multicast membership on socket %d, addr %s for %x / %x (%s)\n",
2486 			    iface->fd, stoa(&iface->sin),
2487 			    mreq.imr_multiaddr.s_addr,
2488 			    mreq.imr_interface.s_addr, stoa(maddr)));
2489 		break;
2490 
2491 	case AF_INET6:
2492 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2493 		/*
2494 		 * Enable reception of multicast packets.
2495 		 * If the address is link-local we can get the
2496 		 * interface index from the scope id. Don't do this
2497 		 * for other types of multicast addresses. For now let
2498 		 * the kernel figure it out.
2499 		 */
2500 		ZERO(mreq6);
2501 		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2502 		mreq6.ipv6mr_interface = iface->ifindex;
2503 
2504 		if (setsockopt(iface->fd, IPPROTO_IPV6,
2505 			       IPV6_JOIN_GROUP, (void *)&mreq6,
2506 			       sizeof(mreq6))) {
2507 			DPRINTF(2, (
2508 				"setsockopt IPV6_JOIN_GROUP failed: %m on socket %d, addr %s for interface %u (%s)",
2509 				iface->fd, stoa(&iface->sin),
2510 				mreq6.ipv6mr_interface, stoa(maddr)));
2511 			return ISC_FALSE;
2512 		}
2513 		DPRINTF(4, ("Added IPv6 multicast group on socket %d, addr %s for interface %u (%s)\n",
2514 			    iface->fd, stoa(&iface->sin),
2515 			    mreq6.ipv6mr_interface, stoa(maddr)));
2516 # else
2517 		return ISC_FALSE;
2518 # endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2519 	}
2520 	iface->flags |= INT_MCASTOPEN;
2521 	iface->num_mcast++;
2522 
2523 	return ISC_TRUE;
2524 }
2525 #endif	/* MCAST */
2526 
2527 
2528 /*
2529  * Remove a multicast address from a given socket
2530  * The socket is in the ep_list all we need to do is disable
2531  * multicasting. It is not this function's job to select the socket
2532  */
2533 #ifdef MCAST
2534 static isc_boolean_t
socket_multicast_disable(struct interface * iface,sockaddr_u * maddr)2535 socket_multicast_disable(
2536 	struct interface *	iface,
2537 	sockaddr_u *		maddr
2538 	)
2539 {
2540 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2541 	struct ipv6_mreq mreq6;
2542 # endif
2543 	struct ip_mreq mreq;
2544 
2545 	ZERO(mreq);
2546 
2547 	if (find_addr_in_list(maddr) == NULL) {
2548 		DPRINTF(4, ("socket_multicast_disable(%s): not found\n",
2549 			    stoa(maddr)));
2550 		return ISC_TRUE;
2551 	}
2552 
2553 	switch (AF(maddr)) {
2554 
2555 	case AF_INET:
2556 		mreq.imr_multiaddr = SOCK_ADDR4(maddr);
2557 		mreq.imr_interface = SOCK_ADDR4(&iface->sin);
2558 		if (setsockopt(iface->fd, IPPROTO_IP,
2559 			       IP_DROP_MEMBERSHIP, (void *)&mreq,
2560 			       sizeof(mreq))) {
2561 
2562 			msyslog(LOG_ERR,
2563 				"setsockopt IP_DROP_MEMBERSHIP failed: %m on socket %d, addr %s for %x / %x (%s)",
2564 				iface->fd, stoa(&iface->sin),
2565 				SRCADR(maddr), SRCADR(&iface->sin),
2566 				stoa(maddr));
2567 			return ISC_FALSE;
2568 		}
2569 		break;
2570 	case AF_INET6:
2571 # ifdef INCLUDE_IPV6_MULTICAST_SUPPORT
2572 		/*
2573 		 * Disable reception of multicast packets
2574 		 * If the address is link-local we can get the
2575 		 * interface index from the scope id.  Don't do this
2576 		 * for other types of multicast addresses. For now let
2577 		 * the kernel figure it out.
2578 		 */
2579 		mreq6.ipv6mr_multiaddr = SOCK_ADDR6(maddr);
2580 		mreq6.ipv6mr_interface = iface->ifindex;
2581 
2582 		if (setsockopt(iface->fd, IPPROTO_IPV6,
2583 			       IPV6_LEAVE_GROUP, (void *)&mreq6,
2584 			       sizeof(mreq6))) {
2585 
2586 			msyslog(LOG_ERR,
2587 				"setsockopt IPV6_LEAVE_GROUP failure: %m on socket %d, addr %s for %d (%s)",
2588 				iface->fd, stoa(&iface->sin),
2589 				iface->ifindex, stoa(maddr));
2590 			return ISC_FALSE;
2591 		}
2592 		break;
2593 # else
2594 		return ISC_FALSE;
2595 # endif	/* INCLUDE_IPV6_MULTICAST_SUPPORT */
2596 	}
2597 
2598 	iface->num_mcast--;
2599 	if (!iface->num_mcast)
2600 		iface->flags &= ~INT_MCASTOPEN;
2601 
2602 	return ISC_TRUE;
2603 }
2604 #endif	/* MCAST */
2605 
2606 /*
2607  * io_setbclient - open the broadcast client sockets
2608  */
2609 void
io_setbclient(void)2610 io_setbclient(void)
2611 {
2612 #ifdef OPEN_BCAST_SOCKET
2613 	endpt *		ep;
2614 	unsigned int	nif, ni4, ni6;
2615 
2616 	nif = ni4 = ni6 = 0;
2617 	set_reuseaddr(1);
2618 
2619 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2620 		/* count IPv6 vs IPv4 interfaces. Needed later to decide
2621 		 * if we should log an error or not.
2622 		 */
2623 		switch (ep->family) {
2624 		case AF_INET : ++ni4; break;
2625 		case AF_INET6: ++ni6; break;
2626 		default      :        break;
2627 		}
2628 
2629 		if (ep->flags & (INT_WILDCARD | INT_LOOPBACK))
2630 			continue;
2631 
2632 		/* use only allowed addresses */
2633 		if (ep->ignore_packets)
2634 			continue;
2635 
2636 		/* Need a broadcast-capable interface */
2637 		if (!(ep->flags & INT_BROADCAST))
2638 			continue;
2639 
2640 		/* Only IPv4 addresses are valid for broadcast */
2641 		REQUIRE(IS_IPV4(&ep->bcast));
2642 
2643 		/* Do we already have the broadcast address open? */
2644 		if (ep->flags & INT_BCASTOPEN) {
2645 			/*
2646 			 * account for already open interfaces to avoid
2647 			 * misleading warning below
2648 			 */
2649 			nif++;
2650 			continue;
2651 		}
2652 
2653 		/*
2654 		 * Try to open the broadcast address
2655 		 */
2656 		ep->family = AF_INET;
2657 		ep->bfd = open_socket(&ep->bcast, 1, 0, ep);
2658 
2659 		/*
2660 		 * If we succeeded then we use it otherwise enable
2661 		 * broadcast on the interface address
2662 		 */
2663 		if (ep->bfd != INVALID_SOCKET) {
2664 			nif++;
2665 			ep->flags |= INT_BCASTOPEN;
2666 			msyslog(LOG_INFO,
2667 				"Listen for broadcasts to %s on interface #%d %s",
2668 				stoa(&ep->bcast), ep->ifnum, ep->name);
2669 		} else switch (errno) {
2670 			/* Silently ignore EADDRINUSE as we probably
2671 			 * opened the socket already for an address in
2672 			 * the same network */
2673 		case EADDRINUSE:
2674 			/* Some systems cannot bind a socket to a broadcast
2675 			 * address, as that is not a valid host address. */
2676 		case EADDRNOTAVAIL:
2677 #		    ifdef SYS_WINNT	/*TODO: use for other systems, too? */
2678 			/* avoid recurrence here -- if we already have a
2679 			 * regular socket, it's quite useless to try this
2680 			 * again.
2681 			 */
2682 			if (ep->fd != INVALID_SOCKET) {
2683 				ep->flags |= INT_BCASTOPEN;
2684 				nif++;
2685 			}
2686 #		    endif
2687 			break;
2688 
2689 		default:
2690 			msyslog(LOG_INFO,
2691 				"failed to listen for broadcasts to %s on interface #%d %s",
2692 				stoa(&ep->bcast), ep->ifnum, ep->name);
2693 			break;
2694 		}
2695 	}
2696 	set_reuseaddr(0);
2697 	if (nif != 0) {
2698 		broadcast_client_enabled = ISC_TRUE;
2699 		DPRINTF(1, ("io_setbclient: listening to %d broadcast addresses\n", nif));
2700 	} else {
2701 		broadcast_client_enabled = ISC_FALSE;
2702 		/* This is expected when having only IPv6 interfaces
2703 		 * and no IPv4 interfaces at all. We suppress the error
2704 		 * log in that case... everything else should work!
2705 		 */
2706 		if (ni4 && !ni6) {
2707 			msyslog(LOG_ERR,
2708 				"Unable to listen for broadcasts, no broadcast interfaces available");
2709 		}
2710 	}
2711 #else
2712 	msyslog(LOG_ERR,
2713 		"io_setbclient: Broadcast Client disabled by build");
2714 #endif	/* OPEN_BCAST_SOCKET */
2715 }
2716 
2717 /*
2718  * io_unsetbclient - close the broadcast client sockets
2719  */
2720 void
io_unsetbclient(void)2721 io_unsetbclient(void)
2722 {
2723 	endpt *ep;
2724 
2725 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2726 		if (INT_WILDCARD & ep->flags)
2727 			continue;
2728 		if (!(INT_BCASTOPEN & ep->flags))
2729 			continue;
2730 
2731 		if (ep->bfd != INVALID_SOCKET) {
2732 			/* destroy broadcast listening socket */
2733 			msyslog(LOG_INFO,
2734 				"stop listening for broadcasts to %s on interface #%d %s",
2735 				stoa(&ep->bcast), ep->ifnum, ep->name);
2736 #		    ifdef HAVE_IO_COMPLETION_PORT
2737 			io_completion_port_remove_socket(ep->bfd, ep);
2738 #		    endif
2739 			close_and_delete_fd_from_list(ep->bfd);
2740 			ep->bfd = INVALID_SOCKET;
2741 		}
2742 		ep->flags &= ~INT_BCASTOPEN;
2743 	}
2744 	broadcast_client_enabled = ISC_FALSE;
2745 }
2746 
2747 /*
2748  * io_multicast_add() - add multicast group address
2749  */
2750 void
io_multicast_add(sockaddr_u * addr)2751 io_multicast_add(
2752 	sockaddr_u *addr
2753 	)
2754 {
2755 #ifdef MCAST
2756 	endpt *	ep;
2757 	endpt *	one_ep;
2758 
2759 	/*
2760 	 * Check to see if this is a multicast address
2761 	 */
2762 	if (!addr_ismulticast(addr))
2763 		return;
2764 
2765 	/* If we already have it we can just return */
2766 	if (NULL != find_flagged_addr_in_list(addr, INT_MCASTOPEN)) {
2767 		msyslog(LOG_INFO,
2768 			"Duplicate request found for multicast address %s",
2769 			stoa(addr));
2770 		return;
2771 	}
2772 
2773 # ifndef MULTICAST_NONEWSOCKET
2774 	ep = new_interface(NULL);
2775 
2776 	/*
2777 	 * Open a new socket for the multicast address
2778 	 */
2779 	ep->sin = *addr;
2780 	SET_PORT(&ep->sin, NTP_PORT);
2781 	ep->family = AF(&ep->sin);
2782 	AF(&ep->mask) = ep->family;
2783 	SET_ONESMASK(&ep->mask);
2784 
2785 	set_reuseaddr(1);
2786 	ep->bfd = INVALID_SOCKET;
2787 	ep->fd = open_socket(&ep->sin, 0, 0, ep);
2788 	if (ep->fd != INVALID_SOCKET) {
2789 		ep->ignore_packets = ISC_FALSE;
2790 		ep->flags |= INT_MCASTIF;
2791 		ep->ifindex = SCOPE(addr);
2792 
2793 		strlcpy(ep->name, "multicast", sizeof(ep->name));
2794 		DPRINT_INTERFACE(2, (ep, "multicast add ", "\n"));
2795 		add_interface(ep);
2796 		log_listen_address(ep);
2797 	} else {
2798 		/* bind failed, re-use wildcard interface */
2799 		delete_interface(ep);
2800 
2801 		if (IS_IPV4(addr))
2802 			ep = wildipv4;
2803 		else if (IS_IPV6(addr))
2804 			ep = wildipv6;
2805 		else
2806 			ep = NULL;
2807 
2808 		if (ep != NULL) {
2809 			/* HACK ! -- stuff in an address */
2810 			/* because we don't bind addr? DH */
2811 			ep->bcast = *addr;
2812 			msyslog(LOG_ERR,
2813 				"multicast address %s using wildcard interface #%d %s",
2814 				stoa(addr), ep->ifnum, ep->name);
2815 		} else {
2816 			msyslog(LOG_ERR,
2817 				"No multicast socket available to use for address %s",
2818 				stoa(addr));
2819 			return;
2820 		}
2821 	}
2822 	{	/* in place of the { following for in #else clause */
2823 		one_ep = ep;
2824 # else	/* MULTICAST_NONEWSOCKET follows */
2825 	/*
2826 	 * For the case where we can't use a separate socket (Windows)
2827 	 * join each applicable endpoint socket to the group address.
2828 	 */
2829 	if (IS_IPV4(addr))
2830 		one_ep = wildipv4;
2831 	else
2832 		one_ep = wildipv6;
2833 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
2834 		if (ep->ignore_packets || AF(&ep->sin) != AF(addr) ||
2835 		    !(INT_MULTICAST & ep->flags) ||
2836 		    (INT_LOOPBACK | INT_WILDCARD) & ep->flags)
2837 			continue;
2838 		one_ep = ep;
2839 # endif	/* MULTICAST_NONEWSOCKET */
2840 		if (socket_multicast_enable(ep, addr))
2841 			msyslog(LOG_INFO,
2842 				"Joined %s socket to multicast group %s",
2843 				stoa(&ep->sin),
2844 				stoa(addr));
2845 	}
2846 
2847 	add_addr_to_list(addr, one_ep);
2848 #else	/* !MCAST  follows*/
2849 	msyslog(LOG_ERR,
2850 		"Can not add multicast address %s: no multicast support",
2851 		stoa(addr));
2852 #endif
2853 	return;
2854 }
2855 
2856 
2857 /*
2858  * io_multicast_del() - delete multicast group address
2859  */
2860 void
2861 io_multicast_del(
2862 	sockaddr_u *	addr
2863 	)
2864 {
2865 #ifdef MCAST
2866 	endpt *iface;
2867 
2868 	/*
2869 	 * Check to see if this is a multicast address
2870 	 */
2871 	if (!addr_ismulticast(addr)) {
2872 		msyslog(LOG_ERR, "invalid multicast address %s",
2873 			stoa(addr));
2874 		return;
2875 	}
2876 
2877 	/*
2878 	 * Disable reception of multicast packets
2879 	 */
2880 	while ((iface = find_flagged_addr_in_list(addr, INT_MCASTOPEN))
2881 	       != NULL)
2882 		socket_multicast_disable(iface, addr);
2883 
2884 	delete_addr_from_list(addr);
2885 
2886 #else /* not MCAST */
2887 	msyslog(LOG_ERR,
2888 		"Can not delete multicast address %s: no multicast support",
2889 		stoa(addr));
2890 #endif /* not MCAST */
2891 }
2892 
2893 
2894 /*
2895  * open_socket - open a socket, returning the file descriptor
2896  */
2897 
2898 static SOCKET
2899 open_socket(
2900 	sockaddr_u *	addr,
2901 	int		bcast,
2902 	int		turn_off_reuse,
2903 	endpt *		interf
2904 	)
2905 {
2906 	SOCKET	fd;
2907 	int	errval;
2908 	/*
2909 	 * int is OK for REUSEADR per
2910 	 * http://www.kohala.com/start/mcast.api.txt
2911 	 */
2912 	int	on = 1;
2913 	int	off = 0;
2914 
2915 	if (IS_IPV6(addr) && !ipv6_works)
2916 		return INVALID_SOCKET;
2917 
2918 	/* create a datagram (UDP) socket */
2919 	fd = socket(AF(addr), SOCK_DGRAM, 0);
2920 	if (INVALID_SOCKET == fd) {
2921 		errval = socket_errno();
2922 		msyslog(LOG_ERR,
2923 			"socket(AF_INET%s, SOCK_DGRAM, 0) failed on address %s: %m",
2924 			IS_IPV6(addr) ? "6" : "", stoa(addr));
2925 
2926 		if (errval == EPROTONOSUPPORT ||
2927 		    errval == EAFNOSUPPORT ||
2928 		    errval == EPFNOSUPPORT)
2929 			return (INVALID_SOCKET);
2930 
2931 		errno = errval;
2932 		msyslog(LOG_ERR,
2933 			"unexpected socket() error %m code %d (not EPROTONOSUPPORT nor EAFNOSUPPORT nor EPFNOSUPPORT) - exiting",
2934 			errno);
2935 		exit(1);
2936 	}
2937 
2938 #ifdef SYS_WINNT
2939 	connection_reset_fix(fd, addr);
2940 #endif
2941 	/*
2942 	 * Fixup the file descriptor for some systems
2943 	 * See bug #530 for details of the issue.
2944 	 */
2945 	fd = move_fd(fd);
2946 
2947 	/*
2948 	 * set SO_REUSEADDR since we will be binding the same port
2949 	 * number on each interface according to turn_off_reuse.
2950 	 * This is undesirable on Windows versions starting with
2951 	 * Windows XP (numeric version 5.1).
2952 	 */
2953 #ifdef SYS_WINNT
2954 	if (isc_win32os_versioncheck(5, 1, 0, 0) < 0)  /* before 5.1 */
2955 #endif
2956 		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
2957 			       (void *)((turn_off_reuse)
2958 					    ? &off
2959 					    : &on),
2960 			       sizeof(on))) {
2961 
2962 			msyslog(LOG_ERR,
2963 				"setsockopt SO_REUSEADDR %s fails for address %s: %m",
2964 				(turn_off_reuse)
2965 				    ? "off"
2966 				    : "on",
2967 				stoa(addr));
2968 			closesocket(fd);
2969 			return INVALID_SOCKET;
2970 		}
2971 #ifdef SO_EXCLUSIVEADDRUSE
2972 	/*
2973 	 * setting SO_EXCLUSIVEADDRUSE on the wildcard we open
2974 	 * first will cause more specific binds to fail.
2975 	 */
2976 	if (!(interf->flags & INT_WILDCARD))
2977 		set_excladdruse(fd);
2978 #endif
2979 
2980 	/*
2981 	 * IPv4 specific options go here
2982 	 */
2983 	if (IS_IPV4(addr)) {
2984 #if defined(IPPROTO_IP) && defined(IP_TOS)
2985 		if (setsockopt(fd, IPPROTO_IP, IP_TOS, (void *)&qos,
2986 			       sizeof(qos)))
2987 			msyslog(LOG_ERR,
2988 				"setsockopt IP_TOS (%02x) fails on address %s: %m",
2989 				qos, stoa(addr));
2990 #endif /* IPPROTO_IP && IP_TOS */
2991 		if (bcast)
2992 			socket_broadcast_enable(interf, fd, addr);
2993 	}
2994 
2995 	/*
2996 	 * IPv6 specific options go here
2997 	 */
2998 	if (IS_IPV6(addr)) {
2999 #if defined(IPPROTO_IPV6) && defined(IPV6_TCLASS)
3000 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, (void *)&qos,
3001 			       sizeof(qos)))
3002 			msyslog(LOG_ERR,
3003 				"setsockopt IPV6_TCLASS (%02x) fails on address %s: %m",
3004 				qos, stoa(addr));
3005 #endif /* IPPROTO_IPV6 && IPV6_TCLASS */
3006 #ifdef IPV6_V6ONLY
3007 		if (isc_net_probe_ipv6only() == ISC_R_SUCCESS
3008 		    && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY,
3009 		    (void *)&on, sizeof(on)))
3010 			msyslog(LOG_ERR,
3011 				"setsockopt IPV6_V6ONLY on fails on address %s: %m",
3012 				stoa(addr));
3013 #endif
3014 #ifdef IPV6_BINDV6ONLY
3015 		if (setsockopt(fd, IPPROTO_IPV6, IPV6_BINDV6ONLY,
3016 		    (void *)&on, sizeof(on)))
3017 			msyslog(LOG_ERR,
3018 				"setsockopt IPV6_BINDV6ONLY on fails on address %s: %m",
3019 				stoa(addr));
3020 #endif
3021 	}
3022 
3023 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3024 	/*
3025 	 * some OSes don't allow binding to more specific
3026 	 * addresses if a wildcard address already bound
3027 	 * to the port and SO_REUSEADDR is not set
3028 	 */
3029 	if (!is_wildcard_addr(addr))
3030 		set_wildcard_reuse(AF(addr), 1);
3031 #endif
3032 
3033 	/*
3034 	 * bind the local address.
3035 	 */
3036 	errval = bind(fd, &addr->sa, SOCKLEN(addr));
3037 
3038 #ifdef OS_NEEDS_REUSEADDR_FOR_IFADDRBIND
3039 	if (!is_wildcard_addr(addr))
3040 		set_wildcard_reuse(AF(addr), 0);
3041 #endif
3042 
3043 	if (errval < 0) {
3044 		/*
3045 		 * Don't log this under all conditions
3046 		 */
3047 		if (turn_off_reuse == 0
3048 #ifdef DEBUG
3049 		    || debug > 1
3050 #endif
3051 		    ) {
3052 			msyslog(LOG_ERR,
3053 				"bind(%d) AF_INET%s %s#%d%s flags 0x%x failed: %m",
3054 				fd, IS_IPV6(addr) ? "6" : "",
3055 				stoa(addr), SRCPORT(addr),
3056 				IS_MCAST(addr) ? " (multicast)" : "",
3057 				interf->flags);
3058 		}
3059 
3060 		closesocket(fd);
3061 
3062 		return INVALID_SOCKET;
3063 	}
3064 
3065 #ifdef HAVE_TIMESTAMP
3066 	{
3067 		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP,
3068 			       (void *)&on, sizeof(on)))
3069 			msyslog(LOG_DEBUG,
3070 				"setsockopt SO_TIMESTAMP on fails on address %s: %m",
3071 				stoa(addr));
3072 		else
3073 			DPRINTF(4, ("setsockopt SO_TIMESTAMP enabled on fd %d address %s\n",
3074 				    fd, stoa(addr)));
3075 	}
3076 #endif
3077 #ifdef HAVE_TIMESTAMPNS
3078 	{
3079 		if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS,
3080 			       (void *)&on, sizeof(on)))
3081 			msyslog(LOG_DEBUG,
3082 				"setsockopt SO_TIMESTAMPNS on fails on address %s: %m",
3083 				stoa(addr));
3084 		else
3085 			DPRINTF(4, ("setsockopt SO_TIMESTAMPNS enabled on fd %d address %s\n",
3086 				    fd, stoa(addr)));
3087 	}
3088 #endif
3089 #ifdef HAVE_BINTIME
3090 	{
3091 		if (setsockopt(fd, SOL_SOCKET, SO_BINTIME,
3092 			       (void *)&on, sizeof(on)))
3093 			msyslog(LOG_DEBUG,
3094 				"setsockopt SO_BINTIME on fails on address %s: %m",
3095 				stoa(addr));
3096 		else
3097 			DPRINTF(4, ("setsockopt SO_BINTIME enabled on fd %d address %s\n",
3098 				    fd, stoa(addr)));
3099 	}
3100 #endif
3101 
3102 	DPRINTF(4, ("bind(%d) AF_INET%s, addr %s%%%d#%d, flags 0x%x\n",
3103 		   fd, IS_IPV6(addr) ? "6" : "", stoa(addr),
3104 		   SCOPE(addr), SRCPORT(addr), interf->flags));
3105 
3106 	make_socket_nonblocking(fd);
3107 
3108 #ifdef HAVE_SIGNALED_IO
3109 	init_socket_sig(fd);
3110 #endif /* not HAVE_SIGNALED_IO */
3111 
3112 	add_fd_to_list(fd, FD_TYPE_SOCKET);
3113 
3114 #if !defined(SYS_WINNT) && !defined(VMS)
3115 	DPRINTF(4, ("flags for fd %d: 0x%x\n", fd,
3116 		    fcntl(fd, F_GETFL, 0)));
3117 #endif /* SYS_WINNT || VMS */
3118 
3119 #if defined(HAVE_IO_COMPLETION_PORT)
3120 /*
3121  * Add the socket to the completion port
3122  */
3123 	if (!io_completion_port_add_socket(fd, interf, bcast)) {
3124 		msyslog(LOG_ERR, "unable to set up io completion port - EXITING");
3125 		exit(1);
3126 	}
3127 #endif
3128 	return fd;
3129 }
3130 
3131 
3132 
3133 /* XXX ELIMINATE sendpkt similar in ntpq.c, ntpdc.c, ntp_io.c, ntptrace.c */
3134 /*
3135  * sendpkt - send a packet to the specified destination. Maintain a
3136  * send error cache so that only the first consecutive error for a
3137  * destination is logged.
3138  */
3139 void
3140 sendpkt(
3141 	sockaddr_u *		dest,
3142 	struct interface *	ep,
3143 	int			ttl,
3144 	struct pkt *		pkt,
3145 	int			len
3146 	)
3147 {
3148 	endpt *	src;
3149 	int	ismcast;
3150 	int	cc;
3151 	int	rc;
3152 	u_char	cttl;
3153 	l_fp	fp_zero = { { 0 }, 0 };
3154 	l_fp	org, rec, xmt;
3155 
3156 	ismcast = IS_MCAST(dest);
3157 	if (!ismcast)
3158 		src = ep;
3159 	else
3160 		src = (IS_IPV4(dest))
3161 			  ? mc4_list
3162 			  : mc6_list;
3163 
3164 	if (NULL == src) {
3165 		/*
3166 		 * unbound peer - drop request and wait for better
3167 		 * network conditions
3168 		 */
3169 		DPRINTF(2, ("%ssendpkt(dst=%s, ttl=%d, len=%d): no interface - IGNORED\n",
3170 			    ismcast ? "\tMCAST\t***** " : "",
3171 			    stoa(dest), ttl, len));
3172 		return;
3173 	}
3174 
3175 	do {
3176 		DPRINTF(2, ("%ssendpkt(%d, dst=%s, src=%s, ttl=%d, len=%d)\n",
3177 			    ismcast ? "\tMCAST\t***** " : "", src->fd,
3178 			    stoa(dest), stoa(&src->sin), ttl, len));
3179 #ifdef MCAST
3180 		/*
3181 		 * for the moment we use the bcast option to set multicast ttl
3182 		 */
3183 		if (ismcast && ttl > 0 && ttl != src->last_ttl) {
3184 			/*
3185 			 * set the multicast ttl for outgoing packets
3186 			 */
3187 			switch (AF(&src->sin)) {
3188 
3189 			case AF_INET :
3190 				cttl = (u_char)ttl;
3191 				rc = setsockopt(src->fd, IPPROTO_IP,
3192 						IP_MULTICAST_TTL,
3193 						(void *)&cttl,
3194 						sizeof(cttl));
3195 				break;
3196 
3197 # ifdef INCLUDE_IPV6_SUPPORT
3198 			case AF_INET6 :
3199 				rc = setsockopt(src->fd, IPPROTO_IPV6,
3200 						 IPV6_MULTICAST_HOPS,
3201 						 (void *)&ttl,
3202 						 sizeof(ttl));
3203 				break;
3204 # endif	/* INCLUDE_IPV6_SUPPORT */
3205 
3206 			default:
3207 				rc = 0;
3208 			}
3209 
3210 			if (!rc)
3211 				src->last_ttl = ttl;
3212 			else
3213 				msyslog(LOG_ERR,
3214 					"setsockopt IP_MULTICAST_TTL/IPV6_MULTICAST_HOPS fails on address %s: %m",
3215 					stoa(&src->sin));
3216 		}
3217 #endif	/* MCAST */
3218 
3219 #ifdef SIM
3220 		cc = simulate_server(dest, src, pkt);
3221 #elif defined(HAVE_IO_COMPLETION_PORT)
3222 		cc = io_completion_port_sendto(src, src->fd, pkt,
3223 			(size_t)len, (sockaddr_u *)&dest->sa);
3224 #else
3225 		cc = sendto(src->fd, (char *)pkt, (u_int)len, 0,
3226 			    &dest->sa, SOCKLEN(dest));
3227 #endif
3228 		if (cc == -1) {
3229 			src->notsent++;
3230 			packets_notsent++;
3231 		} else	{
3232 			src->sent++;
3233 			packets_sent++;
3234 		}
3235 		if (ismcast)
3236 			src = src->mclink;
3237 	} while (ismcast && src != NULL);
3238 
3239 	/* HMS: pkt->rootdisp is usually random here */
3240 	NTOHL_FP(&pkt->org, &org);
3241 	NTOHL_FP(&pkt->rec, &rec);
3242 	NTOHL_FP(&pkt->xmt, &xmt);
3243 	record_raw_stats(src ? &src->sin : NULL, dest,
3244 			&org, &rec, &xmt, &fp_zero,
3245 			PKT_LEAP(pkt->li_vn_mode),
3246 			PKT_VERSION(pkt->li_vn_mode),
3247 			PKT_MODE(pkt->li_vn_mode),
3248 			pkt->stratum,
3249 			pkt->ppoll, pkt->precision,
3250 			pkt->rootdelay, pkt->rootdisp, pkt->refid,
3251 			len - MIN_V4_PKT_LEN, (u_char *)&pkt->exten);
3252 
3253 	return;
3254 }
3255 
3256 
3257 #if !defined(HAVE_IO_COMPLETION_PORT)
3258 #if !defined(HAVE_SIGNALED_IO)
3259 /*
3260  * fdbits - generate ascii representation of fd_set (FAU debug support)
3261  * HFDF format - highest fd first.
3262  */
3263 static char *
3264 fdbits(
3265 	int		count,
3266 	const fd_set*	set
3267 	)
3268 {
3269 	static char buffer[256];
3270 	char * buf = buffer;
3271 
3272 	count = min(count,  255);
3273 
3274 	while (count >= 0) {
3275 		*buf++ = FD_ISSET(count, set) ? '#' : '-';
3276 		count--;
3277 	}
3278 	*buf = '\0';
3279 
3280 	return buffer;
3281 }
3282 #endif
3283 
3284 #ifdef REFCLOCK
3285 /*
3286  * Routine to read the refclock packets for a specific interface
3287  * Return the number of bytes read. That way we know if we should
3288  * read it again or go on to the next one if no bytes returned
3289  */
3290 static inline int
3291 read_refclock_packet(
3292 	SOCKET			fd,
3293 	struct refclockio *	rp,
3294 	l_fp			ts
3295 	)
3296 {
3297 	u_int			read_count;
3298 	int			buflen;
3299 	int			saved_errno;
3300 	int			consumed;
3301 	struct recvbuf *	rb;
3302 
3303 	rb = get_free_recv_buffer(TRUE);
3304 
3305 	if (NULL == rb) {
3306 		/*
3307 		 * No buffer space available - just drop the 'packet'.
3308 		 * Since this is a non-blocking character stream we read
3309 		 * all data that we can.
3310 		 *
3311 		 * ...hmmmm... what about "tcflush(fd,TCIFLUSH)" here?!?
3312 		 */
3313 		char buf[128];
3314 		do
3315 			buflen = read(fd, buf, sizeof(buf));
3316 		while (buflen > 0);
3317 		packets_dropped++;
3318 		return (buflen);
3319 	}
3320 
3321 	/* TALOS-CAN-0064: avoid signed/unsigned clashes that can lead
3322 	 * to buffer overrun and memory corruption
3323 	 */
3324 	if (rp->datalen <= 0 || (size_t)rp->datalen > sizeof(rb->recv_space))
3325 		read_count = sizeof(rb->recv_space);
3326 	else
3327 		read_count = (u_int)rp->datalen;
3328 	do {
3329 		buflen = read(fd, (char *)&rb->recv_space, read_count);
3330 	} while (buflen < 0 && EINTR == errno);
3331 
3332 	if (buflen <= 0) {
3333 		saved_errno = errno;
3334 		freerecvbuf(rb);
3335 		errno = saved_errno;
3336 		return buflen;
3337 	}
3338 
3339 	/*
3340 	 * Got one. Mark how and when it got here,
3341 	 * put it on the full list and do bookkeeping.
3342 	 */
3343 	rb->recv_length = buflen;
3344 	rb->recv_peer = rp->srcclock;
3345 	rb->dstadr = 0;
3346 	rb->fd = fd;
3347 	rb->recv_time = ts;
3348 	rb->receiver = rp->clock_recv;
3349 
3350 	consumed = indicate_refclock_packet(rp, rb);
3351 	if (!consumed) {
3352 		rp->recvcount++;
3353 		packets_received++;
3354 	}
3355 
3356 	return buflen;
3357 }
3358 #endif	/* REFCLOCK */
3359 
3360 
3361 #ifdef HAVE_PACKET_TIMESTAMP
3362 /*
3363  * extract timestamps from control message buffer
3364  */
3365 static l_fp
3366 fetch_timestamp(
3367 	struct recvbuf *	rb,
3368 	struct msghdr *		msghdr,
3369 	l_fp			ts
3370 	)
3371 {
3372 	struct cmsghdr *	cmsghdr;
3373 	unsigned long		ticks;
3374 	double			fuzz;
3375 	l_fp			lfpfuzz;
3376 	l_fp			nts;
3377 #ifdef DEBUG_TIMING
3378 	l_fp			dts;
3379 #endif
3380 
3381 	cmsghdr = CMSG_FIRSTHDR(msghdr);
3382 	while (cmsghdr != NULL) {
3383 		switch (cmsghdr->cmsg_type)
3384 		{
3385 #ifdef HAVE_BINTIME
3386 		case SCM_BINTIME:
3387 #endif  /* HAVE_BINTIME */
3388 #ifdef HAVE_TIMESTAMPNS
3389 		case SCM_TIMESTAMPNS:
3390 #endif	/* HAVE_TIMESTAMPNS */
3391 #ifdef HAVE_TIMESTAMP
3392 		case SCM_TIMESTAMP:
3393 #endif	/* HAVE_TIMESTAMP */
3394 #if defined(HAVE_BINTIME) || defined (HAVE_TIMESTAMPNS) || defined(HAVE_TIMESTAMP)
3395 			switch (cmsghdr->cmsg_type)
3396 			{
3397 #ifdef HAVE_BINTIME
3398 			case SCM_BINTIME:
3399 				{
3400 					struct bintime	pbt;
3401 					memcpy(&pbt, CMSG_DATA(cmsghdr), sizeof(pbt));
3402 					/*
3403 					 * bintime documentation is at http://phk.freebsd.dk/pubs/timecounter.pdf
3404 					 */
3405 					nts.l_i = pbt.sec + JAN_1970;
3406 					nts.l_uf = (u_int32)(pbt.frac >> 32);
3407 					if (sys_tick > measured_tick &&
3408 					    sys_tick > 1e-9) {
3409 						ticks = (unsigned long)(nts.l_uf / (unsigned long)(sys_tick * FRAC));
3410 						nts.l_uf = (unsigned long)(ticks * (unsigned long)(sys_tick * FRAC));
3411 					}
3412 					DPRINTF(4, ("fetch_timestamp: system bintime network time stamp: %ld.%09lu\n",
3413 						    pbt.sec, (unsigned long)((nts.l_uf / FRAC) * 1e9)));
3414 				}
3415 				break;
3416 #endif  /* HAVE_BINTIME */
3417 #ifdef HAVE_TIMESTAMPNS
3418 			case SCM_TIMESTAMPNS:
3419 				{
3420 					struct timespec	pts;
3421 					memcpy(&pts, CMSG_DATA(cmsghdr), sizeof(pts));
3422 					if (sys_tick > measured_tick &&
3423 					    sys_tick > 1e-9) {
3424 						ticks = (unsigned long)((pts.tv_nsec * 1e-9) /
3425 									sys_tick);
3426 						pts.tv_nsec = (long)(ticks * 1e9 *
3427 								     sys_tick);
3428 					}
3429 					DPRINTF(4, ("fetch_timestamp: system nsec network time stamp: %ld.%09ld\n",
3430 						    pts.tv_sec, pts.tv_nsec));
3431 					nts = tspec_stamp_to_lfp(pts);
3432 				}
3433 				break;
3434 #endif	/* HAVE_TIMESTAMPNS */
3435 #ifdef HAVE_TIMESTAMP
3436 			case SCM_TIMESTAMP:
3437 				{
3438 					struct timeval	ptv;
3439 					memcpy(&ptv, CMSG_DATA(cmsghdr), sizeof(ptv));
3440 					if (sys_tick > measured_tick &&
3441 					    sys_tick > 1e-6) {
3442 						ticks = (unsigned long)((ptv.tv_usec * 1e-6) /
3443 									sys_tick);
3444 						ptv.tv_usec = (long)(ticks * 1e6 *
3445 								    sys_tick);
3446 					}
3447 					DPRINTF(4, ("fetch_timestamp: system usec network time stamp: %jd.%06ld\n",
3448 						    (intmax_t)ptv.tv_sec, (long)ptv.tv_usec));
3449 					nts = tval_stamp_to_lfp(ptv);
3450 				}
3451 				break;
3452 #endif  /* HAVE_TIMESTAMP */
3453 			}
3454 			fuzz = ntp_random() * 2. / FRAC * sys_fuzz;
3455 			DTOLFP(fuzz, &lfpfuzz);
3456 			L_ADD(&nts, &lfpfuzz);
3457 #ifdef DEBUG_TIMING
3458 			dts = ts;
3459 			L_SUB(&dts, &nts);
3460 			collect_timing(rb, "input processing delay", 1,
3461 				       &dts);
3462 			DPRINTF(4, ("fetch_timestamp: timestamp delta: %s (incl. fuzz)\n",
3463 				    lfptoa(&dts, 9)));
3464 #endif	/* DEBUG_TIMING */
3465 			ts = nts;  /* network time stamp */
3466 			break;
3467 #endif	/* HAVE_BINTIME || HAVE_TIMESTAMPNS || HAVE_TIMESTAMP */
3468 
3469 		default:
3470 			DPRINTF(4, ("fetch_timestamp: skipping control message 0x%x\n",
3471 				    cmsghdr->cmsg_type));
3472 		}
3473 		cmsghdr = CMSG_NXTHDR(msghdr, cmsghdr);
3474 	}
3475 	return ts;
3476 }
3477 #endif	/* HAVE_PACKET_TIMESTAMP */
3478 
3479 
3480 /*
3481  * Routine to read the network NTP packets for a specific interface
3482  * Return the number of bytes read. That way we know if we should
3483  * read it again or go on to the next one if no bytes returned
3484  */
3485 static inline int
3486 read_network_packet(
3487 	SOCKET			fd,
3488 	struct interface *	itf,
3489 	l_fp			ts
3490 	)
3491 {
3492 	GETSOCKNAME_SOCKLEN_TYPE fromlen;
3493 	int buflen;
3494 	register struct recvbuf *rb;
3495 #ifdef HAVE_PACKET_TIMESTAMP
3496 	struct msghdr msghdr;
3497 	struct iovec iovec;
3498 	char control[CMSG_BUFSIZE];
3499 #endif
3500 
3501 	/*
3502 	 * Get a buffer and read the frame.  If we haven't got a buffer,
3503 	 * or this is received on a disallowed socket, just dump the
3504 	 * packet.
3505 	 */
3506 
3507 	rb = itf->ignore_packets ? NULL : get_free_recv_buffer(FALSE);
3508 	if (NULL == rb) {
3509 		/* A partial read on a UDP socket truncates the data and
3510 		 * removes the message from the queue. So there's no
3511 		 * need to have a full buffer here on the stack.
3512 		 */
3513 		char buf[16];
3514 		sockaddr_u from;
3515 
3516 		if (rb != NULL)
3517 			freerecvbuf(rb);
3518 
3519 		fromlen = sizeof(from);
3520 		buflen = recvfrom(fd, buf, sizeof(buf), 0,
3521 				  &from.sa, &fromlen);
3522 		DPRINTF(4, ("%s on (%lu) fd=%d from %s\n",
3523 			(itf->ignore_packets)
3524 			    ? "ignore"
3525 			    : "drop",
3526 			free_recvbuffs(), fd, stoa(&from)));
3527 		if (itf->ignore_packets)
3528 			packets_ignored++;
3529 		else
3530 			packets_dropped++;
3531 		return (buflen);
3532 	}
3533 
3534 	fromlen = sizeof(rb->recv_srcadr);
3535 
3536 #ifndef HAVE_PACKET_TIMESTAMP
3537 	rb->recv_length = recvfrom(fd, (char *)&rb->recv_space,
3538 				   sizeof(rb->recv_space), 0,
3539 				   &rb->recv_srcadr.sa, &fromlen);
3540 #else
3541 	iovec.iov_base        = &rb->recv_space;
3542 	iovec.iov_len         = sizeof(rb->recv_space);
3543 	msghdr.msg_name       = &rb->recv_srcadr;
3544 	msghdr.msg_namelen    = fromlen;
3545 	msghdr.msg_iov        = &iovec;
3546 	msghdr.msg_iovlen     = 1;
3547 	msghdr.msg_control    = (void *)&control;
3548 	msghdr.msg_controllen = sizeof(control);
3549 	msghdr.msg_flags      = 0;
3550 	rb->recv_length       = recvmsg(fd, &msghdr, 0);
3551 #endif
3552 
3553 	buflen = rb->recv_length;
3554 
3555 	if (buflen == 0 || (buflen == -1 &&
3556 	    (EWOULDBLOCK == errno
3557 #ifdef EAGAIN
3558 	     || EAGAIN == errno
3559 #endif
3560 	     ))) {
3561 		freerecvbuf(rb);
3562 		return (buflen);
3563 	} else if (buflen < 0) {
3564 		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: %m",
3565 			stoa(&rb->recv_srcadr), fd);
3566 		DPRINTF(5, ("read_network_packet: fd=%d dropped (bad recvfrom)\n",
3567 			    fd));
3568 		freerecvbuf(rb);
3569 		return (buflen);
3570 	}
3571 
3572 	DPRINTF(3, ("read_network_packet: fd=%d length %d from %s\n",
3573 		    fd, buflen, stoa(&rb->recv_srcadr)));
3574 
3575 #ifdef ENABLE_BUG3020_FIX
3576 	if (ISREFCLOCKADR(&rb->recv_srcadr)) {
3577 		msyslog(LOG_ERR, "recvfrom(%s) fd=%d: refclock srcadr on a network interface!",
3578 			stoa(&rb->recv_srcadr), fd);
3579 		DPRINTF(1, ("read_network_packet: fd=%d dropped (refclock srcadr))\n",
3580 			    fd));
3581 		packets_dropped++;
3582 		freerecvbuf(rb);
3583 		return (buflen);
3584 	}
3585 #endif
3586 
3587 	/*
3588 	** Bug 2672: Some OSes (MacOSX and Linux) don't block spoofed ::1
3589 	*/
3590 
3591 	if (AF_INET6 == itf->family) {
3592 		DPRINTF(2, ("Got an IPv6 packet, from <%s> (%d) to <%s> (%d)\n",
3593 			stoa(&rb->recv_srcadr),
3594 			IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr)),
3595 			stoa(&itf->sin),
3596 			!IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&itf->sin))
3597 			));
3598 
3599 		if (   IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&rb->recv_srcadr))
3600 		    && !IN6_IS_ADDR_LOOPBACK(PSOCK_ADDR6(&itf->sin))
3601 		   ) {
3602 			packets_dropped++;
3603 			DPRINTF(2, ("DROPPING that packet\n"));
3604 			freerecvbuf(rb);
3605 			return buflen;
3606 		}
3607 		DPRINTF(2, ("processing that packet\n"));
3608 	}
3609 
3610 	/*
3611 	 * Got one.  Mark how and when it got here,
3612 	 * put it on the full list and do bookkeeping.
3613 	 */
3614 	rb->dstadr = itf;
3615 	rb->fd = fd;
3616 #ifdef HAVE_PACKET_TIMESTAMP
3617 	/* pick up a network time stamp if possible */
3618 	ts = fetch_timestamp(rb, &msghdr, ts);
3619 #endif
3620 	rb->recv_time = ts;
3621 	rb->receiver = receive;
3622 
3623 	add_full_recv_buffer(rb);
3624 
3625 	itf->received++;
3626 	packets_received++;
3627 	return (buflen);
3628 }
3629 
3630 /*
3631  * attempt to handle io (select()/signaled IO)
3632  */
3633 void
3634 io_handler(void)
3635 {
3636 #  ifndef HAVE_SIGNALED_IO
3637 	fd_set rdfdes;
3638 	int nfound;
3639 
3640 	/*
3641 	 * Use select() on all on all input fd's for unlimited
3642 	 * time.  select() will terminate on SIGALARM or on the
3643 	 * reception of input.	Using select() means we can't do
3644 	 * robust signal handling and we get a potential race
3645 	 * between checking for alarms and doing the select().
3646 	 * Mostly harmless, I think.
3647 	 */
3648 	/*
3649 	 * On VMS, I suspect that select() can't be interrupted
3650 	 * by a "signal" either, so I take the easy way out and
3651 	 * have select() time out after one second.
3652 	 * System clock updates really aren't time-critical,
3653 	 * and - lacking a hardware reference clock - I have
3654 	 * yet to learn about anything else that is.
3655 	 */
3656 	++handler_calls;
3657 	rdfdes = activefds;
3658 #   if !defined(VMS) && !defined(SYS_VXWORKS)
3659 	nfound = select(maxactivefd + 1, &rdfdes, NULL,
3660 			NULL, NULL);
3661 #   else	/* VMS, VxWorks */
3662 	/* make select() wake up after one second */
3663 	{
3664 		struct timeval t1;
3665 		t1.tv_sec  = 1;
3666 		t1.tv_usec = 0;
3667 		nfound = select(maxactivefd + 1,
3668 				&rdfdes, NULL, NULL,
3669 				&t1);
3670 	}
3671 #   endif	/* VMS, VxWorks */
3672 	if (nfound < 0 && sanitize_fdset(errno)) {
3673 		struct timeval t1;
3674 		t1.tv_sec  = 0;
3675 		t1.tv_usec = 0;
3676 		rdfdes = activefds;
3677 		nfound = select(maxactivefd + 1,
3678 				&rdfdes, NULL, NULL,
3679 				&t1);
3680 	}
3681 
3682 	if (nfound > 0) {
3683 		l_fp ts;
3684 
3685 		get_systime(&ts);
3686 
3687 		input_handler_scan(&ts, &rdfdes);
3688 	} else if (nfound == -1 && errno != EINTR) {
3689 		msyslog(LOG_ERR, "select() error: %m");
3690 	}
3691 #   ifdef DEBUG
3692 	else if (debug > 4) {
3693 		msyslog(LOG_DEBUG, "select(): nfound=%d, error: %m", nfound);
3694 	} else {
3695 		DPRINTF(3, ("select() returned %d: %m\n", nfound));
3696 	}
3697 #   endif /* DEBUG */
3698 #  else /* HAVE_SIGNALED_IO */
3699 	wait_for_signal();
3700 #  endif /* HAVE_SIGNALED_IO */
3701 }
3702 
3703 #ifdef HAVE_SIGNALED_IO
3704 /*
3705  * input_handler - receive packets asynchronously
3706  *
3707  * ALWAYS IN SIGNAL HANDLER CONTEXT -- only async-safe functions allowed!
3708  */
3709 static RETSIGTYPE
3710 input_handler(
3711 	l_fp *	cts
3712 	)
3713 {
3714 	int		n;
3715 	struct timeval	tvzero;
3716 	fd_set		fds;
3717 
3718 	++handler_calls;
3719 
3720 	/*
3721 	 * Do a poll to see who has data
3722 	 */
3723 
3724 	fds = activefds;
3725 	tvzero.tv_sec = tvzero.tv_usec = 0;
3726 
3727 	n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3728 	if (n < 0 && sanitize_fdset(errno)) {
3729 		fds = activefds;
3730 		tvzero.tv_sec = tvzero.tv_usec = 0;
3731 		n = select(maxactivefd + 1, &fds, NULL, NULL, &tvzero);
3732 	}
3733 	if (n > 0)
3734 		input_handler_scan(cts, &fds);
3735 }
3736 #endif /* HAVE_SIGNALED_IO */
3737 
3738 
3739 /*
3740  * Try to sanitize the global FD set
3741  *
3742  * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3743  */
3744 static int/*BOOL*/
3745 sanitize_fdset(
3746 	int	errc
3747 	)
3748 {
3749 	int j, b, maxscan;
3750 
3751 #  ifndef HAVE_SIGNALED_IO
3752 	/*
3753 	 * extended FAU debugging output
3754 	 */
3755 	if (errc != EINTR) {
3756 		msyslog(LOG_ERR,
3757 			"select(%d, %s, 0L, 0L, &0.0) error: %m",
3758 			maxactivefd + 1,
3759 			fdbits(maxactivefd, &activefds));
3760 	}
3761 #   endif
3762 
3763 	if (errc != EBADF)
3764 		return FALSE;
3765 
3766 	/* if we have oviously bad FDs, try to sanitize the FD set. */
3767 	for (j = 0, maxscan = 0; j <= maxactivefd; j++) {
3768 		if (FD_ISSET(j, &activefds)) {
3769 			if (-1 != read(j, &b, 0)) {
3770 				maxscan = j;
3771 				continue;
3772 			}
3773 #		    ifndef HAVE_SIGNALED_IO
3774 			msyslog(LOG_ERR,
3775 				"Removing bad file descriptor %d from select set",
3776 				j);
3777 #		    endif
3778 			FD_CLR(j, &activefds);
3779 		}
3780 	}
3781 	if (maxactivefd != maxscan)
3782 		maxactivefd = maxscan;
3783 	return TRUE;
3784 }
3785 
3786 /*
3787  * scan the known FDs (clocks, servers, ...) for presence in a 'fd_set'.
3788  *
3789  * SIGNAL HANDLER CONTEXT if HAVE_SIGNALED_IO, ordinary userspace otherwise
3790  */
3791 static void
3792 input_handler_scan(
3793 	const l_fp *	cts,
3794 	const fd_set *	pfds
3795 	)
3796 {
3797 	int		buflen;
3798 	u_int		idx;
3799 	int		doing;
3800 	SOCKET		fd;
3801 	blocking_child *c;
3802 	l_fp		ts;	/* Timestamp at BOselect() gob */
3803 
3804 #if defined(DEBUG_TIMING)
3805 	l_fp		ts_e;	/* Timestamp at EOselect() gob */
3806 #endif
3807 	endpt *		ep;
3808 #ifdef REFCLOCK
3809 	struct refclockio *rp;
3810 	int		saved_errno;
3811 	const char *	clk;
3812 #endif
3813 #ifdef HAS_ROUTING_SOCKET
3814 	struct asyncio_reader *	asyncio_reader;
3815 	struct asyncio_reader *	next_asyncio_reader;
3816 #endif
3817 
3818 	++handler_pkts;
3819 	ts = *cts;
3820 
3821 #ifdef REFCLOCK
3822 	/*
3823 	 * Check out the reference clocks first, if any
3824 	 */
3825 
3826 	for (rp = refio; rp != NULL; rp = rp->next) {
3827 		fd = rp->fd;
3828 
3829 		if (!FD_ISSET(fd, pfds))
3830 			continue;
3831 		buflen = read_refclock_packet(fd, rp, ts);
3832 		/*
3833 		 * The first read must succeed after select() indicates
3834 		 * readability, or we've reached a permanent EOF.
3835 		 * http://bugs.ntp.org/1732 reported ntpd munching CPU
3836 		 * after a USB GPS was unplugged because select was
3837 		 * indicating EOF but ntpd didn't remove the descriptor
3838 		 * from the activefds set.
3839 		 */
3840 		if (buflen < 0 && EAGAIN != errno) {
3841 			saved_errno = errno;
3842 			clk = refnumtoa(&rp->srcclock->srcadr);
3843 			errno = saved_errno;
3844 			msyslog(LOG_ERR, "%s read: %m", clk);
3845 			maintain_activefds(fd, TRUE);
3846 		} else if (0 == buflen) {
3847 			clk = refnumtoa(&rp->srcclock->srcadr);
3848 			msyslog(LOG_ERR, "%s read EOF", clk);
3849 			maintain_activefds(fd, TRUE);
3850 		} else {
3851 			/* drain any remaining refclock input */
3852 			do {
3853 				buflen = read_refclock_packet(fd, rp, ts);
3854 			} while (buflen > 0);
3855 		}
3856 	}
3857 #endif /* REFCLOCK */
3858 
3859 	/*
3860 	 * Loop through the interfaces looking for data to read.
3861 	 */
3862 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
3863 		for (doing = 0; doing < 2; doing++) {
3864 			if (!doing) {
3865 				fd = ep->fd;
3866 			} else {
3867 				if (!(ep->flags & INT_BCASTOPEN))
3868 					break;
3869 				fd = ep->bfd;
3870 			}
3871 			if (fd < 0)
3872 				continue;
3873 			if (FD_ISSET(fd, pfds))
3874 				do {
3875 					buflen = read_network_packet(
3876 							fd, ep, ts);
3877 				} while (buflen > 0);
3878 			/* Check more interfaces */
3879 		}
3880 	}
3881 
3882 #ifdef HAS_ROUTING_SOCKET
3883 	/*
3884 	 * scan list of asyncio readers - currently only used for routing sockets
3885 	 */
3886 	asyncio_reader = asyncio_reader_list;
3887 
3888 	while (asyncio_reader != NULL) {
3889 		/* callback may unlink and free asyncio_reader */
3890 		next_asyncio_reader = asyncio_reader->link;
3891 		if (FD_ISSET(asyncio_reader->fd, pfds))
3892 			(*asyncio_reader->receiver)(asyncio_reader);
3893 		asyncio_reader = next_asyncio_reader;
3894 	}
3895 #endif /* HAS_ROUTING_SOCKET */
3896 
3897 	/*
3898 	 * Check for a response from a blocking child
3899 	 */
3900 	for (idx = 0; idx < blocking_children_alloc; idx++) {
3901 		c = blocking_children[idx];
3902 		if (NULL == c || -1 == c->resp_read_pipe)
3903 			continue;
3904 		if (FD_ISSET(c->resp_read_pipe, pfds)) {
3905 			++c->resp_ready_seen;
3906 			++blocking_child_ready_seen;
3907 		}
3908 	}
3909 
3910 	/* We've done our work */
3911 #if defined(DEBUG_TIMING)
3912 	get_systime(&ts_e);
3913 	/*
3914 	 * (ts_e - ts) is the amount of time we spent
3915 	 * processing this gob of file descriptors.  Log
3916 	 * it.
3917 	 */
3918 	L_SUB(&ts_e, &ts);
3919 	collect_timing(NULL, "input handler", 1, &ts_e);
3920 	if (debug > 3)
3921 		msyslog(LOG_DEBUG,
3922 			"input_handler: Processed a gob of fd's in %s msec",
3923 			lfptoms(&ts_e, 6));
3924 #endif /* DEBUG_TIMING */
3925 }
3926 #endif /* !HAVE_IO_COMPLETION_PORT */
3927 
3928 /*
3929  * find an interface suitable for the src address
3930  */
3931 endpt *
3932 select_peerinterface(
3933 	struct peer *	peer,
3934 	sockaddr_u *	srcadr,
3935 	endpt *		dstadr
3936 	)
3937 {
3938 	endpt *ep;
3939 #ifndef SIM
3940 	endpt *wild;
3941 
3942 	wild = ANY_INTERFACE_CHOOSE(srcadr);
3943 
3944 	/*
3945 	 * Initialize the peer structure and dance the interface jig.
3946 	 * Reference clocks step the loopback waltz, the others
3947 	 * squaredance around the interface list looking for a buddy. If
3948 	 * the dance peters out, there is always the wildcard interface.
3949 	 * This might happen in some systems and would preclude proper
3950 	 * operation with public key cryptography.
3951 	 */
3952 	if (ISREFCLOCKADR(srcadr)) {
3953 		ep = loopback_interface;
3954 	} else if (peer->cast_flags &
3955 		   (MDF_BCLNT | MDF_ACAST | MDF_MCAST | MDF_BCAST)) {
3956 		ep = findbcastinter(srcadr);
3957 		if (ep != NULL)
3958 			DPRINTF(4, ("Found *-cast interface %s for address %s\n",
3959 				stoa(&ep->sin), stoa(srcadr)));
3960 		else
3961 			DPRINTF(4, ("No *-cast local address found for address %s\n",
3962 				stoa(srcadr)));
3963 	} else {
3964 		ep = dstadr;
3965 		if (NULL == ep)
3966 			ep = wild;
3967 	}
3968 	/*
3969 	 * If it is a multicast address, findbcastinter() may not find
3970 	 * it.  For unicast, we get to find the interface when dstadr is
3971 	 * given to us as the wildcard (ANY_INTERFACE_CHOOSE).  Either
3972 	 * way, try a little harder.
3973 	 */
3974 	if (wild == ep)
3975 		ep = findinterface(srcadr);
3976 	/*
3977 	 * we do not bind to the wildcard interfaces for output
3978 	 * as our (network) source address would be undefined and
3979 	 * crypto will not work without knowing the own transmit address
3980 	 */
3981 	if (ep != NULL && INT_WILDCARD & ep->flags)
3982 		if (!accept_wildcard_if_for_winnt)
3983 			ep = NULL;
3984 #else	/* SIM follows */
3985 	ep = loopback_interface;
3986 #endif
3987 
3988 	return ep;
3989 }
3990 
3991 
3992 /*
3993  * findinterface - find local interface corresponding to address
3994  */
3995 endpt *
3996 findinterface(
3997 	sockaddr_u *addr
3998 	)
3999 {
4000 	endpt *iface;
4001 
4002 	iface = findlocalinterface(addr, INT_WILDCARD, 0);
4003 
4004 	if (NULL == iface) {
4005 		DPRINTF(4, ("Found no interface for address %s - returning wildcard\n",
4006 			    stoa(addr)));
4007 
4008 		iface = ANY_INTERFACE_CHOOSE(addr);
4009 	} else
4010 		DPRINTF(4, ("Found interface #%d %s for address %s\n",
4011 			    iface->ifnum, iface->name, stoa(addr)));
4012 
4013 	return iface;
4014 }
4015 
4016 /*
4017  * findlocalinterface - find local interface corresponding to addr,
4018  * which does not have any of flags set.  If bast is nonzero, addr is
4019  * a broadcast address.
4020  *
4021  * This code attempts to find the local sending address for an outgoing
4022  * address by connecting a new socket to destinationaddress:NTP_PORT
4023  * and reading the sockname of the resulting connect.
4024  * the complicated sequence simulates the routing table lookup
4025  * for to first hop without duplicating any of the routing logic into
4026  * ntpd. preferably we would have used an API call - but its not there -
4027  * so this is the best we can do here short of duplicating to entire routing
4028  * logic in ntpd which would be a silly and really unportable thing to do.
4029  *
4030  */
4031 static endpt *
4032 findlocalinterface(
4033 	sockaddr_u *	addr,
4034 	int		flags,
4035 	int		bcast
4036 	)
4037 {
4038 	GETSOCKNAME_SOCKLEN_TYPE	sockaddrlen;
4039 	endpt *				iface;
4040 	sockaddr_u			saddr;
4041 	SOCKET				s;
4042 	int				rtn;
4043 	int				on;
4044 
4045 	DPRINTF(4, ("Finding interface for addr %s in list of addresses\n",
4046 		    stoa(addr)));
4047 
4048 	/* [Bug 3437] The dummy POOL peer comes in with an AF of
4049 	 * zero. This is bound to fail, but on the way to nowhere it
4050 	 * triggers a security incident on SELinux.
4051 	 *
4052 	 * Checking the condition and failing early is probably a good
4053 	 * advice, and even saves us some syscalls in that case.
4054 	 * Thanks to Miroslav Lichvar for finding this.
4055 	 */
4056 	if (AF_UNSPEC == AF(addr))
4057 		return NULL;
4058 
4059 	s = socket(AF(addr), SOCK_DGRAM, 0);
4060 	if (INVALID_SOCKET == s)
4061 		return NULL;
4062 
4063 	/*
4064 	 * If we are looking for broadcast interface we need to set this
4065 	 * socket to allow broadcast
4066 	 */
4067 	if (bcast) {
4068 		on = 1;
4069 		if (SOCKET_ERROR == setsockopt(s, SOL_SOCKET,
4070 						SO_BROADCAST,
4071 						(void *)&on,
4072 						sizeof(on))) {
4073 			closesocket(s);
4074 			return NULL;
4075 		}
4076 	}
4077 
4078 	rtn = connect(s, &addr->sa, SOCKLEN(addr));
4079 	if (SOCKET_ERROR == rtn) {
4080 		closesocket(s);
4081 		return NULL;
4082 	}
4083 
4084 	sockaddrlen = sizeof(saddr);
4085 	rtn = getsockname(s, &saddr.sa, &sockaddrlen);
4086 	closesocket(s);
4087 	if (SOCKET_ERROR == rtn)
4088 		return NULL;
4089 
4090 	DPRINTF(4, ("findlocalinterface: kernel maps %s to %s\n",
4091 		    stoa(addr), stoa(&saddr)));
4092 
4093 	iface = getinterface(&saddr, flags);
4094 
4095 	/*
4096 	 * if we didn't find an exact match on saddr, find the closest
4097 	 * available local address.  This handles the case of the
4098 	 * address suggested by the kernel being excluded by nic rules
4099 	 * or the user's -I and -L options to ntpd.
4100 	 * See http://bugs.ntp.org/1184 and http://bugs.ntp.org/1683
4101 	 * for more background.
4102 	 */
4103 	if (NULL == iface || iface->ignore_packets)
4104 		iface = findclosestinterface(&saddr,
4105 					     flags | INT_LOOPBACK);
4106 
4107 	/* Don't use an interface which will ignore replies */
4108 	if (iface != NULL && iface->ignore_packets)
4109 		iface = NULL;
4110 
4111 	return iface;
4112 }
4113 
4114 
4115 /*
4116  * findclosestinterface
4117  *
4118  * If there are -I/--interface or -L/novirtualips command-line options,
4119  * or "nic" or "interface" rules in ntp.conf, findlocalinterface() may
4120  * find the kernel's preferred local address for a given peer address is
4121  * administratively unavailable to ntpd, and punt to this routine's more
4122  * expensive search.
4123  *
4124  * Find the numerically closest local address to the one connect()
4125  * suggested.  This matches an address on the same subnet first, as
4126  * needed by Bug 1184, and provides a consistent choice if there are
4127  * multiple feasible local addresses, regardless of the order ntpd
4128  * enumerated them.
4129  */
4130 endpt *
4131 findclosestinterface(
4132 	sockaddr_u *	addr,
4133 	int		flags
4134 	)
4135 {
4136 	endpt *		ep;
4137 	endpt *		winner;
4138 	sockaddr_u	addr_dist;
4139 	sockaddr_u	min_dist;
4140 
4141 	ZERO_SOCK(&min_dist);
4142 	winner = NULL;
4143 
4144 	for (ep = ep_list; ep != NULL; ep = ep->elink) {
4145 		if (ep->ignore_packets ||
4146 		    AF(addr) != ep->family ||
4147 		    flags & ep->flags)
4148 			continue;
4149 
4150 		calc_addr_distance(&addr_dist, addr, &ep->sin);
4151 		if (NULL == winner ||
4152 		    -1 == cmp_addr_distance(&addr_dist, &min_dist)) {
4153 			min_dist = addr_dist;
4154 			winner = ep;
4155 		}
4156 	}
4157 	if (NULL == winner)
4158 		DPRINTF(4, ("findclosestinterface(%s) failed\n",
4159 			    stoa(addr)));
4160 	else
4161 		DPRINTF(4, ("findclosestinterface(%s) -> %s\n",
4162 			    stoa(addr), stoa(&winner->sin)));
4163 
4164 	return winner;
4165 }
4166 
4167 
4168 /*
4169  * calc_addr_distance - calculate the distance between two addresses,
4170  *			the absolute value of the difference between
4171  *			the addresses numerically, stored as an address.
4172  */
4173 static void
4174 calc_addr_distance(
4175 	sockaddr_u *		dist,
4176 	const sockaddr_u *	a1,
4177 	const sockaddr_u *	a2
4178 	)
4179 {
4180 	u_int32	a1val;
4181 	u_int32	a2val;
4182 	u_int32	v4dist;
4183 	int	found_greater;
4184 	int	a1_greater;
4185 	int	i;
4186 
4187 	REQUIRE(AF(a1) == AF(a2));
4188 
4189 	ZERO_SOCK(dist);
4190 	AF(dist) = AF(a1);
4191 
4192 	/* v4 can be done a bit simpler */
4193 	if (IS_IPV4(a1)) {
4194 		a1val = SRCADR(a1);
4195 		a2val = SRCADR(a2);
4196 		v4dist = (a1val > a2val)
4197 			     ? a1val - a2val
4198 			     : a2val - a1val;
4199 		SET_ADDR4(dist, v4dist);
4200 
4201 		return;
4202 	}
4203 
4204 	found_greater = FALSE;
4205 	a1_greater = FALSE;	/* suppress pot. uninit. warning */
4206 	for (i = 0; i < (int)sizeof(NSRCADR6(a1)); i++) {
4207 		if (!found_greater &&
4208 		    NSRCADR6(a1)[i] != NSRCADR6(a2)[i]) {
4209 			found_greater = TRUE;
4210 			a1_greater = (NSRCADR6(a1)[i] > NSRCADR6(a2)[i]);
4211 		}
4212 		if (!found_greater) {
4213 			NSRCADR6(dist)[i] = 0;
4214 		} else {
4215 			if (a1_greater)
4216 				NSRCADR6(dist)[i] = NSRCADR6(a1)[i] -
4217 						    NSRCADR6(a2)[i];
4218 			else
4219 				NSRCADR6(dist)[i] = NSRCADR6(a2)[i] -
4220 						    NSRCADR6(a1)[i];
4221 		}
4222 	}
4223 }
4224 
4225 
4226 /*
4227  * cmp_addr_distance - compare two address distances, returning -1, 0,
4228  *		       1 to indicate their relationship.
4229  */
4230 static int
4231 cmp_addr_distance(
4232 	const sockaddr_u *	d1,
4233 	const sockaddr_u *	d2
4234 	)
4235 {
4236 	int	i;
4237 
4238 	REQUIRE(AF(d1) == AF(d2));
4239 
4240 	if (IS_IPV4(d1)) {
4241 		if (SRCADR(d1) < SRCADR(d2))
4242 			return -1;
4243 		else if (SRCADR(d1) == SRCADR(d2))
4244 			return 0;
4245 		else
4246 			return 1;
4247 	}
4248 
4249 	for (i = 0; i < (int)sizeof(NSRCADR6(d1)); i++) {
4250 		if (NSRCADR6(d1)[i] < NSRCADR6(d2)[i])
4251 			return -1;
4252 		else if (NSRCADR6(d1)[i] > NSRCADR6(d2)[i])
4253 			return 1;
4254 	}
4255 
4256 	return 0;
4257 }
4258 
4259 
4260 
4261 /*
4262  * fetch an interface structure the matches the
4263  * address and has the given flags NOT set
4264  */
4265 endpt *
4266 getinterface(
4267 	sockaddr_u *	addr,
4268 	u_int32		flags
4269 	)
4270 {
4271 	endpt *iface;
4272 
4273 	iface = find_addr_in_list(addr);
4274 
4275 	if (iface != NULL && (iface->flags & flags))
4276 		iface = NULL;
4277 
4278 	return iface;
4279 }
4280 
4281 
4282 /*
4283  * findbcastinter - find broadcast interface corresponding to address
4284  */
4285 endpt *
4286 findbcastinter(
4287 	sockaddr_u *addr
4288 	)
4289 {
4290 	endpt *	iface;
4291 
4292 	iface = NULL;
4293 #if !defined(MPE) && (defined(SIOCGIFCONF) || defined(SYS_WINNT))
4294 	DPRINTF(4, ("Finding broadcast/multicast interface for addr %s in list of addresses\n",
4295 		    stoa(addr)));
4296 
4297 	iface = findlocalinterface(addr, INT_LOOPBACK | INT_WILDCARD,
4298 				   1);
4299 	if (iface != NULL) {
4300 		DPRINTF(4, ("Easily found bcast-/mcast- interface index #%d %s\n",
4301 			    iface->ifnum, iface->name));
4302 		return iface;
4303 	}
4304 
4305 	/*
4306 	 * plan B - try to find something reasonable in our lists in
4307 	 * case kernel lookup doesn't help
4308 	 */
4309 	for (iface = ep_list; iface != NULL; iface = iface->elink) {
4310 		if (iface->flags & INT_WILDCARD)
4311 			continue;
4312 
4313 		/* Don't bother with ignored interfaces */
4314 		if (iface->ignore_packets)
4315 			continue;
4316 
4317 		/*
4318 		 * First look if this is the correct family
4319 		 */
4320 		if(AF(&iface->sin) != AF(addr))
4321 			continue;
4322 
4323 		/* Skip the loopback addresses */
4324 		if (iface->flags & INT_LOOPBACK)
4325 			continue;
4326 
4327 		/*
4328 		 * If we are looking to match a multicast address and
4329 		 * this interface is one...
4330 		 */
4331 		if (addr_ismulticast(addr)
4332 		    && (iface->flags & INT_MULTICAST)) {
4333 #ifdef INCLUDE_IPV6_SUPPORT
4334 			/*
4335 			 * ...it is the winner unless we're looking for
4336 			 * an interface to use for link-local multicast
4337 			 * and its address is not link-local.
4338 			 */
4339 			if (IS_IPV6(addr)
4340 			    && IN6_IS_ADDR_MC_LINKLOCAL(PSOCK_ADDR6(addr))
4341 			    && !IN6_IS_ADDR_LINKLOCAL(PSOCK_ADDR6(&iface->sin)))
4342 				continue;
4343 #endif
4344 			break;
4345 		}
4346 
4347 		/*
4348 		 * We match only those interfaces marked as
4349 		 * broadcastable and either the explicit broadcast
4350 		 * address or the network portion of the IP address.
4351 		 * Sloppy.
4352 		 */
4353 		if (IS_IPV4(addr)) {
4354 			if (SOCK_EQ(&iface->bcast, addr))
4355 				break;
4356 
4357 			if ((NSRCADR(&iface->sin) & NSRCADR(&iface->mask))
4358 			    == (NSRCADR(addr)	  & NSRCADR(&iface->mask)))
4359 				break;
4360 		}
4361 #ifdef INCLUDE_IPV6_SUPPORT
4362 		else if (IS_IPV6(addr)) {
4363 			if (SOCK_EQ(&iface->bcast, addr))
4364 				break;
4365 
4366 			if (SOCK_EQ(netof(&iface->sin), netof(addr)))
4367 				break;
4368 		}
4369 #endif
4370 	}
4371 #endif /* SIOCGIFCONF */
4372 	if (NULL == iface) {
4373 		DPRINTF(4, ("No bcast interface found for %s\n",
4374 			    stoa(addr)));
4375 		iface = ANY_INTERFACE_CHOOSE(addr);
4376 	} else {
4377 		DPRINTF(4, ("Found bcast-/mcast- interface index #%d %s\n",
4378 			    iface->ifnum, iface->name));
4379 	}
4380 
4381 	return iface;
4382 }
4383 
4384 
4385 /*
4386  * io_clr_stats - clear I/O module statistics
4387  */
4388 void
4389 io_clr_stats(void)
4390 {
4391 	packets_dropped = 0;
4392 	packets_ignored = 0;
4393 	packets_received = 0;
4394 	packets_sent = 0;
4395 	packets_notsent = 0;
4396 
4397 	handler_calls = 0;
4398 	handler_pkts = 0;
4399 	io_timereset = current_time;
4400 }
4401 
4402 
4403 #ifdef REFCLOCK
4404 /*
4405  * io_addclock - add a reference clock to the list and arrange that we
4406  *				 get SIGIO interrupts from it.
4407  */
4408 int
4409 io_addclock(
4410 	struct refclockio *rio
4411 	)
4412 {
4413 	BLOCKIO();
4414 
4415 	/*
4416 	 * Stuff the I/O structure in the list and mark the descriptor
4417 	 * in use.  There is a harmless (I hope) race condition here.
4418 	 */
4419 	rio->active = TRUE;
4420 
4421 # ifdef HAVE_SIGNALED_IO
4422 	if (init_clock_sig(rio)) {
4423 		UNBLOCKIO();
4424 		return 0;
4425 	}
4426 # elif defined(HAVE_IO_COMPLETION_PORT)
4427 	if (!io_completion_port_add_clock_io(rio)) {
4428 		UNBLOCKIO();
4429 		return 0;
4430 	}
4431 # endif
4432 
4433 	/*
4434 	 * enqueue
4435 	 */
4436 	LINK_SLIST(refio, rio, next);
4437 
4438 	/*
4439 	 * register fd
4440 	 */
4441 	add_fd_to_list(rio->fd, FD_TYPE_FILE);
4442 
4443 	UNBLOCKIO();
4444 	return 1;
4445 }
4446 
4447 
4448 /*
4449  * io_closeclock - close the clock in the I/O structure given
4450  */
4451 void
4452 io_closeclock(
4453 	struct refclockio *rio
4454 	)
4455 {
4456 	struct refclockio *unlinked;
4457 
4458 	BLOCKIO();
4459 
4460 	/*
4461 	 * Remove structure from the list
4462 	 */
4463 	rio->active = FALSE;
4464 	UNLINK_SLIST(unlinked, refio, rio, next, struct refclockio);
4465 	if (NULL != unlinked) {
4466 		/* Close the descriptor. The order of operations is
4467 		 * important here in case of async / overlapped IO:
4468 		 * only after we have removed the clock from the
4469 		 * IO completion port we can be sure no further
4470 		 * input is queued. So...
4471 		 *  - we first disable feeding to the queu by removing
4472 		 *    the clock from the IO engine
4473 		 *  - close the file (which brings down any IO on it)
4474 		 *  - clear the buffer from results for this fd
4475 		 */
4476 #	    ifdef HAVE_IO_COMPLETION_PORT
4477 		io_completion_port_remove_clock_io(rio);
4478 #	    endif
4479 		close_and_delete_fd_from_list(rio->fd);
4480 		purge_recv_buffers_for_fd(rio->fd);
4481 		rio->fd = -1;
4482 	}
4483 
4484 	UNBLOCKIO();
4485 }
4486 #endif	/* REFCLOCK */
4487 
4488 
4489 /*
4490  * On NT a SOCKET is an unsigned int so we cannot possibly keep it in
4491  * an array. So we use one of the ISC_LIST functions to hold the
4492  * socket value and use that when we want to enumerate it.
4493  *
4494  * This routine is called by the forked intres child process to close
4495  * all open sockets.  On Windows there's no need as intres runs in
4496  * the same process as a thread.
4497  */
4498 #ifndef SYS_WINNT
4499 void
4500 kill_asyncio(
4501 	int	startfd
4502 	)
4503 {
4504 	BLOCKIO();
4505 
4506 	/*
4507 	 * In the child process we do not maintain activefds and
4508 	 * maxactivefd.  Zeroing maxactivefd disables code which
4509 	 * maintains it in close_and_delete_fd_from_list().
4510 	 */
4511 	maxactivefd = 0;
4512 
4513 	while (fd_list != NULL)
4514 		close_and_delete_fd_from_list(fd_list->fd);
4515 
4516 	UNBLOCKIO();
4517 }
4518 #endif	/* !SYS_WINNT */
4519 
4520 
4521 /*
4522  * Add and delete functions for the list of open sockets
4523  */
4524 static void
4525 add_fd_to_list(
4526 	SOCKET fd,
4527 	enum desc_type type
4528 	)
4529 {
4530 	vsock_t *lsock = emalloc(sizeof(*lsock));
4531 
4532 	lsock->fd = fd;
4533 	lsock->type = type;
4534 
4535 	LINK_SLIST(fd_list, lsock, link);
4536 	maintain_activefds(fd, 0);
4537 }
4538 
4539 
4540 static void
4541 close_and_delete_fd_from_list(
4542 	SOCKET fd
4543 	)
4544 {
4545 	vsock_t *lsock;
4546 
4547 	UNLINK_EXPR_SLIST(lsock, fd_list, fd ==
4548 	    UNLINK_EXPR_SLIST_CURRENT()->fd, link, vsock_t);
4549 
4550 	if (NULL == lsock)
4551 		return;
4552 
4553 	switch (lsock->type) {
4554 
4555 	case FD_TYPE_SOCKET:
4556 		closesocket(lsock->fd);
4557 		break;
4558 
4559 	case FD_TYPE_FILE:
4560 		closeserial((int)lsock->fd);
4561 		break;
4562 
4563 	default:
4564 		msyslog(LOG_ERR,
4565 			"internal error - illegal descriptor type %d - EXITING",
4566 			(int)lsock->type);
4567 		exit(1);
4568 	}
4569 
4570 	free(lsock);
4571 	/*
4572 	 * remove from activefds
4573 	 */
4574 	maintain_activefds(fd, 1);
4575 }
4576 
4577 
4578 static void
4579 add_addr_to_list(
4580 	sockaddr_u *	addr,
4581 	endpt *		ep
4582 	)
4583 {
4584 	remaddr_t *laddr;
4585 
4586 #ifdef DEBUG
4587 	if (find_addr_in_list(addr) == NULL) {
4588 #endif
4589 		/* not there yet - add to list */
4590 		laddr = emalloc(sizeof(*laddr));
4591 		laddr->addr = *addr;
4592 		laddr->ep = ep;
4593 
4594 		LINK_SLIST(remoteaddr_list, laddr, link);
4595 
4596 		DPRINTF(4, ("Added addr %s to list of addresses\n",
4597 			    stoa(addr)));
4598 #ifdef DEBUG
4599 	} else
4600 		DPRINTF(4, ("WARNING: Attempt to add duplicate addr %s to address list\n",
4601 			    stoa(addr)));
4602 #endif
4603 }
4604 
4605 
4606 static void
4607 delete_addr_from_list(
4608 	sockaddr_u *addr
4609 	)
4610 {
4611 	remaddr_t *unlinked;
4612 
4613 	UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, SOCK_EQ(addr,
4614 		&(UNLINK_EXPR_SLIST_CURRENT()->addr)), link, remaddr_t);
4615 
4616 	if (unlinked != NULL) {
4617 		DPRINTF(4, ("Deleted addr %s from list of addresses\n",
4618 			stoa(addr)));
4619 		free(unlinked);
4620 	}
4621 }
4622 
4623 
4624 static void
4625 delete_interface_from_list(
4626 	endpt *iface
4627 	)
4628 {
4629 	remaddr_t *unlinked;
4630 
4631 	for (;;) {
4632 		UNLINK_EXPR_SLIST(unlinked, remoteaddr_list, iface ==
4633 		    UNLINK_EXPR_SLIST_CURRENT()->ep, link,
4634 		    remaddr_t);
4635 
4636 		if (unlinked == NULL)
4637 			break;
4638 		DPRINTF(4, ("Deleted addr %s for interface #%d %s from list of addresses\n",
4639 			    stoa(&unlinked->addr), iface->ifnum,
4640 			    iface->name));
4641 		free(unlinked);
4642 	}
4643 }
4644 
4645 
4646 static struct interface *
4647 find_addr_in_list(
4648 	sockaddr_u *addr
4649 	)
4650 {
4651 	remaddr_t *entry;
4652 
4653 	DPRINTF(4, ("Searching for addr %s in list of addresses - ",
4654 		    stoa(addr)));
4655 
4656 	for (entry = remoteaddr_list;
4657 	     entry != NULL;
4658 	     entry = entry->link)
4659 		if (SOCK_EQ(&entry->addr, addr)) {
4660 			DPRINTF(4, ("FOUND\n"));
4661 			return entry->ep;
4662 		}
4663 
4664 	DPRINTF(4, ("NOT FOUND\n"));
4665 	return NULL;
4666 }
4667 
4668 
4669 /*
4670  * Find the given address with the all given flags set in the list
4671  */
4672 static endpt *
4673 find_flagged_addr_in_list(
4674 	sockaddr_u *	addr,
4675 	u_int32		flags
4676 	)
4677 {
4678 	remaddr_t *entry;
4679 
4680 	DPRINTF(4, ("Finding addr %s with flags %d in list: ",
4681 		    stoa(addr), flags));
4682 
4683 	for (entry = remoteaddr_list;
4684 	     entry != NULL;
4685 	     entry = entry->link)
4686 
4687 		if (SOCK_EQ(&entry->addr, addr)
4688 		    && (entry->ep->flags & flags) == flags) {
4689 
4690 			DPRINTF(4, ("FOUND\n"));
4691 			return entry->ep;
4692 		}
4693 
4694 	DPRINTF(4, ("NOT FOUND\n"));
4695 	return NULL;
4696 }
4697 
4698 
4699 const char *
4700 localaddrtoa(
4701 	endpt *la
4702 	)
4703 {
4704 	return (NULL == la)
4705 		   ? "<null>"
4706 		   : stoa(&la->sin);
4707 }
4708 
4709 
4710 #ifdef HAS_ROUTING_SOCKET
4711 # ifndef UPDATE_GRACE
4712 #  define UPDATE_GRACE	2	/* wait UPDATE_GRACE seconds before scanning */
4713 # endif
4714 
4715 static void
4716 process_routing_msgs(struct asyncio_reader *reader)
4717 {
4718 	char buffer[5120];
4719 	int cnt, msg_type;
4720 #ifdef HAVE_RTNETLINK
4721 	struct nlmsghdr *nh;
4722 #else
4723 	struct rt_msghdr rtm;
4724 	char *p;
4725 #endif
4726 
4727 	if (disable_dynamic_updates) {
4728 		/*
4729 		 * discard ourselves if we are not needed any more
4730 		 * usually happens when running unprivileged
4731 		 */
4732 		remove_asyncio_reader(reader);
4733 		delete_asyncio_reader(reader);
4734 		return;
4735 	}
4736 
4737 	cnt = read(reader->fd, buffer, sizeof(buffer));
4738 
4739 	if (cnt < 0) {
4740 		if (errno == ENOBUFS) {
4741 			msyslog(LOG_DEBUG,
4742 				"routing socket overflowed"
4743 				" - will update interfaces");
4744 			/*
4745 			 * drain the routing socket as we need to update
4746 			 * the interfaces anyway
4747 			 */
4748 			do {
4749 				cnt = read(reader->fd, buffer, sizeof(buffer));
4750 			} while (cnt != -1 || errno == ENOBUFS);
4751 			timer_interfacetimeout(current_time + UPDATE_GRACE);
4752 		} else if (errno != EINTR) {
4753 			msyslog(LOG_ERR,
4754 				"routing socket reports: %m - disabling");
4755 			remove_asyncio_reader(reader);
4756 			delete_asyncio_reader(reader);
4757 		}
4758 		return;
4759 	}
4760 
4761 	/*
4762 	 * process routing message
4763 	 */
4764 #ifdef HAVE_RTNETLINK
4765 	for (nh = UA_PTR(struct nlmsghdr, buffer);
4766 	     NLMSG_OK(nh, cnt);
4767 	     nh = NLMSG_NEXT(nh, cnt))
4768 	{
4769 		msg_type = nh->nlmsg_type;
4770 #else
4771 	for (p = buffer;
4772 	     (p + sizeof(struct rt_msghdr)) <= (buffer + cnt);
4773 	     p += rtm.rtm_msglen)
4774 	{
4775 		memcpy(&rtm, p, sizeof(rtm));
4776 		if (rtm.rtm_version != RTM_VERSION) {
4777 			msyslog(LOG_ERR,
4778 				"version mismatch (got %d - expected %d) on routing socket - disabling",
4779 				rtm.rtm_version, RTM_VERSION);
4780 
4781 			remove_asyncio_reader(reader);
4782 			delete_asyncio_reader(reader);
4783 			return;
4784 		}
4785 		msg_type = rtm.rtm_type;
4786 #endif
4787 		switch (msg_type) {
4788 #ifdef RTM_NEWADDR
4789 		case RTM_NEWADDR:
4790 #endif
4791 #ifdef RTM_DELADDR
4792 		case RTM_DELADDR:
4793 #endif
4794 #ifdef RTM_ADD
4795 		case RTM_ADD:
4796 #endif
4797 #ifdef RTM_DELETE
4798 		case RTM_DELETE:
4799 #endif
4800 #ifdef RTM_REDIRECT
4801 		case RTM_REDIRECT:
4802 #endif
4803 #ifdef RTM_CHANGE
4804 		case RTM_CHANGE:
4805 #endif
4806 #ifdef RTM_IFINFO
4807 		case RTM_IFINFO:
4808 #endif
4809 #ifdef RTM_NEWLINK
4810 		case RTM_NEWLINK:
4811 #endif
4812 #ifdef RTM_DELLINK
4813 		case RTM_DELLINK:
4814 #endif
4815 #ifdef RTM_NEWROUTE
4816 		case RTM_NEWROUTE:
4817 #endif
4818 #ifdef RTM_DELROUTE
4819 		case RTM_DELROUTE:
4820 #endif
4821 			/*
4822 			 * we are keen on new and deleted addresses and
4823 			 * if an interface goes up and down or routing
4824 			 * changes
4825 			 */
4826 			DPRINTF(3, ("routing message op = %d: scheduling interface update\n",
4827 				    msg_type));
4828 			timer_interfacetimeout(current_time + UPDATE_GRACE);
4829 			break;
4830 #ifdef HAVE_RTNETLINK
4831 		case NLMSG_DONE:
4832 			/* end of multipart message */
4833 			return;
4834 #endif
4835 		default:
4836 			/*
4837 			 * the rest doesn't bother us.
4838 			 */
4839 			DPRINTF(4, ("routing message op = %d: ignored\n",
4840 				    msg_type));
4841 			break;
4842 		}
4843 	}
4844 }
4845 
4846 /*
4847  * set up routing notifications
4848  */
4849 static void
4850 init_async_notifications()
4851 {
4852 	struct asyncio_reader *reader;
4853 #ifdef HAVE_RTNETLINK
4854 	int fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
4855 	struct sockaddr_nl sa;
4856 #else
4857 	int fd = socket(PF_ROUTE, SOCK_RAW, 0);
4858 #ifdef SO_RERROR
4859 	int on = 1;
4860 #endif
4861 #endif
4862 #ifdef RO_MSGFILTER
4863 	unsigned char msgfilter[] = {
4864 #ifdef RTM_NEWADDR
4865 		RTM_NEWADDR,
4866 #endif
4867 #ifdef RTM_DELADDR
4868 		RTM_DELADDR,
4869 #endif
4870 #ifdef RTM_ADD
4871 		RTM_ADD,
4872 #endif
4873 #ifdef RTM_DELETE
4874 		RTM_DELETE,
4875 #endif
4876 #ifdef RTM_REDIRECT
4877 		RTM_REDIRECT,
4878 #endif
4879 #ifdef RTM_CHANGE
4880 		RTM_CHANGE,
4881 #endif
4882 #ifdef RTM_IFINFO
4883 		RTM_IFINFO,
4884 #endif
4885 #ifdef RTM_NEWLINK
4886 		RTM_NEWLINK,
4887 #endif
4888 #ifdef RTM_DELLINK
4889 		RTM_DELLINK,
4890 #endif
4891 #ifdef RTM_NEWROUTE
4892 		RTM_NEWROUTE,
4893 #endif
4894 #ifdef RTM_DELROUTE
4895 		RTM_DELROUTE,
4896 #endif
4897 	};
4898 #endif /* !RO_MSGFILTER */
4899 
4900 	if (fd < 0) {
4901 		msyslog(LOG_ERR,
4902 			"unable to open routing socket (%m) - using polled interface update");
4903 		return;
4904 	}
4905 
4906 	fd = move_fd(fd);
4907 #ifdef HAVE_RTNETLINK
4908 	ZERO(sa);
4909 	sa.nl_family = PF_NETLINK;
4910 	sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR
4911 		       | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_ROUTE
4912 		       | RTMGRP_IPV4_MROUTE | RTMGRP_IPV6_ROUTE
4913 		       | RTMGRP_IPV6_MROUTE;
4914 	if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
4915 		msyslog(LOG_ERR,
4916 			"bind failed on routing socket (%m) - using polled interface update");
4917 		return;
4918 	}
4919 #endif
4920 #ifdef RO_MSGFILTER
4921 	if (setsockopt(fd, PF_ROUTE, RO_MSGFILTER,
4922 	    &msgfilter, sizeof(msgfilter)) == -1)
4923 		msyslog(LOG_ERR, "RO_MSGFILTER: %m");
4924 #endif
4925 #ifdef SO_RERROR
4926 	if (setsockopt(fd, SOL_SOCKET, SO_RERROR, &on, sizeof(on)) == -1)
4927 		msyslog(LOG_ERR, "SO_RERROR: %m");
4928 #endif
4929 	make_socket_nonblocking(fd);
4930 #if defined(HAVE_SIGNALED_IO)
4931 	init_socket_sig(fd);
4932 #endif /* HAVE_SIGNALED_IO */
4933 
4934 	reader = new_asyncio_reader();
4935 
4936 	reader->fd = fd;
4937 	reader->receiver = process_routing_msgs;
4938 
4939 	add_asyncio_reader(reader, FD_TYPE_SOCKET);
4940 	msyslog(LOG_INFO,
4941 		"Listening on routing socket on fd #%d for interface updates",
4942 		fd);
4943 }
4944 #else
4945 /* HAS_ROUTING_SOCKET not defined */
4946 static void
4947 init_async_notifications(void)
4948 {
4949 }
4950 #endif
4951 
4952