xref: /dragonfly/sys/netinet/ip_carp.c (revision 6f74e152)
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29 
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50 
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53 
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64 #include <net/netisr2.h>
65 
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
73 #endif
74 
75 #ifdef INET6
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
81 #endif
82 
83 #include <netinet/ip_carp.h>
84 
85 /*
86  * Note about carp's MP safe approach:
87  *
88  * Brief: carp_softc (softc), carp_softc_container (scc)
89  *
90  * - All configuration operation, e.g. ioctl, add/delete inet addresses
91  *   is serialized by netisr0; not by carp's serializer
92  *
93  * - Backing interface's if_carp and carp_softc's relationship:
94  *
95  *                +---------+
96  *     if_carp -->| carp_if |
97  *                +---------+
98  *                     |
99  *                     |
100  *                     V      +---------+
101  *                  +-----+   |         |
102  *                  | scc |-->|  softc  |
103  *                  +-----+   |         |
104  *                     |      +---------+
105  *                     |
106  *                     V      +---------+
107  *                  +-----+   |         |
108  *                  | scc |-->|  softc  |
109  *                  +-----+   |         |
110  *                            +---------+
111  *
112  * - if_carp creation, modification and deletion all happen in netisr0,
113  *   as stated previously.  Since if_carp is accessed by multiple netisrs,
114  *   the modification to if_carp is conducted in the following way:
115  *
116  *   Adding carp_softc:
117  *
118  *   1) Duplicate the old carp_if to new carp_if (ncif), and insert the
119  *      to-be-added carp_softc to the new carp_if (ncif):
120  *
121  *        if_carp                     ncif
122  *           |                         |
123  *           V                         V
124  *      +---------+               +---------+
125  *      | carp_if |               | carp_if |
126  *      +---------+               +---------+
127  *           |                         |
128  *           |                         |
129  *           V        +-------+        V
130  *        +-----+     |       |     +-----+
131  *        | scc |---->| softc |<----| scc |
132  *        +-----+     |       |     +-----+
133  *           |        +-------+        |
134  *           |                         |
135  *           V        +-------+        V
136  *        +-----+     |       |     +-----+
137  *        | scc |---->| softc |<----| scc |
138  *        +-----+     |       |     +-----+
139  *                    +-------+        |
140  *                                     |
141  *                    +-------+        V
142  *                    |       |     +-----+
143  *                    | softc |<----| scc |
144  *                    |       |     +-----+
145  *                    +-------+
146  *
147  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
148  *
149  *          ocif                    if_carp
150  *           |                         |
151  *           V                         V
152  *      +---------+               +---------+
153  *      | carp_if |               | carp_if |
154  *      +---------+               +---------+
155  *           |                         |
156  *           |                         |
157  *           V        +-------+        V
158  *        +-----+     |       |     +-----+
159  *        | scc |---->| softc |<----| scc |
160  *        +-----+     |       |     +-----+
161  *           |        +-------+        |
162  *           |                         |
163  *           V        +-------+        V
164  *        +-----+     |       |     +-----+
165  *        | scc |---->| softc |<----| scc |
166  *        +-----+     |       |     +-----+
167  *                    +-------+        |
168  *                                     |
169  *                    +-------+        V
170  *                    |       |     +-----+
171  *                    | softc |<----| scc |
172  *                    |       |     +-----+
173  *                    +-------+
174  *
175  *   3) Run netmsg_service_sync(), which will make sure that
176  *      ocif is no longer accessed (all network operations
177  *      are happened only in network threads).
178  *   4) Free ocif -- only carp_if and scc are freed.
179  *
180  *
181  *   Removing carp_softc:
182  *
183  *   1) Duplicate the old carp_if to new carp_if (ncif); the to-be-deleted
184  *      carp_softc will not be duplicated.
185  *
186  *        if_carp                     ncif
187  *           |                         |
188  *           V                         V
189  *      +---------+               +---------+
190  *      | carp_if |               | carp_if |
191  *      +---------+               +---------+
192  *           |                         |
193  *           |                         |
194  *           V        +-------+        V
195  *        +-----+     |       |     +-----+
196  *        | scc |---->| softc |<----| scc |
197  *        +-----+     |       |     +-----+
198  *           |        +-------+        |
199  *           |                         |
200  *           V        +-------+        |
201  *        +-----+     |       |        |
202  *        | scc |---->| softc |        |
203  *        +-----+     |       |        |
204  *           |        +-------+        |
205  *           |                         |
206  *           V        +-------+        V
207  *        +-----+     |       |     +-----+
208  *        | scc |---->| softc |<----| scc |
209  *        +-----+     |       |     +-----+
210  *                    +-------+
211  *
212  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
213  *
214  *          ocif                    if_carp
215  *           |                         |
216  *           V                         V
217  *      +---------+               +---------+
218  *      | carp_if |               | carp_if |
219  *      +---------+               +---------+
220  *           |                         |
221  *           |                         |
222  *           V        +-------+        V
223  *        +-----+     |       |     +-----+
224  *        | scc |---->| softc |<----| scc |
225  *        +-----+     |       |     +-----+
226  *           |        +-------+        |
227  *           |                         |
228  *           V        +-------+        |
229  *        +-----+     |       |        |
230  *        | scc |---->| softc |        |
231  *        +-----+     |       |        |
232  *           |        +-------+        |
233  *           |                         |
234  *           V        +-------+        V
235  *        +-----+     |       |     +-----+
236  *        | scc |---->| softc |<----| scc |
237  *        +-----+     |       |     +-----+
238  *                    +-------+
239  *
240  *   3) Run netmsg_service_sync(), which will make sure that
241  *      ocif is no longer accessed (all network operations
242  *      are happened only in network threads).
243  *   4) Free ocif -- only carp_if and scc are freed.
244  *
245  * - if_carp accessing:
246  *   The accessing code should cache the if_carp in a local temporary
247  *   variable and accessing the temporary variable along the code path
248  *   instead of accessing if_carp later on.
249  */
250 
251 #define	CARP_IFNAME		"carp"
252 #define CARP_IS_RUNNING(ifp)	\
253 	(((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
254 
255 struct carp_softc;
256 
257 struct carp_vhaddr {
258 	uint32_t		vha_flags;	/* CARP_VHAF_ */
259 	struct in_ifaddr	*vha_ia;	/* carp address */
260 	struct in_ifaddr	*vha_iaback;	/* backing address */
261 	TAILQ_ENTRY(carp_vhaddr) vha_link;
262 };
263 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
264 
265 struct netmsg_carp {
266 	struct netmsg_base	base;
267 	struct ifnet		*nc_carpdev;
268 	struct carp_softc	*nc_softc;
269 	void			*nc_data;
270 	size_t			nc_datalen;
271 };
272 
273 struct carp_softc {
274 	struct arpcom		 arpcom;
275 	struct ifnet		*sc_carpdev;	/* parent interface */
276 	struct carp_vhaddr_list	 sc_vha_list;	/* virtual addr list */
277 
278 	const struct in_ifaddr	*sc_ia;		/* primary iface address v4 */
279 	struct ip_moptions 	 sc_imo;
280 
281 #ifdef INET6
282 	struct in6_ifaddr 	*sc_ia6;	/* primary iface address v6 */
283 	struct ip6_moptions 	 sc_im6o;
284 #endif /* INET6 */
285 
286 	enum { INIT = 0, BACKUP, MASTER }
287 				 sc_state;
288 	boolean_t		 sc_dead;
289 
290 	int			 sc_suppress;
291 
292 	int			 sc_sendad_errors;
293 #define	CARP_SENDAD_MAX_ERRORS	3
294 	int			 sc_sendad_success;
295 #define	CARP_SENDAD_MIN_SUCCESS 3
296 
297 	int			 sc_vhid;
298 	int			 sc_advskew;
299 	int			 sc_naddrs;	/* actually used IPv4 vha */
300 	int			 sc_naddrs6;
301 	int			 sc_advbase;	/* seconds */
302 	int			 sc_init_counter;
303 	uint64_t		 sc_counter;
304 
305 	/* authentication */
306 #define CARP_HMAC_PAD	64
307 	unsigned char		 sc_key[CARP_KEY_LEN];
308 	unsigned char		 sc_pad[CARP_HMAC_PAD];
309 	SHA1_CTX		 sc_sha1;
310 
311 	struct callout		 sc_ad_tmo;	/* advertisement timeout */
312 	struct netmsg_carp	 sc_ad_msg;	/* adv timeout netmsg */
313 	struct callout		 sc_md_tmo;	/* ip4 master down timeout */
314 	struct callout 		 sc_md6_tmo;	/* ip6 master down timeout */
315 	struct netmsg_carp	 sc_md_msg;	/* master down timeout netmsg */
316 
317 	LIST_ENTRY(carp_softc)	 sc_next;	/* Interface clue */
318 };
319 
320 #define sc_if	arpcom.ac_if
321 
322 struct carp_softc_container {
323 	TAILQ_ENTRY(carp_softc_container) scc_link;
324 	struct carp_softc	*scc_softc;
325 };
326 TAILQ_HEAD(carp_if, carp_softc_container);
327 
328 SYSCTL_DECL(_net_inet_carp);
329 
330 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
331 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
332     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
333 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
334     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
335 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
336     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
337 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
338     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
339 
340 static int carp_suppress_preempt = 0;
341 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
342     &carp_suppress_preempt, 0, "Preemption is suppressed");
343 
344 static int carp_prio_ad = 1;
345 SYSCTL_INT(_net_inet_carp, OID_AUTO, prio_ad, CTLFLAG_RD,
346     &carp_prio_ad, 0, "Prioritize advertisement packet");
347 
348 static struct carpstats carpstats;
349 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
350     &carpstats, carpstats,
351     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
352 
353 #define	CARP_LOG(...)	do {				\
354 	if (carp_opts[CARPCTL_LOG] > 0)			\
355 		log(LOG_INFO, __VA_ARGS__);		\
356 } while (0)
357 
358 #define	CARP_DEBUG(...)	do {				\
359 	if (carp_opts[CARPCTL_LOG] > 1)			\
360 		log(LOG_DEBUG, __VA_ARGS__);		\
361 } while (0)
362 
363 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token);
364 
365 static void	carp_hmac_prepare(struct carp_softc *);
366 static void	carp_hmac_generate(struct carp_softc *, uint32_t *,
367 		    unsigned char *);
368 static int	carp_hmac_verify(struct carp_softc *, uint32_t *,
369 		    unsigned char *);
370 static void	carp_setroute(struct carp_softc *, int);
371 static void	carp_proto_input_c(struct carp_softc *, struct mbuf *,
372 		    struct carp_header *, sa_family_t);
373 static int 	carp_clone_create(struct if_clone *, int, caddr_t);
374 static int 	carp_clone_destroy(struct ifnet *);
375 static void	carp_detach(struct carp_softc *, boolean_t, boolean_t);
376 static void	carp_prepare_ad(struct carp_softc *, struct carp_header *);
377 static void	carp_send_ad_all(void);
378 static void	carp_send_ad_timeout(void *);
379 static void	carp_send_ad(struct carp_softc *);
380 static void	carp_send_arp(struct carp_softc *);
381 static void	carp_master_down_timeout(void *);
382 static void	carp_master_down(struct carp_softc *);
383 static void	carp_setrun(struct carp_softc *, sa_family_t);
384 static void	carp_set_state(struct carp_softc *, int);
385 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
386 
387 static void	carp_init(void *);
388 static int	carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
389 static int	carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
390 		    struct rtentry *);
391 static void	carp_start(struct ifnet *, struct ifaltq_subque *);
392 
393 static void	carp_multicast_cleanup(struct carp_softc *);
394 static void	carp_add_addr(struct carp_softc *, struct ifaddr *);
395 static void	carp_del_addr(struct carp_softc *, struct ifaddr *);
396 static void	carp_config_addr(struct carp_softc *, struct ifaddr *);
397 static void	carp_link_addrs(struct carp_softc *, struct ifnet *,
398 		    struct ifaddr *);
399 static void	carp_unlink_addrs(struct carp_softc *, struct ifnet *,
400 		    struct ifaddr *);
401 static void	carp_update_addrs(struct carp_softc *, struct ifaddr *);
402 
403 static int	carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
404 		    struct in_ifaddr *);
405 static int	carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
406 		    struct ifnet *, struct in_ifaddr *, int);
407 static void	carp_deactivate_vhaddr(struct carp_softc *,
408 		    struct carp_vhaddr *, boolean_t);
409 static int	carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
410 static void	carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
411 		    boolean_t);
412 
413 #ifdef foo
414 static void	carp_sc_state(struct carp_softc *);
415 #endif
416 #ifdef INET6
417 static void	carp_send_na(struct carp_softc *);
418 #ifdef notyet
419 static int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
420 static int	carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
421 #endif
422 static void	carp_multicast6_cleanup(struct carp_softc *);
423 #endif
424 static void	carp_stop(struct carp_softc *, boolean_t);
425 static void	carp_suspend(struct carp_softc *, boolean_t);
426 static void	carp_ioctl_stop(struct carp_softc *);
427 static int	carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *);
428 static void	carp_ioctl_ifcap(struct carp_softc *, int);
429 static int	carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *);
430 static int	carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *);
431 static int	carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *);
432 
433 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *);
434 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *);
435 static void	carp_if_free(struct carp_if *);
436 
437 static void	carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
438 			    struct ifaddr *);
439 static void	carp_ifdetach(void *, struct ifnet *);
440 
441 static void	carp_ifdetach_dispatch(netmsg_t);
442 static void	carp_clone_destroy_dispatch(netmsg_t);
443 static void	carp_init_dispatch(netmsg_t);
444 static void	carp_ioctl_stop_dispatch(netmsg_t);
445 static void	carp_ioctl_setvh_dispatch(netmsg_t);
446 static void	carp_ioctl_ifcap_dispatch(netmsg_t);
447 static void	carp_ioctl_getvh_dispatch(netmsg_t);
448 static void	carp_ioctl_getdevname_dispatch(netmsg_t);
449 static void	carp_ioctl_getvhaddr_dispatch(netmsg_t);
450 static void	carp_send_ad_timeout_dispatch(netmsg_t);
451 static void	carp_master_down_timeout_dispatch(netmsg_t);
452 
453 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
454 
455 static LIST_HEAD(, carp_softc) carpif_list;
456 
457 static struct if_clone carp_cloner =
458 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
459 		     0, IF_MAXUNIT);
460 
461 static const uint8_t	carp_etheraddr[ETHER_ADDR_LEN] =
462 	{ 0, 0, 0x5e, 0, 1, 0 };
463 
464 static eventhandler_tag carp_ifdetach_event;
465 static eventhandler_tag carp_ifaddr_event;
466 
467 static __inline void
468 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
469 {
470 	struct carp_vhaddr *vha;
471 	u_long new_addr, addr;
472 
473 	KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
474 
475 	/*
476 	 * Virtual address list is sorted; smaller one first
477 	 */
478 	new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
479 
480 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
481 		addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
482 
483 		if (addr > new_addr)
484 			break;
485 	}
486 	if (vha == NULL)
487 		TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
488 	else
489 		TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
490 	vha_new->vha_flags |= CARP_VHAF_ONLIST;
491 }
492 
493 static __inline void
494 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
495 {
496 	KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
497 	vha->vha_flags &= ~CARP_VHAF_ONLIST;
498 	TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
499 }
500 
501 static void
502 carp_hmac_prepare(struct carp_softc *sc)
503 {
504 	uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
505 	uint8_t vhid = sc->sc_vhid & 0xff;
506 	int i;
507 #ifdef INET6
508 	struct ifaddr_container *ifac;
509 	struct in6_addr in6;
510 #endif
511 #ifdef INET
512 	struct carp_vhaddr *vha;
513 #endif
514 
515 	/* XXX: possible race here */
516 
517 	/* compute ipad from key */
518 	bzero(sc->sc_pad, sizeof(sc->sc_pad));
519 	bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
520 	for (i = 0; i < sizeof(sc->sc_pad); i++)
521 		sc->sc_pad[i] ^= 0x36;
522 
523 	/* precompute first part of inner hash */
524 	SHA1Init(&sc->sc_sha1);
525 	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
526 	SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
527 	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
528 	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
529 #ifdef INET
530 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
531 		SHA1Update(&sc->sc_sha1,
532 		    (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
533 		    sizeof(struct in_addr));
534 	}
535 #endif /* INET */
536 #ifdef INET6
537 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
538 		struct ifaddr *ifa = ifac->ifa;
539 
540 		if (ifa->ifa_addr->sa_family == AF_INET6) {
541 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
542 			in6_clearscope(&in6);
543 			SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
544 		}
545 	}
546 #endif /* INET6 */
547 
548 	/* convert ipad to opad */
549 	for (i = 0; i < sizeof(sc->sc_pad); i++)
550 		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
551 }
552 
553 static void
554 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
555     unsigned char md[20])
556 {
557 	SHA1_CTX sha1ctx;
558 
559 	/* fetch first half of inner hash */
560 	bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
561 
562 	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
563 	SHA1Final(md, &sha1ctx);
564 
565 	/* outer hash */
566 	SHA1Init(&sha1ctx);
567 	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
568 	SHA1Update(&sha1ctx, md, 20);
569 	SHA1Final(md, &sha1ctx);
570 }
571 
572 static int
573 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
574     unsigned char md[20])
575 {
576 	unsigned char md2[20];
577 
578 	carp_hmac_generate(sc, counter, md2);
579 	return (bcmp(md, md2, sizeof(md2)));
580 }
581 
582 static void
583 carp_setroute(struct carp_softc *sc, int cmd)
584 {
585 #ifdef INET6
586 	struct ifaddr_container *ifac;
587 #endif
588 	struct carp_vhaddr *vha;
589 
590 	KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
591 
592 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
593 		if (vha->vha_iaback == NULL)
594 			continue;
595 		if (cmd == RTM_DELETE)
596 			carp_delroute_vhaddr(sc, vha, FALSE);
597 		else
598 			carp_addroute_vhaddr(sc, vha);
599 	}
600 
601 #ifdef INET6
602 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
603 		struct ifaddr *ifa = ifac->ifa;
604 
605 		if (ifa->ifa_addr->sa_family == AF_INET6) {
606 			if (cmd == RTM_ADD)
607 				in6_ifaddloop(ifa);
608 			else
609 				in6_ifremloop(ifa);
610 		}
611 	}
612 #endif /* INET6 */
613 }
614 
615 static int
616 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
617 {
618 	struct carp_softc *sc;
619 	struct ifnet *ifp;
620 
621 	sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
622 	ifp = &sc->sc_if;
623 
624 	sc->sc_suppress = 0;
625 	sc->sc_advbase = CARP_DFLTINTV;
626 	sc->sc_vhid = -1;	/* required setting */
627 	sc->sc_advskew = 0;
628 	sc->sc_init_counter = 1;
629 	sc->sc_naddrs = 0;
630 	sc->sc_naddrs6 = 0;
631 
632 	TAILQ_INIT(&sc->sc_vha_list);
633 
634 #ifdef INET6
635 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
636 #endif
637 
638 	callout_init_mp(&sc->sc_ad_tmo);
639 	netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport,
640 	    MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch);
641 	sc->sc_ad_msg.nc_softc = sc;
642 
643 	callout_init_mp(&sc->sc_md_tmo);
644 	callout_init_mp(&sc->sc_md6_tmo);
645 	netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport,
646 	    MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch);
647 	sc->sc_md_msg.nc_softc = sc;
648 
649 	if_initname(ifp, CARP_IFNAME, unit);
650 	ifp->if_softc = sc;
651 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
652 	ifp->if_init = carp_init;
653 	ifp->if_ioctl = carp_ioctl;
654 	ifp->if_start = carp_start;
655 
656 	ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO;
657 	ifp->if_capenable = ifp->if_capabilities;
658 	/*
659 	 * Leave if_hwassist as it is; if_hwassist will be
660 	 * setup when this carp interface has parent.
661 	 */
662 
663 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
664 	ifq_set_ready(&ifp->if_snd);
665 
666 	ether_ifattach(ifp, carp_etheraddr, NULL);
667 
668 	ifp->if_type = IFT_CARP;
669 	ifp->if_output = carp_output;
670 
671 	lwkt_gettoken(&carp_listtok);
672 	LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
673 	lwkt_reltoken(&carp_listtok);
674 
675 	return (0);
676 }
677 
678 static void
679 carp_clone_destroy_dispatch(netmsg_t msg)
680 {
681 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
682 	struct carp_softc *sc = cmsg->nc_softc;
683 
684 	sc->sc_dead = TRUE;
685 	carp_detach(sc, TRUE, FALSE);
686 
687 	callout_stop_sync(&sc->sc_ad_tmo);
688 	callout_stop_sync(&sc->sc_md_tmo);
689 	callout_stop_sync(&sc->sc_md6_tmo);
690 
691 	crit_enter();
692 	lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg);
693 	lwkt_dropmsg(&sc->sc_md_msg.base.lmsg);
694 	crit_exit();
695 
696 	lwkt_replymsg(&cmsg->base.lmsg, 0);
697 }
698 
699 static int
700 carp_clone_destroy(struct ifnet *ifp)
701 {
702 	struct carp_softc *sc = ifp->if_softc;
703 	struct netmsg_carp cmsg;
704 
705 	bzero(&cmsg, sizeof(cmsg));
706 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
707 	    carp_clone_destroy_dispatch);
708 	cmsg.nc_softc = sc;
709 
710 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
711 
712 	lwkt_gettoken(&carp_listtok);
713 	LIST_REMOVE(sc, sc_next);
714 	lwkt_reltoken(&carp_listtok);
715 
716 	bpfdetach(ifp);
717 	if_detach(ifp);
718 
719 	KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active"));
720 	kfree(sc, M_CARP);
721 
722 	return 0;
723 }
724 
725 static struct carp_if *
726 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc)
727 {
728 	struct carp_softc_container *oscc, *scc;
729 	struct carp_if *cif;
730 	int count = 0;
731 #ifdef INVARIANTS
732 	int found = 0;
733 #endif
734 
735 	TAILQ_FOREACH(oscc, ocif, scc_link) {
736 		++count;
737 #ifdef INVARIANTS
738 		if (oscc->scc_softc == sc)
739 			found = 1;
740 #endif
741 	}
742 	KASSERT(found, ("%s carp_softc is not on carp_if", __func__));
743 
744 	if (count == 1) {
745 		/* Last one is going to be unlinked */
746 		return NULL;
747 	}
748 
749 	cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
750 	TAILQ_INIT(cif);
751 
752 	TAILQ_FOREACH(oscc, ocif, scc_link) {
753 		if (oscc->scc_softc == sc)
754 			continue;
755 
756 		scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO);
757 		scc->scc_softc = oscc->scc_softc;
758 		TAILQ_INSERT_TAIL(cif, scc, scc_link);
759 	}
760 
761 	return cif;
762 }
763 
764 static struct carp_if *
765 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc)
766 {
767 	struct carp_softc_container *oscc;
768 	int onlist;
769 
770 	onlist = 0;
771 	if (ocif != NULL) {
772 		TAILQ_FOREACH(oscc, ocif, scc_link) {
773 			if (oscc->scc_softc == sc)
774 				onlist = 1;
775 		}
776 	}
777 
778 #ifdef INVARIANTS
779 	if (sc->sc_carpdev != NULL) {
780 		KASSERT(onlist, ("%s is not on %s carp list",
781 		    sc->sc_if.if_xname, sc->sc_carpdev->if_xname));
782 	} else {
783 		KASSERT(!onlist, ("%s is already on carp list",
784 		    sc->sc_if.if_xname));
785 	}
786 #endif
787 
788 	if (!onlist) {
789 		struct carp_if *cif;
790 		struct carp_softc_container *new_scc, *scc;
791 		int inserted = 0;
792 
793 		cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
794 		TAILQ_INIT(cif);
795 
796 		new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO);
797 		new_scc->scc_softc = sc;
798 
799 		if (ocif != NULL) {
800 			TAILQ_FOREACH(oscc, ocif, scc_link) {
801 				if (!inserted &&
802 				    oscc->scc_softc->sc_vhid > sc->sc_vhid) {
803 					TAILQ_INSERT_TAIL(cif, new_scc,
804 					    scc_link);
805 					inserted = 1;
806 				}
807 
808 				scc = kmalloc(sizeof(*scc), M_CARP,
809 				    M_WAITOK | M_ZERO);
810 				scc->scc_softc = oscc->scc_softc;
811 				TAILQ_INSERT_TAIL(cif, scc, scc_link);
812 			}
813 		}
814 		if (!inserted)
815 			TAILQ_INSERT_TAIL(cif, new_scc, scc_link);
816 
817 		return cif;
818 	} else {
819 		return ocif;
820 	}
821 }
822 
823 static void
824 carp_if_free(struct carp_if *cif)
825 {
826 	struct carp_softc_container *scc;
827 
828 	while ((scc = TAILQ_FIRST(cif)) != NULL) {
829 		TAILQ_REMOVE(cif, scc, scc_link);
830 		kfree(scc, M_CARP);
831 	}
832 	kfree(cif, M_CARP);
833 }
834 
835 static void
836 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback)
837 {
838 	carp_suspend(sc, detach);
839 
840 	carp_multicast_cleanup(sc);
841 #ifdef INET6
842 	carp_multicast6_cleanup(sc);
843 #endif
844 
845 	if (!sc->sc_dead && detach) {
846 		struct carp_vhaddr *vha;
847 
848 		TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
849 			carp_deactivate_vhaddr(sc, vha, del_iaback);
850 		KKASSERT(sc->sc_naddrs == 0);
851 	}
852 
853 	if (sc->sc_carpdev != NULL) {
854 		struct ifnet *ifp = sc->sc_carpdev;
855 		struct carp_if *ocif = ifp->if_carp;
856 
857 		ifp->if_carp = carp_if_remove(ocif, sc);
858 		KASSERT(ifp->if_carp != ocif,
859 		    ("%s carp_if_remove failed", __func__));
860 
861 		sc->sc_carpdev = NULL;
862 		sc->sc_ia = NULL;
863 		sc->arpcom.ac_if.if_hwassist = 0;
864 
865 		/*
866 		 * Make sure that all protocol threads see the
867 		 * sc_carpdev and if_carp changes
868 		 */
869 		netmsg_service_sync();
870 
871 		if (ifp->if_carp == NULL) {
872 			/*
873 			 * No more carp interfaces using
874 			 * ifp as the backing interface,
875 			 * move it out of promiscous mode.
876 			 */
877 			ifpromisc(ifp, 0);
878 		}
879 
880 		/*
881 		 * The old carp list could be safely free now,
882 		 * since no one can access it.
883 		 */
884 		carp_if_free(ocif);
885 	}
886 }
887 
888 static void
889 carp_ifdetach_dispatch(netmsg_t msg)
890 {
891 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
892 	struct ifnet *ifp = cmsg->nc_carpdev;
893 
894 	while (ifp->if_carp) {
895 		struct carp_softc_container *scc;
896 
897 		scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp));
898 		carp_detach(scc->scc_softc, TRUE, TRUE);
899 	}
900 	lwkt_replymsg(&cmsg->base.lmsg, 0);
901 }
902 
903 /* Detach an interface from the carp. */
904 static void
905 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
906 {
907 	struct netmsg_carp cmsg;
908 
909 	ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
910 
911 	bzero(&cmsg, sizeof(cmsg));
912 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
913 	    carp_ifdetach_dispatch);
914 	cmsg.nc_carpdev = ifp;
915 
916 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
917 }
918 
919 /*
920  * process input packet.
921  * we have rearranged checks order compared to the rfc,
922  * but it seems more efficient this way or not possible otherwise.
923  */
924 int
925 carp_proto_input(struct mbuf **mp, int *offp, int proto)
926 {
927 	struct mbuf *m = *mp;
928 	struct ip *ip = mtod(m, struct ip *);
929 	struct ifnet *ifp = m->m_pkthdr.rcvif;
930 	struct carp_header *ch;
931 	struct carp_softc *sc;
932 	int len, iphlen;
933 
934 	iphlen = *offp;
935 	*mp = NULL;
936 
937 	carpstats.carps_ipackets++;
938 
939 	if (!carp_opts[CARPCTL_ALLOW]) {
940 		m_freem(m);
941 		goto back;
942 	}
943 
944 	/* Check if received on a valid carp interface */
945 	if (ifp->if_type != IFT_CARP) {
946 		carpstats.carps_badif++;
947 		CARP_LOG("carp_proto_input: packet received on non-carp "
948 		    "interface: %s\n", ifp->if_xname);
949 		m_freem(m);
950 		goto back;
951 	}
952 
953 	if (!CARP_IS_RUNNING(ifp)) {
954 		carpstats.carps_badif++;
955 		CARP_LOG("carp_proto_input: packet received on stopped carp "
956 		    "interface: %s\n", ifp->if_xname);
957 		m_freem(m);
958 		goto back;
959 	}
960 
961 	sc = ifp->if_softc;
962 	if (sc->sc_carpdev == NULL) {
963 		carpstats.carps_badif++;
964 		CARP_LOG("carp_proto_input: packet received on defunc carp "
965 		    "interface: %s\n", ifp->if_xname);
966 		m_freem(m);
967 		goto back;
968 	}
969 
970 	if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
971 		carpstats.carps_badif++;
972 		CARP_LOG("carp_proto_input: non-mcast packet on "
973 		    "interface: %s\n", ifp->if_xname);
974 		m_freem(m);
975 		goto back;
976 	}
977 
978 	/* Verify that the IP TTL is CARP_DFLTTL. */
979 	if (ip->ip_ttl != CARP_DFLTTL) {
980 		carpstats.carps_badttl++;
981 		CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
982 		    ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
983 		m_freem(m);
984 		goto back;
985 	}
986 
987 	/* Minimal CARP packet size */
988 	len = iphlen + sizeof(*ch);
989 
990 	/*
991 	 * Verify that the received packet length is
992 	 * not less than the CARP header
993 	 */
994 	if (m->m_pkthdr.len < len) {
995 		carpstats.carps_badlen++;
996 		CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
997 		    ifp->if_xname);
998 		m_freem(m);
999 		goto back;
1000 	}
1001 
1002 	/* Make sure that CARP header is contiguous */
1003 	if (len > m->m_len) {
1004 		m = m_pullup(m, len);
1005 		if (m == NULL) {
1006 			carpstats.carps_hdrops++;
1007 			CARP_LOG("carp_proto_input: m_pullup failed\n");
1008 			goto back;
1009 		}
1010 		ip = mtod(m, struct ip *);
1011 	}
1012 	ch = (struct carp_header *)((uint8_t *)ip + iphlen);
1013 
1014 	/* Verify the CARP checksum */
1015 	if (in_cksum_skip(m, len, iphlen)) {
1016 		carpstats.carps_badsum++;
1017 		CARP_LOG("carp_proto_input: checksum failed on %s\n",
1018 		    ifp->if_xname);
1019 		m_freem(m);
1020 		goto back;
1021 	}
1022 	carp_proto_input_c(sc, m, ch, AF_INET);
1023 back:
1024 	return(IPPROTO_DONE);
1025 }
1026 
1027 #ifdef INET6
1028 int
1029 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
1030 {
1031 	struct mbuf *m = *mp;
1032 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1033 	struct ifnet *ifp = m->m_pkthdr.rcvif;
1034 	struct carp_header *ch;
1035 	struct carp_softc *sc;
1036 	u_int len;
1037 
1038 	carpstats.carps_ipackets6++;
1039 
1040 	if (!carp_opts[CARPCTL_ALLOW]) {
1041 		m_freem(m);
1042 		goto back;
1043 	}
1044 
1045 	/* check if received on a valid carp interface */
1046 	if (ifp->if_type != IFT_CARP) {
1047 		carpstats.carps_badif++;
1048 		CARP_LOG("carp6_proto_input: packet received on non-carp "
1049 		    "interface: %s\n", ifp->if_xname);
1050 		m_freem(m);
1051 		goto back;
1052 	}
1053 
1054 	if (!CARP_IS_RUNNING(ifp)) {
1055 		carpstats.carps_badif++;
1056 		CARP_LOG("carp_proto_input: packet received on stopped carp "
1057 		    "interface: %s\n", ifp->if_xname);
1058 		m_freem(m);
1059 		goto back;
1060 	}
1061 
1062 	sc = ifp->if_softc;
1063 	if (sc->sc_carpdev == NULL) {
1064 		carpstats.carps_badif++;
1065 		CARP_LOG("carp6_proto_input: packet received on defunc-carp "
1066 		    "interface: %s\n", ifp->if_xname);
1067 		m_freem(m);
1068 		goto back;
1069 	}
1070 
1071 	/* verify that the IP TTL is 255 */
1072 	if (ip6->ip6_hlim != CARP_DFLTTL) {
1073 		carpstats.carps_badttl++;
1074 		CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
1075 		    ip6->ip6_hlim, ifp->if_xname);
1076 		m_freem(m);
1077 		goto back;
1078 	}
1079 
1080 	/* verify that we have a complete carp packet */
1081 	len = m->m_len;
1082 	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
1083 	if (ch == NULL) {
1084 		carpstats.carps_badlen++;
1085 		CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
1086 		goto back;
1087 	}
1088 
1089 	/* verify the CARP checksum */
1090 	if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
1091 		carpstats.carps_badsum++;
1092 		CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
1093 		    ifp->if_xname);
1094 		m_freem(m);
1095 		goto back;
1096 	}
1097 
1098 	carp_proto_input_c(sc, m, ch, AF_INET6);
1099 back:
1100 	return (IPPROTO_DONE);
1101 }
1102 #endif /* INET6 */
1103 
1104 static void
1105 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
1106     struct carp_header *ch, sa_family_t af)
1107 {
1108 	struct ifnet *cifp;
1109 	uint64_t tmp_counter;
1110 	struct timeval sc_tv, ch_tv;
1111 
1112 	if (sc->sc_vhid != ch->carp_vhid) {
1113 		/*
1114 		 * CARP uses multicast, however, multicast packets
1115 		 * are tapped to all CARP interfaces on the physical
1116 		 * interface receiving the CARP packets, so we don't
1117 		 * update any stats here.
1118 		 */
1119 		m_freem(m);
1120 		return;
1121 	}
1122 	cifp = &sc->sc_if;
1123 
1124 	/* verify the CARP version. */
1125 	if (ch->carp_version != CARP_VERSION) {
1126 		carpstats.carps_badver++;
1127 		CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
1128 			 ch->carp_version);
1129 		m_freem(m);
1130 		return;
1131 	}
1132 
1133 	/* verify the hash */
1134 	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
1135 		carpstats.carps_badauth++;
1136 		CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
1137 		m_freem(m);
1138 		return;
1139 	}
1140 
1141 	tmp_counter = ntohl(ch->carp_counter[0]);
1142 	tmp_counter = tmp_counter<<32;
1143 	tmp_counter += ntohl(ch->carp_counter[1]);
1144 
1145 	/* XXX Replay protection goes here */
1146 
1147 	sc->sc_init_counter = 0;
1148 	sc->sc_counter = tmp_counter;
1149 
1150 	sc_tv.tv_sec = sc->sc_advbase;
1151 	if (carp_suppress_preempt && sc->sc_advskew <  240)
1152 		sc_tv.tv_usec = 240 * 1000000 / 256;
1153 	else
1154 		sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1155 	ch_tv.tv_sec = ch->carp_advbase;
1156 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
1157 
1158 	switch (sc->sc_state) {
1159 	case INIT:
1160 		break;
1161 
1162 	case MASTER:
1163 		/*
1164 		 * If we receive an advertisement from a master who's going to
1165 		 * be more frequent than us, go into BACKUP state.
1166 		 */
1167 		if (timevalcmp(&sc_tv, &ch_tv, >) ||
1168 		    timevalcmp(&sc_tv, &ch_tv, ==)) {
1169 			callout_stop(&sc->sc_ad_tmo);
1170 			CARP_DEBUG("%s: MASTER -> BACKUP "
1171 			   "(more frequent advertisement received)\n",
1172 			   cifp->if_xname);
1173 			carp_set_state(sc, BACKUP);
1174 			carp_setrun(sc, 0);
1175 			carp_setroute(sc, RTM_DELETE);
1176 		}
1177 		break;
1178 
1179 	case BACKUP:
1180 		/*
1181 		 * If we're pre-empting masters who advertise slower than us,
1182 		 * and this one claims to be slower, treat him as down.
1183 		 */
1184 		if (carp_opts[CARPCTL_PREEMPT] &&
1185 		    timevalcmp(&sc_tv, &ch_tv, <)) {
1186 			CARP_DEBUG("%s: BACKUP -> MASTER "
1187 			    "(preempting a slower master)\n", cifp->if_xname);
1188 			carp_master_down(sc);
1189 			break;
1190 		}
1191 
1192 		/*
1193 		 *  If the master is going to advertise at such a low frequency
1194 		 *  that he's guaranteed to time out, we'd might as well just
1195 		 *  treat him as timed out now.
1196 		 */
1197 		sc_tv.tv_sec = sc->sc_advbase * 3;
1198 		if (timevalcmp(&sc_tv, &ch_tv, <)) {
1199 			CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1200 				   cifp->if_xname);
1201 			carp_master_down(sc);
1202 			break;
1203 		}
1204 
1205 		/*
1206 		 * Otherwise, we reset the counter and wait for the next
1207 		 * advertisement.
1208 		 */
1209 		carp_setrun(sc, af);
1210 		break;
1211 	}
1212 	m_freem(m);
1213 }
1214 
1215 struct mbuf *
1216 carp_input(void *v, struct mbuf *m)
1217 {
1218 	struct carp_if *cif = v;
1219 	struct ether_header *eh;
1220 	struct carp_softc_container *scc;
1221 	struct ifnet *ifp;
1222 
1223 	eh = mtod(m, struct ether_header *);
1224 
1225 	ifp = carp_forus(cif, eh->ether_dhost);
1226 	if (ifp != NULL) {
1227 		ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
1228 		return NULL;
1229 	}
1230 
1231 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
1232 		return m;
1233 
1234 	/*
1235 	 * XXX Should really check the list of multicast addresses
1236 	 * for each CARP interface _before_ copying.
1237 	 */
1238 	TAILQ_FOREACH(scc, cif, scc_link) {
1239 		struct carp_softc *sc = scc->scc_softc;
1240 		struct mbuf *m0;
1241 
1242 		if ((sc->sc_if.if_flags & IFF_UP) == 0)
1243 			continue;
1244 
1245 		m0 = m_dup(m, MB_DONTWAIT);
1246 		if (m0 == NULL)
1247 			continue;
1248 
1249 		ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
1250 	}
1251 	return m;
1252 }
1253 
1254 static void
1255 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
1256 {
1257 	if (sc->sc_init_counter) {
1258 		/* this could also be seconds since unix epoch */
1259 		sc->sc_counter = karc4random();
1260 		sc->sc_counter = sc->sc_counter << 32;
1261 		sc->sc_counter += karc4random();
1262 	} else {
1263 		sc->sc_counter++;
1264 	}
1265 
1266 	ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
1267 	ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
1268 
1269 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
1270 }
1271 
1272 static void
1273 carp_send_ad_all(void)
1274 {
1275 	struct carp_softc *sc;
1276 
1277 	LIST_FOREACH(sc, &carpif_list, sc_next) {
1278 		if (sc->sc_carpdev == NULL)
1279 			continue;
1280 
1281 		if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
1282 			carp_send_ad(sc);
1283 	}
1284 }
1285 
1286 static void
1287 carp_send_ad_timeout(void *xsc)
1288 {
1289 	struct carp_softc *sc = xsc;
1290 	struct netmsg_carp *cmsg = &sc->sc_ad_msg;
1291 
1292 	KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1293 	    __func__, mycpuid));
1294 
1295 	crit_enter();
1296 	if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1297 		lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1298 	crit_exit();
1299 }
1300 
1301 static void
1302 carp_send_ad_timeout_dispatch(netmsg_t msg)
1303 {
1304 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1305 	struct carp_softc *sc = cmsg->nc_softc;
1306 
1307 	/* Reply ASAP */
1308 	crit_enter();
1309 	lwkt_replymsg(&cmsg->base.lmsg, 0);
1310 	crit_exit();
1311 
1312 	carp_send_ad(sc);
1313 }
1314 
1315 static void
1316 carp_send_ad(struct carp_softc *sc)
1317 {
1318 	struct ifnet *cifp = &sc->sc_if;
1319 	struct carp_header ch;
1320 	struct timeval tv;
1321 	struct carp_header *ch_ptr;
1322 	struct mbuf *m;
1323 	int len, advbase, advskew;
1324 
1325 	if (!CARP_IS_RUNNING(cifp)) {
1326 		/* Bow out */
1327 		advbase = 255;
1328 		advskew = 255;
1329 	} else {
1330 		advbase = sc->sc_advbase;
1331 		if (!carp_suppress_preempt || sc->sc_advskew > 240)
1332 			advskew = sc->sc_advskew;
1333 		else
1334 			advskew = 240;
1335 		tv.tv_sec = advbase;
1336 		tv.tv_usec = advskew * 1000000 / 256;
1337 	}
1338 
1339 	ch.carp_version = CARP_VERSION;
1340 	ch.carp_type = CARP_ADVERTISEMENT;
1341 	ch.carp_vhid = sc->sc_vhid;
1342 	ch.carp_advbase = advbase;
1343 	ch.carp_advskew = advskew;
1344 	ch.carp_authlen = 7;	/* XXX DEFINE */
1345 	ch.carp_pad1 = 0;	/* must be zero */
1346 	ch.carp_cksum = 0;
1347 
1348 #ifdef INET
1349 	if (sc->sc_ia != NULL) {
1350 		struct ip *ip;
1351 
1352 		MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1353 		if (m == NULL) {
1354 			IFNET_STAT_INC(cifp, oerrors, 1);
1355 			carpstats.carps_onomem++;
1356 			/* XXX maybe less ? */
1357 			if (advbase != 255 || advskew != 255)
1358 				callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1359 				    carp_send_ad_timeout, sc);
1360 			return;
1361 		}
1362 		len = sizeof(*ip) + sizeof(ch);
1363 		m->m_pkthdr.len = len;
1364 		m->m_pkthdr.rcvif = NULL;
1365 		m->m_len = len;
1366 		MH_ALIGN(m, m->m_len);
1367 		m->m_flags |= M_MCAST;
1368 		if (carp_prio_ad)
1369 			m->m_flags |= M_PRIO;
1370 		ip = mtod(m, struct ip *);
1371 		ip->ip_v = IPVERSION;
1372 		ip->ip_hl = sizeof(*ip) >> 2;
1373 		ip->ip_tos = IPTOS_LOWDELAY;
1374 		ip->ip_len = len;
1375 		ip->ip_id = ip_newid();
1376 		ip->ip_off = IP_DF;
1377 		ip->ip_ttl = CARP_DFLTTL;
1378 		ip->ip_p = IPPROTO_CARP;
1379 		ip->ip_sum = 0;
1380 		ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1381 		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1382 
1383 		ch_ptr = (struct carp_header *)(&ip[1]);
1384 		bcopy(&ch, ch_ptr, sizeof(ch));
1385 		carp_prepare_ad(sc, ch_ptr);
1386 		ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1387 
1388 		getmicrotime(&cifp->if_lastchange);
1389 		IFNET_STAT_INC(cifp, opackets, 1);
1390 		IFNET_STAT_INC(cifp, obytes, len);
1391 		carpstats.carps_opackets++;
1392 
1393 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1394 			IFNET_STAT_INC(cifp, oerrors, 1);
1395 			if (sc->sc_sendad_errors < INT_MAX)
1396 				sc->sc_sendad_errors++;
1397 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1398 				carp_suppress_preempt++;
1399 				if (carp_suppress_preempt == 1) {
1400 					carp_send_ad_all();
1401 				}
1402 			}
1403 			sc->sc_sendad_success = 0;
1404 		} else {
1405 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1406 				if (++sc->sc_sendad_success >=
1407 				    CARP_SENDAD_MIN_SUCCESS) {
1408 					carp_suppress_preempt--;
1409 					sc->sc_sendad_errors = 0;
1410 				}
1411 			} else {
1412 				sc->sc_sendad_errors = 0;
1413 			}
1414 		}
1415 	}
1416 #endif /* INET */
1417 #ifdef INET6
1418 	if (sc->sc_ia6) {
1419 		struct ip6_hdr *ip6;
1420 
1421 		MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1422 		if (m == NULL) {
1423 			IFNET_STAT_INC(cifp, oerrors, 1);
1424 			carpstats.carps_onomem++;
1425 			/* XXX maybe less ? */
1426 			if (advbase != 255 || advskew != 255)
1427 				callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1428 				    carp_send_ad_timeout, sc);
1429 			return;
1430 		}
1431 		len = sizeof(*ip6) + sizeof(ch);
1432 		m->m_pkthdr.len = len;
1433 		m->m_pkthdr.rcvif = NULL;
1434 		m->m_len = len;
1435 		MH_ALIGN(m, m->m_len);
1436 		m->m_flags |= M_MCAST;
1437 		ip6 = mtod(m, struct ip6_hdr *);
1438 		bzero(ip6, sizeof(*ip6));
1439 		ip6->ip6_vfc |= IPV6_VERSION;
1440 		ip6->ip6_hlim = CARP_DFLTTL;
1441 		ip6->ip6_nxt = IPPROTO_CARP;
1442 		bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1443 		    sizeof(struct in6_addr));
1444 		/* set the multicast destination */
1445 
1446 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1447 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1448 		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1449 			IFNET_STAT_INC(cifp, oerrors, 1);
1450 			m_freem(m);
1451 			CARP_LOG("%s: in6_setscope failed\n", __func__);
1452 			return;
1453 		}
1454 
1455 		ch_ptr = (struct carp_header *)(&ip6[1]);
1456 		bcopy(&ch, ch_ptr, sizeof(ch));
1457 		carp_prepare_ad(sc, ch_ptr);
1458 		ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1459 
1460 		getmicrotime(&cifp->if_lastchange);
1461 		IFNET_STAT_INC(cifp, opackets, 1);
1462 		IFNET_STAT_INC(cifp, obytes, len);
1463 		carpstats.carps_opackets6++;
1464 
1465 		if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1466 			IFNET_STAT_INC(cifp, oerrors, 1);
1467 			if (sc->sc_sendad_errors < INT_MAX)
1468 				sc->sc_sendad_errors++;
1469 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1470 				carp_suppress_preempt++;
1471 				if (carp_suppress_preempt == 1) {
1472 					carp_send_ad_all();
1473 				}
1474 			}
1475 			sc->sc_sendad_success = 0;
1476 		} else {
1477 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1478 				if (++sc->sc_sendad_success >=
1479 				    CARP_SENDAD_MIN_SUCCESS) {
1480 					carp_suppress_preempt--;
1481 					sc->sc_sendad_errors = 0;
1482 				}
1483 			} else {
1484 				sc->sc_sendad_errors = 0;
1485 			}
1486 		}
1487 	}
1488 #endif /* INET6 */
1489 
1490 	if (advbase != 255 || advskew != 255)
1491 		callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1492 		    carp_send_ad_timeout, sc);
1493 }
1494 
1495 /*
1496  * Broadcast a gratuitous ARP request containing
1497  * the virtual router MAC address for each IP address
1498  * associated with the virtual router.
1499  */
1500 static void
1501 carp_send_arp(struct carp_softc *sc)
1502 {
1503 	const struct carp_vhaddr *vha;
1504 
1505 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1506 		if (vha->vha_iaback == NULL)
1507 			continue;
1508 		arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1509 	}
1510 }
1511 
1512 #ifdef INET6
1513 static void
1514 carp_send_na(struct carp_softc *sc)
1515 {
1516 	struct ifaddr_container *ifac;
1517 	struct in6_addr *in6;
1518 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1519 
1520 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1521 		struct ifaddr *ifa = ifac->ifa;
1522 
1523 		if (ifa->ifa_addr->sa_family != AF_INET6)
1524 			continue;
1525 
1526 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1527 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1528 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1529 		DELAY(1000);	/* XXX */
1530 	}
1531 }
1532 #endif /* INET6 */
1533 
1534 static __inline const struct carp_vhaddr *
1535 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1536 {
1537 	struct carp_vhaddr *vha;
1538 
1539 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1540 		if (vha->vha_iaback == NULL)
1541 			continue;
1542 
1543 		if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1544 			return vha;
1545 	}
1546 	return NULL;
1547 }
1548 
1549 #ifdef notyet
1550 static int
1551 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1552 		     const struct in_addr *isaddr, uint8_t **enaddr)
1553 {
1554 	const struct carp_softc *vh;
1555 	int index, count = 0;
1556 
1557 	/*
1558 	 * XXX proof of concept implementation.
1559 	 * We use the source ip to decide which virtual host should
1560 	 * handle the request. If we're master of that virtual host,
1561 	 * then we respond, otherwise, just drop the arp packet on
1562 	 * the floor.
1563 	 */
1564 
1565 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1566 		if (!CARP_IS_RUNNING(&vh->sc_if))
1567 			continue;
1568 
1569 		if (carp_find_addr(vh, itaddr) != NULL)
1570 			count++;
1571 	}
1572 	if (count == 0)
1573 		return 0;
1574 
1575 	/* this should be a hash, like pf_hash() */
1576 	index = ntohl(isaddr->s_addr) % count;
1577 	count = 0;
1578 
1579 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1580 		if (!CARP_IS_RUNNING(&vh->sc_if))
1581 			continue;
1582 
1583 		if (carp_find_addr(vh, itaddr) == NULL)
1584 			continue;
1585 
1586 		if (count == index) {
1587 			if (vh->sc_state == MASTER) {
1588 				*enaddr = IF_LLADDR(&vh->sc_if);
1589 				return 1;
1590 			} else {
1591 				return 0;
1592 			}
1593 		}
1594 		count++;
1595 	}
1596 	return 0;
1597 }
1598 #endif
1599 
1600 int
1601 carp_iamatch(const struct in_ifaddr *ia)
1602 {
1603 	const struct carp_softc *sc = ia->ia_ifp->if_softc;
1604 
1605 	KASSERT(&curthread->td_msgport == netisr_cpuport(0),
1606 	    ("not in netisr0"));
1607 
1608 #ifdef notyet
1609 	if (carp_opts[CARPCTL_ARPBALANCE])
1610 		return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1611 #endif
1612 
1613 	if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1614 		return 0;
1615 
1616 	return 1;
1617 }
1618 
1619 #ifdef INET6
1620 struct ifaddr *
1621 carp_iamatch6(void *v, struct in6_addr *taddr)
1622 {
1623 #ifdef foo
1624 	struct carp_if *cif = v;
1625 	struct carp_softc *vh;
1626 
1627 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1628 		struct ifaddr_container *ifac;
1629 
1630 		TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1631 			      ifa_link) {
1632 			struct ifaddr *ifa = ifac->ifa;
1633 
1634 			if (IN6_ARE_ADDR_EQUAL(taddr,
1635 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1636 			    CARP_IS_RUNNING(&vh->sc_if) &&
1637 			    vh->sc_state == MASTER) {
1638 				return (ifa);
1639 			}
1640 		}
1641 	}
1642 #endif
1643 	return (NULL);
1644 }
1645 
1646 void *
1647 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1648 {
1649 #ifdef foo
1650 	struct m_tag *mtag;
1651 	struct carp_if *cif = v;
1652 	struct carp_softc *sc;
1653 
1654 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1655 		struct ifaddr_container *ifac;
1656 
1657 		TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1658 			      ifa_link) {
1659 			struct ifaddr *ifa = ifac->ifa;
1660 
1661 			if (IN6_ARE_ADDR_EQUAL(taddr,
1662 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1663 			    CARP_IS_RUNNING(&sc->sc_if)) {
1664 				struct ifnet *ifp = &sc->sc_if;
1665 
1666 				mtag = m_tag_get(PACKET_TAG_CARP,
1667 				    sizeof(struct ifnet *), MB_DONTWAIT);
1668 				if (mtag == NULL) {
1669 					/* better a bit than nothing */
1670 					return (IF_LLADDR(ifp));
1671 				}
1672 				bcopy(&ifp, (caddr_t)(mtag + 1),
1673 				    sizeof(struct ifnet *));
1674 				m_tag_prepend(m, mtag);
1675 
1676 				return (IF_LLADDR(ifp));
1677 			}
1678 		}
1679 	}
1680 #endif
1681 	return (NULL);
1682 }
1683 #endif
1684 
1685 static struct ifnet *
1686 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1687 {
1688 	struct carp_softc_container *scc;
1689 
1690 	if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1691 		return NULL;
1692 
1693 	TAILQ_FOREACH(scc, cif, scc_link) {
1694 		struct carp_softc *sc = scc->scc_softc;
1695 		struct ifnet *ifp = &sc->sc_if;
1696 
1697 		if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1698 		    !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1699 			return ifp;
1700 	}
1701 	return NULL;
1702 }
1703 
1704 static void
1705 carp_master_down_timeout(void *xsc)
1706 {
1707 	struct carp_softc *sc = xsc;
1708 	struct netmsg_carp *cmsg = &sc->sc_md_msg;
1709 
1710 	KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1711 	    __func__, mycpuid));
1712 
1713 	crit_enter();
1714 	if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1715 		lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1716 	crit_exit();
1717 }
1718 
1719 static void
1720 carp_master_down_timeout_dispatch(netmsg_t msg)
1721 {
1722 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1723 	struct carp_softc *sc = cmsg->nc_softc;
1724 
1725 	/* Reply ASAP */
1726 	crit_enter();
1727 	lwkt_replymsg(&cmsg->base.lmsg, 0);
1728 	crit_exit();
1729 
1730 	CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1731 		   sc->sc_if.if_xname);
1732 	carp_master_down(sc);
1733 }
1734 
1735 static void
1736 carp_master_down(struct carp_softc *sc)
1737 {
1738 	switch (sc->sc_state) {
1739 	case INIT:
1740 		kprintf("%s: master_down event in INIT state\n",
1741 			sc->sc_if.if_xname);
1742 		break;
1743 
1744 	case MASTER:
1745 		break;
1746 
1747 	case BACKUP:
1748 		carp_set_state(sc, MASTER);
1749 		carp_send_ad(sc);
1750 		carp_send_arp(sc);
1751 #ifdef INET6
1752 		carp_send_na(sc);
1753 #endif /* INET6 */
1754 		carp_setrun(sc, 0);
1755 		carp_setroute(sc, RTM_ADD);
1756 		break;
1757 	}
1758 }
1759 
1760 /*
1761  * When in backup state, af indicates whether to reset the master down timer
1762  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1763  */
1764 static void
1765 carp_setrun(struct carp_softc *sc, sa_family_t af)
1766 {
1767 	struct ifnet *cifp = &sc->sc_if;
1768 	struct timeval tv;
1769 
1770 	if (sc->sc_carpdev == NULL) {
1771 		carp_set_state(sc, INIT);
1772 		return;
1773 	}
1774 
1775 	if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1776 	    (sc->sc_naddrs || sc->sc_naddrs6)) {
1777 		/* Nothing */
1778 	} else {
1779 		carp_setroute(sc, RTM_DELETE);
1780 		return;
1781 	}
1782 
1783 	switch (sc->sc_state) {
1784 	case INIT:
1785 		if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1786 			carp_send_ad(sc);
1787 			carp_send_arp(sc);
1788 #ifdef INET6
1789 			carp_send_na(sc);
1790 #endif /* INET6 */
1791 			CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1792 				   cifp->if_xname);
1793 			carp_set_state(sc, MASTER);
1794 			carp_setroute(sc, RTM_ADD);
1795 		} else {
1796 			CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1797 			carp_set_state(sc, BACKUP);
1798 			carp_setroute(sc, RTM_DELETE);
1799 			carp_setrun(sc, 0);
1800 		}
1801 		break;
1802 
1803 	case BACKUP:
1804 		callout_stop(&sc->sc_ad_tmo);
1805 		tv.tv_sec = 3 * sc->sc_advbase;
1806 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1807 		switch (af) {
1808 #ifdef INET
1809 		case AF_INET:
1810 			callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1811 			    carp_master_down_timeout, sc);
1812 			break;
1813 #endif /* INET */
1814 #ifdef INET6
1815 		case AF_INET6:
1816 			callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1817 			    carp_master_down_timeout, sc);
1818 			break;
1819 #endif /* INET6 */
1820 		default:
1821 			if (sc->sc_naddrs)
1822 				callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1823 				    carp_master_down_timeout, sc);
1824 			if (sc->sc_naddrs6)
1825 				callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1826 				    carp_master_down_timeout, sc);
1827 			break;
1828 		}
1829 		break;
1830 
1831 	case MASTER:
1832 		tv.tv_sec = sc->sc_advbase;
1833 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1834 		callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1835 		    carp_send_ad_timeout, sc);
1836 		break;
1837 	}
1838 }
1839 
1840 static void
1841 carp_multicast_cleanup(struct carp_softc *sc)
1842 {
1843 	struct ip_moptions *imo = &sc->sc_imo;
1844 
1845 	if (imo->imo_num_memberships == 0)
1846 		return;
1847 	KKASSERT(imo->imo_num_memberships == 1);
1848 
1849 	in_delmulti(imo->imo_membership[0]);
1850 	imo->imo_membership[0] = NULL;
1851 	imo->imo_num_memberships = 0;
1852 	imo->imo_multicast_ifp = NULL;
1853 }
1854 
1855 #ifdef INET6
1856 static void
1857 carp_multicast6_cleanup(struct carp_softc *sc)
1858 {
1859 	struct ip6_moptions *im6o = &sc->sc_im6o;
1860 
1861 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1862 		struct in6_multi_mship *imm =
1863 		    LIST_FIRST(&im6o->im6o_memberships);
1864 
1865 		LIST_REMOVE(imm, i6mm_chain);
1866 		in6_leavegroup(imm);
1867 	}
1868 	im6o->im6o_multicast_ifp = NULL;
1869 }
1870 #endif
1871 
1872 static void
1873 carp_ioctl_getvhaddr_dispatch(netmsg_t msg)
1874 {
1875 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1876 	struct carp_softc *sc = cmsg->nc_softc;
1877 	const struct carp_vhaddr *vha;
1878 	struct ifcarpvhaddr *carpa, *carpa0;
1879 	int count, len, error = 0;
1880 
1881 	count = 0;
1882 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1883 		++count;
1884 
1885 	if (cmsg->nc_datalen == 0) {
1886 		cmsg->nc_datalen = count * sizeof(*carpa);
1887 		goto back;
1888 	} else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) {
1889 		cmsg->nc_datalen = 0;
1890 		goto back;
1891 	}
1892 	len = min(cmsg->nc_datalen, sizeof(*carpa) * count);
1893 	KKASSERT(len >= sizeof(*carpa));
1894 
1895 	carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1896 	if (carpa == NULL) {
1897 		error = ENOMEM;
1898 		goto back;
1899 	}
1900 
1901 	count = 0;
1902 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1903 		if (len < sizeof(*carpa))
1904 			break;
1905 
1906 		carpa->carpa_flags = vha->vha_flags;
1907 		carpa->carpa_addr.sin_family = AF_INET;
1908 		carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1909 
1910 		carpa->carpa_baddr.sin_family = AF_INET;
1911 		if (vha->vha_iaback == NULL) {
1912 			carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1913 		} else {
1914 			carpa->carpa_baddr.sin_addr =
1915 			vha->vha_iaback->ia_addr.sin_addr;
1916 		}
1917 
1918 		++carpa;
1919 		++count;
1920 		len -= sizeof(*carpa);
1921 	}
1922 	cmsg->nc_datalen = sizeof(*carpa) * count;
1923 	KKASSERT(cmsg->nc_datalen > 0);
1924 
1925 	cmsg->nc_data = carpa0;
1926 
1927 back:
1928 	lwkt_replymsg(&cmsg->base.lmsg, error);
1929 }
1930 
1931 static int
1932 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1933 {
1934 	struct ifnet *ifp = &sc->arpcom.ac_if;
1935 	struct netmsg_carp cmsg;
1936 	int error;
1937 
1938 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1939 	ifnet_deserialize_all(ifp);
1940 
1941 	bzero(&cmsg, sizeof(cmsg));
1942 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
1943 	    carp_ioctl_getvhaddr_dispatch);
1944 	cmsg.nc_softc = sc;
1945 	cmsg.nc_datalen = ifd->ifd_len;
1946 
1947 	error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
1948 
1949 	if (!error) {
1950 		if (cmsg.nc_data != NULL) {
1951 			error = copyout(cmsg.nc_data, ifd->ifd_data,
1952 			    cmsg.nc_datalen);
1953 			kfree(cmsg.nc_data, M_TEMP);
1954 		}
1955 		ifd->ifd_len = cmsg.nc_datalen;
1956 	} else {
1957 		KASSERT(cmsg.nc_data == NULL,
1958 		    ("%s temp vhaddr is alloc upon error", __func__));
1959 	}
1960 
1961 	ifnet_serialize_all(ifp);
1962 	return error;
1963 }
1964 
1965 static int
1966 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1967     struct in_ifaddr *ia_del)
1968 {
1969 	struct ifnet *ifp;
1970 	struct in_ifaddr *ia_if;
1971 	const struct in_ifaddr *ia_vha;
1972 	struct in_ifaddr_container *iac;
1973 	int own, ia_match_carpdev;
1974 
1975 	KKASSERT(vha->vha_ia != NULL);
1976 	ia_vha = vha->vha_ia;
1977 
1978 	ia_if = NULL;
1979 	own = 0;
1980 	ia_match_carpdev = 0;
1981 	TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1982 		struct in_ifaddr *ia = iac->ia;
1983 
1984 		if (ia == ia_del)
1985 			continue;
1986 
1987 		if (ia->ia_ifp->if_type == IFT_CARP)
1988 			continue;
1989 
1990 		if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1991 			continue;
1992 
1993 		/* and, yeah, we need a multicast-capable iface too */
1994 		if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
1995 			continue;
1996 
1997 		if (ia_vha->ia_subnetmask == ia->ia_subnetmask &&
1998 		    ia_vha->ia_subnet == ia->ia_subnet) {
1999 			if (ia_vha->ia_addr.sin_addr.s_addr ==
2000 			    ia->ia_addr.sin_addr.s_addr)
2001 				own = 1;
2002 			if (ia_if == NULL) {
2003 				ia_if = ia;
2004 			} else if (sc->sc_carpdev != NULL &&
2005 			    sc->sc_carpdev == ia->ia_ifp) {
2006 				ia_if = ia;
2007 				if (ia_if->ia_flags & IFA_ROUTE) {
2008 					/*
2009 					 * Address with prefix route
2010 					 * is prefered
2011 					 */
2012 					break;
2013 				}
2014 				ia_match_carpdev = 1;
2015 			} else if (!ia_match_carpdev) {
2016 				if (ia->ia_flags & IFA_ROUTE) {
2017 					/*
2018 					 * Address with prefix route
2019 					 * is prefered over others.
2020 					 */
2021 					ia_if = ia;
2022 				}
2023 			}
2024 		}
2025 	}
2026 
2027 	carp_deactivate_vhaddr(sc, vha, FALSE);
2028 	if (!ia_if)
2029 		return ENOENT;
2030 
2031 	ifp = ia_if->ia_ifp;
2032 
2033 	/* XXX Don't allow parent iface to be changed */
2034 	if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
2035 		return EEXIST;
2036 
2037 	return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2038 }
2039 
2040 static void
2041 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2042 {
2043 	struct carp_vhaddr *vha_new;
2044 	struct in_ifaddr *carp_ia;
2045 #ifdef INVARIANTS
2046 	struct carp_vhaddr *vha;
2047 #endif
2048 
2049 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2050 	carp_ia = ifatoia(carp_ifa);
2051 
2052 #ifdef INVARIANTS
2053 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2054 		KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
2055 #endif
2056 
2057 	vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
2058 	vha_new->vha_ia = carp_ia;
2059 	carp_insert_vhaddr(sc, vha_new);
2060 
2061 	if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
2062 		/*
2063 		 * If the above configuration fails, it may only mean
2064 		 * that the new address is problematic.  However, the
2065 		 * carp(4) interface may already have several working
2066 		 * addresses.  Since the expected behaviour of
2067 		 * SIOC[AS]IFADDR is to put the NIC into working state,
2068 		 * we try starting the state machine manually here with
2069 		 * the hope that the carp(4)'s previously working
2070 		 * addresses still could be brought up.
2071 		 */
2072 		carp_hmac_prepare(sc);
2073 		carp_set_state(sc, INIT);
2074 		carp_setrun(sc, 0);
2075 	}
2076 }
2077 
2078 static void
2079 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2080 {
2081 	struct carp_vhaddr *vha;
2082 	struct in_ifaddr *carp_ia;
2083 
2084 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2085 	carp_ia = ifatoia(carp_ifa);
2086 
2087 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2088 		KKASSERT(vha->vha_ia != NULL);
2089 		if (vha->vha_ia == carp_ia)
2090 			break;
2091 	}
2092 	KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2093 
2094 	/*
2095 	 * Remove the vhaddr from the list before deactivating
2096 	 * the vhaddr, so that the HMAC could be correctly
2097 	 * updated in carp_deactivate_vhaddr()
2098 	 */
2099 	carp_remove_vhaddr(sc, vha);
2100 
2101 	carp_deactivate_vhaddr(sc, vha, FALSE);
2102 	kfree(vha, M_CARP);
2103 }
2104 
2105 static void
2106 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2107 {
2108 	struct carp_vhaddr *vha;
2109 	struct in_ifaddr *carp_ia;
2110 
2111 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2112 	carp_ia = ifatoia(carp_ifa);
2113 
2114 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2115 		KKASSERT(vha->vha_ia != NULL);
2116 		if (vha->vha_ia == carp_ia)
2117 			break;
2118 	}
2119 	KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2120 
2121 	/* Remove then reinsert, to keep the vhaddr list sorted */
2122 	carp_remove_vhaddr(sc, vha);
2123 	carp_insert_vhaddr(sc, vha);
2124 
2125 	if (carp_config_vhaddr(sc, vha, NULL) != 0) {
2126 		/* See the comment in carp_add_addr() */
2127 		carp_hmac_prepare(sc);
2128 		carp_set_state(sc, INIT);
2129 		carp_setrun(sc, 0);
2130 	}
2131 }
2132 
2133 #ifdef notyet
2134 
2135 #ifdef INET6
2136 static int
2137 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2138 {
2139 	struct ifnet *ifp;
2140 	struct carp_if *cif;
2141 	struct in6_ifaddr *ia, *ia_if;
2142 	struct ip6_moptions *im6o = &sc->sc_im6o;
2143 	struct in6_multi_mship *imm;
2144 	struct in6_addr in6;
2145 	int own, error;
2146 
2147 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2148 		carp_setrun(sc, 0);
2149 		return (0);
2150 	}
2151 
2152 	/* we have to do it by hands to check we won't match on us */
2153 	ia_if = NULL; own = 0;
2154 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2155 		int i;
2156 
2157 		for (i = 0; i < 4; i++) {
2158 			if ((sin6->sin6_addr.s6_addr32[i] &
2159 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2160 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
2161 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2162 				break;
2163 		}
2164 		/* and, yeah, we need a multicast-capable iface too */
2165 		if (ia->ia_ifp != &sc->sc_if &&
2166 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2167 		    (i == 4)) {
2168 			if (!ia_if)
2169 				ia_if = ia;
2170 			if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
2171 			    &ia->ia_addr.sin6_addr))
2172 				own++;
2173 		}
2174 	}
2175 
2176 	if (!ia_if)
2177 		return (EADDRNOTAVAIL);
2178 	ia = ia_if;
2179 	ifp = ia->ia_ifp;
2180 
2181 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
2182 	    (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
2183 		return (EADDRNOTAVAIL);
2184 
2185 	if (!sc->sc_naddrs6) {
2186 		im6o->im6o_multicast_ifp = ifp;
2187 
2188 		/* join CARP multicast address */
2189 		bzero(&in6, sizeof(in6));
2190 		in6.s6_addr16[0] = htons(0xff02);
2191 		in6.s6_addr8[15] = 0x12;
2192 		if (in6_setscope(&in6, ifp, NULL) != 0)
2193 			goto cleanup;
2194 		if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2195 			goto cleanup;
2196 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2197 
2198 		/* join solicited multicast address */
2199 		bzero(&in6, sizeof(in6));
2200 		in6.s6_addr16[0] = htons(0xff02);
2201 		in6.s6_addr32[1] = 0;
2202 		in6.s6_addr32[2] = htonl(1);
2203 		in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
2204 		in6.s6_addr8[12] = 0xff;
2205 		if (in6_setscope(&in6, ifp, NULL) != 0)
2206 			goto cleanup;
2207 		if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2208 			goto cleanup;
2209 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2210 	}
2211 
2212 #ifdef foo
2213 	if (!ifp->if_carp) {
2214 		cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2215 
2216 		if ((error = ifpromisc(ifp, 1))) {
2217 			kfree(cif, M_CARP);
2218 			goto cleanup;
2219 		}
2220 
2221 		TAILQ_INIT(&cif->vhif_vrs);
2222 		ifp->if_carp = cif;
2223 	} else {
2224 		struct carp_softc *vr;
2225 
2226 		cif = ifp->if_carp;
2227 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2228 			if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
2229 				error = EINVAL;
2230 				goto cleanup;
2231 			}
2232 		}
2233 	}
2234 #endif
2235 	sc->sc_ia6 = ia;
2236 	sc->sc_carpdev = ifp;
2237 
2238 #ifdef foo
2239 	{ /* XXX prevent endless loop if already in queue */
2240 	struct carp_softc *vr, *after = NULL;
2241 	int myself = 0;
2242 	cif = ifp->if_carp;
2243 
2244 	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2245 		if (vr == sc)
2246 			myself = 1;
2247 		if (vr->sc_vhid < sc->sc_vhid)
2248 			after = vr;
2249 	}
2250 
2251 	if (!myself) {
2252 		/* We're trying to keep things in order */
2253 		if (after == NULL)
2254 			TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2255 		else
2256 			TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2257 	}
2258 	}
2259 #endif
2260 
2261 	sc->sc_naddrs6++;
2262 	if (own)
2263 		sc->sc_advskew = 0;
2264 	carp_sc_state(sc);
2265 	carp_setrun(sc, 0);
2266 
2267 	return (0);
2268 
2269 cleanup:
2270 	/* clean up multicast memberships */
2271 	if (!sc->sc_naddrs6) {
2272 		while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2273 			imm = LIST_FIRST(&im6o->im6o_memberships);
2274 			LIST_REMOVE(imm, i6mm_chain);
2275 			in6_leavegroup(imm);
2276 		}
2277 	}
2278 	return (error);
2279 }
2280 
2281 static int
2282 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2283 {
2284 	int error = 0;
2285 
2286 	if (!--sc->sc_naddrs6) {
2287 		struct carp_if *cif = sc->sc_carpdev->if_carp;
2288 		struct ip6_moptions *im6o = &sc->sc_im6o;
2289 
2290 		callout_stop(&sc->sc_ad_tmo);
2291 		sc->sc_vhid = -1;
2292 		while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2293 			struct in6_multi_mship *imm =
2294 			    LIST_FIRST(&im6o->im6o_memberships);
2295 
2296 			LIST_REMOVE(imm, i6mm_chain);
2297 			in6_leavegroup(imm);
2298 		}
2299 		im6o->im6o_multicast_ifp = NULL;
2300 #ifdef foo
2301 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
2302 		if (TAILQ_EMPTY(&cif->vhif_vrs)) {
2303 			sc->sc_carpdev->if_carp = NULL;
2304 			kfree(cif, M_IFADDR);
2305 		}
2306 #endif
2307 	}
2308 	return (error);
2309 }
2310 #endif /* INET6 */
2311 
2312 #endif
2313 
2314 static int
2315 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
2316 {
2317 	struct carp_softc *sc = ifp->if_softc;
2318 	struct ifreq *ifr = (struct ifreq *)addr;
2319 	struct ifdrv *ifd = (struct ifdrv *)addr;
2320 	int error = 0;
2321 
2322 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2323 
2324 	switch (cmd) {
2325 	case SIOCSIFFLAGS:
2326 		if (ifp->if_flags & IFF_UP) {
2327 			if ((ifp->if_flags & IFF_RUNNING) == 0)
2328 				carp_init(sc);
2329 		} else if (ifp->if_flags & IFF_RUNNING) {
2330 			carp_ioctl_stop(sc);
2331 		}
2332 		break;
2333 
2334 	case SIOCSIFCAP:
2335 		carp_ioctl_ifcap(sc, ifr->ifr_reqcap);
2336 		break;
2337 
2338 	case SIOCSVH:
2339 		error = carp_ioctl_setvh(sc, ifr->ifr_data, cr);
2340 		break;
2341 
2342 	case SIOCGVH:
2343 		error = carp_ioctl_getvh(sc, ifr->ifr_data, cr);
2344 		break;
2345 
2346 	case SIOCGDRVSPEC:
2347 		switch (ifd->ifd_cmd) {
2348 		case CARPGDEVNAME:
2349 			error = carp_ioctl_getdevname(sc, ifd);
2350 			break;
2351 
2352 		case CARPGVHADDR:
2353 			error = carp_ioctl_getvhaddr(sc, ifd);
2354 			break;
2355 
2356 		default:
2357 			error = EINVAL;
2358 			break;
2359 		}
2360 		break;
2361 
2362 	default:
2363 		error = ether_ioctl(ifp, cmd, addr);
2364 		break;
2365 	}
2366 
2367 	return error;
2368 }
2369 
2370 static void
2371 carp_ioctl_stop_dispatch(netmsg_t msg)
2372 {
2373 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2374 	struct carp_softc *sc = cmsg->nc_softc;
2375 
2376 	carp_stop(sc, FALSE);
2377 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2378 }
2379 
2380 static void
2381 carp_ioctl_stop(struct carp_softc *sc)
2382 {
2383 	struct ifnet *ifp = &sc->arpcom.ac_if;
2384 	struct netmsg_carp cmsg;
2385 
2386 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2387 
2388 	ifnet_deserialize_all(ifp);
2389 
2390 	bzero(&cmsg, sizeof(cmsg));
2391 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2392 	    carp_ioctl_stop_dispatch);
2393 	cmsg.nc_softc = sc;
2394 
2395 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2396 
2397 	ifnet_serialize_all(ifp);
2398 }
2399 
2400 static void
2401 carp_ioctl_setvh_dispatch(netmsg_t msg)
2402 {
2403 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2404 	struct carp_softc *sc = cmsg->nc_softc;
2405 	struct ifnet *ifp = &sc->arpcom.ac_if;
2406 	const struct carpreq *carpr = cmsg->nc_data;
2407 	int error;
2408 
2409 	error = 1;
2410 	if ((ifp->if_flags & IFF_RUNNING) &&
2411 	    sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) {
2412 		switch (carpr->carpr_state) {
2413 		case BACKUP:
2414 			callout_stop(&sc->sc_ad_tmo);
2415 			carp_set_state(sc, BACKUP);
2416 			carp_setrun(sc, 0);
2417 			carp_setroute(sc, RTM_DELETE);
2418 			break;
2419 
2420 		case MASTER:
2421 			carp_master_down(sc);
2422 			break;
2423 
2424 		default:
2425 			break;
2426 		}
2427 	}
2428 	if (carpr->carpr_vhid > 0) {
2429 		if (carpr->carpr_vhid > 255) {
2430 			error = EINVAL;
2431 			goto back;
2432 		}
2433 		if (sc->sc_carpdev) {
2434 			struct carp_if *cif = sc->sc_carpdev->if_carp;
2435 			struct carp_softc_container *scc;
2436 
2437 			TAILQ_FOREACH(scc, cif, scc_link) {
2438 				struct carp_softc *vr = scc->scc_softc;
2439 
2440 				if (vr != sc &&
2441 				    vr->sc_vhid == carpr->carpr_vhid) {
2442 					error = EEXIST;
2443 					goto back;
2444 				}
2445 			}
2446 		}
2447 		sc->sc_vhid = carpr->carpr_vhid;
2448 
2449 		IF_LLADDR(ifp)[5] = sc->sc_vhid;
2450 		bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
2451 		    ETHER_ADDR_LEN);
2452 
2453 		error--;
2454 	}
2455 	if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) {
2456 		if (carpr->carpr_advskew >= 255) {
2457 			error = EINVAL;
2458 			goto back;
2459 		}
2460 		if (carpr->carpr_advbase > 255) {
2461 			error = EINVAL;
2462 			goto back;
2463 		}
2464 		sc->sc_advbase = carpr->carpr_advbase;
2465 		sc->sc_advskew = carpr->carpr_advskew;
2466 		error--;
2467 	}
2468 	bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
2469 	if (error > 0) {
2470 		error = EINVAL;
2471 	} else {
2472 		error = 0;
2473 		carp_setrun(sc, 0);
2474 	}
2475 back:
2476 	carp_hmac_prepare(sc);
2477 
2478 	lwkt_replymsg(&cmsg->base.lmsg, error);
2479 }
2480 
2481 static int
2482 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2483 {
2484 	struct ifnet *ifp = &sc->arpcom.ac_if;
2485 	struct netmsg_carp cmsg;
2486 	struct carpreq carpr;
2487 	int error;
2488 
2489 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2490 	ifnet_deserialize_all(ifp);
2491 
2492 	error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2493 	if (error)
2494 		goto back;
2495 
2496 	error = copyin(udata, &carpr, sizeof(carpr));
2497 	if (error)
2498 		goto back;
2499 
2500 	bzero(&cmsg, sizeof(cmsg));
2501 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2502 	    carp_ioctl_setvh_dispatch);
2503 	cmsg.nc_softc = sc;
2504 	cmsg.nc_data = &carpr;
2505 
2506 	error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2507 
2508 back:
2509 	ifnet_serialize_all(ifp);
2510 	return error;
2511 }
2512 
2513 static void
2514 carp_ioctl_ifcap_dispatch(netmsg_t msg)
2515 {
2516 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2517 	struct carp_softc *sc = cmsg->nc_softc;
2518 	struct ifnet *ifp = &sc->arpcom.ac_if;
2519 	int reqcap = *((const int *)(cmsg->nc_data));
2520 	int mask;
2521 
2522 	mask = reqcap ^ ifp->if_capenable;
2523 	if (mask & IFCAP_TXCSUM) {
2524 		ifp->if_capenable ^= IFCAP_TXCSUM;
2525 		if ((ifp->if_capenable & IFCAP_TXCSUM) &&
2526 		    sc->sc_carpdev != NULL) {
2527 			ifp->if_hwassist |=
2528 			    (sc->sc_carpdev->if_hwassist &
2529 			     (CSUM_IP | CSUM_UDP | CSUM_TCP));
2530 		} else {
2531 			ifp->if_hwassist &= ~(CSUM_IP | CSUM_UDP | CSUM_TCP);
2532 		}
2533 	}
2534 	if (mask & IFCAP_TSO) {
2535 		ifp->if_capenable ^= IFCAP_TSO;
2536 		if ((ifp->if_capenable & IFCAP_TSO) &&
2537 		    sc->sc_carpdev != NULL) {
2538 			ifp->if_hwassist |=
2539 			    (sc->sc_carpdev->if_hwassist & CSUM_TSO);
2540 		} else {
2541 			ifp->if_hwassist &= ~CSUM_TSO;
2542 		}
2543 	}
2544 
2545 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2546 }
2547 
2548 static void
2549 carp_ioctl_ifcap(struct carp_softc *sc, int reqcap)
2550 {
2551 	struct ifnet *ifp = &sc->arpcom.ac_if;
2552 	struct netmsg_carp cmsg;
2553 
2554 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2555 	ifnet_deserialize_all(ifp);
2556 
2557 	bzero(&cmsg, sizeof(cmsg));
2558 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2559 	    carp_ioctl_ifcap_dispatch);
2560 	cmsg.nc_softc = sc;
2561 	cmsg.nc_data = &reqcap;
2562 
2563 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2564 
2565 	ifnet_serialize_all(ifp);
2566 }
2567 
2568 static void
2569 carp_ioctl_getvh_dispatch(netmsg_t msg)
2570 {
2571 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2572 	struct carp_softc *sc = cmsg->nc_softc;
2573 	struct carpreq *carpr = cmsg->nc_data;
2574 
2575 	carpr->carpr_state = sc->sc_state;
2576 	carpr->carpr_vhid = sc->sc_vhid;
2577 	carpr->carpr_advbase = sc->sc_advbase;
2578 	carpr->carpr_advskew = sc->sc_advskew;
2579 	bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
2580 
2581 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2582 }
2583 
2584 static int
2585 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2586 {
2587 	struct ifnet *ifp = &sc->arpcom.ac_if;
2588 	struct netmsg_carp cmsg;
2589 	struct carpreq carpr;
2590 	int error;
2591 
2592 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2593 	ifnet_deserialize_all(ifp);
2594 
2595 	bzero(&cmsg, sizeof(cmsg));
2596 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2597 	    carp_ioctl_getvh_dispatch);
2598 	cmsg.nc_softc = sc;
2599 	cmsg.nc_data = &carpr;
2600 
2601 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2602 
2603 	error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2604 	if (error)
2605 		bzero(carpr.carpr_key, sizeof(carpr.carpr_key));
2606 
2607 	error = copyout(&carpr, udata, sizeof(carpr));
2608 
2609 	ifnet_serialize_all(ifp);
2610 	return error;
2611 }
2612 
2613 static void
2614 carp_ioctl_getdevname_dispatch(netmsg_t msg)
2615 {
2616 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2617 	struct carp_softc *sc = cmsg->nc_softc;
2618 	char *devname = cmsg->nc_data;
2619 
2620 	bzero(devname, IFNAMSIZ);
2621 	if (sc->sc_carpdev != NULL)
2622 		strlcpy(devname, sc->sc_carpdev->if_xname, IFNAMSIZ);
2623 
2624 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2625 }
2626 
2627 static int
2628 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd)
2629 {
2630 	struct ifnet *ifp = &sc->arpcom.ac_if;
2631 	struct netmsg_carp cmsg;
2632 	char devname[IFNAMSIZ];
2633 	int error;
2634 
2635 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2636 
2637 	if (ifd->ifd_len != sizeof(devname))
2638 		return EINVAL;
2639 
2640 	ifnet_deserialize_all(ifp);
2641 
2642 	bzero(&cmsg, sizeof(cmsg));
2643 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2644 	    carp_ioctl_getdevname_dispatch);
2645 	cmsg.nc_softc = sc;
2646 	cmsg.nc_data = devname;
2647 
2648 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2649 
2650 	error = copyout(devname, ifd->ifd_data, sizeof(devname));
2651 
2652 	ifnet_serialize_all(ifp);
2653 	return error;
2654 }
2655 
2656 static void
2657 carp_init_dispatch(netmsg_t msg)
2658 {
2659 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2660 	struct carp_softc *sc = cmsg->nc_softc;
2661 
2662 	sc->sc_if.if_flags |= IFF_RUNNING;
2663 	carp_hmac_prepare(sc);
2664 	carp_set_state(sc, INIT);
2665 	carp_setrun(sc, 0);
2666 
2667 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2668 }
2669 
2670 static void
2671 carp_init(void *xsc)
2672 {
2673 	struct carp_softc *sc = xsc;
2674 	struct ifnet *ifp = &sc->arpcom.ac_if;
2675 	struct netmsg_carp cmsg;
2676 
2677 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2678 
2679 	ifnet_deserialize_all(ifp);
2680 
2681 	bzero(&cmsg, sizeof(cmsg));
2682 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2683 	    carp_init_dispatch);
2684 	cmsg.nc_softc = sc;
2685 
2686 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2687 
2688 	ifnet_serialize_all(ifp);
2689 }
2690 
2691 static int
2692 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2693     struct rtentry *rt)
2694 {
2695 	struct carp_softc *sc = ifp->if_softc;
2696 	struct ifnet *carpdev;
2697 	int error = 0;
2698 
2699 	carpdev = sc->sc_carpdev;
2700 	if (carpdev != NULL) {
2701 		if (m->m_flags & M_MCAST)
2702 			IFNET_STAT_INC(ifp, omcasts, 1);
2703 		IFNET_STAT_INC(ifp, obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
2704 		IFNET_STAT_INC(ifp, opackets, 1);
2705 
2706 		/*
2707 		 * NOTE:
2708 		 * CARP's ifp is passed to backing device's
2709 		 * if_output method.
2710 		 */
2711 		carpdev->if_output(ifp, m, dst, rt);
2712 	} else {
2713 		IFNET_STAT_INC(ifp, oerrors, 1);
2714 		m_freem(m);
2715 		error = ENETUNREACH;
2716 	}
2717 	return error;
2718 }
2719 
2720 /*
2721  * Start output on carp interface. This function should never be called.
2722  */
2723 static void
2724 carp_start(struct ifnet *ifp, struct ifaltq_subque *ifsq __unused)
2725 {
2726 	panic("%s: start called", ifp->if_xname);
2727 }
2728 
2729 static void
2730 carp_set_state(struct carp_softc *sc, int state)
2731 {
2732 	struct ifnet *cifp = &sc->sc_if;
2733 
2734 	if (sc->sc_state == state)
2735 		return;
2736 	sc->sc_state = state;
2737 
2738 	switch (sc->sc_state) {
2739 	case BACKUP:
2740 		cifp->if_link_state = LINK_STATE_DOWN;
2741 		break;
2742 
2743 	case MASTER:
2744 		cifp->if_link_state = LINK_STATE_UP;
2745 		break;
2746 
2747 	default:
2748 		cifp->if_link_state = LINK_STATE_UNKNOWN;
2749 		break;
2750 	}
2751 	rt_ifmsg(cifp);
2752 }
2753 
2754 void
2755 carp_group_demote_adj(struct ifnet *ifp, int adj)
2756 {
2757 	struct ifg_list	*ifgl;
2758 	int *dm;
2759 
2760 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2761 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2762 			continue;
2763 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2764 
2765 		if (*dm + adj >= 0)
2766 			*dm += adj;
2767 		else
2768 			*dm = 0;
2769 
2770 		if (adj > 0 && *dm == 1)
2771 			carp_send_ad_all();
2772 		CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2773                     ifgl->ifgl_group->ifg_group, *dm);
2774 	}
2775 }
2776 
2777 #ifdef foo
2778 void
2779 carp_carpdev_state(void *v)
2780 {
2781 	struct carp_if *cif = v;
2782 	struct carp_softc *sc;
2783 
2784 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2785 		carp_sc_state(sc);
2786 }
2787 
2788 static void
2789 carp_sc_state(struct carp_softc *sc)
2790 {
2791 	if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2792 		callout_stop(&sc->sc_ad_tmo);
2793 		callout_stop(&sc->sc_md_tmo);
2794 		callout_stop(&sc->sc_md6_tmo);
2795 		carp_set_state(sc, INIT);
2796 		carp_setrun(sc, 0);
2797 		if (!sc->sc_suppress) {
2798 			carp_suppress_preempt++;
2799 			if (carp_suppress_preempt == 1)
2800 				carp_send_ad_all();
2801 		}
2802 		sc->sc_suppress = 1;
2803 	} else {
2804 		carp_set_state(sc, INIT);
2805 		carp_setrun(sc, 0);
2806 		if (sc->sc_suppress)
2807 			carp_suppress_preempt--;
2808 		sc->sc_suppress = 0;
2809 	}
2810 }
2811 #endif
2812 
2813 static void
2814 carp_stop(struct carp_softc *sc, boolean_t detach)
2815 {
2816 	sc->sc_if.if_flags &= ~IFF_RUNNING;
2817 
2818 	callout_stop(&sc->sc_ad_tmo);
2819 	callout_stop(&sc->sc_md_tmo);
2820 	callout_stop(&sc->sc_md6_tmo);
2821 
2822 	if (!detach && sc->sc_state == MASTER)
2823 		carp_send_ad(sc);
2824 
2825 	if (sc->sc_suppress)
2826 		carp_suppress_preempt--;
2827 	sc->sc_suppress = 0;
2828 
2829 	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2830 		carp_suppress_preempt--;
2831 	sc->sc_sendad_errors = 0;
2832 	sc->sc_sendad_success = 0;
2833 
2834 	carp_set_state(sc, INIT);
2835 	carp_setrun(sc, 0);
2836 }
2837 
2838 static void
2839 carp_suspend(struct carp_softc *sc, boolean_t detach)
2840 {
2841 	struct ifnet *cifp = &sc->sc_if;
2842 
2843 	carp_stop(sc, detach);
2844 
2845 	/* Retain the running state, if we are not dead yet */
2846 	if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2847 		cifp->if_flags |= IFF_RUNNING;
2848 }
2849 
2850 static int
2851 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2852     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2853 {
2854 	struct ip_moptions *imo = &sc->sc_imo;
2855 	struct carp_if *ocif = ifp->if_carp;
2856 	int error;
2857 
2858 	KKASSERT(vha->vha_ia != NULL);
2859 
2860 	KASSERT(ia_if != NULL, ("NULL backing address"));
2861 	KASSERT(vha->vha_iaback == NULL, ("%p is already activated", vha));
2862 	KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2863 		("inactive vhaddr %p is the address owner", vha));
2864 
2865 	KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2866 		("%s is already on %s", sc->sc_if.if_xname,
2867 		 sc->sc_carpdev->if_xname));
2868 
2869 	if (ocif == NULL) {
2870 		KASSERT(sc->sc_carpdev == NULL,
2871 			("%s is already on %s", sc->sc_if.if_xname,
2872 			 sc->sc_carpdev->if_xname));
2873 
2874 		error = ifpromisc(ifp, 1);
2875 		if (error)
2876 			return error;
2877 	} else {
2878 		struct carp_softc_container *scc;
2879 
2880 		TAILQ_FOREACH(scc, ocif, scc_link) {
2881 			struct carp_softc *vr = scc->scc_softc;
2882 
2883 			if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2884 				return EINVAL;
2885 		}
2886 	}
2887 
2888 	ifp->if_carp = carp_if_insert(ocif, sc);
2889 	KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed", __func__));
2890 
2891 	sc->sc_ia = ia_if;
2892 	sc->sc_carpdev = ifp;
2893 	sc->arpcom.ac_if.if_hwassist = 0;
2894 	if (sc->arpcom.ac_if.if_capenable & IFCAP_TXCSUM) {
2895 		sc->arpcom.ac_if.if_hwassist |=
2896 		    (ifp->if_hwassist & (CSUM_IP | CSUM_UDP | CSUM_TCP));
2897 	}
2898 	if (sc->arpcom.ac_if.if_capenable & IFCAP_TSO)
2899 		sc->arpcom.ac_if.if_hwassist |= (ifp->if_hwassist & CSUM_TSO);
2900 
2901 	/*
2902 	 * Make sure that all protocol threads see the sc_carpdev and
2903 	 * if_carp changes
2904 	 */
2905 	netmsg_service_sync();
2906 
2907 	if (ocif != NULL && ifp->if_carp != ocif) {
2908 		/*
2909 		 * The old carp list could be safely free now,
2910 		 * since no one can access it.
2911 		 */
2912 		carp_if_free(ocif);
2913 	}
2914 
2915 	vha->vha_iaback = ia_if;
2916 	sc->sc_naddrs++;
2917 
2918 	if (own) {
2919 		vha->vha_flags |= CARP_VHAF_OWNER;
2920 
2921 		/* XXX save user configured advskew? */
2922 		sc->sc_advskew = 0;
2923 	}
2924 
2925 	carp_addroute_vhaddr(sc, vha);
2926 
2927 	/*
2928 	 * Join the multicast group only after the backing interface
2929 	 * has been hooked with the CARP interface.
2930 	 */
2931 	KASSERT(imo->imo_multicast_ifp == NULL ||
2932 		imo->imo_multicast_ifp == &sc->sc_if,
2933 		("%s didn't leave mcast group on %s",
2934 		 sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2935 
2936 	if (imo->imo_num_memberships == 0) {
2937 		struct in_addr addr;
2938 
2939 		addr.s_addr = htonl(INADDR_CARP_GROUP);
2940 		imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2941 		if (imo->imo_membership[0] == NULL) {
2942 			carp_deactivate_vhaddr(sc, vha, FALSE);
2943 			return ENOBUFS;
2944 		}
2945 
2946 		imo->imo_num_memberships++;
2947 		imo->imo_multicast_ifp = &sc->sc_if;
2948 		imo->imo_multicast_ttl = CARP_DFLTTL;
2949 		imo->imo_multicast_loop = 0;
2950 	}
2951 
2952 	carp_hmac_prepare(sc);
2953 	carp_set_state(sc, INIT);
2954 	carp_setrun(sc, 0);
2955 	return 0;
2956 }
2957 
2958 static void
2959 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2960     boolean_t del_iaback)
2961 {
2962 	KKASSERT(vha->vha_ia != NULL);
2963 
2964 	carp_hmac_prepare(sc);
2965 
2966 	if (vha->vha_iaback == NULL) {
2967 		KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2968 			("inactive vhaddr %p is the address owner", vha));
2969 		return;
2970 	}
2971 
2972 	vha->vha_flags &= ~CARP_VHAF_OWNER;
2973 	carp_delroute_vhaddr(sc, vha, del_iaback);
2974 
2975 	KKASSERT(sc->sc_naddrs > 0);
2976 	vha->vha_iaback = NULL;
2977 	sc->sc_naddrs--;
2978 	if (!sc->sc_naddrs) {
2979 		if (sc->sc_naddrs6) {
2980 			carp_multicast_cleanup(sc);
2981 			sc->sc_ia = NULL;
2982 		} else {
2983 			carp_detach(sc, FALSE, del_iaback);
2984 		}
2985 	}
2986 }
2987 
2988 static void
2989 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
2990 {
2991 	struct carp_vhaddr *vha;
2992 	struct in_ifaddr *ia_if;
2993 
2994 	KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2995 	ia_if = ifatoia(ifa_if);
2996 
2997 	/*
2998 	 * Test each inactive vhaddr against the newly added address.
2999 	 * If the newly added address could be the backing address,
3000 	 * then activate the matching vhaddr.
3001 	 */
3002 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3003 		const struct in_ifaddr *ia;
3004 		int own;
3005 
3006 		if (vha->vha_iaback != NULL)
3007 			continue;
3008 
3009 		ia = vha->vha_ia;
3010 		if (ia->ia_subnetmask != ia_if->ia_subnetmask ||
3011 		    ia->ia_subnet != ia_if->ia_subnet)
3012 			continue;
3013 
3014 		own = 0;
3015 		if (ia->ia_addr.sin_addr.s_addr ==
3016 		    ia_if->ia_addr.sin_addr.s_addr)
3017 			own = 1;
3018 
3019 		carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
3020 	}
3021 }
3022 
3023 static void
3024 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
3025 		  struct ifaddr *ifa_if)
3026 {
3027 	struct carp_vhaddr *vha;
3028 	struct in_ifaddr *ia_if;
3029 
3030 	KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
3031 	ia_if = ifatoia(ifa_if);
3032 
3033 	/*
3034 	 * Ad src address is deleted; set it to NULL.
3035 	 * Following loop will try pick up a new ad src address
3036 	 * if one of the vhaddr could retain its backing address.
3037 	 */
3038 	if (sc->sc_ia == ia_if)
3039 		sc->sc_ia = NULL;
3040 
3041 	/*
3042 	 * Test each active vhaddr against the deleted address.
3043 	 * If the deleted address is vhaddr address's backing
3044 	 * address, then deactivate the vhaddr.
3045 	 */
3046 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3047 		if (vha->vha_iaback == NULL)
3048 			continue;
3049 
3050 		if (vha->vha_iaback == ia_if)
3051 			carp_deactivate_vhaddr(sc, vha, TRUE);
3052 		else if (sc->sc_ia == NULL)
3053 			sc->sc_ia = vha->vha_iaback;
3054 	}
3055 }
3056 
3057 static void
3058 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
3059 {
3060 	struct carp_vhaddr *vha;
3061 
3062 	KKASSERT(sc->sc_carpdev == NULL);
3063 
3064 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
3065 		carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
3066 }
3067 
3068 static void
3069 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
3070 	    enum ifaddr_event event, struct ifaddr *ifa)
3071 {
3072 	struct carp_softc *sc;
3073 
3074 	if (ifa->ifa_addr->sa_family != AF_INET)
3075 		return;
3076 
3077 	KASSERT(&curthread->td_msgport == netisr_cpuport(0),
3078 	    ("not in netisr0"));
3079 
3080 	if (ifp->if_type == IFT_CARP) {
3081 		/*
3082 		 * Address is changed on carp(4) interface
3083 		 */
3084 		switch (event) {
3085 		case IFADDR_EVENT_ADD:
3086 			carp_add_addr(ifp->if_softc, ifa);
3087 			break;
3088 
3089 		case IFADDR_EVENT_CHANGE:
3090 			carp_config_addr(ifp->if_softc, ifa);
3091 			break;
3092 
3093 		case IFADDR_EVENT_DELETE:
3094 			carp_del_addr(ifp->if_softc, ifa);
3095 			break;
3096 		}
3097 		return;
3098 	}
3099 
3100 	/*
3101 	 * Address is changed on non-carp(4) interface
3102 	 */
3103 	if ((ifp->if_flags & IFF_MULTICAST) == 0)
3104 		return;
3105 
3106 	LIST_FOREACH(sc, &carpif_list, sc_next) {
3107 		if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
3108 			/* Not the parent iface; skip */
3109 			continue;
3110 		}
3111 
3112 		switch (event) {
3113 		case IFADDR_EVENT_ADD:
3114 			carp_link_addrs(sc, ifp, ifa);
3115 			break;
3116 
3117 		case IFADDR_EVENT_DELETE:
3118 			if (sc->sc_carpdev != NULL) {
3119 				carp_unlink_addrs(sc, ifp, ifa);
3120 				if (sc->sc_carpdev == NULL) {
3121 					/*
3122 					 * We no longer have the parent
3123 					 * interface, however, certain
3124 					 * virtual addresses, which are
3125 					 * not used because they can't
3126 					 * match the previous parent
3127 					 * interface's addresses, may now
3128 					 * match different interface's
3129 					 * addresses.
3130 					 */
3131 					carp_update_addrs(sc, ifa);
3132 				}
3133 			} else {
3134 				/*
3135 				 * The carp(4) interface didn't have a
3136 				 * parent iface, so it is not possible
3137 				 * that it will contain any address to
3138 				 * be unlinked.
3139 				 */
3140 			}
3141 			break;
3142 
3143 		case IFADDR_EVENT_CHANGE:
3144 			if (sc->sc_carpdev == NULL) {
3145 				/*
3146 				 * The carp(4) interface didn't have a
3147 				 * parent iface, so it is not possible
3148 				 * that it will contain any address to
3149 				 * be updated.
3150 				 */
3151 				carp_link_addrs(sc, ifp, ifa);
3152 			} else {
3153 				/*
3154 				 * First try breaking tie with the old
3155 				 * address.  Then see whether we could
3156 				 * link certain vhaddr to the new address.
3157 				 * If that fails, i.e. carpdev is NULL,
3158 				 * we try a global update.
3159 				 *
3160 				 * NOTE: The above order is critical.
3161 				 */
3162 				carp_unlink_addrs(sc, ifp, ifa);
3163 				carp_link_addrs(sc, ifp, ifa);
3164 				if (sc->sc_carpdev == NULL) {
3165 					/*
3166 					 * See the comment in the above
3167 					 * IFADDR_EVENT_DELETE block.
3168 					 */
3169 					carp_update_addrs(sc, NULL);
3170 				}
3171 			}
3172 			break;
3173 		}
3174 	}
3175 }
3176 
3177 void
3178 carp_proto_ctlinput(netmsg_t msg)
3179 {
3180 	int cmd = msg->ctlinput.nm_cmd;
3181 	struct sockaddr *sa = msg->ctlinput.nm_arg;
3182 	struct in_ifaddr_container *iac;
3183 
3184 	/* We only process PRC_IFDOWN and PRC_IFUP commands */
3185 	if (cmd != PRC_IFDOWN && cmd != PRC_IFUP)
3186 		goto done;
3187 
3188 	TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
3189 		struct in_ifaddr *ia = iac->ia;
3190 		struct ifnet *ifp = ia->ia_ifp;
3191 
3192 		if (ifp->if_type == IFT_CARP)
3193 			continue;
3194 
3195 		if (ia->ia_ifa.ifa_addr == sa) {
3196 			if (cmd == PRC_IFDOWN) {
3197 				carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
3198 				    &ia->ia_ifa);
3199 			} else if (cmd == PRC_IFUP) {
3200 				carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
3201 				    &ia->ia_ifa);
3202 			}
3203 			break;
3204 		}
3205 	}
3206 done:
3207 	lwkt_replymsg(&msg->lmsg, 0);
3208 }
3209 
3210 struct ifnet *
3211 carp_parent(struct ifnet *cifp)
3212 {
3213 	struct carp_softc *sc;
3214 
3215 	KKASSERT(cifp->if_type == IFT_CARP);
3216 	sc = cifp->if_softc;
3217 
3218 	return sc->sc_carpdev;
3219 }
3220 
3221 #define rtinitflags(x) \
3222 	(((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
3223 		 ? RTF_HOST : 0)
3224 
3225 static int
3226 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
3227 {
3228 	struct in_ifaddr *ia, *iaback;
3229 
3230 	if (sc->sc_state != MASTER)
3231 		return 0;
3232 
3233 	ia = vha->vha_ia;
3234 	KKASSERT(ia != NULL);
3235 
3236 	iaback = vha->vha_iaback;
3237 	KKASSERT(iaback != NULL);
3238 
3239 	return rtchange(&iaback->ia_ifa, &ia->ia_ifa);
3240 }
3241 
3242 static void
3243 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
3244     boolean_t del_iaback)
3245 {
3246 	struct in_ifaddr *ia, *iaback;
3247 
3248 	ia = vha->vha_ia;
3249 	KKASSERT(ia != NULL);
3250 
3251 	iaback = vha->vha_iaback;
3252 	KKASSERT(iaback != NULL);
3253 
3254 	if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
3255 		rtchange(&ia->ia_ifa, &iaback->ia_ifa);
3256 		return;
3257 	}
3258 
3259 	rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
3260 	in_ifadown_force(&ia->ia_ifa, 1);
3261 	ia->ia_flags &= ~IFA_ROUTE;
3262 }
3263 
3264 static int
3265 carp_modevent(module_t mod, int type, void *data)
3266 {
3267 	switch (type) {
3268 	case MOD_LOAD:
3269 		LIST_INIT(&carpif_list);
3270 		carp_ifdetach_event =
3271 		EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
3272 				      EVENTHANDLER_PRI_ANY);
3273 		carp_ifaddr_event =
3274 		EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
3275 				      EVENTHANDLER_PRI_FIRST);
3276 		if_clone_attach(&carp_cloner);
3277 		break;
3278 
3279 	case MOD_UNLOAD:
3280 		EVENTHANDLER_DEREGISTER(ifnet_detach_event,
3281 					carp_ifdetach_event);
3282 		EVENTHANDLER_DEREGISTER(ifaddr_event,
3283 					carp_ifaddr_event);
3284 		if_clone_detach(&carp_cloner);
3285 		break;
3286 
3287 	default:
3288 		return (EINVAL);
3289 	}
3290 	return (0);
3291 }
3292 
3293 static moduledata_t carp_mod = {
3294 	"carp",
3295 	carp_modevent,
3296 	0
3297 };
3298 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3299