xref: /dragonfly/sys/netinet/ip_carp.c (revision dc71b7ab)
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29 
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50 
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53 
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64 #include <net/netisr2.h>
65 
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
73 #endif
74 
75 #ifdef INET6
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
81 #endif
82 
83 #include <netinet/ip_carp.h>
84 
85 /*
86  * Note about carp's MP safe approach:
87  *
88  * Brief: carp_softc (softc), carp_softc_container (scc)
89  *
90  * - All configuration operation, e.g. ioctl, add/delete inet addresses
91  *   is serialized by netisr0; not by carp's serializer
92  *
93  * - Backing interface's if_carp and carp_softc's relationship:
94  *
95  *                +---------+
96  *     if_carp -->| carp_if |
97  *                +---------+
98  *                     |
99  *                     |
100  *                     V      +---------+
101  *                  +-----+   |         |
102  *                  | scc |-->|  softc  |
103  *                  +-----+   |         |
104  *                     |      +---------+
105  *                     |
106  *                     V      +---------+
107  *                  +-----+   |         |
108  *                  | scc |-->|  softc  |
109  *                  +-----+   |         |
110  *                            +---------+
111  *
112  * - if_carp creation, modification and deletion all happen in netisr0,
113  *   as stated previously.  Since if_carp is accessed by multiple netisrs,
114  *   the modification to if_carp is conducted in the following way:
115  *
116  *   Adding carp_softc:
117  *
118  *   1) Duplicate the old carp_if to new carp_if (ncif), and insert the
119  *      to-be-added carp_softc to the new carp_if (ncif):
120  *
121  *        if_carp                     ncif
122  *           |                         |
123  *           V                         V
124  *      +---------+               +---------+
125  *      | carp_if |               | carp_if |
126  *      +---------+               +---------+
127  *           |                         |
128  *           |                         |
129  *           V        +-------+        V
130  *        +-----+     |       |     +-----+
131  *        | scc |---->| softc |<----| scc |
132  *        +-----+     |       |     +-----+
133  *           |        +-------+        |
134  *           |                         |
135  *           V        +-------+        V
136  *        +-----+     |       |     +-----+
137  *        | scc |---->| softc |<----| scc |
138  *        +-----+     |       |     +-----+
139  *                    +-------+        |
140  *                                     |
141  *                    +-------+        V
142  *                    |       |     +-----+
143  *                    | softc |<----| scc |
144  *                    |       |     +-----+
145  *                    +-------+
146  *
147  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
148  *
149  *          ocif                    if_carp
150  *           |                         |
151  *           V                         V
152  *      +---------+               +---------+
153  *      | carp_if |               | carp_if |
154  *      +---------+               +---------+
155  *           |                         |
156  *           |                         |
157  *           V        +-------+        V
158  *        +-----+     |       |     +-----+
159  *        | scc |---->| softc |<----| scc |
160  *        +-----+     |       |     +-----+
161  *           |        +-------+        |
162  *           |                         |
163  *           V        +-------+        V
164  *        +-----+     |       |     +-----+
165  *        | scc |---->| softc |<----| scc |
166  *        +-----+     |       |     +-----+
167  *                    +-------+        |
168  *                                     |
169  *                    +-------+        V
170  *                    |       |     +-----+
171  *                    | softc |<----| scc |
172  *                    |       |     +-----+
173  *                    +-------+
174  *
175  *   3) Run netmsg_service_sync(), which will make sure that
176  *      ocif is no longer accessed (all network operations
177  *      are happened only in network threads).
178  *   4) Free ocif -- only carp_if and scc are freed.
179  *
180  *
181  *   Removing carp_softc:
182  *
183  *   1) Duplicate the old carp_if to new carp_if (ncif); the to-be-deleted
184  *      carp_softc will not be duplicated.
185  *
186  *        if_carp                     ncif
187  *           |                         |
188  *           V                         V
189  *      +---------+               +---------+
190  *      | carp_if |               | carp_if |
191  *      +---------+               +---------+
192  *           |                         |
193  *           |                         |
194  *           V        +-------+        V
195  *        +-----+     |       |     +-----+
196  *        | scc |---->| softc |<----| scc |
197  *        +-----+     |       |     +-----+
198  *           |        +-------+        |
199  *           |                         |
200  *           V        +-------+        |
201  *        +-----+     |       |        |
202  *        | scc |---->| softc |        |
203  *        +-----+     |       |        |
204  *           |        +-------+        |
205  *           |                         |
206  *           V        +-------+        V
207  *        +-----+     |       |     +-----+
208  *        | scc |---->| softc |<----| scc |
209  *        +-----+     |       |     +-----+
210  *                    +-------+
211  *
212  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
213  *
214  *          ocif                    if_carp
215  *           |                         |
216  *           V                         V
217  *      +---------+               +---------+
218  *      | carp_if |               | carp_if |
219  *      +---------+               +---------+
220  *           |                         |
221  *           |                         |
222  *           V        +-------+        V
223  *        +-----+     |       |     +-----+
224  *        | scc |---->| softc |<----| scc |
225  *        +-----+     |       |     +-----+
226  *           |        +-------+        |
227  *           |                         |
228  *           V        +-------+        |
229  *        +-----+     |       |        |
230  *        | scc |---->| softc |        |
231  *        +-----+     |       |        |
232  *           |        +-------+        |
233  *           |                         |
234  *           V        +-------+        V
235  *        +-----+     |       |     +-----+
236  *        | scc |---->| softc |<----| scc |
237  *        +-----+     |       |     +-----+
238  *                    +-------+
239  *
240  *   3) Run netmsg_service_sync(), which will make sure that
241  *      ocif is no longer accessed (all network operations
242  *      are happened only in network threads).
243  *   4) Free ocif -- only carp_if and scc are freed.
244  *
245  * - if_carp accessing:
246  *   The accessing code should cache the if_carp in a local temporary
247  *   variable and accessing the temporary variable along the code path
248  *   instead of accessing if_carp later on.
249  */
250 
251 #define	CARP_IFNAME		"carp"
252 #define CARP_IS_RUNNING(ifp)	\
253 	(((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
254 
255 struct carp_softc;
256 
257 struct carp_vhaddr {
258 	uint32_t		vha_flags;	/* CARP_VHAF_ */
259 	struct in_ifaddr	*vha_ia;	/* carp address */
260 	struct in_ifaddr	*vha_iaback;	/* backing address */
261 	TAILQ_ENTRY(carp_vhaddr) vha_link;
262 };
263 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
264 
265 struct netmsg_carp {
266 	struct netmsg_base	base;
267 	struct ifnet		*nc_carpdev;
268 	struct carp_softc	*nc_softc;
269 	void			*nc_data;
270 	size_t			nc_datalen;
271 };
272 
273 struct carp_softc {
274 	struct arpcom		 arpcom;
275 	struct ifnet		*sc_carpdev;	/* parent interface */
276 	struct carp_vhaddr_list	 sc_vha_list;	/* virtual addr list */
277 
278 	const struct in_ifaddr	*sc_ia;		/* primary iface address v4 */
279 	struct ip_moptions 	 sc_imo;
280 
281 #ifdef INET6
282 	struct in6_ifaddr 	*sc_ia6;	/* primary iface address v6 */
283 	struct ip6_moptions 	 sc_im6o;
284 #endif /* INET6 */
285 
286 	enum { INIT = 0, BACKUP, MASTER }
287 				 sc_state;
288 	boolean_t		 sc_dead;
289 
290 	int			 sc_suppress;
291 
292 	int			 sc_sendad_errors;
293 #define	CARP_SENDAD_MAX_ERRORS	3
294 	int			 sc_sendad_success;
295 #define	CARP_SENDAD_MIN_SUCCESS 3
296 
297 	int			 sc_vhid;
298 	int			 sc_advskew;
299 	int			 sc_naddrs;	/* actually used IPv4 vha */
300 	int			 sc_naddrs6;
301 	int			 sc_advbase;	/* seconds */
302 	int			 sc_init_counter;
303 	uint64_t		 sc_counter;
304 
305 	/* authentication */
306 #define CARP_HMAC_PAD	64
307 	unsigned char		 sc_key[CARP_KEY_LEN];
308 	unsigned char		 sc_pad[CARP_HMAC_PAD];
309 	SHA1_CTX		 sc_sha1;
310 
311 	struct callout		 sc_ad_tmo;	/* advertisement timeout */
312 	struct netmsg_carp	 sc_ad_msg;	/* adv timeout netmsg */
313 	struct callout		 sc_md_tmo;	/* ip4 master down timeout */
314 	struct callout 		 sc_md6_tmo;	/* ip6 master down timeout */
315 	struct netmsg_carp	 sc_md_msg;	/* master down timeout netmsg */
316 
317 	LIST_ENTRY(carp_softc)	 sc_next;	/* Interface clue */
318 };
319 
320 #define sc_if	arpcom.ac_if
321 
322 struct carp_softc_container {
323 	TAILQ_ENTRY(carp_softc_container) scc_link;
324 	struct carp_softc	*scc_softc;
325 };
326 TAILQ_HEAD(carp_if, carp_softc_container);
327 
328 SYSCTL_DECL(_net_inet_carp);
329 
330 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
331 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
332     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
333 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
334     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
335 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
336     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
337 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
338     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
339 
340 static int carp_suppress_preempt = 0;
341 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
342     &carp_suppress_preempt, 0, "Preemption is suppressed");
343 
344 static struct carpstats carpstats;
345 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
346     &carpstats, carpstats,
347     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
348 
349 #define	CARP_LOG(...)	do {				\
350 	if (carp_opts[CARPCTL_LOG] > 0)			\
351 		log(LOG_INFO, __VA_ARGS__);		\
352 } while (0)
353 
354 #define	CARP_DEBUG(...)	do {				\
355 	if (carp_opts[CARPCTL_LOG] > 1)			\
356 		log(LOG_DEBUG, __VA_ARGS__);		\
357 } while (0)
358 
359 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token);
360 
361 static void	carp_hmac_prepare(struct carp_softc *);
362 static void	carp_hmac_generate(struct carp_softc *, uint32_t *,
363 		    unsigned char *);
364 static int	carp_hmac_verify(struct carp_softc *, uint32_t *,
365 		    unsigned char *);
366 static void	carp_setroute(struct carp_softc *, int);
367 static void	carp_proto_input_c(struct carp_softc *, struct mbuf *,
368 		    struct carp_header *, sa_family_t);
369 static int 	carp_clone_create(struct if_clone *, int, caddr_t);
370 static int 	carp_clone_destroy(struct ifnet *);
371 static void	carp_detach(struct carp_softc *, boolean_t, boolean_t);
372 static void	carp_prepare_ad(struct carp_softc *, struct carp_header *);
373 static void	carp_send_ad_all(void);
374 static void	carp_send_ad_timeout(void *);
375 static void	carp_send_ad(struct carp_softc *);
376 static void	carp_send_arp(struct carp_softc *);
377 static void	carp_master_down_timeout(void *);
378 static void	carp_master_down(struct carp_softc *);
379 static void	carp_setrun(struct carp_softc *, sa_family_t);
380 static void	carp_set_state(struct carp_softc *, int);
381 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
382 
383 static void	carp_init(void *);
384 static int	carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
385 static int	carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
386 		    struct rtentry *);
387 static void	carp_start(struct ifnet *, struct ifaltq_subque *);
388 
389 static void	carp_multicast_cleanup(struct carp_softc *);
390 static void	carp_add_addr(struct carp_softc *, struct ifaddr *);
391 static void	carp_del_addr(struct carp_softc *, struct ifaddr *);
392 static void	carp_config_addr(struct carp_softc *, struct ifaddr *);
393 static void	carp_link_addrs(struct carp_softc *, struct ifnet *,
394 		    struct ifaddr *);
395 static void	carp_unlink_addrs(struct carp_softc *, struct ifnet *,
396 		    struct ifaddr *);
397 static void	carp_update_addrs(struct carp_softc *, struct ifaddr *);
398 
399 static int	carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
400 		    struct in_ifaddr *);
401 static int	carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
402 		    struct ifnet *, struct in_ifaddr *, int);
403 static void	carp_deactivate_vhaddr(struct carp_softc *,
404 		    struct carp_vhaddr *, boolean_t);
405 static int	carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
406 static void	carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
407 		    boolean_t);
408 
409 #ifdef foo
410 static void	carp_sc_state(struct carp_softc *);
411 #endif
412 #ifdef INET6
413 static void	carp_send_na(struct carp_softc *);
414 #ifdef notyet
415 static int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
416 static int	carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
417 #endif
418 static void	carp_multicast6_cleanup(struct carp_softc *);
419 #endif
420 static void	carp_stop(struct carp_softc *, boolean_t);
421 static void	carp_suspend(struct carp_softc *, boolean_t);
422 static void	carp_ioctl_stop(struct carp_softc *);
423 static int	carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *);
424 static int	carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *);
425 static int	carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *);
426 static int	carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *);
427 
428 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *);
429 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *);
430 static void	carp_if_free(struct carp_if *);
431 
432 static void	carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
433 			    struct ifaddr *);
434 static void	carp_ifdetach(void *, struct ifnet *);
435 
436 static void	carp_ifdetach_dispatch(netmsg_t);
437 static void	carp_clone_destroy_dispatch(netmsg_t);
438 static void	carp_init_dispatch(netmsg_t);
439 static void	carp_ioctl_stop_dispatch(netmsg_t);
440 static void	carp_ioctl_setvh_dispatch(netmsg_t);
441 static void	carp_ioctl_getvh_dispatch(netmsg_t);
442 static void	carp_ioctl_getdevname_dispatch(netmsg_t);
443 static void	carp_ioctl_getvhaddr_dispatch(netmsg_t);
444 static void	carp_send_ad_timeout_dispatch(netmsg_t);
445 static void	carp_master_down_timeout_dispatch(netmsg_t);
446 
447 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
448 
449 static LIST_HEAD(, carp_softc) carpif_list;
450 
451 static struct if_clone carp_cloner =
452 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
453 		     0, IF_MAXUNIT);
454 
455 static uint8_t	carp_etheraddr[ETHER_ADDR_LEN] = { 0, 0, 0x5e, 0, 1, 0 };
456 
457 static eventhandler_tag carp_ifdetach_event;
458 static eventhandler_tag carp_ifaddr_event;
459 
460 static __inline void
461 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
462 {
463 	struct carp_vhaddr *vha;
464 	u_long new_addr, addr;
465 
466 	KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
467 
468 	/*
469 	 * Virtual address list is sorted; smaller one first
470 	 */
471 	new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
472 
473 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
474 		addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
475 
476 		if (addr > new_addr)
477 			break;
478 	}
479 	if (vha == NULL)
480 		TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
481 	else
482 		TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
483 	vha_new->vha_flags |= CARP_VHAF_ONLIST;
484 }
485 
486 static __inline void
487 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
488 {
489 	KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
490 	vha->vha_flags &= ~CARP_VHAF_ONLIST;
491 	TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
492 }
493 
494 static void
495 carp_hmac_prepare(struct carp_softc *sc)
496 {
497 	uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
498 	uint8_t vhid = sc->sc_vhid & 0xff;
499 	int i;
500 #ifdef INET6
501 	struct ifaddr_container *ifac;
502 	struct in6_addr in6;
503 #endif
504 #ifdef INET
505 	struct carp_vhaddr *vha;
506 #endif
507 
508 	/* XXX: possible race here */
509 
510 	/* compute ipad from key */
511 	bzero(sc->sc_pad, sizeof(sc->sc_pad));
512 	bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
513 	for (i = 0; i < sizeof(sc->sc_pad); i++)
514 		sc->sc_pad[i] ^= 0x36;
515 
516 	/* precompute first part of inner hash */
517 	SHA1Init(&sc->sc_sha1);
518 	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
519 	SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
520 	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
521 	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
522 #ifdef INET
523 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
524 		SHA1Update(&sc->sc_sha1,
525 		    (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
526 		    sizeof(struct in_addr));
527 	}
528 #endif /* INET */
529 #ifdef INET6
530 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
531 		struct ifaddr *ifa = ifac->ifa;
532 
533 		if (ifa->ifa_addr->sa_family == AF_INET6) {
534 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
535 			in6_clearscope(&in6);
536 			SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
537 		}
538 	}
539 #endif /* INET6 */
540 
541 	/* convert ipad to opad */
542 	for (i = 0; i < sizeof(sc->sc_pad); i++)
543 		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
544 }
545 
546 static void
547 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
548     unsigned char md[20])
549 {
550 	SHA1_CTX sha1ctx;
551 
552 	/* fetch first half of inner hash */
553 	bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
554 
555 	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
556 	SHA1Final(md, &sha1ctx);
557 
558 	/* outer hash */
559 	SHA1Init(&sha1ctx);
560 	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
561 	SHA1Update(&sha1ctx, md, 20);
562 	SHA1Final(md, &sha1ctx);
563 }
564 
565 static int
566 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
567     unsigned char md[20])
568 {
569 	unsigned char md2[20];
570 
571 	carp_hmac_generate(sc, counter, md2);
572 	return (bcmp(md, md2, sizeof(md2)));
573 }
574 
575 static void
576 carp_setroute(struct carp_softc *sc, int cmd)
577 {
578 #ifdef INET6
579 	struct ifaddr_container *ifac;
580 #endif
581 	struct carp_vhaddr *vha;
582 
583 	KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
584 
585 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
586 		if (vha->vha_iaback == NULL)
587 			continue;
588 		if (cmd == RTM_DELETE)
589 			carp_delroute_vhaddr(sc, vha, FALSE);
590 		else
591 			carp_addroute_vhaddr(sc, vha);
592 	}
593 
594 #ifdef INET6
595 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
596 		struct ifaddr *ifa = ifac->ifa;
597 
598 		if (ifa->ifa_addr->sa_family == AF_INET6) {
599 			if (cmd == RTM_ADD)
600 				in6_ifaddloop(ifa);
601 			else
602 				in6_ifremloop(ifa);
603 		}
604 	}
605 #endif /* INET6 */
606 }
607 
608 static int
609 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
610 {
611 	struct carp_softc *sc;
612 	struct ifnet *ifp;
613 
614 	sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
615 	ifp = &sc->sc_if;
616 
617 	sc->sc_suppress = 0;
618 	sc->sc_advbase = CARP_DFLTINTV;
619 	sc->sc_vhid = -1;	/* required setting */
620 	sc->sc_advskew = 0;
621 	sc->sc_init_counter = 1;
622 	sc->sc_naddrs = 0;
623 	sc->sc_naddrs6 = 0;
624 
625 	TAILQ_INIT(&sc->sc_vha_list);
626 
627 #ifdef INET6
628 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
629 #endif
630 
631 	callout_init_mp(&sc->sc_ad_tmo);
632 	netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport,
633 	    MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch);
634 	sc->sc_ad_msg.nc_softc = sc;
635 
636 	callout_init_mp(&sc->sc_md_tmo);
637 	callout_init_mp(&sc->sc_md6_tmo);
638 	netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport,
639 	    MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch);
640 	sc->sc_md_msg.nc_softc = sc;
641 
642 	if_initname(ifp, CARP_IFNAME, unit);
643 	ifp->if_softc = sc;
644 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
645 	ifp->if_init = carp_init;
646 	ifp->if_ioctl = carp_ioctl;
647 	ifp->if_start = carp_start;
648 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
649 	ifq_set_ready(&ifp->if_snd);
650 
651 	ether_ifattach(ifp, carp_etheraddr, NULL);
652 
653 	ifp->if_type = IFT_CARP;
654 	ifp->if_output = carp_output;
655 
656 	lwkt_gettoken(&carp_listtok);
657 	LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
658 	lwkt_reltoken(&carp_listtok);
659 
660 	return (0);
661 }
662 
663 static void
664 carp_clone_destroy_dispatch(netmsg_t msg)
665 {
666 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
667 	struct carp_softc *sc = cmsg->nc_softc;
668 
669 	sc->sc_dead = TRUE;
670 	carp_detach(sc, TRUE, FALSE);
671 
672 	callout_stop_sync(&sc->sc_ad_tmo);
673 	callout_stop_sync(&sc->sc_md_tmo);
674 	callout_stop_sync(&sc->sc_md6_tmo);
675 
676 	crit_enter();
677 	lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg);
678 	lwkt_dropmsg(&sc->sc_md_msg.base.lmsg);
679 	crit_exit();
680 
681 	lwkt_replymsg(&cmsg->base.lmsg, 0);
682 }
683 
684 static int
685 carp_clone_destroy(struct ifnet *ifp)
686 {
687 	struct carp_softc *sc = ifp->if_softc;
688 	struct netmsg_carp cmsg;
689 
690 	bzero(&cmsg, sizeof(cmsg));
691 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
692 	    carp_clone_destroy_dispatch);
693 	cmsg.nc_softc = sc;
694 
695 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
696 
697 	lwkt_gettoken(&carp_listtok);
698 	LIST_REMOVE(sc, sc_next);
699 	lwkt_reltoken(&carp_listtok);
700 
701 	bpfdetach(ifp);
702 	if_detach(ifp);
703 
704 	KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active"));
705 	kfree(sc, M_CARP);
706 
707 	return 0;
708 }
709 
710 static struct carp_if *
711 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc)
712 {
713 	struct carp_softc_container *oscc, *scc;
714 	struct carp_if *cif;
715 	int count = 0;
716 #ifdef INVARIANTS
717 	int found = 0;
718 #endif
719 
720 	TAILQ_FOREACH(oscc, ocif, scc_link) {
721 		++count;
722 #ifdef INVARIANTS
723 		if (oscc->scc_softc == sc)
724 			found = 1;
725 #endif
726 	}
727 	KASSERT(found, ("%s carp_softc is not on carp_if", __func__));
728 
729 	if (count == 1) {
730 		/* Last one is going to be unlinked */
731 		return NULL;
732 	}
733 
734 	cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
735 	TAILQ_INIT(cif);
736 
737 	TAILQ_FOREACH(oscc, ocif, scc_link) {
738 		if (oscc->scc_softc == sc)
739 			continue;
740 
741 		scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO);
742 		scc->scc_softc = oscc->scc_softc;
743 		TAILQ_INSERT_TAIL(cif, scc, scc_link);
744 	}
745 
746 	return cif;
747 }
748 
749 static struct carp_if *
750 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc)
751 {
752 	struct carp_softc_container *oscc;
753 	int onlist;
754 
755 	onlist = 0;
756 	if (ocif != NULL) {
757 		TAILQ_FOREACH(oscc, ocif, scc_link) {
758 			if (oscc->scc_softc == sc)
759 				onlist = 1;
760 		}
761 	}
762 
763 #ifdef INVARIANTS
764 	if (sc->sc_carpdev != NULL) {
765 		KASSERT(onlist, ("%s is not on %s carp list",
766 		    sc->sc_if.if_xname, sc->sc_carpdev->if_xname));
767 	} else {
768 		KASSERT(!onlist, ("%s is already on carp list",
769 		    sc->sc_if.if_xname));
770 	}
771 #endif
772 
773 	if (!onlist) {
774 		struct carp_if *cif;
775 		struct carp_softc_container *new_scc, *scc;
776 		int inserted = 0;
777 
778 		cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
779 		TAILQ_INIT(cif);
780 
781 		new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO);
782 		new_scc->scc_softc = sc;
783 
784 		if (ocif != NULL) {
785 			TAILQ_FOREACH(oscc, ocif, scc_link) {
786 				if (!inserted &&
787 				    oscc->scc_softc->sc_vhid > sc->sc_vhid) {
788 					TAILQ_INSERT_TAIL(cif, new_scc,
789 					    scc_link);
790 					inserted = 1;
791 				}
792 
793 				scc = kmalloc(sizeof(*scc), M_CARP,
794 				    M_WAITOK | M_ZERO);
795 				scc->scc_softc = oscc->scc_softc;
796 				TAILQ_INSERT_TAIL(cif, scc, scc_link);
797 			}
798 		}
799 		if (!inserted)
800 			TAILQ_INSERT_TAIL(cif, new_scc, scc_link);
801 
802 		return cif;
803 	} else {
804 		return ocif;
805 	}
806 }
807 
808 static void
809 carp_if_free(struct carp_if *cif)
810 {
811 	struct carp_softc_container *scc;
812 
813 	while ((scc = TAILQ_FIRST(cif)) != NULL) {
814 		TAILQ_REMOVE(cif, scc, scc_link);
815 		kfree(scc, M_CARP);
816 	}
817 	kfree(cif, M_CARP);
818 }
819 
820 static void
821 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback)
822 {
823 	carp_suspend(sc, detach);
824 
825 	carp_multicast_cleanup(sc);
826 #ifdef INET6
827 	carp_multicast6_cleanup(sc);
828 #endif
829 
830 	if (!sc->sc_dead && detach) {
831 		struct carp_vhaddr *vha;
832 
833 		TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
834 			carp_deactivate_vhaddr(sc, vha, del_iaback);
835 		KKASSERT(sc->sc_naddrs == 0);
836 	}
837 
838 	if (sc->sc_carpdev != NULL) {
839 		struct ifnet *ifp = sc->sc_carpdev;
840 		struct carp_if *ocif = ifp->if_carp;
841 
842 		ifp->if_carp = carp_if_remove(ocif, sc);
843 		KASSERT(ifp->if_carp != ocif,
844 		    ("%s carp_if_remove failed", __func__));
845 
846 		sc->sc_carpdev = NULL;
847 		sc->sc_ia = NULL;
848 
849 		/*
850 		 * Make sure that all protocol threads see the
851 		 * sc_carpdev and if_carp changes
852 		 */
853 		netmsg_service_sync();
854 
855 		if (ifp->if_carp == NULL) {
856 			/*
857 			 * No more carp interfaces using
858 			 * ifp as the backing interface,
859 			 * move it out of promiscous mode.
860 			 */
861 			ifpromisc(ifp, 0);
862 		}
863 
864 		/*
865 		 * The old carp list could be safely free now,
866 		 * since no one can access it.
867 		 */
868 		carp_if_free(ocif);
869 	}
870 }
871 
872 static void
873 carp_ifdetach_dispatch(netmsg_t msg)
874 {
875 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
876 	struct ifnet *ifp = cmsg->nc_carpdev;
877 
878 	while (ifp->if_carp) {
879 		struct carp_softc_container *scc;
880 
881 		scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp));
882 		carp_detach(scc->scc_softc, TRUE, TRUE);
883 	}
884 	lwkt_replymsg(&cmsg->base.lmsg, 0);
885 }
886 
887 /* Detach an interface from the carp. */
888 static void
889 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
890 {
891 	struct netmsg_carp cmsg;
892 
893 	ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
894 
895 	bzero(&cmsg, sizeof(cmsg));
896 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
897 	    carp_ifdetach_dispatch);
898 	cmsg.nc_carpdev = ifp;
899 
900 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
901 }
902 
903 /*
904  * process input packet.
905  * we have rearranged checks order compared to the rfc,
906  * but it seems more efficient this way or not possible otherwise.
907  */
908 int
909 carp_proto_input(struct mbuf **mp, int *offp, int proto)
910 {
911 	struct mbuf *m = *mp;
912 	struct ip *ip = mtod(m, struct ip *);
913 	struct ifnet *ifp = m->m_pkthdr.rcvif;
914 	struct carp_header *ch;
915 	struct carp_softc *sc;
916 	int len, iphlen;
917 
918 	iphlen = *offp;
919 	*mp = NULL;
920 
921 	carpstats.carps_ipackets++;
922 
923 	if (!carp_opts[CARPCTL_ALLOW]) {
924 		m_freem(m);
925 		goto back;
926 	}
927 
928 	/* Check if received on a valid carp interface */
929 	if (ifp->if_type != IFT_CARP) {
930 		carpstats.carps_badif++;
931 		CARP_LOG("carp_proto_input: packet received on non-carp "
932 		    "interface: %s\n", ifp->if_xname);
933 		m_freem(m);
934 		goto back;
935 	}
936 
937 	if (!CARP_IS_RUNNING(ifp)) {
938 		carpstats.carps_badif++;
939 		CARP_LOG("carp_proto_input: packet received on stopped carp "
940 		    "interface: %s\n", ifp->if_xname);
941 		m_freem(m);
942 		goto back;
943 	}
944 
945 	sc = ifp->if_softc;
946 	if (sc->sc_carpdev == NULL) {
947 		carpstats.carps_badif++;
948 		CARP_LOG("carp_proto_input: packet received on defunc carp "
949 		    "interface: %s\n", ifp->if_xname);
950 		m_freem(m);
951 		goto back;
952 	}
953 
954 	if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
955 		carpstats.carps_badif++;
956 		CARP_LOG("carp_proto_input: non-mcast packet on "
957 		    "interface: %s\n", ifp->if_xname);
958 		m_freem(m);
959 		goto back;
960 	}
961 
962 	/* Verify that the IP TTL is CARP_DFLTTL. */
963 	if (ip->ip_ttl != CARP_DFLTTL) {
964 		carpstats.carps_badttl++;
965 		CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
966 		    ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
967 		m_freem(m);
968 		goto back;
969 	}
970 
971 	/* Minimal CARP packet size */
972 	len = iphlen + sizeof(*ch);
973 
974 	/*
975 	 * Verify that the received packet length is
976 	 * not less than the CARP header
977 	 */
978 	if (m->m_pkthdr.len < len) {
979 		carpstats.carps_badlen++;
980 		CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
981 		    ifp->if_xname);
982 		m_freem(m);
983 		goto back;
984 	}
985 
986 	/* Make sure that CARP header is contiguous */
987 	if (len > m->m_len) {
988 		m = m_pullup(m, len);
989 		if (m == NULL) {
990 			carpstats.carps_hdrops++;
991 			CARP_LOG("carp_proto_input: m_pullup failed\n");
992 			goto back;
993 		}
994 		ip = mtod(m, struct ip *);
995 	}
996 	ch = (struct carp_header *)((uint8_t *)ip + iphlen);
997 
998 	/* Verify the CARP checksum */
999 	if (in_cksum_skip(m, len, iphlen)) {
1000 		carpstats.carps_badsum++;
1001 		CARP_LOG("carp_proto_input: checksum failed on %s\n",
1002 		    ifp->if_xname);
1003 		m_freem(m);
1004 		goto back;
1005 	}
1006 	carp_proto_input_c(sc, m, ch, AF_INET);
1007 back:
1008 	return(IPPROTO_DONE);
1009 }
1010 
1011 #ifdef INET6
1012 int
1013 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
1014 {
1015 	struct mbuf *m = *mp;
1016 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1017 	struct ifnet *ifp = m->m_pkthdr.rcvif;
1018 	struct carp_header *ch;
1019 	struct carp_softc *sc;
1020 	u_int len;
1021 
1022 	carpstats.carps_ipackets6++;
1023 
1024 	if (!carp_opts[CARPCTL_ALLOW]) {
1025 		m_freem(m);
1026 		goto back;
1027 	}
1028 
1029 	/* check if received on a valid carp interface */
1030 	if (ifp->if_type != IFT_CARP) {
1031 		carpstats.carps_badif++;
1032 		CARP_LOG("carp6_proto_input: packet received on non-carp "
1033 		    "interface: %s\n", ifp->if_xname);
1034 		m_freem(m);
1035 		goto back;
1036 	}
1037 
1038 	if (!CARP_IS_RUNNING(ifp)) {
1039 		carpstats.carps_badif++;
1040 		CARP_LOG("carp_proto_input: packet received on stopped carp "
1041 		    "interface: %s\n", ifp->if_xname);
1042 		m_freem(m);
1043 		goto back;
1044 	}
1045 
1046 	sc = ifp->if_softc;
1047 	if (sc->sc_carpdev == NULL) {
1048 		carpstats.carps_badif++;
1049 		CARP_LOG("carp6_proto_input: packet received on defunc-carp "
1050 		    "interface: %s\n", ifp->if_xname);
1051 		m_freem(m);
1052 		goto back;
1053 	}
1054 
1055 	/* verify that the IP TTL is 255 */
1056 	if (ip6->ip6_hlim != CARP_DFLTTL) {
1057 		carpstats.carps_badttl++;
1058 		CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
1059 		    ip6->ip6_hlim, ifp->if_xname);
1060 		m_freem(m);
1061 		goto back;
1062 	}
1063 
1064 	/* verify that we have a complete carp packet */
1065 	len = m->m_len;
1066 	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
1067 	if (ch == NULL) {
1068 		carpstats.carps_badlen++;
1069 		CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
1070 		goto back;
1071 	}
1072 
1073 	/* verify the CARP checksum */
1074 	if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
1075 		carpstats.carps_badsum++;
1076 		CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
1077 		    ifp->if_xname);
1078 		m_freem(m);
1079 		goto back;
1080 	}
1081 
1082 	carp_proto_input_c(sc, m, ch, AF_INET6);
1083 back:
1084 	return (IPPROTO_DONE);
1085 }
1086 #endif /* INET6 */
1087 
1088 static void
1089 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
1090     struct carp_header *ch, sa_family_t af)
1091 {
1092 	struct ifnet *cifp;
1093 	uint64_t tmp_counter;
1094 	struct timeval sc_tv, ch_tv;
1095 
1096 	if (sc->sc_vhid != ch->carp_vhid) {
1097 		/*
1098 		 * CARP uses multicast, however, multicast packets
1099 		 * are tapped to all CARP interfaces on the physical
1100 		 * interface receiving the CARP packets, so we don't
1101 		 * update any stats here.
1102 		 */
1103 		m_freem(m);
1104 		return;
1105 	}
1106 	cifp = &sc->sc_if;
1107 
1108 	/* verify the CARP version. */
1109 	if (ch->carp_version != CARP_VERSION) {
1110 		carpstats.carps_badver++;
1111 		CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
1112 			 ch->carp_version);
1113 		m_freem(m);
1114 		return;
1115 	}
1116 
1117 	/* verify the hash */
1118 	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
1119 		carpstats.carps_badauth++;
1120 		CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
1121 		m_freem(m);
1122 		return;
1123 	}
1124 
1125 	tmp_counter = ntohl(ch->carp_counter[0]);
1126 	tmp_counter = tmp_counter<<32;
1127 	tmp_counter += ntohl(ch->carp_counter[1]);
1128 
1129 	/* XXX Replay protection goes here */
1130 
1131 	sc->sc_init_counter = 0;
1132 	sc->sc_counter = tmp_counter;
1133 
1134 	sc_tv.tv_sec = sc->sc_advbase;
1135 	if (carp_suppress_preempt && sc->sc_advskew <  240)
1136 		sc_tv.tv_usec = 240 * 1000000 / 256;
1137 	else
1138 		sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1139 	ch_tv.tv_sec = ch->carp_advbase;
1140 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
1141 
1142 	switch (sc->sc_state) {
1143 	case INIT:
1144 		break;
1145 
1146 	case MASTER:
1147 		/*
1148 		 * If we receive an advertisement from a master who's going to
1149 		 * be more frequent than us, go into BACKUP state.
1150 		 */
1151 		if (timevalcmp(&sc_tv, &ch_tv, >) ||
1152 		    timevalcmp(&sc_tv, &ch_tv, ==)) {
1153 			callout_stop(&sc->sc_ad_tmo);
1154 			CARP_DEBUG("%s: MASTER -> BACKUP "
1155 			   "(more frequent advertisement received)\n",
1156 			   cifp->if_xname);
1157 			carp_set_state(sc, BACKUP);
1158 			carp_setrun(sc, 0);
1159 			carp_setroute(sc, RTM_DELETE);
1160 		}
1161 		break;
1162 
1163 	case BACKUP:
1164 		/*
1165 		 * If we're pre-empting masters who advertise slower than us,
1166 		 * and this one claims to be slower, treat him as down.
1167 		 */
1168 		if (carp_opts[CARPCTL_PREEMPT] &&
1169 		    timevalcmp(&sc_tv, &ch_tv, <)) {
1170 			CARP_DEBUG("%s: BACKUP -> MASTER "
1171 			    "(preempting a slower master)\n", cifp->if_xname);
1172 			carp_master_down(sc);
1173 			break;
1174 		}
1175 
1176 		/*
1177 		 *  If the master is going to advertise at such a low frequency
1178 		 *  that he's guaranteed to time out, we'd might as well just
1179 		 *  treat him as timed out now.
1180 		 */
1181 		sc_tv.tv_sec = sc->sc_advbase * 3;
1182 		if (timevalcmp(&sc_tv, &ch_tv, <)) {
1183 			CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1184 				   cifp->if_xname);
1185 			carp_master_down(sc);
1186 			break;
1187 		}
1188 
1189 		/*
1190 		 * Otherwise, we reset the counter and wait for the next
1191 		 * advertisement.
1192 		 */
1193 		carp_setrun(sc, af);
1194 		break;
1195 	}
1196 	m_freem(m);
1197 }
1198 
1199 struct mbuf *
1200 carp_input(void *v, struct mbuf *m)
1201 {
1202 	struct carp_if *cif = v;
1203 	struct ether_header *eh;
1204 	struct carp_softc_container *scc;
1205 	struct ifnet *ifp;
1206 
1207 	eh = mtod(m, struct ether_header *);
1208 
1209 	ifp = carp_forus(cif, eh->ether_dhost);
1210 	if (ifp != NULL) {
1211 		ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
1212 		return NULL;
1213 	}
1214 
1215 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
1216 		return m;
1217 
1218 	/*
1219 	 * XXX Should really check the list of multicast addresses
1220 	 * for each CARP interface _before_ copying.
1221 	 */
1222 	TAILQ_FOREACH(scc, cif, scc_link) {
1223 		struct carp_softc *sc = scc->scc_softc;
1224 		struct mbuf *m0;
1225 
1226 		if ((sc->sc_if.if_flags & IFF_UP) == 0)
1227 			continue;
1228 
1229 		m0 = m_dup(m, MB_DONTWAIT);
1230 		if (m0 == NULL)
1231 			continue;
1232 
1233 		ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
1234 	}
1235 	return m;
1236 }
1237 
1238 static void
1239 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
1240 {
1241 	if (sc->sc_init_counter) {
1242 		/* this could also be seconds since unix epoch */
1243 		sc->sc_counter = karc4random();
1244 		sc->sc_counter = sc->sc_counter << 32;
1245 		sc->sc_counter += karc4random();
1246 	} else {
1247 		sc->sc_counter++;
1248 	}
1249 
1250 	ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
1251 	ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
1252 
1253 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
1254 }
1255 
1256 static void
1257 carp_send_ad_all(void)
1258 {
1259 	struct carp_softc *sc;
1260 
1261 	LIST_FOREACH(sc, &carpif_list, sc_next) {
1262 		if (sc->sc_carpdev == NULL)
1263 			continue;
1264 
1265 		if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
1266 			carp_send_ad(sc);
1267 	}
1268 }
1269 
1270 static void
1271 carp_send_ad_timeout(void *xsc)
1272 {
1273 	struct carp_softc *sc = xsc;
1274 	struct netmsg_carp *cmsg = &sc->sc_ad_msg;
1275 
1276 	KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1277 	    __func__, mycpuid));
1278 
1279 	crit_enter();
1280 	if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1281 		lwkt_sendmsg(netisr_cpuport(0), &cmsg->base.lmsg);
1282 	crit_exit();
1283 }
1284 
1285 static void
1286 carp_send_ad_timeout_dispatch(netmsg_t msg)
1287 {
1288 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1289 	struct carp_softc *sc = cmsg->nc_softc;
1290 
1291 	/* Reply ASAP */
1292 	crit_enter();
1293 	lwkt_replymsg(&cmsg->base.lmsg, 0);
1294 	crit_exit();
1295 
1296 	carp_send_ad(sc);
1297 }
1298 
1299 static void
1300 carp_send_ad(struct carp_softc *sc)
1301 {
1302 	struct ifnet *cifp = &sc->sc_if;
1303 	struct carp_header ch;
1304 	struct timeval tv;
1305 	struct carp_header *ch_ptr;
1306 	struct mbuf *m;
1307 	int len, advbase, advskew;
1308 
1309 	if (!CARP_IS_RUNNING(cifp)) {
1310 		/* Bow out */
1311 		advbase = 255;
1312 		advskew = 255;
1313 	} else {
1314 		advbase = sc->sc_advbase;
1315 		if (!carp_suppress_preempt || sc->sc_advskew > 240)
1316 			advskew = sc->sc_advskew;
1317 		else
1318 			advskew = 240;
1319 		tv.tv_sec = advbase;
1320 		tv.tv_usec = advskew * 1000000 / 256;
1321 	}
1322 
1323 	ch.carp_version = CARP_VERSION;
1324 	ch.carp_type = CARP_ADVERTISEMENT;
1325 	ch.carp_vhid = sc->sc_vhid;
1326 	ch.carp_advbase = advbase;
1327 	ch.carp_advskew = advskew;
1328 	ch.carp_authlen = 7;	/* XXX DEFINE */
1329 	ch.carp_pad1 = 0;	/* must be zero */
1330 	ch.carp_cksum = 0;
1331 
1332 #ifdef INET
1333 	if (sc->sc_ia != NULL) {
1334 		struct ip *ip;
1335 
1336 		MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1337 		if (m == NULL) {
1338 			IFNET_STAT_INC(cifp, oerrors, 1);
1339 			carpstats.carps_onomem++;
1340 			/* XXX maybe less ? */
1341 			if (advbase != 255 || advskew != 255)
1342 				callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1343 				    carp_send_ad_timeout, sc);
1344 			return;
1345 		}
1346 		len = sizeof(*ip) + sizeof(ch);
1347 		m->m_pkthdr.len = len;
1348 		m->m_pkthdr.rcvif = NULL;
1349 		m->m_len = len;
1350 		MH_ALIGN(m, m->m_len);
1351 		m->m_flags |= M_MCAST;
1352 		ip = mtod(m, struct ip *);
1353 		ip->ip_v = IPVERSION;
1354 		ip->ip_hl = sizeof(*ip) >> 2;
1355 		ip->ip_tos = IPTOS_LOWDELAY;
1356 		ip->ip_len = len;
1357 		ip->ip_id = ip_newid();
1358 		ip->ip_off = IP_DF;
1359 		ip->ip_ttl = CARP_DFLTTL;
1360 		ip->ip_p = IPPROTO_CARP;
1361 		ip->ip_sum = 0;
1362 		ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1363 		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1364 
1365 		ch_ptr = (struct carp_header *)(&ip[1]);
1366 		bcopy(&ch, ch_ptr, sizeof(ch));
1367 		carp_prepare_ad(sc, ch_ptr);
1368 		ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1369 
1370 		getmicrotime(&cifp->if_lastchange);
1371 		IFNET_STAT_INC(cifp, opackets, 1);
1372 		IFNET_STAT_INC(cifp, obytes, len);
1373 		carpstats.carps_opackets++;
1374 
1375 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1376 			IFNET_STAT_INC(cifp, oerrors, 1);
1377 			if (sc->sc_sendad_errors < INT_MAX)
1378 				sc->sc_sendad_errors++;
1379 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1380 				carp_suppress_preempt++;
1381 				if (carp_suppress_preempt == 1) {
1382 					carp_send_ad_all();
1383 				}
1384 			}
1385 			sc->sc_sendad_success = 0;
1386 		} else {
1387 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1388 				if (++sc->sc_sendad_success >=
1389 				    CARP_SENDAD_MIN_SUCCESS) {
1390 					carp_suppress_preempt--;
1391 					sc->sc_sendad_errors = 0;
1392 				}
1393 			} else {
1394 				sc->sc_sendad_errors = 0;
1395 			}
1396 		}
1397 	}
1398 #endif /* INET */
1399 #ifdef INET6
1400 	if (sc->sc_ia6) {
1401 		struct ip6_hdr *ip6;
1402 
1403 		MGETHDR(m, MB_DONTWAIT, MT_HEADER);
1404 		if (m == NULL) {
1405 			IFNET_STAT_INC(cifp, oerrors, 1);
1406 			carpstats.carps_onomem++;
1407 			/* XXX maybe less ? */
1408 			if (advbase != 255 || advskew != 255)
1409 				callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1410 				    carp_send_ad_timeout, sc);
1411 			return;
1412 		}
1413 		len = sizeof(*ip6) + sizeof(ch);
1414 		m->m_pkthdr.len = len;
1415 		m->m_pkthdr.rcvif = NULL;
1416 		m->m_len = len;
1417 		MH_ALIGN(m, m->m_len);
1418 		m->m_flags |= M_MCAST;
1419 		ip6 = mtod(m, struct ip6_hdr *);
1420 		bzero(ip6, sizeof(*ip6));
1421 		ip6->ip6_vfc |= IPV6_VERSION;
1422 		ip6->ip6_hlim = CARP_DFLTTL;
1423 		ip6->ip6_nxt = IPPROTO_CARP;
1424 		bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1425 		    sizeof(struct in6_addr));
1426 		/* set the multicast destination */
1427 
1428 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1429 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1430 		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1431 			IFNET_STAT_INC(cifp, oerrors, 1);
1432 			m_freem(m);
1433 			CARP_LOG("%s: in6_setscope failed\n", __func__);
1434 			return;
1435 		}
1436 
1437 		ch_ptr = (struct carp_header *)(&ip6[1]);
1438 		bcopy(&ch, ch_ptr, sizeof(ch));
1439 		carp_prepare_ad(sc, ch_ptr);
1440 		ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1441 
1442 		getmicrotime(&cifp->if_lastchange);
1443 		IFNET_STAT_INC(cifp, opackets, 1);
1444 		IFNET_STAT_INC(cifp, obytes, len);
1445 		carpstats.carps_opackets6++;
1446 
1447 		if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1448 			IFNET_STAT_INC(cifp, oerrors, 1);
1449 			if (sc->sc_sendad_errors < INT_MAX)
1450 				sc->sc_sendad_errors++;
1451 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1452 				carp_suppress_preempt++;
1453 				if (carp_suppress_preempt == 1) {
1454 					carp_send_ad_all();
1455 				}
1456 			}
1457 			sc->sc_sendad_success = 0;
1458 		} else {
1459 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1460 				if (++sc->sc_sendad_success >=
1461 				    CARP_SENDAD_MIN_SUCCESS) {
1462 					carp_suppress_preempt--;
1463 					sc->sc_sendad_errors = 0;
1464 				}
1465 			} else {
1466 				sc->sc_sendad_errors = 0;
1467 			}
1468 		}
1469 	}
1470 #endif /* INET6 */
1471 
1472 	if (advbase != 255 || advskew != 255)
1473 		callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1474 		    carp_send_ad_timeout, sc);
1475 }
1476 
1477 /*
1478  * Broadcast a gratuitous ARP request containing
1479  * the virtual router MAC address for each IP address
1480  * associated with the virtual router.
1481  */
1482 static void
1483 carp_send_arp(struct carp_softc *sc)
1484 {
1485 	const struct carp_vhaddr *vha;
1486 
1487 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1488 		if (vha->vha_iaback == NULL)
1489 			continue;
1490 		arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1491 	}
1492 }
1493 
1494 #ifdef INET6
1495 static void
1496 carp_send_na(struct carp_softc *sc)
1497 {
1498 	struct ifaddr_container *ifac;
1499 	struct in6_addr *in6;
1500 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1501 
1502 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1503 		struct ifaddr *ifa = ifac->ifa;
1504 
1505 		if (ifa->ifa_addr->sa_family != AF_INET6)
1506 			continue;
1507 
1508 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1509 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1510 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1511 		DELAY(1000);	/* XXX */
1512 	}
1513 }
1514 #endif /* INET6 */
1515 
1516 static __inline const struct carp_vhaddr *
1517 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1518 {
1519 	struct carp_vhaddr *vha;
1520 
1521 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1522 		if (vha->vha_iaback == NULL)
1523 			continue;
1524 
1525 		if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1526 			return vha;
1527 	}
1528 	return NULL;
1529 }
1530 
1531 #ifdef notyet
1532 static int
1533 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1534 		     const struct in_addr *isaddr, uint8_t **enaddr)
1535 {
1536 	const struct carp_softc *vh;
1537 	int index, count = 0;
1538 
1539 	/*
1540 	 * XXX proof of concept implementation.
1541 	 * We use the source ip to decide which virtual host should
1542 	 * handle the request. If we're master of that virtual host,
1543 	 * then we respond, otherwise, just drop the arp packet on
1544 	 * the floor.
1545 	 */
1546 
1547 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1548 		if (!CARP_IS_RUNNING(&vh->sc_if))
1549 			continue;
1550 
1551 		if (carp_find_addr(vh, itaddr) != NULL)
1552 			count++;
1553 	}
1554 	if (count == 0)
1555 		return 0;
1556 
1557 	/* this should be a hash, like pf_hash() */
1558 	index = ntohl(isaddr->s_addr) % count;
1559 	count = 0;
1560 
1561 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1562 		if (!CARP_IS_RUNNING(&vh->sc_if))
1563 			continue;
1564 
1565 		if (carp_find_addr(vh, itaddr) == NULL)
1566 			continue;
1567 
1568 		if (count == index) {
1569 			if (vh->sc_state == MASTER) {
1570 				*enaddr = IF_LLADDR(&vh->sc_if);
1571 				return 1;
1572 			} else {
1573 				return 0;
1574 			}
1575 		}
1576 		count++;
1577 	}
1578 	return 0;
1579 }
1580 #endif
1581 
1582 int
1583 carp_iamatch(const struct in_ifaddr *ia)
1584 {
1585 	const struct carp_softc *sc = ia->ia_ifp->if_softc;
1586 
1587 	KASSERT(&curthread->td_msgport == netisr_cpuport(0),
1588 	    ("not in netisr0"));
1589 
1590 #ifdef notyet
1591 	if (carp_opts[CARPCTL_ARPBALANCE])
1592 		return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1593 #endif
1594 
1595 	if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1596 		return 0;
1597 
1598 	return 1;
1599 }
1600 
1601 #ifdef INET6
1602 struct ifaddr *
1603 carp_iamatch6(void *v, struct in6_addr *taddr)
1604 {
1605 #ifdef foo
1606 	struct carp_if *cif = v;
1607 	struct carp_softc *vh;
1608 
1609 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1610 		struct ifaddr_container *ifac;
1611 
1612 		TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1613 			      ifa_link) {
1614 			struct ifaddr *ifa = ifac->ifa;
1615 
1616 			if (IN6_ARE_ADDR_EQUAL(taddr,
1617 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1618 			    CARP_IS_RUNNING(&vh->sc_if) &&
1619 			    vh->sc_state == MASTER) {
1620 				return (ifa);
1621 			}
1622 		}
1623 	}
1624 #endif
1625 	return (NULL);
1626 }
1627 
1628 void *
1629 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1630 {
1631 #ifdef foo
1632 	struct m_tag *mtag;
1633 	struct carp_if *cif = v;
1634 	struct carp_softc *sc;
1635 
1636 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1637 		struct ifaddr_container *ifac;
1638 
1639 		TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1640 			      ifa_link) {
1641 			struct ifaddr *ifa = ifac->ifa;
1642 
1643 			if (IN6_ARE_ADDR_EQUAL(taddr,
1644 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1645 			    CARP_IS_RUNNING(&sc->sc_if)) {
1646 				struct ifnet *ifp = &sc->sc_if;
1647 
1648 				mtag = m_tag_get(PACKET_TAG_CARP,
1649 				    sizeof(struct ifnet *), MB_DONTWAIT);
1650 				if (mtag == NULL) {
1651 					/* better a bit than nothing */
1652 					return (IF_LLADDR(ifp));
1653 				}
1654 				bcopy(&ifp, (caddr_t)(mtag + 1),
1655 				    sizeof(struct ifnet *));
1656 				m_tag_prepend(m, mtag);
1657 
1658 				return (IF_LLADDR(ifp));
1659 			}
1660 		}
1661 	}
1662 #endif
1663 	return (NULL);
1664 }
1665 #endif
1666 
1667 static struct ifnet *
1668 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1669 {
1670 	struct carp_softc_container *scc;
1671 
1672 	if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1673 		return NULL;
1674 
1675 	TAILQ_FOREACH(scc, cif, scc_link) {
1676 		struct carp_softc *sc = scc->scc_softc;
1677 		struct ifnet *ifp = &sc->sc_if;
1678 
1679 		if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1680 		    !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1681 			return ifp;
1682 	}
1683 	return NULL;
1684 }
1685 
1686 static void
1687 carp_master_down_timeout(void *xsc)
1688 {
1689 	struct carp_softc *sc = xsc;
1690 	struct netmsg_carp *cmsg = &sc->sc_md_msg;
1691 
1692 	KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1693 	    __func__, mycpuid));
1694 
1695 	crit_enter();
1696 	if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1697 		lwkt_sendmsg(netisr_cpuport(0), &cmsg->base.lmsg);
1698 	crit_exit();
1699 }
1700 
1701 static void
1702 carp_master_down_timeout_dispatch(netmsg_t msg)
1703 {
1704 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1705 	struct carp_softc *sc = cmsg->nc_softc;
1706 
1707 	/* Reply ASAP */
1708 	crit_enter();
1709 	lwkt_replymsg(&cmsg->base.lmsg, 0);
1710 	crit_exit();
1711 
1712 	CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1713 		   sc->sc_if.if_xname);
1714 	carp_master_down(sc);
1715 }
1716 
1717 static void
1718 carp_master_down(struct carp_softc *sc)
1719 {
1720 	switch (sc->sc_state) {
1721 	case INIT:
1722 		kprintf("%s: master_down event in INIT state\n",
1723 			sc->sc_if.if_xname);
1724 		break;
1725 
1726 	case MASTER:
1727 		break;
1728 
1729 	case BACKUP:
1730 		carp_set_state(sc, MASTER);
1731 		carp_send_ad(sc);
1732 		carp_send_arp(sc);
1733 #ifdef INET6
1734 		carp_send_na(sc);
1735 #endif /* INET6 */
1736 		carp_setrun(sc, 0);
1737 		carp_setroute(sc, RTM_ADD);
1738 		break;
1739 	}
1740 }
1741 
1742 /*
1743  * When in backup state, af indicates whether to reset the master down timer
1744  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1745  */
1746 static void
1747 carp_setrun(struct carp_softc *sc, sa_family_t af)
1748 {
1749 	struct ifnet *cifp = &sc->sc_if;
1750 	struct timeval tv;
1751 
1752 	if (sc->sc_carpdev == NULL) {
1753 		carp_set_state(sc, INIT);
1754 		return;
1755 	}
1756 
1757 	if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1758 	    (sc->sc_naddrs || sc->sc_naddrs6)) {
1759 		/* Nothing */
1760 	} else {
1761 		carp_setroute(sc, RTM_DELETE);
1762 		return;
1763 	}
1764 
1765 	switch (sc->sc_state) {
1766 	case INIT:
1767 		if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1768 			carp_send_ad(sc);
1769 			carp_send_arp(sc);
1770 #ifdef INET6
1771 			carp_send_na(sc);
1772 #endif /* INET6 */
1773 			CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1774 				   cifp->if_xname);
1775 			carp_set_state(sc, MASTER);
1776 			carp_setroute(sc, RTM_ADD);
1777 		} else {
1778 			CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1779 			carp_set_state(sc, BACKUP);
1780 			carp_setroute(sc, RTM_DELETE);
1781 			carp_setrun(sc, 0);
1782 		}
1783 		break;
1784 
1785 	case BACKUP:
1786 		callout_stop(&sc->sc_ad_tmo);
1787 		tv.tv_sec = 3 * sc->sc_advbase;
1788 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1789 		switch (af) {
1790 #ifdef INET
1791 		case AF_INET:
1792 			callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1793 			    carp_master_down_timeout, sc);
1794 			break;
1795 #endif /* INET */
1796 #ifdef INET6
1797 		case AF_INET6:
1798 			callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1799 			    carp_master_down_timeout, sc);
1800 			break;
1801 #endif /* INET6 */
1802 		default:
1803 			if (sc->sc_naddrs)
1804 				callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1805 				    carp_master_down_timeout, sc);
1806 			if (sc->sc_naddrs6)
1807 				callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1808 				    carp_master_down_timeout, sc);
1809 			break;
1810 		}
1811 		break;
1812 
1813 	case MASTER:
1814 		tv.tv_sec = sc->sc_advbase;
1815 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1816 		callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1817 		    carp_send_ad_timeout, sc);
1818 		break;
1819 	}
1820 }
1821 
1822 static void
1823 carp_multicast_cleanup(struct carp_softc *sc)
1824 {
1825 	struct ip_moptions *imo = &sc->sc_imo;
1826 
1827 	if (imo->imo_num_memberships == 0)
1828 		return;
1829 	KKASSERT(imo->imo_num_memberships == 1);
1830 
1831 	in_delmulti(imo->imo_membership[0]);
1832 	imo->imo_membership[0] = NULL;
1833 	imo->imo_num_memberships = 0;
1834 	imo->imo_multicast_ifp = NULL;
1835 }
1836 
1837 #ifdef INET6
1838 static void
1839 carp_multicast6_cleanup(struct carp_softc *sc)
1840 {
1841 	struct ip6_moptions *im6o = &sc->sc_im6o;
1842 
1843 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1844 		struct in6_multi_mship *imm =
1845 		    LIST_FIRST(&im6o->im6o_memberships);
1846 
1847 		LIST_REMOVE(imm, i6mm_chain);
1848 		in6_leavegroup(imm);
1849 	}
1850 	im6o->im6o_multicast_ifp = NULL;
1851 }
1852 #endif
1853 
1854 static void
1855 carp_ioctl_getvhaddr_dispatch(netmsg_t msg)
1856 {
1857 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1858 	struct carp_softc *sc = cmsg->nc_softc;
1859 	const struct carp_vhaddr *vha;
1860 	struct ifcarpvhaddr *carpa, *carpa0;
1861 	int count, len, error = 0;
1862 
1863 	count = 0;
1864 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1865 		++count;
1866 
1867 	if (cmsg->nc_datalen == 0) {
1868 		cmsg->nc_datalen = count * sizeof(*carpa);
1869 		goto back;
1870 	} else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) {
1871 		cmsg->nc_datalen = 0;
1872 		goto back;
1873 	}
1874 	len = min(cmsg->nc_datalen, sizeof(*carpa) * count);
1875 	KKASSERT(len >= sizeof(*carpa));
1876 
1877 	carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1878 	if (carpa == NULL) {
1879 		error = ENOMEM;
1880 		goto back;
1881 	}
1882 
1883 	count = 0;
1884 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1885 		if (len < sizeof(*carpa))
1886 			break;
1887 
1888 		carpa->carpa_flags = vha->vha_flags;
1889 		carpa->carpa_addr.sin_family = AF_INET;
1890 		carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1891 
1892 		carpa->carpa_baddr.sin_family = AF_INET;
1893 		if (vha->vha_iaback == NULL) {
1894 			carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1895 		} else {
1896 			carpa->carpa_baddr.sin_addr =
1897 			vha->vha_iaback->ia_addr.sin_addr;
1898 		}
1899 
1900 		++carpa;
1901 		++count;
1902 		len -= sizeof(*carpa);
1903 	}
1904 	cmsg->nc_datalen = sizeof(*carpa) * count;
1905 	KKASSERT(cmsg->nc_datalen > 0);
1906 
1907 	cmsg->nc_data = carpa0;
1908 
1909 back:
1910 	lwkt_replymsg(&cmsg->base.lmsg, error);
1911 }
1912 
1913 static int
1914 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1915 {
1916 	struct ifnet *ifp = &sc->arpcom.ac_if;
1917 	struct netmsg_carp cmsg;
1918 	int error;
1919 
1920 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1921 	ifnet_deserialize_all(ifp);
1922 
1923 	bzero(&cmsg, sizeof(cmsg));
1924 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
1925 	    carp_ioctl_getvhaddr_dispatch);
1926 	cmsg.nc_softc = sc;
1927 	cmsg.nc_datalen = ifd->ifd_len;
1928 
1929 	error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
1930 
1931 	if (!error) {
1932 		if (cmsg.nc_data != NULL) {
1933 			error = copyout(cmsg.nc_data, ifd->ifd_data,
1934 			    cmsg.nc_datalen);
1935 			kfree(cmsg.nc_data, M_TEMP);
1936 		}
1937 		ifd->ifd_len = cmsg.nc_datalen;
1938 	} else {
1939 		KASSERT(cmsg.nc_data == NULL,
1940 		    ("%s temp vhaddr is alloc upon error", __func__));
1941 	}
1942 
1943 	ifnet_serialize_all(ifp);
1944 	return error;
1945 }
1946 
1947 static int
1948 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1949     struct in_ifaddr *ia_del)
1950 {
1951 	struct ifnet *ifp;
1952 	struct in_ifaddr *ia_if;
1953 	const struct in_ifaddr *ia_vha;
1954 	struct in_ifaddr_container *iac;
1955 	int own, ia_match_carpdev;
1956 
1957 	KKASSERT(vha->vha_ia != NULL);
1958 	ia_vha = vha->vha_ia;
1959 
1960 	ia_if = NULL;
1961 	own = 0;
1962 	ia_match_carpdev = 0;
1963 	TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1964 		struct in_ifaddr *ia = iac->ia;
1965 
1966 		if (ia == ia_del)
1967 			continue;
1968 
1969 		if (ia->ia_ifp->if_type == IFT_CARP)
1970 			continue;
1971 
1972 		if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1973 			continue;
1974 
1975 		/* and, yeah, we need a multicast-capable iface too */
1976 		if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
1977 			continue;
1978 
1979 		if (ia_vha->ia_subnetmask == ia->ia_subnetmask &&
1980 		    ia_vha->ia_subnet == ia->ia_subnet) {
1981 			if (ia_vha->ia_addr.sin_addr.s_addr ==
1982 			    ia->ia_addr.sin_addr.s_addr)
1983 				own = 1;
1984 			if (ia_if == NULL) {
1985 				ia_if = ia;
1986 			} else if (sc->sc_carpdev != NULL &&
1987 			    sc->sc_carpdev == ia->ia_ifp) {
1988 				ia_if = ia;
1989 				if (ia_if->ia_flags & IFA_ROUTE) {
1990 					/*
1991 					 * Address with prefix route
1992 					 * is prefered
1993 					 */
1994 					break;
1995 				}
1996 				ia_match_carpdev = 1;
1997 			} else if (!ia_match_carpdev) {
1998 				if (ia->ia_flags & IFA_ROUTE) {
1999 					/*
2000 					 * Address with prefix route
2001 					 * is prefered over others.
2002 					 */
2003 					ia_if = ia;
2004 				}
2005 			}
2006 		}
2007 	}
2008 
2009 	carp_deactivate_vhaddr(sc, vha, FALSE);
2010 	if (!ia_if)
2011 		return ENOENT;
2012 
2013 	ifp = ia_if->ia_ifp;
2014 
2015 	/* XXX Don't allow parent iface to be changed */
2016 	if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
2017 		return EEXIST;
2018 
2019 	return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2020 }
2021 
2022 static void
2023 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2024 {
2025 	struct carp_vhaddr *vha_new;
2026 	struct in_ifaddr *carp_ia;
2027 #ifdef INVARIANTS
2028 	struct carp_vhaddr *vha;
2029 #endif
2030 
2031 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2032 	carp_ia = ifatoia(carp_ifa);
2033 
2034 #ifdef INVARIANTS
2035 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2036 		KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
2037 #endif
2038 
2039 	vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
2040 	vha_new->vha_ia = carp_ia;
2041 	carp_insert_vhaddr(sc, vha_new);
2042 
2043 	if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
2044 		/*
2045 		 * If the above configuration fails, it may only mean
2046 		 * that the new address is problematic.  However, the
2047 		 * carp(4) interface may already have several working
2048 		 * addresses.  Since the expected behaviour of
2049 		 * SIOC[AS]IFADDR is to put the NIC into working state,
2050 		 * we try starting the state machine manually here with
2051 		 * the hope that the carp(4)'s previously working
2052 		 * addresses still could be brought up.
2053 		 */
2054 		carp_hmac_prepare(sc);
2055 		carp_set_state(sc, INIT);
2056 		carp_setrun(sc, 0);
2057 	}
2058 }
2059 
2060 static void
2061 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2062 {
2063 	struct carp_vhaddr *vha;
2064 	struct in_ifaddr *carp_ia;
2065 
2066 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2067 	carp_ia = ifatoia(carp_ifa);
2068 
2069 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2070 		KKASSERT(vha->vha_ia != NULL);
2071 		if (vha->vha_ia == carp_ia)
2072 			break;
2073 	}
2074 	KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2075 
2076 	/*
2077 	 * Remove the vhaddr from the list before deactivating
2078 	 * the vhaddr, so that the HMAC could be correctly
2079 	 * updated in carp_deactivate_vhaddr()
2080 	 */
2081 	carp_remove_vhaddr(sc, vha);
2082 
2083 	carp_deactivate_vhaddr(sc, vha, FALSE);
2084 	kfree(vha, M_CARP);
2085 }
2086 
2087 static void
2088 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2089 {
2090 	struct carp_vhaddr *vha;
2091 	struct in_ifaddr *carp_ia;
2092 
2093 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2094 	carp_ia = ifatoia(carp_ifa);
2095 
2096 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2097 		KKASSERT(vha->vha_ia != NULL);
2098 		if (vha->vha_ia == carp_ia)
2099 			break;
2100 	}
2101 	KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2102 
2103 	/* Remove then reinsert, to keep the vhaddr list sorted */
2104 	carp_remove_vhaddr(sc, vha);
2105 	carp_insert_vhaddr(sc, vha);
2106 
2107 	if (carp_config_vhaddr(sc, vha, NULL) != 0) {
2108 		/* See the comment in carp_add_addr() */
2109 		carp_hmac_prepare(sc);
2110 		carp_set_state(sc, INIT);
2111 		carp_setrun(sc, 0);
2112 	}
2113 }
2114 
2115 #ifdef notyet
2116 
2117 #ifdef INET6
2118 static int
2119 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2120 {
2121 	struct ifnet *ifp;
2122 	struct carp_if *cif;
2123 	struct in6_ifaddr *ia, *ia_if;
2124 	struct ip6_moptions *im6o = &sc->sc_im6o;
2125 	struct in6_multi_mship *imm;
2126 	struct in6_addr in6;
2127 	int own, error;
2128 
2129 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2130 		carp_setrun(sc, 0);
2131 		return (0);
2132 	}
2133 
2134 	/* we have to do it by hands to check we won't match on us */
2135 	ia_if = NULL; own = 0;
2136 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2137 		int i;
2138 
2139 		for (i = 0; i < 4; i++) {
2140 			if ((sin6->sin6_addr.s6_addr32[i] &
2141 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2142 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
2143 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2144 				break;
2145 		}
2146 		/* and, yeah, we need a multicast-capable iface too */
2147 		if (ia->ia_ifp != &sc->sc_if &&
2148 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2149 		    (i == 4)) {
2150 			if (!ia_if)
2151 				ia_if = ia;
2152 			if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
2153 			    &ia->ia_addr.sin6_addr))
2154 				own++;
2155 		}
2156 	}
2157 
2158 	if (!ia_if)
2159 		return (EADDRNOTAVAIL);
2160 	ia = ia_if;
2161 	ifp = ia->ia_ifp;
2162 
2163 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
2164 	    (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
2165 		return (EADDRNOTAVAIL);
2166 
2167 	if (!sc->sc_naddrs6) {
2168 		im6o->im6o_multicast_ifp = ifp;
2169 
2170 		/* join CARP multicast address */
2171 		bzero(&in6, sizeof(in6));
2172 		in6.s6_addr16[0] = htons(0xff02);
2173 		in6.s6_addr8[15] = 0x12;
2174 		if (in6_setscope(&in6, ifp, NULL) != 0)
2175 			goto cleanup;
2176 		if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2177 			goto cleanup;
2178 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2179 
2180 		/* join solicited multicast address */
2181 		bzero(&in6, sizeof(in6));
2182 		in6.s6_addr16[0] = htons(0xff02);
2183 		in6.s6_addr32[1] = 0;
2184 		in6.s6_addr32[2] = htonl(1);
2185 		in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
2186 		in6.s6_addr8[12] = 0xff;
2187 		if (in6_setscope(&in6, ifp, NULL) != 0)
2188 			goto cleanup;
2189 		if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2190 			goto cleanup;
2191 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2192 	}
2193 
2194 #ifdef foo
2195 	if (!ifp->if_carp) {
2196 		cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2197 
2198 		if ((error = ifpromisc(ifp, 1))) {
2199 			kfree(cif, M_CARP);
2200 			goto cleanup;
2201 		}
2202 
2203 		TAILQ_INIT(&cif->vhif_vrs);
2204 		ifp->if_carp = cif;
2205 	} else {
2206 		struct carp_softc *vr;
2207 
2208 		cif = ifp->if_carp;
2209 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2210 			if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
2211 				error = EINVAL;
2212 				goto cleanup;
2213 			}
2214 		}
2215 	}
2216 #endif
2217 	sc->sc_ia6 = ia;
2218 	sc->sc_carpdev = ifp;
2219 
2220 #ifdef foo
2221 	{ /* XXX prevent endless loop if already in queue */
2222 	struct carp_softc *vr, *after = NULL;
2223 	int myself = 0;
2224 	cif = ifp->if_carp;
2225 
2226 	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2227 		if (vr == sc)
2228 			myself = 1;
2229 		if (vr->sc_vhid < sc->sc_vhid)
2230 			after = vr;
2231 	}
2232 
2233 	if (!myself) {
2234 		/* We're trying to keep things in order */
2235 		if (after == NULL)
2236 			TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2237 		else
2238 			TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2239 	}
2240 	}
2241 #endif
2242 
2243 	sc->sc_naddrs6++;
2244 	if (own)
2245 		sc->sc_advskew = 0;
2246 	carp_sc_state(sc);
2247 	carp_setrun(sc, 0);
2248 
2249 	return (0);
2250 
2251 cleanup:
2252 	/* clean up multicast memberships */
2253 	if (!sc->sc_naddrs6) {
2254 		while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2255 			imm = LIST_FIRST(&im6o->im6o_memberships);
2256 			LIST_REMOVE(imm, i6mm_chain);
2257 			in6_leavegroup(imm);
2258 		}
2259 	}
2260 	return (error);
2261 }
2262 
2263 static int
2264 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2265 {
2266 	int error = 0;
2267 
2268 	if (!--sc->sc_naddrs6) {
2269 		struct carp_if *cif = sc->sc_carpdev->if_carp;
2270 		struct ip6_moptions *im6o = &sc->sc_im6o;
2271 
2272 		callout_stop(&sc->sc_ad_tmo);
2273 		sc->sc_vhid = -1;
2274 		while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2275 			struct in6_multi_mship *imm =
2276 			    LIST_FIRST(&im6o->im6o_memberships);
2277 
2278 			LIST_REMOVE(imm, i6mm_chain);
2279 			in6_leavegroup(imm);
2280 		}
2281 		im6o->im6o_multicast_ifp = NULL;
2282 #ifdef foo
2283 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
2284 		if (TAILQ_EMPTY(&cif->vhif_vrs)) {
2285 			sc->sc_carpdev->if_carp = NULL;
2286 			kfree(cif, M_IFADDR);
2287 		}
2288 #endif
2289 	}
2290 	return (error);
2291 }
2292 #endif /* INET6 */
2293 
2294 #endif
2295 
2296 static int
2297 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
2298 {
2299 	struct carp_softc *sc = ifp->if_softc;
2300 	struct ifreq *ifr = (struct ifreq *)addr;
2301 	struct ifdrv *ifd = (struct ifdrv *)addr;
2302 	int error = 0;
2303 
2304 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2305 
2306 	switch (cmd) {
2307 	case SIOCSIFFLAGS:
2308 		if (ifp->if_flags & IFF_UP) {
2309 			if ((ifp->if_flags & IFF_RUNNING) == 0)
2310 				carp_init(sc);
2311 		} else if (ifp->if_flags & IFF_RUNNING) {
2312 			carp_ioctl_stop(sc);
2313 		}
2314 		break;
2315 
2316 	case SIOCSVH:
2317 		error = carp_ioctl_setvh(sc, ifr->ifr_data, cr);
2318 		break;
2319 
2320 	case SIOCGVH:
2321 		error = carp_ioctl_getvh(sc, ifr->ifr_data, cr);
2322 		break;
2323 
2324 	case SIOCGDRVSPEC:
2325 		switch (ifd->ifd_cmd) {
2326 		case CARPGDEVNAME:
2327 			error = carp_ioctl_getdevname(sc, ifd);
2328 			break;
2329 
2330 		case CARPGVHADDR:
2331 			error = carp_ioctl_getvhaddr(sc, ifd);
2332 			break;
2333 
2334 		default:
2335 			error = EINVAL;
2336 			break;
2337 		}
2338 		break;
2339 
2340 	default:
2341 		error = ether_ioctl(ifp, cmd, addr);
2342 		break;
2343 	}
2344 
2345 	return error;
2346 }
2347 
2348 static void
2349 carp_ioctl_stop_dispatch(netmsg_t msg)
2350 {
2351 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2352 	struct carp_softc *sc = cmsg->nc_softc;
2353 
2354 	carp_stop(sc, FALSE);
2355 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2356 }
2357 
2358 static void
2359 carp_ioctl_stop(struct carp_softc *sc)
2360 {
2361 	struct ifnet *ifp = &sc->arpcom.ac_if;
2362 	struct netmsg_carp cmsg;
2363 
2364 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2365 
2366 	ifnet_deserialize_all(ifp);
2367 
2368 	bzero(&cmsg, sizeof(cmsg));
2369 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2370 	    carp_ioctl_stop_dispatch);
2371 	cmsg.nc_softc = sc;
2372 
2373 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2374 
2375 	ifnet_serialize_all(ifp);
2376 }
2377 
2378 static void
2379 carp_ioctl_setvh_dispatch(netmsg_t msg)
2380 {
2381 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2382 	struct carp_softc *sc = cmsg->nc_softc;
2383 	struct ifnet *ifp = &sc->arpcom.ac_if;
2384 	const struct carpreq *carpr = cmsg->nc_data;
2385 	int error;
2386 
2387 	error = 1;
2388 	if ((ifp->if_flags & IFF_RUNNING) &&
2389 	    sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) {
2390 		switch (carpr->carpr_state) {
2391 		case BACKUP:
2392 			callout_stop(&sc->sc_ad_tmo);
2393 			carp_set_state(sc, BACKUP);
2394 			carp_setrun(sc, 0);
2395 			carp_setroute(sc, RTM_DELETE);
2396 			break;
2397 
2398 		case MASTER:
2399 			carp_master_down(sc);
2400 			break;
2401 
2402 		default:
2403 			break;
2404 		}
2405 	}
2406 	if (carpr->carpr_vhid > 0) {
2407 		if (carpr->carpr_vhid > 255) {
2408 			error = EINVAL;
2409 			goto back;
2410 		}
2411 		if (sc->sc_carpdev) {
2412 			struct carp_if *cif = sc->sc_carpdev->if_carp;
2413 			struct carp_softc_container *scc;
2414 
2415 			TAILQ_FOREACH(scc, cif, scc_link) {
2416 				struct carp_softc *vr = scc->scc_softc;
2417 
2418 				if (vr != sc &&
2419 				    vr->sc_vhid == carpr->carpr_vhid) {
2420 					error = EEXIST;
2421 					goto back;
2422 				}
2423 			}
2424 		}
2425 		sc->sc_vhid = carpr->carpr_vhid;
2426 
2427 		IF_LLADDR(ifp)[5] = sc->sc_vhid;
2428 		bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
2429 		    ETHER_ADDR_LEN);
2430 
2431 		error--;
2432 	}
2433 	if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) {
2434 		if (carpr->carpr_advskew >= 255) {
2435 			error = EINVAL;
2436 			goto back;
2437 		}
2438 		if (carpr->carpr_advbase > 255) {
2439 			error = EINVAL;
2440 			goto back;
2441 		}
2442 		sc->sc_advbase = carpr->carpr_advbase;
2443 		sc->sc_advskew = carpr->carpr_advskew;
2444 		error--;
2445 	}
2446 	bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
2447 	if (error > 0) {
2448 		error = EINVAL;
2449 	} else {
2450 		error = 0;
2451 		carp_setrun(sc, 0);
2452 	}
2453 back:
2454 	carp_hmac_prepare(sc);
2455 
2456 	lwkt_replymsg(&cmsg->base.lmsg, error);
2457 }
2458 
2459 static int
2460 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2461 {
2462 	struct ifnet *ifp = &sc->arpcom.ac_if;
2463 	struct netmsg_carp cmsg;
2464 	struct carpreq carpr;
2465 	int error;
2466 
2467 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2468 	ifnet_deserialize_all(ifp);
2469 
2470 	error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2471 	if (error)
2472 		goto back;
2473 
2474 	error = copyin(udata, &carpr, sizeof(carpr));
2475 	if (error)
2476 		goto back;
2477 
2478 	bzero(&cmsg, sizeof(cmsg));
2479 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2480 	    carp_ioctl_setvh_dispatch);
2481 	cmsg.nc_softc = sc;
2482 	cmsg.nc_data = &carpr;
2483 
2484 	error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2485 
2486 back:
2487 	ifnet_serialize_all(ifp);
2488 	return error;
2489 }
2490 
2491 static void
2492 carp_ioctl_getvh_dispatch(netmsg_t msg)
2493 {
2494 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2495 	struct carp_softc *sc = cmsg->nc_softc;
2496 	struct carpreq *carpr = cmsg->nc_data;
2497 
2498 	carpr->carpr_state = sc->sc_state;
2499 	carpr->carpr_vhid = sc->sc_vhid;
2500 	carpr->carpr_advbase = sc->sc_advbase;
2501 	carpr->carpr_advskew = sc->sc_advskew;
2502 	bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
2503 
2504 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2505 }
2506 
2507 static int
2508 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2509 {
2510 	struct ifnet *ifp = &sc->arpcom.ac_if;
2511 	struct netmsg_carp cmsg;
2512 	struct carpreq carpr;
2513 	int error;
2514 
2515 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2516 	ifnet_deserialize_all(ifp);
2517 
2518 	bzero(&cmsg, sizeof(cmsg));
2519 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2520 	    carp_ioctl_getvh_dispatch);
2521 	cmsg.nc_softc = sc;
2522 	cmsg.nc_data = &carpr;
2523 
2524 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2525 
2526 	error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2527 	if (error)
2528 		bzero(carpr.carpr_key, sizeof(carpr.carpr_key));
2529 
2530 	error = copyout(&carpr, udata, sizeof(carpr));
2531 
2532 	ifnet_serialize_all(ifp);
2533 	return error;
2534 }
2535 
2536 static void
2537 carp_ioctl_getdevname_dispatch(netmsg_t msg)
2538 {
2539 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2540 	struct carp_softc *sc = cmsg->nc_softc;
2541 	char *devname = cmsg->nc_data;
2542 
2543 	bzero(devname, IFNAMSIZ);
2544 	if (sc->sc_carpdev != NULL)
2545 		strlcpy(devname, sc->sc_carpdev->if_xname, IFNAMSIZ);
2546 
2547 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2548 }
2549 
2550 static int
2551 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd)
2552 {
2553 	struct ifnet *ifp = &sc->arpcom.ac_if;
2554 	struct netmsg_carp cmsg;
2555 	char devname[IFNAMSIZ];
2556 	int error;
2557 
2558 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2559 
2560 	if (ifd->ifd_len != sizeof(devname))
2561 		return EINVAL;
2562 
2563 	ifnet_deserialize_all(ifp);
2564 
2565 	bzero(&cmsg, sizeof(cmsg));
2566 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2567 	    carp_ioctl_getdevname_dispatch);
2568 	cmsg.nc_softc = sc;
2569 	cmsg.nc_data = devname;
2570 
2571 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2572 
2573 	error = copyout(devname, ifd->ifd_data, sizeof(devname));
2574 
2575 	ifnet_serialize_all(ifp);
2576 	return error;
2577 }
2578 
2579 static void
2580 carp_init_dispatch(netmsg_t msg)
2581 {
2582 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2583 	struct carp_softc *sc = cmsg->nc_softc;
2584 
2585 	sc->sc_if.if_flags |= IFF_RUNNING;
2586 	carp_hmac_prepare(sc);
2587 	carp_set_state(sc, INIT);
2588 	carp_setrun(sc, 0);
2589 
2590 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2591 }
2592 
2593 static void
2594 carp_init(void *xsc)
2595 {
2596 	struct carp_softc *sc = xsc;
2597 	struct ifnet *ifp = &sc->arpcom.ac_if;
2598 	struct netmsg_carp cmsg;
2599 
2600 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2601 
2602 	ifnet_deserialize_all(ifp);
2603 
2604 	bzero(&cmsg, sizeof(cmsg));
2605 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2606 	    carp_init_dispatch);
2607 	cmsg.nc_softc = sc;
2608 
2609 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2610 
2611 	ifnet_serialize_all(ifp);
2612 }
2613 
2614 static int
2615 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2616     struct rtentry *rt)
2617 {
2618 	struct carp_softc *sc = ifp->if_softc;
2619 	struct ifnet *carpdev;
2620 	int error = 0;
2621 
2622 	carpdev = sc->sc_carpdev;
2623 	if (carpdev != NULL) {
2624 		/*
2625 		 * NOTE:
2626 		 * CARP's ifp is passed to backing device's
2627 		 * if_output method.
2628 		 */
2629 		carpdev->if_output(ifp, m, dst, rt);
2630 	} else {
2631 		m_freem(m);
2632 		error = ENETUNREACH;
2633 	}
2634 	return error;
2635 }
2636 
2637 /*
2638  * Start output on carp interface. This function should never be called.
2639  */
2640 static void
2641 carp_start(struct ifnet *ifp, struct ifaltq_subque *ifsq __unused)
2642 {
2643 	panic("%s: start called", ifp->if_xname);
2644 }
2645 
2646 static void
2647 carp_set_state(struct carp_softc *sc, int state)
2648 {
2649 	struct ifnet *cifp = &sc->sc_if;
2650 
2651 	if (sc->sc_state == state)
2652 		return;
2653 	sc->sc_state = state;
2654 
2655 	switch (sc->sc_state) {
2656 	case BACKUP:
2657 		cifp->if_link_state = LINK_STATE_DOWN;
2658 		break;
2659 
2660 	case MASTER:
2661 		cifp->if_link_state = LINK_STATE_UP;
2662 		break;
2663 
2664 	default:
2665 		cifp->if_link_state = LINK_STATE_UNKNOWN;
2666 		break;
2667 	}
2668 	rt_ifmsg(cifp);
2669 }
2670 
2671 void
2672 carp_group_demote_adj(struct ifnet *ifp, int adj)
2673 {
2674 	struct ifg_list	*ifgl;
2675 	int *dm;
2676 
2677 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2678 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2679 			continue;
2680 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2681 
2682 		if (*dm + adj >= 0)
2683 			*dm += adj;
2684 		else
2685 			*dm = 0;
2686 
2687 		if (adj > 0 && *dm == 1)
2688 			carp_send_ad_all();
2689 		CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2690                     ifgl->ifgl_group->ifg_group, *dm);
2691 	}
2692 }
2693 
2694 #ifdef foo
2695 void
2696 carp_carpdev_state(void *v)
2697 {
2698 	struct carp_if *cif = v;
2699 	struct carp_softc *sc;
2700 
2701 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2702 		carp_sc_state(sc);
2703 }
2704 
2705 static void
2706 carp_sc_state(struct carp_softc *sc)
2707 {
2708 	if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2709 		callout_stop(&sc->sc_ad_tmo);
2710 		callout_stop(&sc->sc_md_tmo);
2711 		callout_stop(&sc->sc_md6_tmo);
2712 		carp_set_state(sc, INIT);
2713 		carp_setrun(sc, 0);
2714 		if (!sc->sc_suppress) {
2715 			carp_suppress_preempt++;
2716 			if (carp_suppress_preempt == 1)
2717 				carp_send_ad_all();
2718 		}
2719 		sc->sc_suppress = 1;
2720 	} else {
2721 		carp_set_state(sc, INIT);
2722 		carp_setrun(sc, 0);
2723 		if (sc->sc_suppress)
2724 			carp_suppress_preempt--;
2725 		sc->sc_suppress = 0;
2726 	}
2727 }
2728 #endif
2729 
2730 static void
2731 carp_stop(struct carp_softc *sc, boolean_t detach)
2732 {
2733 	sc->sc_if.if_flags &= ~IFF_RUNNING;
2734 
2735 	callout_stop(&sc->sc_ad_tmo);
2736 	callout_stop(&sc->sc_md_tmo);
2737 	callout_stop(&sc->sc_md6_tmo);
2738 
2739 	if (!detach && sc->sc_state == MASTER)
2740 		carp_send_ad(sc);
2741 
2742 	if (sc->sc_suppress)
2743 		carp_suppress_preempt--;
2744 	sc->sc_suppress = 0;
2745 
2746 	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2747 		carp_suppress_preempt--;
2748 	sc->sc_sendad_errors = 0;
2749 	sc->sc_sendad_success = 0;
2750 
2751 	carp_set_state(sc, INIT);
2752 	carp_setrun(sc, 0);
2753 }
2754 
2755 static void
2756 carp_suspend(struct carp_softc *sc, boolean_t detach)
2757 {
2758 	struct ifnet *cifp = &sc->sc_if;
2759 
2760 	carp_stop(sc, detach);
2761 
2762 	/* Retain the running state, if we are not dead yet */
2763 	if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2764 		cifp->if_flags |= IFF_RUNNING;
2765 }
2766 
2767 static int
2768 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2769     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2770 {
2771 	struct ip_moptions *imo = &sc->sc_imo;
2772 	struct carp_if *ocif = ifp->if_carp;
2773 	int error;
2774 
2775 	KKASSERT(vha->vha_ia != NULL);
2776 
2777 	KASSERT(ia_if != NULL, ("NULL backing address"));
2778 	KASSERT(vha->vha_iaback == NULL, ("%p is already activated", vha));
2779 	KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2780 		("inactive vhaddr %p is the address owner", vha));
2781 
2782 	KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2783 		("%s is already on %s", sc->sc_if.if_xname,
2784 		 sc->sc_carpdev->if_xname));
2785 
2786 	if (ocif == NULL) {
2787 		KASSERT(sc->sc_carpdev == NULL,
2788 			("%s is already on %s", sc->sc_if.if_xname,
2789 			 sc->sc_carpdev->if_xname));
2790 
2791 		error = ifpromisc(ifp, 1);
2792 		if (error)
2793 			return error;
2794 	} else {
2795 		struct carp_softc_container *scc;
2796 
2797 		TAILQ_FOREACH(scc, ocif, scc_link) {
2798 			struct carp_softc *vr = scc->scc_softc;
2799 
2800 			if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2801 				return EINVAL;
2802 		}
2803 	}
2804 
2805 	ifp->if_carp = carp_if_insert(ocif, sc);
2806 	KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed", __func__));
2807 
2808 	sc->sc_ia = ia_if;
2809 	sc->sc_carpdev = ifp;
2810 
2811 	/*
2812 	 * Make sure that all protocol threads see the sc_carpdev and
2813 	 * if_carp changes
2814 	 */
2815 	netmsg_service_sync();
2816 
2817 	if (ocif != NULL && ifp->if_carp != ocif) {
2818 		/*
2819 		 * The old carp list could be safely free now,
2820 		 * since no one can access it.
2821 		 */
2822 		carp_if_free(ocif);
2823 	}
2824 
2825 	vha->vha_iaback = ia_if;
2826 	sc->sc_naddrs++;
2827 
2828 	if (own) {
2829 		vha->vha_flags |= CARP_VHAF_OWNER;
2830 
2831 		/* XXX save user configured advskew? */
2832 		sc->sc_advskew = 0;
2833 	}
2834 
2835 	carp_addroute_vhaddr(sc, vha);
2836 
2837 	/*
2838 	 * Join the multicast group only after the backing interface
2839 	 * has been hooked with the CARP interface.
2840 	 */
2841 	KASSERT(imo->imo_multicast_ifp == NULL ||
2842 		imo->imo_multicast_ifp == &sc->sc_if,
2843 		("%s didn't leave mcast group on %s",
2844 		 sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2845 
2846 	if (imo->imo_num_memberships == 0) {
2847 		struct in_addr addr;
2848 
2849 		addr.s_addr = htonl(INADDR_CARP_GROUP);
2850 		imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2851 		if (imo->imo_membership[0] == NULL) {
2852 			carp_deactivate_vhaddr(sc, vha, FALSE);
2853 			return ENOBUFS;
2854 		}
2855 
2856 		imo->imo_num_memberships++;
2857 		imo->imo_multicast_ifp = &sc->sc_if;
2858 		imo->imo_multicast_ttl = CARP_DFLTTL;
2859 		imo->imo_multicast_loop = 0;
2860 	}
2861 
2862 	carp_hmac_prepare(sc);
2863 	carp_set_state(sc, INIT);
2864 	carp_setrun(sc, 0);
2865 	return 0;
2866 }
2867 
2868 static void
2869 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2870     boolean_t del_iaback)
2871 {
2872 	KKASSERT(vha->vha_ia != NULL);
2873 
2874 	carp_hmac_prepare(sc);
2875 
2876 	if (vha->vha_iaback == NULL) {
2877 		KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2878 			("inactive vhaddr %p is the address owner", vha));
2879 		return;
2880 	}
2881 
2882 	vha->vha_flags &= ~CARP_VHAF_OWNER;
2883 	carp_delroute_vhaddr(sc, vha, del_iaback);
2884 
2885 	KKASSERT(sc->sc_naddrs > 0);
2886 	vha->vha_iaback = NULL;
2887 	sc->sc_naddrs--;
2888 	if (!sc->sc_naddrs) {
2889 		if (sc->sc_naddrs6) {
2890 			carp_multicast_cleanup(sc);
2891 			sc->sc_ia = NULL;
2892 		} else {
2893 			carp_detach(sc, FALSE, del_iaback);
2894 		}
2895 	}
2896 }
2897 
2898 static void
2899 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
2900 {
2901 	struct carp_vhaddr *vha;
2902 	struct in_ifaddr *ia_if;
2903 
2904 	KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2905 	ia_if = ifatoia(ifa_if);
2906 
2907 	/*
2908 	 * Test each inactive vhaddr against the newly added address.
2909 	 * If the newly added address could be the backing address,
2910 	 * then activate the matching vhaddr.
2911 	 */
2912 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2913 		const struct in_ifaddr *ia;
2914 		int own;
2915 
2916 		if (vha->vha_iaback != NULL)
2917 			continue;
2918 
2919 		ia = vha->vha_ia;
2920 		if (ia->ia_subnetmask != ia_if->ia_subnetmask ||
2921 		    ia->ia_subnet != ia_if->ia_subnet)
2922 			continue;
2923 
2924 		own = 0;
2925 		if (ia->ia_addr.sin_addr.s_addr ==
2926 		    ia_if->ia_addr.sin_addr.s_addr)
2927 			own = 1;
2928 
2929 		carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2930 	}
2931 }
2932 
2933 static void
2934 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
2935 		  struct ifaddr *ifa_if)
2936 {
2937 	struct carp_vhaddr *vha;
2938 	struct in_ifaddr *ia_if;
2939 
2940 	KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2941 	ia_if = ifatoia(ifa_if);
2942 
2943 	/*
2944 	 * Ad src address is deleted; set it to NULL.
2945 	 * Following loop will try pick up a new ad src address
2946 	 * if one of the vhaddr could retain its backing address.
2947 	 */
2948 	if (sc->sc_ia == ia_if)
2949 		sc->sc_ia = NULL;
2950 
2951 	/*
2952 	 * Test each active vhaddr against the deleted address.
2953 	 * If the deleted address is vhaddr address's backing
2954 	 * address, then deactivate the vhaddr.
2955 	 */
2956 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2957 		if (vha->vha_iaback == NULL)
2958 			continue;
2959 
2960 		if (vha->vha_iaback == ia_if)
2961 			carp_deactivate_vhaddr(sc, vha, TRUE);
2962 		else if (sc->sc_ia == NULL)
2963 			sc->sc_ia = vha->vha_iaback;
2964 	}
2965 }
2966 
2967 static void
2968 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
2969 {
2970 	struct carp_vhaddr *vha;
2971 
2972 	KKASSERT(sc->sc_carpdev == NULL);
2973 
2974 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2975 		carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
2976 }
2977 
2978 static void
2979 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
2980 	    enum ifaddr_event event, struct ifaddr *ifa)
2981 {
2982 	struct carp_softc *sc;
2983 
2984 	if (ifa->ifa_addr->sa_family != AF_INET)
2985 		return;
2986 
2987 	KASSERT(&curthread->td_msgport == netisr_cpuport(0),
2988 	    ("not in netisr0"));
2989 
2990 	if (ifp->if_type == IFT_CARP) {
2991 		/*
2992 		 * Address is changed on carp(4) interface
2993 		 */
2994 		switch (event) {
2995 		case IFADDR_EVENT_ADD:
2996 			carp_add_addr(ifp->if_softc, ifa);
2997 			break;
2998 
2999 		case IFADDR_EVENT_CHANGE:
3000 			carp_config_addr(ifp->if_softc, ifa);
3001 			break;
3002 
3003 		case IFADDR_EVENT_DELETE:
3004 			carp_del_addr(ifp->if_softc, ifa);
3005 			break;
3006 		}
3007 		return;
3008 	}
3009 
3010 	/*
3011 	 * Address is changed on non-carp(4) interface
3012 	 */
3013 	if ((ifp->if_flags & IFF_MULTICAST) == 0)
3014 		return;
3015 
3016 	LIST_FOREACH(sc, &carpif_list, sc_next) {
3017 		if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
3018 			/* Not the parent iface; skip */
3019 			continue;
3020 		}
3021 
3022 		switch (event) {
3023 		case IFADDR_EVENT_ADD:
3024 			carp_link_addrs(sc, ifp, ifa);
3025 			break;
3026 
3027 		case IFADDR_EVENT_DELETE:
3028 			if (sc->sc_carpdev != NULL) {
3029 				carp_unlink_addrs(sc, ifp, ifa);
3030 				if (sc->sc_carpdev == NULL) {
3031 					/*
3032 					 * We no longer have the parent
3033 					 * interface, however, certain
3034 					 * virtual addresses, which are
3035 					 * not used because they can't
3036 					 * match the previous parent
3037 					 * interface's addresses, may now
3038 					 * match different interface's
3039 					 * addresses.
3040 					 */
3041 					carp_update_addrs(sc, ifa);
3042 				}
3043 			} else {
3044 				/*
3045 				 * The carp(4) interface didn't have a
3046 				 * parent iface, so it is not possible
3047 				 * that it will contain any address to
3048 				 * be unlinked.
3049 				 */
3050 			}
3051 			break;
3052 
3053 		case IFADDR_EVENT_CHANGE:
3054 			if (sc->sc_carpdev == NULL) {
3055 				/*
3056 				 * The carp(4) interface didn't have a
3057 				 * parent iface, so it is not possible
3058 				 * that it will contain any address to
3059 				 * be updated.
3060 				 */
3061 				carp_link_addrs(sc, ifp, ifa);
3062 			} else {
3063 				/*
3064 				 * First try breaking tie with the old
3065 				 * address.  Then see whether we could
3066 				 * link certain vhaddr to the new address.
3067 				 * If that fails, i.e. carpdev is NULL,
3068 				 * we try a global update.
3069 				 *
3070 				 * NOTE: The above order is critical.
3071 				 */
3072 				carp_unlink_addrs(sc, ifp, ifa);
3073 				carp_link_addrs(sc, ifp, ifa);
3074 				if (sc->sc_carpdev == NULL) {
3075 					/*
3076 					 * See the comment in the above
3077 					 * IFADDR_EVENT_DELETE block.
3078 					 */
3079 					carp_update_addrs(sc, NULL);
3080 				}
3081 			}
3082 			break;
3083 		}
3084 	}
3085 }
3086 
3087 void
3088 carp_proto_ctlinput(netmsg_t msg)
3089 {
3090 	int cmd = msg->ctlinput.nm_cmd;
3091 	struct sockaddr *sa = msg->ctlinput.nm_arg;
3092 	struct in_ifaddr_container *iac;
3093 
3094 	TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
3095 		struct in_ifaddr *ia = iac->ia;
3096 		struct ifnet *ifp = ia->ia_ifp;
3097 
3098 		if (ifp->if_type == IFT_CARP)
3099 			continue;
3100 
3101 		if (ia->ia_ifa.ifa_addr == sa) {
3102 			if (cmd == PRC_IFDOWN) {
3103 				carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
3104 				    &ia->ia_ifa);
3105 			} else if (cmd == PRC_IFUP) {
3106 				carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
3107 				    &ia->ia_ifa);
3108 			}
3109 			break;
3110 		}
3111 	}
3112 
3113 	lwkt_replymsg(&msg->lmsg, 0);
3114 }
3115 
3116 struct ifnet *
3117 carp_parent(struct ifnet *cifp)
3118 {
3119 	struct carp_softc *sc;
3120 
3121 	KKASSERT(cifp->if_type == IFT_CARP);
3122 	sc = cifp->if_softc;
3123 
3124 	return sc->sc_carpdev;
3125 }
3126 
3127 #define rtinitflags(x) \
3128 	(((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
3129 		 ? RTF_HOST : 0)
3130 
3131 static int
3132 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
3133 {
3134 	struct in_ifaddr *ia, *iaback;
3135 
3136 	if (sc->sc_state != MASTER)
3137 		return 0;
3138 
3139 	ia = vha->vha_ia;
3140 	KKASSERT(ia != NULL);
3141 
3142 	iaback = vha->vha_iaback;
3143 	KKASSERT(iaback != NULL);
3144 
3145 	return rtchange(&iaback->ia_ifa, &ia->ia_ifa);
3146 }
3147 
3148 static void
3149 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
3150     boolean_t del_iaback)
3151 {
3152 	struct in_ifaddr *ia, *iaback;
3153 
3154 	ia = vha->vha_ia;
3155 	KKASSERT(ia != NULL);
3156 
3157 	iaback = vha->vha_iaback;
3158 	KKASSERT(iaback != NULL);
3159 
3160 	if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
3161 		rtchange(&ia->ia_ifa, &iaback->ia_ifa);
3162 		return;
3163 	}
3164 
3165 	rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
3166 	in_ifadown_force(&ia->ia_ifa, 1);
3167 	ia->ia_flags &= ~IFA_ROUTE;
3168 }
3169 
3170 static int
3171 carp_modevent(module_t mod, int type, void *data)
3172 {
3173 	switch (type) {
3174 	case MOD_LOAD:
3175 		LIST_INIT(&carpif_list);
3176 		carp_ifdetach_event =
3177 		EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
3178 				      EVENTHANDLER_PRI_ANY);
3179 		carp_ifaddr_event =
3180 		EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
3181 				      EVENTHANDLER_PRI_FIRST);
3182 		if_clone_attach(&carp_cloner);
3183 		break;
3184 
3185 	case MOD_UNLOAD:
3186 		EVENTHANDLER_DEREGISTER(ifnet_detach_event,
3187 					carp_ifdetach_event);
3188 		EVENTHANDLER_DEREGISTER(ifaddr_event,
3189 					carp_ifaddr_event);
3190 		if_clone_detach(&carp_cloner);
3191 		break;
3192 
3193 	default:
3194 		return (EINVAL);
3195 	}
3196 	return (0);
3197 }
3198 
3199 static moduledata_t carp_mod = {
3200 	"carp",
3201 	carp_modevent,
3202 	0
3203 };
3204 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3205