xref: /dragonfly/sys/netinet/ip_carp.c (revision e98bdfd3)
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29 
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/priv.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50 
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53 
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64 #include <net/netisr2.h>
65 
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
73 #endif
74 
75 #ifdef INET6
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
81 #endif
82 
83 #include <netinet/ip_carp.h>
84 
85 /*
86  * Note about carp's MP safe approach:
87  *
88  * Brief: carp_softc (softc), carp_softc_container (scc)
89  *
90  * - All configuration operation, e.g. ioctl, add/delete inet addresses
91  *   is serialized by netisr0; not by carp's serializer
92  *
93  * - Backing interface's if_carp and carp_softc's relationship:
94  *
95  *                +---------+
96  *     if_carp -->| carp_if |
97  *                +---------+
98  *                     |
99  *                     |
100  *                     V      +---------+
101  *                  +-----+   |         |
102  *                  | scc |-->|  softc  |
103  *                  +-----+   |         |
104  *                     |      +---------+
105  *                     |
106  *                     V      +---------+
107  *                  +-----+   |         |
108  *                  | scc |-->|  softc  |
109  *                  +-----+   |         |
110  *                            +---------+
111  *
112  * - if_carp creation, modification and deletion all happen in netisr0,
113  *   as stated previously.  Since if_carp is accessed by multiple netisrs,
114  *   the modification to if_carp is conducted in the following way:
115  *
116  *   Adding carp_softc:
117  *
118  *   1) Duplicate the old carp_if to new carp_if (ncif), and insert the
119  *      to-be-added carp_softc to the new carp_if (ncif):
120  *
121  *        if_carp                     ncif
122  *           |                         |
123  *           V                         V
124  *      +---------+               +---------+
125  *      | carp_if |               | carp_if |
126  *      +---------+               +---------+
127  *           |                         |
128  *           |                         |
129  *           V        +-------+        V
130  *        +-----+     |       |     +-----+
131  *        | scc |---->| softc |<----| scc |
132  *        +-----+     |       |     +-----+
133  *           |        +-------+        |
134  *           |                         |
135  *           V        +-------+        V
136  *        +-----+     |       |     +-----+
137  *        | scc |---->| softc |<----| scc |
138  *        +-----+     |       |     +-----+
139  *                    +-------+        |
140  *                                     |
141  *                    +-------+        V
142  *                    |       |     +-----+
143  *                    | softc |<----| scc |
144  *                    |       |     +-----+
145  *                    +-------+
146  *
147  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
148  *
149  *          ocif                    if_carp
150  *           |                         |
151  *           V                         V
152  *      +---------+               +---------+
153  *      | carp_if |               | carp_if |
154  *      +---------+               +---------+
155  *           |                         |
156  *           |                         |
157  *           V        +-------+        V
158  *        +-----+     |       |     +-----+
159  *        | scc |---->| softc |<----| scc |
160  *        +-----+     |       |     +-----+
161  *           |        +-------+        |
162  *           |                         |
163  *           V        +-------+        V
164  *        +-----+     |       |     +-----+
165  *        | scc |---->| softc |<----| scc |
166  *        +-----+     |       |     +-----+
167  *                    +-------+        |
168  *                                     |
169  *                    +-------+        V
170  *                    |       |     +-----+
171  *                    | softc |<----| scc |
172  *                    |       |     +-----+
173  *                    +-------+
174  *
175  *   3) Run netmsg_service_sync(), which will make sure that
176  *      ocif is no longer accessed (all network operations
177  *      are happened only in network threads).
178  *   4) Free ocif -- only carp_if and scc are freed.
179  *
180  *
181  *   Removing carp_softc:
182  *
183  *   1) Duplicate the old carp_if to new carp_if (ncif); the to-be-deleted
184  *      carp_softc will not be duplicated.
185  *
186  *        if_carp                     ncif
187  *           |                         |
188  *           V                         V
189  *      +---------+               +---------+
190  *      | carp_if |               | carp_if |
191  *      +---------+               +---------+
192  *           |                         |
193  *           |                         |
194  *           V        +-------+        V
195  *        +-----+     |       |     +-----+
196  *        | scc |---->| softc |<----| scc |
197  *        +-----+     |       |     +-----+
198  *           |        +-------+        |
199  *           |                         |
200  *           V        +-------+        |
201  *        +-----+     |       |        |
202  *        | scc |---->| softc |        |
203  *        +-----+     |       |        |
204  *           |        +-------+        |
205  *           |                         |
206  *           V        +-------+        V
207  *        +-----+     |       |     +-----+
208  *        | scc |---->| softc |<----| scc |
209  *        +-----+     |       |     +-----+
210  *                    +-------+
211  *
212  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
213  *
214  *          ocif                    if_carp
215  *           |                         |
216  *           V                         V
217  *      +---------+               +---------+
218  *      | carp_if |               | carp_if |
219  *      +---------+               +---------+
220  *           |                         |
221  *           |                         |
222  *           V        +-------+        V
223  *        +-----+     |       |     +-----+
224  *        | scc |---->| softc |<----| scc |
225  *        +-----+     |       |     +-----+
226  *           |        +-------+        |
227  *           |                         |
228  *           V        +-------+        |
229  *        +-----+     |       |        |
230  *        | scc |---->| softc |        |
231  *        +-----+     |       |        |
232  *           |        +-------+        |
233  *           |                         |
234  *           V        +-------+        V
235  *        +-----+     |       |     +-----+
236  *        | scc |---->| softc |<----| scc |
237  *        +-----+     |       |     +-----+
238  *                    +-------+
239  *
240  *   3) Run netmsg_service_sync(), which will make sure that
241  *      ocif is no longer accessed (all network operations
242  *      are happened only in network threads).
243  *   4) Free ocif -- only carp_if and scc are freed.
244  *
245  * - if_carp accessing:
246  *   The accessing code should cache the if_carp in a local temporary
247  *   variable and accessing the temporary variable along the code path
248  *   instead of accessing if_carp later on.
249  */
250 
251 #define	CARP_IFNAME		"carp"
252 #define CARP_IS_RUNNING(ifp)	\
253 	(((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
254 
255 struct carp_softc;
256 
257 struct carp_vhaddr {
258 	uint32_t		vha_flags;	/* CARP_VHAF_ */
259 	struct in_ifaddr	*vha_ia;	/* carp address */
260 	struct in_ifaddr	*vha_iaback;	/* backing address */
261 	TAILQ_ENTRY(carp_vhaddr) vha_link;
262 };
263 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
264 
265 struct netmsg_carp {
266 	struct netmsg_base	base;
267 	struct ifnet		*nc_carpdev;
268 	struct carp_softc	*nc_softc;
269 	void			*nc_data;
270 	size_t			nc_datalen;
271 };
272 
273 struct carp_softc {
274 	struct arpcom		 arpcom;
275 	struct ifnet		*sc_carpdev;	/* parent interface */
276 	struct carp_vhaddr_list	 sc_vha_list;	/* virtual addr list */
277 
278 	const struct in_ifaddr	*sc_ia;		/* primary iface address v4 */
279 	struct ip_moptions 	 sc_imo;
280 
281 #ifdef INET6
282 	struct in6_ifaddr 	*sc_ia6;	/* primary iface address v6 */
283 	struct ip6_moptions 	 sc_im6o;
284 #endif /* INET6 */
285 
286 	enum { INIT = 0, BACKUP, MASTER }
287 				 sc_state;
288 	boolean_t		 sc_dead;
289 
290 	int			 sc_suppress;
291 
292 	int			 sc_sendad_errors;
293 #define	CARP_SENDAD_MAX_ERRORS	3
294 	int			 sc_sendad_success;
295 #define	CARP_SENDAD_MIN_SUCCESS 3
296 
297 	int			 sc_vhid;
298 	int			 sc_advskew;
299 	int			 sc_naddrs;	/* actually used IPv4 vha */
300 	int			 sc_naddrs6;
301 	int			 sc_advbase;	/* seconds */
302 	int			 sc_init_counter;
303 	uint64_t		 sc_counter;
304 
305 	/* authentication */
306 #define CARP_HMAC_PAD	64
307 	unsigned char		 sc_key[CARP_KEY_LEN];
308 	unsigned char		 sc_pad[CARP_HMAC_PAD];
309 	SHA1_CTX		 sc_sha1;
310 
311 	struct callout		 sc_ad_tmo;	/* advertisement timeout */
312 	struct netmsg_carp	 sc_ad_msg;	/* adv timeout netmsg */
313 	struct callout		 sc_md_tmo;	/* ip4 master down timeout */
314 	struct callout 		 sc_md6_tmo;	/* ip6 master down timeout */
315 	struct netmsg_carp	 sc_md_msg;	/* master down timeout netmsg */
316 
317 	LIST_ENTRY(carp_softc)	 sc_next;	/* Interface clue */
318 };
319 
320 #define sc_if	arpcom.ac_if
321 
322 struct carp_softc_container {
323 	TAILQ_ENTRY(carp_softc_container) scc_link;
324 	struct carp_softc	*scc_softc;
325 };
326 TAILQ_HEAD(carp_if, carp_softc_container);
327 
328 SYSCTL_DECL(_net_inet_carp);
329 
330 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
331 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
332     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
333 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
334     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
335 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
336     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
337 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
338     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
339 
340 static int carp_suppress_preempt = 0;
341 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
342     &carp_suppress_preempt, 0, "Preemption is suppressed");
343 
344 static int carp_prio_ad = 1;
345 SYSCTL_INT(_net_inet_carp, OID_AUTO, prio_ad, CTLFLAG_RD,
346     &carp_prio_ad, 0, "Prioritize advertisement packet");
347 
348 static struct carpstats carpstats;
349 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
350     &carpstats, carpstats,
351     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
352 
353 #define	CARP_LOG(...)	do {				\
354 	if (carp_opts[CARPCTL_LOG] > 0)			\
355 		log(LOG_INFO, __VA_ARGS__);		\
356 } while (0)
357 
358 #define	CARP_DEBUG(...)	do {				\
359 	if (carp_opts[CARPCTL_LOG] > 1)			\
360 		log(LOG_DEBUG, __VA_ARGS__);		\
361 } while (0)
362 
363 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token);
364 
365 static void	carp_hmac_prepare(struct carp_softc *);
366 static void	carp_hmac_generate(struct carp_softc *, uint32_t *,
367 		    unsigned char *);
368 static int	carp_hmac_verify(struct carp_softc *, uint32_t *,
369 		    unsigned char *);
370 static void	carp_setroute(struct carp_softc *, int);
371 static void	carp_proto_input_c(struct carp_softc *, struct mbuf *,
372 		    struct carp_header *, sa_family_t);
373 static int 	carp_clone_create(struct if_clone *, int, caddr_t);
374 static int 	carp_clone_destroy(struct ifnet *);
375 static void	carp_detach(struct carp_softc *, boolean_t, boolean_t);
376 static void	carp_prepare_ad(struct carp_softc *, struct carp_header *);
377 static void	carp_send_ad_all(void);
378 static void	carp_send_ad_timeout(void *);
379 static void	carp_send_ad(struct carp_softc *);
380 static void	carp_send_arp(struct carp_softc *);
381 static void	carp_master_down_timeout(void *);
382 static void	carp_master_down(struct carp_softc *);
383 static void	carp_setrun(struct carp_softc *, sa_family_t);
384 static void	carp_set_state(struct carp_softc *, int);
385 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
386 
387 static void	carp_init(void *);
388 static int	carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
389 static int	carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
390 		    struct rtentry *);
391 static void	carp_start(struct ifnet *, struct ifaltq_subque *);
392 
393 static void	carp_multicast_cleanup(struct carp_softc *);
394 static void	carp_add_addr(struct carp_softc *, struct ifaddr *);
395 static void	carp_del_addr(struct carp_softc *, struct ifaddr *);
396 static void	carp_config_addr(struct carp_softc *, struct ifaddr *);
397 static void	carp_link_addrs(struct carp_softc *, struct ifnet *,
398 		    struct ifaddr *);
399 static void	carp_unlink_addrs(struct carp_softc *, struct ifnet *,
400 		    struct ifaddr *);
401 static void	carp_update_addrs(struct carp_softc *, struct ifaddr *);
402 
403 static int	carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
404 		    struct in_ifaddr *);
405 static int	carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
406 		    struct ifnet *, struct in_ifaddr *, int);
407 static void	carp_deactivate_vhaddr(struct carp_softc *,
408 		    struct carp_vhaddr *, boolean_t);
409 static int	carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
410 static void	carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
411 		    boolean_t);
412 
413 #ifdef foo
414 static void	carp_sc_state(struct carp_softc *);
415 #endif
416 #ifdef INET6
417 static void	carp_send_na(struct carp_softc *);
418 #ifdef notyet
419 static int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
420 static int	carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
421 #endif
422 static void	carp_multicast6_cleanup(struct carp_softc *);
423 #endif
424 static void	carp_stop(struct carp_softc *, boolean_t);
425 static void	carp_suspend(struct carp_softc *, boolean_t);
426 static void	carp_ioctl_stop(struct carp_softc *);
427 static int	carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *);
428 static void	carp_ioctl_ifcap(struct carp_softc *, int);
429 static int	carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *);
430 static int	carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *);
431 static int	carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *);
432 
433 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *);
434 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *);
435 static void	carp_if_free(struct carp_if *);
436 
437 static void	carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
438 			    struct ifaddr *);
439 static void	carp_ifdetach(void *, struct ifnet *);
440 
441 static void	carp_ifdetach_dispatch(netmsg_t);
442 static void	carp_clone_destroy_dispatch(netmsg_t);
443 static void	carp_init_dispatch(netmsg_t);
444 static void	carp_ioctl_stop_dispatch(netmsg_t);
445 static void	carp_ioctl_setvh_dispatch(netmsg_t);
446 static void	carp_ioctl_ifcap_dispatch(netmsg_t);
447 static void	carp_ioctl_getvh_dispatch(netmsg_t);
448 static void	carp_ioctl_getdevname_dispatch(netmsg_t);
449 static void	carp_ioctl_getvhaddr_dispatch(netmsg_t);
450 static void	carp_send_ad_timeout_dispatch(netmsg_t);
451 static void	carp_master_down_timeout_dispatch(netmsg_t);
452 
453 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
454 
455 static LIST_HEAD(, carp_softc) carpif_list;
456 
457 static struct if_clone carp_cloner =
458 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
459 		     0, IF_MAXUNIT);
460 
461 static const uint8_t	carp_etheraddr[ETHER_ADDR_LEN] =
462 	{ 0, 0, 0x5e, 0, 1, 0 };
463 
464 static eventhandler_tag carp_ifdetach_event;
465 static eventhandler_tag carp_ifaddr_event;
466 
467 static __inline void
468 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
469 {
470 	struct carp_vhaddr *vha;
471 	u_long new_addr, addr;
472 
473 	KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
474 
475 	/*
476 	 * Virtual address list is sorted; smaller one first
477 	 */
478 	new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
479 
480 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
481 		addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
482 
483 		if (addr > new_addr)
484 			break;
485 	}
486 	if (vha == NULL)
487 		TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
488 	else
489 		TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
490 	vha_new->vha_flags |= CARP_VHAF_ONLIST;
491 }
492 
493 static __inline void
494 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
495 {
496 	KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
497 	vha->vha_flags &= ~CARP_VHAF_ONLIST;
498 	TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
499 }
500 
501 static void
502 carp_hmac_prepare(struct carp_softc *sc)
503 {
504 	uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
505 	uint8_t vhid = sc->sc_vhid & 0xff;
506 	int i;
507 #ifdef INET6
508 	struct ifaddr_container *ifac;
509 	struct in6_addr in6;
510 #endif
511 #ifdef INET
512 	struct carp_vhaddr *vha;
513 #endif
514 
515 	/* XXX: possible race here */
516 
517 	/* compute ipad from key */
518 	bzero(sc->sc_pad, sizeof(sc->sc_pad));
519 	bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
520 	for (i = 0; i < sizeof(sc->sc_pad); i++)
521 		sc->sc_pad[i] ^= 0x36;
522 
523 	/* precompute first part of inner hash */
524 	SHA1Init(&sc->sc_sha1);
525 	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
526 	SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
527 	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
528 	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
529 #ifdef INET
530 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
531 		SHA1Update(&sc->sc_sha1,
532 		    (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
533 		    sizeof(struct in_addr));
534 	}
535 #endif /* INET */
536 #ifdef INET6
537 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
538 		struct ifaddr *ifa = ifac->ifa;
539 
540 		if (ifa->ifa_addr->sa_family == AF_INET6) {
541 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
542 			in6_clearscope(&in6);
543 			SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
544 		}
545 	}
546 #endif /* INET6 */
547 
548 	/* convert ipad to opad */
549 	for (i = 0; i < sizeof(sc->sc_pad); i++)
550 		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
551 }
552 
553 static void
554 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
555     unsigned char md[20])
556 {
557 	SHA1_CTX sha1ctx;
558 
559 	/* fetch first half of inner hash */
560 	bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
561 
562 	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
563 	SHA1Final(md, &sha1ctx);
564 
565 	/* outer hash */
566 	SHA1Init(&sha1ctx);
567 	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
568 	SHA1Update(&sha1ctx, md, 20);
569 	SHA1Final(md, &sha1ctx);
570 }
571 
572 static int
573 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
574     unsigned char md[20])
575 {
576 	unsigned char md2[20];
577 
578 	carp_hmac_generate(sc, counter, md2);
579 	return (bcmp(md, md2, sizeof(md2)));
580 }
581 
582 static void
583 carp_setroute(struct carp_softc *sc, int cmd)
584 {
585 #ifdef INET6
586 	struct ifaddr_container *ifac;
587 #endif
588 	struct carp_vhaddr *vha;
589 
590 	KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
591 
592 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
593 		if (vha->vha_iaback == NULL)
594 			continue;
595 		if (cmd == RTM_DELETE)
596 			carp_delroute_vhaddr(sc, vha, FALSE);
597 		else
598 			carp_addroute_vhaddr(sc, vha);
599 	}
600 
601 #ifdef INET6
602 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
603 		struct ifaddr *ifa = ifac->ifa;
604 
605 		if (ifa->ifa_addr->sa_family == AF_INET6) {
606 			if (cmd == RTM_ADD)
607 				in6_ifaddloop(ifa);
608 			else
609 				in6_ifremloop(ifa);
610 		}
611 	}
612 #endif /* INET6 */
613 }
614 
615 static int
616 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
617 {
618 	struct carp_softc *sc;
619 	struct ifnet *ifp;
620 
621 	sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
622 	ifp = &sc->sc_if;
623 
624 	sc->sc_suppress = 0;
625 	sc->sc_advbase = CARP_DFLTINTV;
626 	sc->sc_vhid = -1;	/* required setting */
627 	sc->sc_advskew = 0;
628 	sc->sc_init_counter = 1;
629 	sc->sc_naddrs = 0;
630 	sc->sc_naddrs6 = 0;
631 
632 	TAILQ_INIT(&sc->sc_vha_list);
633 
634 #ifdef INET6
635 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
636 #endif
637 
638 	callout_init_mp(&sc->sc_ad_tmo);
639 	netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport,
640 	    MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch);
641 	sc->sc_ad_msg.nc_softc = sc;
642 
643 	callout_init_mp(&sc->sc_md_tmo);
644 	callout_init_mp(&sc->sc_md6_tmo);
645 	netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport,
646 	    MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch);
647 	sc->sc_md_msg.nc_softc = sc;
648 
649 	if_initname(ifp, CARP_IFNAME, unit);
650 	ifp->if_softc = sc;
651 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
652 	ifp->if_init = carp_init;
653 	ifp->if_ioctl = carp_ioctl;
654 	ifp->if_start = carp_start;
655 
656 	ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO;
657 	ifp->if_capenable = ifp->if_capabilities;
658 	/*
659 	 * Leave if_hwassist as it is; if_hwassist will be
660 	 * setup when this carp interface has parent.
661 	 */
662 
663 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
664 	ifq_set_ready(&ifp->if_snd);
665 
666 	ether_ifattach(ifp, carp_etheraddr, NULL);
667 
668 	ifp->if_type = IFT_CARP;
669 	ifp->if_output = carp_output;
670 
671 	lwkt_gettoken(&carp_listtok);
672 	LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
673 	lwkt_reltoken(&carp_listtok);
674 
675 	return (0);
676 }
677 
678 static void
679 carp_clone_destroy_dispatch(netmsg_t msg)
680 {
681 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
682 	struct carp_softc *sc = cmsg->nc_softc;
683 
684 	sc->sc_dead = TRUE;
685 	carp_detach(sc, TRUE, FALSE);
686 
687 	callout_stop_sync(&sc->sc_ad_tmo);
688 	callout_stop_sync(&sc->sc_md_tmo);
689 	callout_stop_sync(&sc->sc_md6_tmo);
690 
691 	crit_enter();
692 	lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg);
693 	lwkt_dropmsg(&sc->sc_md_msg.base.lmsg);
694 	crit_exit();
695 
696 	lwkt_replymsg(&cmsg->base.lmsg, 0);
697 }
698 
699 static int
700 carp_clone_destroy(struct ifnet *ifp)
701 {
702 	struct carp_softc *sc = ifp->if_softc;
703 	struct netmsg_carp cmsg;
704 
705 	bzero(&cmsg, sizeof(cmsg));
706 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
707 	    carp_clone_destroy_dispatch);
708 	cmsg.nc_softc = sc;
709 
710 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
711 
712 	lwkt_gettoken(&carp_listtok);
713 	LIST_REMOVE(sc, sc_next);
714 	lwkt_reltoken(&carp_listtok);
715 
716 	bpfdetach(ifp);
717 	if_detach(ifp);
718 
719 	KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active"));
720 	kfree(sc, M_CARP);
721 
722 	return 0;
723 }
724 
725 static struct carp_if *
726 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc)
727 {
728 	struct carp_softc_container *oscc, *scc;
729 	struct carp_if *cif;
730 	int count = 0;
731 #ifdef INVARIANTS
732 	int found = 0;
733 #endif
734 
735 	TAILQ_FOREACH(oscc, ocif, scc_link) {
736 		++count;
737 #ifdef INVARIANTS
738 		if (oscc->scc_softc == sc)
739 			found = 1;
740 #endif
741 	}
742 	KASSERT(found, ("%s carp_softc is not on carp_if", __func__));
743 
744 	if (count == 1) {
745 		/* Last one is going to be unlinked */
746 		return NULL;
747 	}
748 
749 	cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
750 	TAILQ_INIT(cif);
751 
752 	TAILQ_FOREACH(oscc, ocif, scc_link) {
753 		if (oscc->scc_softc == sc)
754 			continue;
755 
756 		scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO);
757 		scc->scc_softc = oscc->scc_softc;
758 		TAILQ_INSERT_TAIL(cif, scc, scc_link);
759 	}
760 
761 	return cif;
762 }
763 
764 static struct carp_if *
765 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc)
766 {
767 	struct carp_softc_container *oscc;
768 	int onlist;
769 
770 	onlist = 0;
771 	if (ocif != NULL) {
772 		TAILQ_FOREACH(oscc, ocif, scc_link) {
773 			if (oscc->scc_softc == sc)
774 				onlist = 1;
775 		}
776 	}
777 
778 #ifdef INVARIANTS
779 	if (sc->sc_carpdev != NULL) {
780 		KASSERT(onlist, ("%s is not on %s carp list",
781 		    sc->sc_if.if_xname, sc->sc_carpdev->if_xname));
782 	} else {
783 		KASSERT(!onlist, ("%s is already on carp list",
784 		    sc->sc_if.if_xname));
785 	}
786 #endif
787 
788 	if (!onlist) {
789 		struct carp_if *cif;
790 		struct carp_softc_container *new_scc, *scc;
791 		int inserted = 0;
792 
793 		cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
794 		TAILQ_INIT(cif);
795 
796 		new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO);
797 		new_scc->scc_softc = sc;
798 
799 		if (ocif != NULL) {
800 			TAILQ_FOREACH(oscc, ocif, scc_link) {
801 				if (!inserted &&
802 				    oscc->scc_softc->sc_vhid > sc->sc_vhid) {
803 					TAILQ_INSERT_TAIL(cif, new_scc,
804 					    scc_link);
805 					inserted = 1;
806 				}
807 
808 				scc = kmalloc(sizeof(*scc), M_CARP,
809 				    M_WAITOK | M_ZERO);
810 				scc->scc_softc = oscc->scc_softc;
811 				TAILQ_INSERT_TAIL(cif, scc, scc_link);
812 			}
813 		}
814 		if (!inserted)
815 			TAILQ_INSERT_TAIL(cif, new_scc, scc_link);
816 
817 		return cif;
818 	} else {
819 		return ocif;
820 	}
821 }
822 
823 static void
824 carp_if_free(struct carp_if *cif)
825 {
826 	struct carp_softc_container *scc;
827 
828 	while ((scc = TAILQ_FIRST(cif)) != NULL) {
829 		TAILQ_REMOVE(cif, scc, scc_link);
830 		kfree(scc, M_CARP);
831 	}
832 	kfree(cif, M_CARP);
833 }
834 
835 static void
836 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback)
837 {
838 	carp_suspend(sc, detach);
839 
840 	carp_multicast_cleanup(sc);
841 #ifdef INET6
842 	carp_multicast6_cleanup(sc);
843 #endif
844 
845 	if (!sc->sc_dead && detach) {
846 		struct carp_vhaddr *vha;
847 
848 		TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
849 			carp_deactivate_vhaddr(sc, vha, del_iaback);
850 		KKASSERT(sc->sc_naddrs == 0);
851 	}
852 
853 	if (sc->sc_carpdev != NULL) {
854 		struct ifnet *ifp = sc->sc_carpdev;
855 		struct carp_if *ocif = ifp->if_carp;
856 
857 		ifp->if_carp = carp_if_remove(ocif, sc);
858 		KASSERT(ifp->if_carp != ocif,
859 		    ("%s carp_if_remove failed", __func__));
860 
861 		sc->sc_carpdev = NULL;
862 		sc->sc_ia = NULL;
863 		sc->arpcom.ac_if.if_hwassist = 0;
864 
865 		/*
866 		 * Make sure that all protocol threads see the
867 		 * sc_carpdev and if_carp changes
868 		 */
869 		netmsg_service_sync();
870 
871 		if (ifp->if_carp == NULL) {
872 			/*
873 			 * No more carp interfaces using
874 			 * ifp as the backing interface,
875 			 * move it out of promiscous mode.
876 			 */
877 			ifpromisc(ifp, 0);
878 		}
879 
880 		/*
881 		 * The old carp list could be safely free now,
882 		 * since no one can access it.
883 		 */
884 		carp_if_free(ocif);
885 	}
886 }
887 
888 static void
889 carp_ifdetach_dispatch(netmsg_t msg)
890 {
891 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
892 	struct ifnet *ifp = cmsg->nc_carpdev;
893 
894 	while (ifp->if_carp) {
895 		struct carp_softc_container *scc;
896 
897 		scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp));
898 		carp_detach(scc->scc_softc, TRUE, TRUE);
899 	}
900 	lwkt_replymsg(&cmsg->base.lmsg, 0);
901 }
902 
903 /* Detach an interface from the carp. */
904 static void
905 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
906 {
907 	struct netmsg_carp cmsg;
908 
909 	ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
910 
911 	bzero(&cmsg, sizeof(cmsg));
912 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
913 	    carp_ifdetach_dispatch);
914 	cmsg.nc_carpdev = ifp;
915 
916 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
917 }
918 
919 /*
920  * process input packet.
921  * we have rearranged checks order compared to the rfc,
922  * but it seems more efficient this way or not possible otherwise.
923  */
924 int
925 carp_proto_input(struct mbuf **mp, int *offp, int proto)
926 {
927 	struct mbuf *m = *mp;
928 	struct ip *ip = mtod(m, struct ip *);
929 	struct ifnet *ifp = m->m_pkthdr.rcvif;
930 	struct carp_header *ch;
931 	struct carp_softc *sc;
932 	int len, iphlen;
933 
934 	iphlen = *offp;
935 	*mp = NULL;
936 
937 	carpstats.carps_ipackets++;
938 
939 	if (!carp_opts[CARPCTL_ALLOW]) {
940 		m_freem(m);
941 		goto back;
942 	}
943 
944 	/* Check if received on a valid carp interface */
945 	if (ifp->if_type != IFT_CARP) {
946 		carpstats.carps_badif++;
947 		CARP_LOG("carp_proto_input: packet received on non-carp "
948 		    "interface: %s\n", ifp->if_xname);
949 		m_freem(m);
950 		goto back;
951 	}
952 
953 	if (!CARP_IS_RUNNING(ifp)) {
954 		carpstats.carps_badif++;
955 		CARP_LOG("carp_proto_input: packet received on stopped carp "
956 		    "interface: %s\n", ifp->if_xname);
957 		m_freem(m);
958 		goto back;
959 	}
960 
961 	sc = ifp->if_softc;
962 	if (sc->sc_carpdev == NULL) {
963 		carpstats.carps_badif++;
964 		CARP_LOG("carp_proto_input: packet received on defunc carp "
965 		    "interface: %s\n", ifp->if_xname);
966 		m_freem(m);
967 		goto back;
968 	}
969 
970 	if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
971 		carpstats.carps_badif++;
972 		CARP_LOG("carp_proto_input: non-mcast packet on "
973 		    "interface: %s\n", ifp->if_xname);
974 		m_freem(m);
975 		goto back;
976 	}
977 
978 	/* Verify that the IP TTL is CARP_DFLTTL. */
979 	if (ip->ip_ttl != CARP_DFLTTL) {
980 		carpstats.carps_badttl++;
981 		CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
982 		    ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
983 		m_freem(m);
984 		goto back;
985 	}
986 
987 	/* Minimal CARP packet size */
988 	len = iphlen + sizeof(*ch);
989 
990 	/*
991 	 * Verify that the received packet length is
992 	 * not less than the CARP header
993 	 */
994 	if (m->m_pkthdr.len < len) {
995 		carpstats.carps_badlen++;
996 		CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
997 		    ifp->if_xname);
998 		m_freem(m);
999 		goto back;
1000 	}
1001 
1002 	/* Make sure that CARP header is contiguous */
1003 	if (len > m->m_len) {
1004 		m = m_pullup(m, len);
1005 		if (m == NULL) {
1006 			carpstats.carps_hdrops++;
1007 			CARP_LOG("carp_proto_input: m_pullup failed\n");
1008 			goto back;
1009 		}
1010 		ip = mtod(m, struct ip *);
1011 	}
1012 	ch = (struct carp_header *)((uint8_t *)ip + iphlen);
1013 
1014 	/* Verify the CARP checksum */
1015 	if (in_cksum_skip(m, len, iphlen)) {
1016 		carpstats.carps_badsum++;
1017 		CARP_LOG("carp_proto_input: checksum failed on %s\n",
1018 		    ifp->if_xname);
1019 		m_freem(m);
1020 		goto back;
1021 	}
1022 	carp_proto_input_c(sc, m, ch, AF_INET);
1023 back:
1024 	return(IPPROTO_DONE);
1025 }
1026 
1027 #ifdef INET6
1028 int
1029 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
1030 {
1031 	struct mbuf *m = *mp;
1032 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1033 	struct ifnet *ifp = m->m_pkthdr.rcvif;
1034 	struct carp_header *ch;
1035 	struct carp_softc *sc;
1036 	u_int len;
1037 
1038 	carpstats.carps_ipackets6++;
1039 
1040 	if (!carp_opts[CARPCTL_ALLOW]) {
1041 		m_freem(m);
1042 		goto back;
1043 	}
1044 
1045 	/* check if received on a valid carp interface */
1046 	if (ifp->if_type != IFT_CARP) {
1047 		carpstats.carps_badif++;
1048 		CARP_LOG("carp6_proto_input: packet received on non-carp "
1049 		    "interface: %s\n", ifp->if_xname);
1050 		m_freem(m);
1051 		goto back;
1052 	}
1053 
1054 	if (!CARP_IS_RUNNING(ifp)) {
1055 		carpstats.carps_badif++;
1056 		CARP_LOG("carp_proto_input: packet received on stopped carp "
1057 		    "interface: %s\n", ifp->if_xname);
1058 		m_freem(m);
1059 		goto back;
1060 	}
1061 
1062 	sc = ifp->if_softc;
1063 	if (sc->sc_carpdev == NULL) {
1064 		carpstats.carps_badif++;
1065 		CARP_LOG("carp6_proto_input: packet received on defunc-carp "
1066 		    "interface: %s\n", ifp->if_xname);
1067 		m_freem(m);
1068 		goto back;
1069 	}
1070 
1071 	/* verify that the IP TTL is 255 */
1072 	if (ip6->ip6_hlim != CARP_DFLTTL) {
1073 		carpstats.carps_badttl++;
1074 		CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
1075 		    ip6->ip6_hlim, ifp->if_xname);
1076 		m_freem(m);
1077 		goto back;
1078 	}
1079 
1080 	/* verify that we have a complete carp packet */
1081 	len = m->m_len;
1082 	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
1083 	if (ch == NULL) {
1084 		carpstats.carps_badlen++;
1085 		CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
1086 		goto back;
1087 	}
1088 
1089 	/* verify the CARP checksum */
1090 	if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
1091 		carpstats.carps_badsum++;
1092 		CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
1093 		    ifp->if_xname);
1094 		m_freem(m);
1095 		goto back;
1096 	}
1097 
1098 	carp_proto_input_c(sc, m, ch, AF_INET6);
1099 back:
1100 	return (IPPROTO_DONE);
1101 }
1102 #endif /* INET6 */
1103 
1104 static void
1105 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
1106     struct carp_header *ch, sa_family_t af)
1107 {
1108 	struct ifnet *cifp;
1109 	uint64_t tmp_counter;
1110 	struct timeval sc_tv, ch_tv;
1111 
1112 	if (sc->sc_vhid != ch->carp_vhid) {
1113 		/*
1114 		 * CARP uses multicast, however, multicast packets
1115 		 * are tapped to all CARP interfaces on the physical
1116 		 * interface receiving the CARP packets, so we don't
1117 		 * update any stats here.
1118 		 */
1119 		m_freem(m);
1120 		return;
1121 	}
1122 	cifp = &sc->sc_if;
1123 
1124 	/* verify the CARP version. */
1125 	if (ch->carp_version != CARP_VERSION) {
1126 		carpstats.carps_badver++;
1127 		CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
1128 			 ch->carp_version);
1129 		m_freem(m);
1130 		return;
1131 	}
1132 
1133 	/* verify the hash */
1134 	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
1135 		carpstats.carps_badauth++;
1136 		CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
1137 		m_freem(m);
1138 		return;
1139 	}
1140 
1141 	tmp_counter = ntohl(ch->carp_counter[0]);
1142 	tmp_counter = tmp_counter<<32;
1143 	tmp_counter += ntohl(ch->carp_counter[1]);
1144 
1145 	/* XXX Replay protection goes here */
1146 
1147 	sc->sc_init_counter = 0;
1148 	sc->sc_counter = tmp_counter;
1149 
1150 	sc_tv.tv_sec = sc->sc_advbase;
1151 	if (carp_suppress_preempt && sc->sc_advskew <  240)
1152 		sc_tv.tv_usec = 240 * 1000000 / 256;
1153 	else
1154 		sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1155 	ch_tv.tv_sec = ch->carp_advbase;
1156 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
1157 
1158 	switch (sc->sc_state) {
1159 	case INIT:
1160 		break;
1161 
1162 	case MASTER:
1163 		/*
1164 		 * If we receive an advertisement from a master who's going to
1165 		 * be more frequent than us, go into BACKUP state.
1166 		 */
1167 		if (timevalcmp(&sc_tv, &ch_tv, >) ||
1168 		    timevalcmp(&sc_tv, &ch_tv, ==)) {
1169 			callout_stop(&sc->sc_ad_tmo);
1170 			CARP_DEBUG("%s: MASTER -> BACKUP "
1171 			   "(more frequent advertisement received)\n",
1172 			   cifp->if_xname);
1173 			carp_set_state(sc, BACKUP);
1174 			carp_setrun(sc, 0);
1175 			carp_setroute(sc, RTM_DELETE);
1176 		}
1177 		break;
1178 
1179 	case BACKUP:
1180 		/*
1181 		 * If we're pre-empting masters who advertise slower than us,
1182 		 * and this one claims to be slower, treat him as down.
1183 		 */
1184 		if (carp_opts[CARPCTL_PREEMPT] &&
1185 		    timevalcmp(&sc_tv, &ch_tv, <)) {
1186 			CARP_DEBUG("%s: BACKUP -> MASTER "
1187 			    "(preempting a slower master)\n", cifp->if_xname);
1188 			carp_master_down(sc);
1189 			break;
1190 		}
1191 
1192 		/*
1193 		 *  If the master is going to advertise at such a low frequency
1194 		 *  that he's guaranteed to time out, we'd might as well just
1195 		 *  treat him as timed out now.
1196 		 */
1197 		sc_tv.tv_sec = sc->sc_advbase * 3;
1198 		if (timevalcmp(&sc_tv, &ch_tv, <)) {
1199 			CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1200 				   cifp->if_xname);
1201 			carp_master_down(sc);
1202 			break;
1203 		}
1204 
1205 		/*
1206 		 * Otherwise, we reset the counter and wait for the next
1207 		 * advertisement.
1208 		 */
1209 		carp_setrun(sc, af);
1210 		break;
1211 	}
1212 	m_freem(m);
1213 }
1214 
1215 struct mbuf *
1216 carp_input(void *v, struct mbuf *m)
1217 {
1218 	struct carp_if *cif = v;
1219 	struct ether_header *eh;
1220 	struct carp_softc_container *scc;
1221 	struct ifnet *ifp;
1222 
1223 	eh = mtod(m, struct ether_header *);
1224 
1225 	ifp = carp_forus(cif, eh->ether_dhost);
1226 	if (ifp != NULL) {
1227 		ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
1228 		return NULL;
1229 	}
1230 
1231 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
1232 		return m;
1233 
1234 	/*
1235 	 * XXX Should really check the list of multicast addresses
1236 	 * for each CARP interface _before_ copying.
1237 	 */
1238 	TAILQ_FOREACH(scc, cif, scc_link) {
1239 		struct carp_softc *sc = scc->scc_softc;
1240 		struct mbuf *m0;
1241 
1242 		if ((sc->sc_if.if_flags & IFF_UP) == 0)
1243 			continue;
1244 
1245 		m0 = m_dup(m, M_NOWAIT);
1246 		if (m0 == NULL)
1247 			continue;
1248 
1249 		ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
1250 	}
1251 	return m;
1252 }
1253 
1254 static void
1255 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
1256 {
1257 	if (sc->sc_init_counter) {
1258 		/* this could also be seconds since unix epoch */
1259 		sc->sc_counter = karc4random();
1260 		sc->sc_counter = sc->sc_counter << 32;
1261 		sc->sc_counter += karc4random();
1262 	} else {
1263 		sc->sc_counter++;
1264 	}
1265 
1266 	ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
1267 	ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
1268 
1269 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
1270 }
1271 
1272 static void
1273 carp_send_ad_all(void)
1274 {
1275 	struct carp_softc *sc;
1276 
1277 	LIST_FOREACH(sc, &carpif_list, sc_next) {
1278 		if (sc->sc_carpdev == NULL)
1279 			continue;
1280 
1281 		if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
1282 			carp_send_ad(sc);
1283 	}
1284 }
1285 
1286 static void
1287 carp_send_ad_timeout(void *xsc)
1288 {
1289 	struct carp_softc *sc = xsc;
1290 	struct netmsg_carp *cmsg = &sc->sc_ad_msg;
1291 
1292 	KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1293 	    __func__, mycpuid));
1294 
1295 	crit_enter();
1296 	if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1297 		lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1298 	crit_exit();
1299 }
1300 
1301 static void
1302 carp_send_ad_timeout_dispatch(netmsg_t msg)
1303 {
1304 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1305 	struct carp_softc *sc = cmsg->nc_softc;
1306 
1307 	/* Reply ASAP */
1308 	crit_enter();
1309 	lwkt_replymsg(&cmsg->base.lmsg, 0);
1310 	crit_exit();
1311 
1312 	carp_send_ad(sc);
1313 }
1314 
1315 static void
1316 carp_send_ad(struct carp_softc *sc)
1317 {
1318 	struct ifnet *cifp = &sc->sc_if;
1319 	struct carp_header ch;
1320 	struct timeval tv;
1321 	struct carp_header *ch_ptr;
1322 	struct mbuf *m;
1323 	int len, advbase, advskew;
1324 
1325 	if (!CARP_IS_RUNNING(cifp)) {
1326 		/* Bow out */
1327 		advbase = 255;
1328 		advskew = 255;
1329 	} else {
1330 		advbase = sc->sc_advbase;
1331 		if (!carp_suppress_preempt || sc->sc_advskew > 240)
1332 			advskew = sc->sc_advskew;
1333 		else
1334 			advskew = 240;
1335 		tv.tv_sec = advbase;
1336 		tv.tv_usec = advskew * 1000000 / 256;
1337 	}
1338 
1339 	ch.carp_version = CARP_VERSION;
1340 	ch.carp_type = CARP_ADVERTISEMENT;
1341 	ch.carp_vhid = sc->sc_vhid;
1342 	ch.carp_advbase = advbase;
1343 	ch.carp_advskew = advskew;
1344 	ch.carp_authlen = 7;	/* XXX DEFINE */
1345 	ch.carp_pad1 = 0;	/* must be zero */
1346 	ch.carp_cksum = 0;
1347 
1348 #ifdef INET
1349 	if (sc->sc_ia != NULL) {
1350 		struct ip *ip;
1351 
1352 		MGETHDR(m, M_NOWAIT, MT_HEADER);
1353 		if (m == NULL) {
1354 			IFNET_STAT_INC(cifp, oerrors, 1);
1355 			carpstats.carps_onomem++;
1356 			/* XXX maybe less ? */
1357 			if (advbase != 255 || advskew != 255)
1358 				callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1359 				    carp_send_ad_timeout, sc);
1360 			return;
1361 		}
1362 		len = sizeof(*ip) + sizeof(ch);
1363 		m->m_pkthdr.len = len;
1364 		m->m_pkthdr.rcvif = NULL;
1365 		m->m_len = len;
1366 		MH_ALIGN(m, m->m_len);
1367 		m->m_flags |= M_MCAST;
1368 		if (carp_prio_ad)
1369 			m->m_flags |= M_PRIO;
1370 		ip = mtod(m, struct ip *);
1371 		ip->ip_v = IPVERSION;
1372 		ip->ip_hl = sizeof(*ip) >> 2;
1373 		ip->ip_tos = IPTOS_LOWDELAY;
1374 		ip->ip_len = len;
1375 		ip->ip_id = ip_newid();
1376 		ip->ip_off = IP_DF;
1377 		ip->ip_ttl = CARP_DFLTTL;
1378 		ip->ip_p = IPPROTO_CARP;
1379 		ip->ip_sum = 0;
1380 		ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1381 		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1382 
1383 		ch_ptr = (struct carp_header *)(&ip[1]);
1384 		bcopy(&ch, ch_ptr, sizeof(ch));
1385 		carp_prepare_ad(sc, ch_ptr);
1386 		ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1387 
1388 		getmicrotime(&cifp->if_lastchange);
1389 		IFNET_STAT_INC(cifp, opackets, 1);
1390 		IFNET_STAT_INC(cifp, obytes, len);
1391 		carpstats.carps_opackets++;
1392 
1393 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1394 			IFNET_STAT_INC(cifp, oerrors, 1);
1395 			if (sc->sc_sendad_errors < INT_MAX)
1396 				sc->sc_sendad_errors++;
1397 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1398 				carp_suppress_preempt++;
1399 				if (carp_suppress_preempt == 1) {
1400 					carp_send_ad_all();
1401 				}
1402 			}
1403 			sc->sc_sendad_success = 0;
1404 		} else {
1405 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1406 				if (++sc->sc_sendad_success >=
1407 				    CARP_SENDAD_MIN_SUCCESS) {
1408 					carp_suppress_preempt--;
1409 					sc->sc_sendad_errors = 0;
1410 				}
1411 			} else {
1412 				sc->sc_sendad_errors = 0;
1413 			}
1414 		}
1415 	}
1416 #endif /* INET */
1417 #ifdef INET6
1418 	if (sc->sc_ia6) {
1419 		struct ip6_hdr *ip6;
1420 
1421 		MGETHDR(m, M_NOWAIT, MT_HEADER);
1422 		if (m == NULL) {
1423 			IFNET_STAT_INC(cifp, oerrors, 1);
1424 			carpstats.carps_onomem++;
1425 			/* XXX maybe less ? */
1426 			if (advbase != 255 || advskew != 255)
1427 				callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1428 				    carp_send_ad_timeout, sc);
1429 			return;
1430 		}
1431 		len = sizeof(*ip6) + sizeof(ch);
1432 		m->m_pkthdr.len = len;
1433 		m->m_pkthdr.rcvif = NULL;
1434 		m->m_len = len;
1435 		MH_ALIGN(m, m->m_len);
1436 		m->m_flags |= M_MCAST;
1437 		ip6 = mtod(m, struct ip6_hdr *);
1438 		bzero(ip6, sizeof(*ip6));
1439 		ip6->ip6_vfc |= IPV6_VERSION;
1440 		ip6->ip6_hlim = CARP_DFLTTL;
1441 		ip6->ip6_nxt = IPPROTO_CARP;
1442 		bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1443 		    sizeof(struct in6_addr));
1444 		/* set the multicast destination */
1445 
1446 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1447 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1448 		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1449 			IFNET_STAT_INC(cifp, oerrors, 1);
1450 			m_freem(m);
1451 			CARP_LOG("%s: in6_setscope failed\n", __func__);
1452 			return;
1453 		}
1454 
1455 		ch_ptr = (struct carp_header *)(&ip6[1]);
1456 		bcopy(&ch, ch_ptr, sizeof(ch));
1457 		carp_prepare_ad(sc, ch_ptr);
1458 		ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1459 
1460 		getmicrotime(&cifp->if_lastchange);
1461 		IFNET_STAT_INC(cifp, opackets, 1);
1462 		IFNET_STAT_INC(cifp, obytes, len);
1463 		carpstats.carps_opackets6++;
1464 
1465 		if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1466 			IFNET_STAT_INC(cifp, oerrors, 1);
1467 			if (sc->sc_sendad_errors < INT_MAX)
1468 				sc->sc_sendad_errors++;
1469 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1470 				carp_suppress_preempt++;
1471 				if (carp_suppress_preempt == 1) {
1472 					carp_send_ad_all();
1473 				}
1474 			}
1475 			sc->sc_sendad_success = 0;
1476 		} else {
1477 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1478 				if (++sc->sc_sendad_success >=
1479 				    CARP_SENDAD_MIN_SUCCESS) {
1480 					carp_suppress_preempt--;
1481 					sc->sc_sendad_errors = 0;
1482 				}
1483 			} else {
1484 				sc->sc_sendad_errors = 0;
1485 			}
1486 		}
1487 	}
1488 #endif /* INET6 */
1489 
1490 	if (advbase != 255 || advskew != 255)
1491 		callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1492 		    carp_send_ad_timeout, sc);
1493 }
1494 
1495 /*
1496  * Broadcast a gratuitous ARP request containing
1497  * the virtual router MAC address for each IP address
1498  * associated with the virtual router.
1499  */
1500 static void
1501 carp_send_arp(struct carp_softc *sc)
1502 {
1503 	const struct carp_vhaddr *vha;
1504 
1505 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1506 		if (vha->vha_iaback == NULL)
1507 			continue;
1508 		arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1509 	}
1510 }
1511 
1512 #ifdef INET6
1513 static void
1514 carp_send_na(struct carp_softc *sc)
1515 {
1516 	struct ifaddr_container *ifac;
1517 	struct in6_addr *in6;
1518 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1519 
1520 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1521 		struct ifaddr *ifa = ifac->ifa;
1522 
1523 		if (ifa->ifa_addr->sa_family != AF_INET6)
1524 			continue;
1525 
1526 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1527 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1528 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1529 		DELAY(1000);	/* XXX */
1530 	}
1531 }
1532 #endif /* INET6 */
1533 
1534 static __inline const struct carp_vhaddr *
1535 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1536 {
1537 	struct carp_vhaddr *vha;
1538 
1539 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1540 		if (vha->vha_iaback == NULL)
1541 			continue;
1542 
1543 		if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1544 			return vha;
1545 	}
1546 	return NULL;
1547 }
1548 
1549 #ifdef notyet
1550 static int
1551 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1552 		     const struct in_addr *isaddr, uint8_t **enaddr)
1553 {
1554 	const struct carp_softc *vh;
1555 	int index, count = 0;
1556 
1557 	/*
1558 	 * XXX proof of concept implementation.
1559 	 * We use the source ip to decide which virtual host should
1560 	 * handle the request. If we're master of that virtual host,
1561 	 * then we respond, otherwise, just drop the arp packet on
1562 	 * the floor.
1563 	 */
1564 
1565 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1566 		if (!CARP_IS_RUNNING(&vh->sc_if))
1567 			continue;
1568 
1569 		if (carp_find_addr(vh, itaddr) != NULL)
1570 			count++;
1571 	}
1572 	if (count == 0)
1573 		return 0;
1574 
1575 	/* this should be a hash, like pf_hash() */
1576 	index = ntohl(isaddr->s_addr) % count;
1577 	count = 0;
1578 
1579 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1580 		if (!CARP_IS_RUNNING(&vh->sc_if))
1581 			continue;
1582 
1583 		if (carp_find_addr(vh, itaddr) == NULL)
1584 			continue;
1585 
1586 		if (count == index) {
1587 			if (vh->sc_state == MASTER) {
1588 				*enaddr = IF_LLADDR(&vh->sc_if);
1589 				return 1;
1590 			} else {
1591 				return 0;
1592 			}
1593 		}
1594 		count++;
1595 	}
1596 	return 0;
1597 }
1598 #endif
1599 
1600 int
1601 carp_iamatch(const struct in_ifaddr *ia)
1602 {
1603 	const struct carp_softc *sc = ia->ia_ifp->if_softc;
1604 
1605 	ASSERT_IN_NETISR(0);
1606 
1607 #ifdef notyet
1608 	if (carp_opts[CARPCTL_ARPBALANCE])
1609 		return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1610 #endif
1611 
1612 	if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1613 		return 0;
1614 
1615 	return 1;
1616 }
1617 
1618 #ifdef INET6
1619 struct ifaddr *
1620 carp_iamatch6(void *v, struct in6_addr *taddr)
1621 {
1622 #ifdef foo
1623 	struct carp_if *cif = v;
1624 	struct carp_softc *vh;
1625 
1626 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1627 		struct ifaddr_container *ifac;
1628 
1629 		TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1630 			      ifa_link) {
1631 			struct ifaddr *ifa = ifac->ifa;
1632 
1633 			if (IN6_ARE_ADDR_EQUAL(taddr,
1634 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1635 			    CARP_IS_RUNNING(&vh->sc_if) &&
1636 			    vh->sc_state == MASTER) {
1637 				return (ifa);
1638 			}
1639 		}
1640 	}
1641 #endif
1642 	return (NULL);
1643 }
1644 
1645 void *
1646 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1647 {
1648 #ifdef foo
1649 	struct m_tag *mtag;
1650 	struct carp_if *cif = v;
1651 	struct carp_softc *sc;
1652 
1653 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1654 		struct ifaddr_container *ifac;
1655 
1656 		TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1657 			      ifa_link) {
1658 			struct ifaddr *ifa = ifac->ifa;
1659 
1660 			if (IN6_ARE_ADDR_EQUAL(taddr,
1661 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1662 			    CARP_IS_RUNNING(&sc->sc_if)) {
1663 				struct ifnet *ifp = &sc->sc_if;
1664 
1665 				mtag = m_tag_get(PACKET_TAG_CARP,
1666 				    sizeof(struct ifnet *), M_NOWAIT);
1667 				if (mtag == NULL) {
1668 					/* better a bit than nothing */
1669 					return (IF_LLADDR(ifp));
1670 				}
1671 				bcopy(&ifp, (caddr_t)(mtag + 1),
1672 				    sizeof(struct ifnet *));
1673 				m_tag_prepend(m, mtag);
1674 
1675 				return (IF_LLADDR(ifp));
1676 			}
1677 		}
1678 	}
1679 #endif
1680 	return (NULL);
1681 }
1682 #endif
1683 
1684 static struct ifnet *
1685 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1686 {
1687 	struct carp_softc_container *scc;
1688 
1689 	if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1690 		return NULL;
1691 
1692 	TAILQ_FOREACH(scc, cif, scc_link) {
1693 		struct carp_softc *sc = scc->scc_softc;
1694 		struct ifnet *ifp = &sc->sc_if;
1695 
1696 		if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1697 		    !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1698 			return ifp;
1699 	}
1700 	return NULL;
1701 }
1702 
1703 static void
1704 carp_master_down_timeout(void *xsc)
1705 {
1706 	struct carp_softc *sc = xsc;
1707 	struct netmsg_carp *cmsg = &sc->sc_md_msg;
1708 
1709 	KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1710 	    __func__, mycpuid));
1711 
1712 	crit_enter();
1713 	if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1714 		lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1715 	crit_exit();
1716 }
1717 
1718 static void
1719 carp_master_down_timeout_dispatch(netmsg_t msg)
1720 {
1721 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1722 	struct carp_softc *sc = cmsg->nc_softc;
1723 
1724 	/* Reply ASAP */
1725 	crit_enter();
1726 	lwkt_replymsg(&cmsg->base.lmsg, 0);
1727 	crit_exit();
1728 
1729 	CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1730 		   sc->sc_if.if_xname);
1731 	carp_master_down(sc);
1732 }
1733 
1734 static void
1735 carp_master_down(struct carp_softc *sc)
1736 {
1737 	switch (sc->sc_state) {
1738 	case INIT:
1739 		kprintf("%s: master_down event in INIT state\n",
1740 			sc->sc_if.if_xname);
1741 		break;
1742 
1743 	case MASTER:
1744 		break;
1745 
1746 	case BACKUP:
1747 		carp_set_state(sc, MASTER);
1748 		carp_send_ad(sc);
1749 		carp_send_arp(sc);
1750 #ifdef INET6
1751 		carp_send_na(sc);
1752 #endif /* INET6 */
1753 		carp_setrun(sc, 0);
1754 		carp_setroute(sc, RTM_ADD);
1755 		break;
1756 	}
1757 }
1758 
1759 /*
1760  * When in backup state, af indicates whether to reset the master down timer
1761  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1762  */
1763 static void
1764 carp_setrun(struct carp_softc *sc, sa_family_t af)
1765 {
1766 	struct ifnet *cifp = &sc->sc_if;
1767 	struct timeval tv;
1768 
1769 	if (sc->sc_carpdev == NULL) {
1770 		carp_set_state(sc, INIT);
1771 		return;
1772 	}
1773 
1774 	if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1775 	    (sc->sc_naddrs || sc->sc_naddrs6)) {
1776 		/* Nothing */
1777 	} else {
1778 		carp_setroute(sc, RTM_DELETE);
1779 		return;
1780 	}
1781 
1782 	switch (sc->sc_state) {
1783 	case INIT:
1784 		if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1785 			carp_send_ad(sc);
1786 			carp_send_arp(sc);
1787 #ifdef INET6
1788 			carp_send_na(sc);
1789 #endif /* INET6 */
1790 			CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1791 				   cifp->if_xname);
1792 			carp_set_state(sc, MASTER);
1793 			carp_setroute(sc, RTM_ADD);
1794 		} else {
1795 			CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1796 			carp_set_state(sc, BACKUP);
1797 			carp_setroute(sc, RTM_DELETE);
1798 			carp_setrun(sc, 0);
1799 		}
1800 		break;
1801 
1802 	case BACKUP:
1803 		callout_stop(&sc->sc_ad_tmo);
1804 		tv.tv_sec = 3 * sc->sc_advbase;
1805 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1806 		switch (af) {
1807 #ifdef INET
1808 		case AF_INET:
1809 			callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1810 			    carp_master_down_timeout, sc);
1811 			break;
1812 #endif /* INET */
1813 #ifdef INET6
1814 		case AF_INET6:
1815 			callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1816 			    carp_master_down_timeout, sc);
1817 			break;
1818 #endif /* INET6 */
1819 		default:
1820 			if (sc->sc_naddrs)
1821 				callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1822 				    carp_master_down_timeout, sc);
1823 			if (sc->sc_naddrs6)
1824 				callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1825 				    carp_master_down_timeout, sc);
1826 			break;
1827 		}
1828 		break;
1829 
1830 	case MASTER:
1831 		tv.tv_sec = sc->sc_advbase;
1832 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1833 		callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1834 		    carp_send_ad_timeout, sc);
1835 		break;
1836 	}
1837 }
1838 
1839 static void
1840 carp_multicast_cleanup(struct carp_softc *sc)
1841 {
1842 	struct ip_moptions *imo = &sc->sc_imo;
1843 
1844 	if (imo->imo_num_memberships == 0)
1845 		return;
1846 	KKASSERT(imo->imo_num_memberships == 1);
1847 
1848 	in_delmulti(imo->imo_membership[0]);
1849 	imo->imo_membership[0] = NULL;
1850 	imo->imo_num_memberships = 0;
1851 	imo->imo_multicast_ifp = NULL;
1852 }
1853 
1854 #ifdef INET6
1855 static void
1856 carp_multicast6_cleanup(struct carp_softc *sc)
1857 {
1858 	struct ip6_moptions *im6o = &sc->sc_im6o;
1859 
1860 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1861 		struct in6_multi_mship *imm =
1862 		    LIST_FIRST(&im6o->im6o_memberships);
1863 
1864 		LIST_REMOVE(imm, i6mm_chain);
1865 		in6_leavegroup(imm);
1866 	}
1867 	im6o->im6o_multicast_ifp = NULL;
1868 }
1869 #endif
1870 
1871 static void
1872 carp_ioctl_getvhaddr_dispatch(netmsg_t msg)
1873 {
1874 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1875 	struct carp_softc *sc = cmsg->nc_softc;
1876 	const struct carp_vhaddr *vha;
1877 	struct ifcarpvhaddr *carpa, *carpa0;
1878 	int count, len, error = 0;
1879 
1880 	count = 0;
1881 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1882 		++count;
1883 
1884 	if (cmsg->nc_datalen == 0) {
1885 		cmsg->nc_datalen = count * sizeof(*carpa);
1886 		goto back;
1887 	} else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) {
1888 		cmsg->nc_datalen = 0;
1889 		goto back;
1890 	}
1891 	len = min(cmsg->nc_datalen, sizeof(*carpa) * count);
1892 	KKASSERT(len >= sizeof(*carpa));
1893 
1894 	carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1895 	if (carpa == NULL) {
1896 		error = ENOMEM;
1897 		goto back;
1898 	}
1899 
1900 	count = 0;
1901 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1902 		if (len < sizeof(*carpa))
1903 			break;
1904 
1905 		carpa->carpa_flags = vha->vha_flags;
1906 		carpa->carpa_addr.sin_family = AF_INET;
1907 		carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1908 
1909 		carpa->carpa_baddr.sin_family = AF_INET;
1910 		if (vha->vha_iaback == NULL) {
1911 			carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1912 		} else {
1913 			carpa->carpa_baddr.sin_addr =
1914 			vha->vha_iaback->ia_addr.sin_addr;
1915 		}
1916 
1917 		++carpa;
1918 		++count;
1919 		len -= sizeof(*carpa);
1920 	}
1921 	cmsg->nc_datalen = sizeof(*carpa) * count;
1922 	KKASSERT(cmsg->nc_datalen > 0);
1923 
1924 	cmsg->nc_data = carpa0;
1925 
1926 back:
1927 	lwkt_replymsg(&cmsg->base.lmsg, error);
1928 }
1929 
1930 static int
1931 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1932 {
1933 	struct ifnet *ifp = &sc->arpcom.ac_if;
1934 	struct netmsg_carp cmsg;
1935 	int error;
1936 
1937 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1938 	ifnet_deserialize_all(ifp);
1939 
1940 	bzero(&cmsg, sizeof(cmsg));
1941 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
1942 	    carp_ioctl_getvhaddr_dispatch);
1943 	cmsg.nc_softc = sc;
1944 	cmsg.nc_datalen = ifd->ifd_len;
1945 
1946 	error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
1947 
1948 	if (!error) {
1949 		if (cmsg.nc_data != NULL) {
1950 			error = copyout(cmsg.nc_data, ifd->ifd_data,
1951 			    cmsg.nc_datalen);
1952 			kfree(cmsg.nc_data, M_TEMP);
1953 		}
1954 		ifd->ifd_len = cmsg.nc_datalen;
1955 	} else {
1956 		KASSERT(cmsg.nc_data == NULL,
1957 		    ("%s temp vhaddr is alloc upon error", __func__));
1958 	}
1959 
1960 	ifnet_serialize_all(ifp);
1961 	return error;
1962 }
1963 
1964 static int
1965 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1966     struct in_ifaddr *ia_del)
1967 {
1968 	struct ifnet *ifp;
1969 	struct in_ifaddr *ia_if;
1970 	const struct in_ifaddr *ia_vha;
1971 	struct in_ifaddr_container *iac;
1972 	int own, ia_match_carpdev;
1973 
1974 	KKASSERT(vha->vha_ia != NULL);
1975 	ia_vha = vha->vha_ia;
1976 
1977 	ia_if = NULL;
1978 	own = 0;
1979 	ia_match_carpdev = 0;
1980 	TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1981 		struct in_ifaddr *ia = iac->ia;
1982 
1983 		if (ia == ia_del)
1984 			continue;
1985 
1986 		if (ia->ia_ifp->if_type == IFT_CARP)
1987 			continue;
1988 
1989 		if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1990 			continue;
1991 
1992 		/* and, yeah, we need a multicast-capable iface too */
1993 		if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
1994 			continue;
1995 
1996 		if (ia_vha->ia_subnetmask == ia->ia_subnetmask &&
1997 		    ia_vha->ia_subnet == ia->ia_subnet) {
1998 			if (ia_vha->ia_addr.sin_addr.s_addr ==
1999 			    ia->ia_addr.sin_addr.s_addr)
2000 				own = 1;
2001 			if (ia_if == NULL) {
2002 				ia_if = ia;
2003 			} else if (sc->sc_carpdev != NULL &&
2004 			    sc->sc_carpdev == ia->ia_ifp) {
2005 				ia_if = ia;
2006 				if (ia_if->ia_flags & IFA_ROUTE) {
2007 					/*
2008 					 * Address with prefix route
2009 					 * is prefered
2010 					 */
2011 					break;
2012 				}
2013 				ia_match_carpdev = 1;
2014 			} else if (!ia_match_carpdev) {
2015 				if (ia->ia_flags & IFA_ROUTE) {
2016 					/*
2017 					 * Address with prefix route
2018 					 * is prefered over others.
2019 					 */
2020 					ia_if = ia;
2021 				}
2022 			}
2023 		}
2024 	}
2025 
2026 	carp_deactivate_vhaddr(sc, vha, FALSE);
2027 	if (!ia_if)
2028 		return ENOENT;
2029 
2030 	ifp = ia_if->ia_ifp;
2031 
2032 	/* XXX Don't allow parent iface to be changed */
2033 	if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
2034 		return EEXIST;
2035 
2036 	return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2037 }
2038 
2039 static void
2040 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2041 {
2042 	struct carp_vhaddr *vha_new;
2043 	struct in_ifaddr *carp_ia;
2044 #ifdef INVARIANTS
2045 	struct carp_vhaddr *vha;
2046 #endif
2047 
2048 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2049 	carp_ia = ifatoia(carp_ifa);
2050 
2051 #ifdef INVARIANTS
2052 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2053 		KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
2054 #endif
2055 
2056 	vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
2057 	vha_new->vha_ia = carp_ia;
2058 	carp_insert_vhaddr(sc, vha_new);
2059 
2060 	if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
2061 		/*
2062 		 * If the above configuration fails, it may only mean
2063 		 * that the new address is problematic.  However, the
2064 		 * carp(4) interface may already have several working
2065 		 * addresses.  Since the expected behaviour of
2066 		 * SIOC[AS]IFADDR is to put the NIC into working state,
2067 		 * we try starting the state machine manually here with
2068 		 * the hope that the carp(4)'s previously working
2069 		 * addresses still could be brought up.
2070 		 */
2071 		carp_hmac_prepare(sc);
2072 		carp_set_state(sc, INIT);
2073 		carp_setrun(sc, 0);
2074 	}
2075 }
2076 
2077 static void
2078 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2079 {
2080 	struct carp_vhaddr *vha;
2081 	struct in_ifaddr *carp_ia;
2082 
2083 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2084 	carp_ia = ifatoia(carp_ifa);
2085 
2086 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2087 		KKASSERT(vha->vha_ia != NULL);
2088 		if (vha->vha_ia == carp_ia)
2089 			break;
2090 	}
2091 	KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2092 
2093 	/*
2094 	 * Remove the vhaddr from the list before deactivating
2095 	 * the vhaddr, so that the HMAC could be correctly
2096 	 * updated in carp_deactivate_vhaddr()
2097 	 */
2098 	carp_remove_vhaddr(sc, vha);
2099 
2100 	carp_deactivate_vhaddr(sc, vha, FALSE);
2101 	kfree(vha, M_CARP);
2102 }
2103 
2104 static void
2105 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2106 {
2107 	struct carp_vhaddr *vha;
2108 	struct in_ifaddr *carp_ia;
2109 
2110 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2111 	carp_ia = ifatoia(carp_ifa);
2112 
2113 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2114 		KKASSERT(vha->vha_ia != NULL);
2115 		if (vha->vha_ia == carp_ia)
2116 			break;
2117 	}
2118 	KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2119 
2120 	/* Remove then reinsert, to keep the vhaddr list sorted */
2121 	carp_remove_vhaddr(sc, vha);
2122 	carp_insert_vhaddr(sc, vha);
2123 
2124 	if (carp_config_vhaddr(sc, vha, NULL) != 0) {
2125 		/* See the comment in carp_add_addr() */
2126 		carp_hmac_prepare(sc);
2127 		carp_set_state(sc, INIT);
2128 		carp_setrun(sc, 0);
2129 	}
2130 }
2131 
2132 #ifdef notyet
2133 
2134 #ifdef INET6
2135 static int
2136 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2137 {
2138 	struct ifnet *ifp;
2139 	struct carp_if *cif;
2140 	struct in6_ifaddr *ia, *ia_if;
2141 	struct ip6_moptions *im6o = &sc->sc_im6o;
2142 	struct in6_multi_mship *imm;
2143 	struct in6_addr in6;
2144 	int own, error;
2145 
2146 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2147 		carp_setrun(sc, 0);
2148 		return (0);
2149 	}
2150 
2151 	/* we have to do it by hands to check we won't match on us */
2152 	ia_if = NULL; own = 0;
2153 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2154 		int i;
2155 
2156 		for (i = 0; i < 4; i++) {
2157 			if ((sin6->sin6_addr.s6_addr32[i] &
2158 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2159 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
2160 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2161 				break;
2162 		}
2163 		/* and, yeah, we need a multicast-capable iface too */
2164 		if (ia->ia_ifp != &sc->sc_if &&
2165 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2166 		    (i == 4)) {
2167 			if (!ia_if)
2168 				ia_if = ia;
2169 			if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
2170 			    &ia->ia_addr.sin6_addr))
2171 				own++;
2172 		}
2173 	}
2174 
2175 	if (!ia_if)
2176 		return (EADDRNOTAVAIL);
2177 	ia = ia_if;
2178 	ifp = ia->ia_ifp;
2179 
2180 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
2181 	    (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
2182 		return (EADDRNOTAVAIL);
2183 
2184 	if (!sc->sc_naddrs6) {
2185 		im6o->im6o_multicast_ifp = ifp;
2186 
2187 		/* join CARP multicast address */
2188 		bzero(&in6, sizeof(in6));
2189 		in6.s6_addr16[0] = htons(0xff02);
2190 		in6.s6_addr8[15] = 0x12;
2191 		if (in6_setscope(&in6, ifp, NULL) != 0)
2192 			goto cleanup;
2193 		if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2194 			goto cleanup;
2195 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2196 
2197 		/* join solicited multicast address */
2198 		bzero(&in6, sizeof(in6));
2199 		in6.s6_addr16[0] = htons(0xff02);
2200 		in6.s6_addr32[1] = 0;
2201 		in6.s6_addr32[2] = htonl(1);
2202 		in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
2203 		in6.s6_addr8[12] = 0xff;
2204 		if (in6_setscope(&in6, ifp, NULL) != 0)
2205 			goto cleanup;
2206 		if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2207 			goto cleanup;
2208 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2209 	}
2210 
2211 #ifdef foo
2212 	if (!ifp->if_carp) {
2213 		cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2214 
2215 		if ((error = ifpromisc(ifp, 1))) {
2216 			kfree(cif, M_CARP);
2217 			goto cleanup;
2218 		}
2219 
2220 		TAILQ_INIT(&cif->vhif_vrs);
2221 		ifp->if_carp = cif;
2222 	} else {
2223 		struct carp_softc *vr;
2224 
2225 		cif = ifp->if_carp;
2226 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2227 			if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
2228 				error = EINVAL;
2229 				goto cleanup;
2230 			}
2231 		}
2232 	}
2233 #endif
2234 	sc->sc_ia6 = ia;
2235 	sc->sc_carpdev = ifp;
2236 
2237 #ifdef foo
2238 	{ /* XXX prevent endless loop if already in queue */
2239 	struct carp_softc *vr, *after = NULL;
2240 	int myself = 0;
2241 	cif = ifp->if_carp;
2242 
2243 	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2244 		if (vr == sc)
2245 			myself = 1;
2246 		if (vr->sc_vhid < sc->sc_vhid)
2247 			after = vr;
2248 	}
2249 
2250 	if (!myself) {
2251 		/* We're trying to keep things in order */
2252 		if (after == NULL)
2253 			TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2254 		else
2255 			TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2256 	}
2257 	}
2258 #endif
2259 
2260 	sc->sc_naddrs6++;
2261 	if (own)
2262 		sc->sc_advskew = 0;
2263 	carp_sc_state(sc);
2264 	carp_setrun(sc, 0);
2265 
2266 	return (0);
2267 
2268 cleanup:
2269 	/* clean up multicast memberships */
2270 	if (!sc->sc_naddrs6) {
2271 		while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2272 			imm = LIST_FIRST(&im6o->im6o_memberships);
2273 			LIST_REMOVE(imm, i6mm_chain);
2274 			in6_leavegroup(imm);
2275 		}
2276 	}
2277 	return (error);
2278 }
2279 
2280 static int
2281 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2282 {
2283 	int error = 0;
2284 
2285 	if (!--sc->sc_naddrs6) {
2286 		struct carp_if *cif = sc->sc_carpdev->if_carp;
2287 		struct ip6_moptions *im6o = &sc->sc_im6o;
2288 
2289 		callout_stop(&sc->sc_ad_tmo);
2290 		sc->sc_vhid = -1;
2291 		while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2292 			struct in6_multi_mship *imm =
2293 			    LIST_FIRST(&im6o->im6o_memberships);
2294 
2295 			LIST_REMOVE(imm, i6mm_chain);
2296 			in6_leavegroup(imm);
2297 		}
2298 		im6o->im6o_multicast_ifp = NULL;
2299 #ifdef foo
2300 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
2301 		if (TAILQ_EMPTY(&cif->vhif_vrs)) {
2302 			sc->sc_carpdev->if_carp = NULL;
2303 			kfree(cif, M_IFADDR);
2304 		}
2305 #endif
2306 	}
2307 	return (error);
2308 }
2309 #endif /* INET6 */
2310 
2311 #endif
2312 
2313 static int
2314 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
2315 {
2316 	struct carp_softc *sc = ifp->if_softc;
2317 	struct ifreq *ifr = (struct ifreq *)addr;
2318 	struct ifdrv *ifd = (struct ifdrv *)addr;
2319 	int error = 0;
2320 
2321 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2322 
2323 	switch (cmd) {
2324 	case SIOCSIFFLAGS:
2325 		if (ifp->if_flags & IFF_UP) {
2326 			if ((ifp->if_flags & IFF_RUNNING) == 0)
2327 				carp_init(sc);
2328 		} else if (ifp->if_flags & IFF_RUNNING) {
2329 			carp_ioctl_stop(sc);
2330 		}
2331 		break;
2332 
2333 	case SIOCSIFCAP:
2334 		carp_ioctl_ifcap(sc, ifr->ifr_reqcap);
2335 		break;
2336 
2337 	case SIOCSVH:
2338 		error = carp_ioctl_setvh(sc, ifr->ifr_data, cr);
2339 		break;
2340 
2341 	case SIOCGVH:
2342 		error = carp_ioctl_getvh(sc, ifr->ifr_data, cr);
2343 		break;
2344 
2345 	case SIOCGDRVSPEC:
2346 		switch (ifd->ifd_cmd) {
2347 		case CARPGDEVNAME:
2348 			error = carp_ioctl_getdevname(sc, ifd);
2349 			break;
2350 
2351 		case CARPGVHADDR:
2352 			error = carp_ioctl_getvhaddr(sc, ifd);
2353 			break;
2354 
2355 		default:
2356 			error = EINVAL;
2357 			break;
2358 		}
2359 		break;
2360 
2361 	default:
2362 		error = ether_ioctl(ifp, cmd, addr);
2363 		break;
2364 	}
2365 
2366 	return error;
2367 }
2368 
2369 static void
2370 carp_ioctl_stop_dispatch(netmsg_t msg)
2371 {
2372 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2373 	struct carp_softc *sc = cmsg->nc_softc;
2374 
2375 	carp_stop(sc, FALSE);
2376 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2377 }
2378 
2379 static void
2380 carp_ioctl_stop(struct carp_softc *sc)
2381 {
2382 	struct ifnet *ifp = &sc->arpcom.ac_if;
2383 	struct netmsg_carp cmsg;
2384 
2385 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2386 
2387 	ifnet_deserialize_all(ifp);
2388 
2389 	bzero(&cmsg, sizeof(cmsg));
2390 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2391 	    carp_ioctl_stop_dispatch);
2392 	cmsg.nc_softc = sc;
2393 
2394 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2395 
2396 	ifnet_serialize_all(ifp);
2397 }
2398 
2399 static void
2400 carp_ioctl_setvh_dispatch(netmsg_t msg)
2401 {
2402 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2403 	struct carp_softc *sc = cmsg->nc_softc;
2404 	struct ifnet *ifp = &sc->arpcom.ac_if;
2405 	const struct carpreq *carpr = cmsg->nc_data;
2406 	int error;
2407 
2408 	error = 1;
2409 	if ((ifp->if_flags & IFF_RUNNING) &&
2410 	    sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) {
2411 		switch (carpr->carpr_state) {
2412 		case BACKUP:
2413 			callout_stop(&sc->sc_ad_tmo);
2414 			carp_set_state(sc, BACKUP);
2415 			carp_setrun(sc, 0);
2416 			carp_setroute(sc, RTM_DELETE);
2417 			break;
2418 
2419 		case MASTER:
2420 			carp_master_down(sc);
2421 			break;
2422 
2423 		default:
2424 			break;
2425 		}
2426 	}
2427 	if (carpr->carpr_vhid > 0) {
2428 		if (carpr->carpr_vhid > 255) {
2429 			error = EINVAL;
2430 			goto back;
2431 		}
2432 		if (sc->sc_carpdev) {
2433 			struct carp_if *cif = sc->sc_carpdev->if_carp;
2434 			struct carp_softc_container *scc;
2435 
2436 			TAILQ_FOREACH(scc, cif, scc_link) {
2437 				struct carp_softc *vr = scc->scc_softc;
2438 
2439 				if (vr != sc &&
2440 				    vr->sc_vhid == carpr->carpr_vhid) {
2441 					error = EEXIST;
2442 					goto back;
2443 				}
2444 			}
2445 		}
2446 		sc->sc_vhid = carpr->carpr_vhid;
2447 
2448 		IF_LLADDR(ifp)[5] = sc->sc_vhid;
2449 		bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
2450 		    ETHER_ADDR_LEN);
2451 
2452 		error--;
2453 	}
2454 	if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) {
2455 		if (carpr->carpr_advskew >= 255) {
2456 			error = EINVAL;
2457 			goto back;
2458 		}
2459 		if (carpr->carpr_advbase > 255) {
2460 			error = EINVAL;
2461 			goto back;
2462 		}
2463 		sc->sc_advbase = carpr->carpr_advbase;
2464 		sc->sc_advskew = carpr->carpr_advskew;
2465 		error--;
2466 	}
2467 	bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
2468 	if (error > 0) {
2469 		error = EINVAL;
2470 	} else {
2471 		error = 0;
2472 		carp_setrun(sc, 0);
2473 	}
2474 back:
2475 	carp_hmac_prepare(sc);
2476 
2477 	lwkt_replymsg(&cmsg->base.lmsg, error);
2478 }
2479 
2480 static int
2481 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2482 {
2483 	struct ifnet *ifp = &sc->arpcom.ac_if;
2484 	struct netmsg_carp cmsg;
2485 	struct carpreq carpr;
2486 	int error;
2487 
2488 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2489 	ifnet_deserialize_all(ifp);
2490 
2491 	error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2492 	if (error)
2493 		goto back;
2494 
2495 	error = copyin(udata, &carpr, sizeof(carpr));
2496 	if (error)
2497 		goto back;
2498 
2499 	bzero(&cmsg, sizeof(cmsg));
2500 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2501 	    carp_ioctl_setvh_dispatch);
2502 	cmsg.nc_softc = sc;
2503 	cmsg.nc_data = &carpr;
2504 
2505 	error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2506 
2507 back:
2508 	ifnet_serialize_all(ifp);
2509 	return error;
2510 }
2511 
2512 static void
2513 carp_ioctl_ifcap_dispatch(netmsg_t msg)
2514 {
2515 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2516 	struct carp_softc *sc = cmsg->nc_softc;
2517 	struct ifnet *ifp = &sc->arpcom.ac_if;
2518 	int reqcap = *((const int *)(cmsg->nc_data));
2519 	int mask;
2520 
2521 	mask = reqcap ^ ifp->if_capenable;
2522 	if (mask & IFCAP_TXCSUM) {
2523 		ifp->if_capenable ^= IFCAP_TXCSUM;
2524 		if ((ifp->if_capenable & IFCAP_TXCSUM) &&
2525 		    sc->sc_carpdev != NULL) {
2526 			ifp->if_hwassist |=
2527 			    (sc->sc_carpdev->if_hwassist &
2528 			     (CSUM_IP | CSUM_UDP | CSUM_TCP));
2529 		} else {
2530 			ifp->if_hwassist &= ~(CSUM_IP | CSUM_UDP | CSUM_TCP);
2531 		}
2532 	}
2533 	if (mask & IFCAP_TSO) {
2534 		ifp->if_capenable ^= IFCAP_TSO;
2535 		if ((ifp->if_capenable & IFCAP_TSO) &&
2536 		    sc->sc_carpdev != NULL) {
2537 			ifp->if_hwassist |=
2538 			    (sc->sc_carpdev->if_hwassist & CSUM_TSO);
2539 		} else {
2540 			ifp->if_hwassist &= ~CSUM_TSO;
2541 		}
2542 	}
2543 
2544 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2545 }
2546 
2547 static void
2548 carp_ioctl_ifcap(struct carp_softc *sc, int reqcap)
2549 {
2550 	struct ifnet *ifp = &sc->arpcom.ac_if;
2551 	struct netmsg_carp cmsg;
2552 
2553 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2554 	ifnet_deserialize_all(ifp);
2555 
2556 	bzero(&cmsg, sizeof(cmsg));
2557 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2558 	    carp_ioctl_ifcap_dispatch);
2559 	cmsg.nc_softc = sc;
2560 	cmsg.nc_data = &reqcap;
2561 
2562 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2563 
2564 	ifnet_serialize_all(ifp);
2565 }
2566 
2567 static void
2568 carp_ioctl_getvh_dispatch(netmsg_t msg)
2569 {
2570 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2571 	struct carp_softc *sc = cmsg->nc_softc;
2572 	struct carpreq *carpr = cmsg->nc_data;
2573 
2574 	carpr->carpr_state = sc->sc_state;
2575 	carpr->carpr_vhid = sc->sc_vhid;
2576 	carpr->carpr_advbase = sc->sc_advbase;
2577 	carpr->carpr_advskew = sc->sc_advskew;
2578 	bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
2579 
2580 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2581 }
2582 
2583 static int
2584 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2585 {
2586 	struct ifnet *ifp = &sc->arpcom.ac_if;
2587 	struct netmsg_carp cmsg;
2588 	struct carpreq carpr;
2589 	int error;
2590 
2591 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2592 	ifnet_deserialize_all(ifp);
2593 
2594 	bzero(&cmsg, sizeof(cmsg));
2595 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2596 	    carp_ioctl_getvh_dispatch);
2597 	cmsg.nc_softc = sc;
2598 	cmsg.nc_data = &carpr;
2599 
2600 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2601 
2602 	error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY);
2603 	if (error)
2604 		bzero(carpr.carpr_key, sizeof(carpr.carpr_key));
2605 
2606 	error = copyout(&carpr, udata, sizeof(carpr));
2607 
2608 	ifnet_serialize_all(ifp);
2609 	return error;
2610 }
2611 
2612 static void
2613 carp_ioctl_getdevname_dispatch(netmsg_t msg)
2614 {
2615 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2616 	struct carp_softc *sc = cmsg->nc_softc;
2617 	char *devname = cmsg->nc_data;
2618 
2619 	bzero(devname, IFNAMSIZ);
2620 	if (sc->sc_carpdev != NULL)
2621 		strlcpy(devname, sc->sc_carpdev->if_xname, IFNAMSIZ);
2622 
2623 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2624 }
2625 
2626 static int
2627 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd)
2628 {
2629 	struct ifnet *ifp = &sc->arpcom.ac_if;
2630 	struct netmsg_carp cmsg;
2631 	char devname[IFNAMSIZ];
2632 	int error;
2633 
2634 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2635 
2636 	if (ifd->ifd_len != sizeof(devname))
2637 		return EINVAL;
2638 
2639 	ifnet_deserialize_all(ifp);
2640 
2641 	bzero(&cmsg, sizeof(cmsg));
2642 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2643 	    carp_ioctl_getdevname_dispatch);
2644 	cmsg.nc_softc = sc;
2645 	cmsg.nc_data = devname;
2646 
2647 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2648 
2649 	error = copyout(devname, ifd->ifd_data, sizeof(devname));
2650 
2651 	ifnet_serialize_all(ifp);
2652 	return error;
2653 }
2654 
2655 static void
2656 carp_init_dispatch(netmsg_t msg)
2657 {
2658 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2659 	struct carp_softc *sc = cmsg->nc_softc;
2660 
2661 	sc->sc_if.if_flags |= IFF_RUNNING;
2662 	carp_hmac_prepare(sc);
2663 	carp_set_state(sc, INIT);
2664 	carp_setrun(sc, 0);
2665 
2666 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2667 }
2668 
2669 static void
2670 carp_init(void *xsc)
2671 {
2672 	struct carp_softc *sc = xsc;
2673 	struct ifnet *ifp = &sc->arpcom.ac_if;
2674 	struct netmsg_carp cmsg;
2675 
2676 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2677 
2678 	ifnet_deserialize_all(ifp);
2679 
2680 	bzero(&cmsg, sizeof(cmsg));
2681 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2682 	    carp_init_dispatch);
2683 	cmsg.nc_softc = sc;
2684 
2685 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2686 
2687 	ifnet_serialize_all(ifp);
2688 }
2689 
2690 static int
2691 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2692     struct rtentry *rt)
2693 {
2694 	struct carp_softc *sc = ifp->if_softc;
2695 	struct ifnet *carpdev;
2696 	int error = 0;
2697 
2698 	carpdev = sc->sc_carpdev;
2699 	if (carpdev != NULL) {
2700 		if (m->m_flags & M_MCAST)
2701 			IFNET_STAT_INC(ifp, omcasts, 1);
2702 		IFNET_STAT_INC(ifp, obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
2703 		IFNET_STAT_INC(ifp, opackets, 1);
2704 
2705 		/*
2706 		 * NOTE:
2707 		 * CARP's ifp is passed to backing device's
2708 		 * if_output method.
2709 		 */
2710 		carpdev->if_output(ifp, m, dst, rt);
2711 	} else {
2712 		IFNET_STAT_INC(ifp, oerrors, 1);
2713 		m_freem(m);
2714 		error = ENETUNREACH;
2715 	}
2716 	return error;
2717 }
2718 
2719 /*
2720  * Start output on carp interface. This function should never be called.
2721  */
2722 static void
2723 carp_start(struct ifnet *ifp, struct ifaltq_subque *ifsq __unused)
2724 {
2725 	panic("%s: start called", ifp->if_xname);
2726 }
2727 
2728 static void
2729 carp_set_state(struct carp_softc *sc, int state)
2730 {
2731 	struct ifnet *cifp = &sc->sc_if;
2732 
2733 	if (sc->sc_state == state)
2734 		return;
2735 	sc->sc_state = state;
2736 
2737 	switch (sc->sc_state) {
2738 	case BACKUP:
2739 		cifp->if_link_state = LINK_STATE_DOWN;
2740 		break;
2741 
2742 	case MASTER:
2743 		cifp->if_link_state = LINK_STATE_UP;
2744 		break;
2745 
2746 	default:
2747 		cifp->if_link_state = LINK_STATE_UNKNOWN;
2748 		break;
2749 	}
2750 	rt_ifmsg(cifp);
2751 }
2752 
2753 void
2754 carp_group_demote_adj(struct ifnet *ifp, int adj)
2755 {
2756 	struct ifg_list	*ifgl;
2757 	int *dm;
2758 
2759 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2760 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2761 			continue;
2762 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2763 
2764 		if (*dm + adj >= 0)
2765 			*dm += adj;
2766 		else
2767 			*dm = 0;
2768 
2769 		if (adj > 0 && *dm == 1)
2770 			carp_send_ad_all();
2771 		CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2772                     ifgl->ifgl_group->ifg_group, *dm);
2773 	}
2774 }
2775 
2776 #ifdef foo
2777 void
2778 carp_carpdev_state(void *v)
2779 {
2780 	struct carp_if *cif = v;
2781 	struct carp_softc *sc;
2782 
2783 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2784 		carp_sc_state(sc);
2785 }
2786 
2787 static void
2788 carp_sc_state(struct carp_softc *sc)
2789 {
2790 	if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2791 		callout_stop(&sc->sc_ad_tmo);
2792 		callout_stop(&sc->sc_md_tmo);
2793 		callout_stop(&sc->sc_md6_tmo);
2794 		carp_set_state(sc, INIT);
2795 		carp_setrun(sc, 0);
2796 		if (!sc->sc_suppress) {
2797 			carp_suppress_preempt++;
2798 			if (carp_suppress_preempt == 1)
2799 				carp_send_ad_all();
2800 		}
2801 		sc->sc_suppress = 1;
2802 	} else {
2803 		carp_set_state(sc, INIT);
2804 		carp_setrun(sc, 0);
2805 		if (sc->sc_suppress)
2806 			carp_suppress_preempt--;
2807 		sc->sc_suppress = 0;
2808 	}
2809 }
2810 #endif
2811 
2812 static void
2813 carp_stop(struct carp_softc *sc, boolean_t detach)
2814 {
2815 	sc->sc_if.if_flags &= ~IFF_RUNNING;
2816 
2817 	callout_stop(&sc->sc_ad_tmo);
2818 	callout_stop(&sc->sc_md_tmo);
2819 	callout_stop(&sc->sc_md6_tmo);
2820 
2821 	if (!detach && sc->sc_state == MASTER)
2822 		carp_send_ad(sc);
2823 
2824 	if (sc->sc_suppress)
2825 		carp_suppress_preempt--;
2826 	sc->sc_suppress = 0;
2827 
2828 	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2829 		carp_suppress_preempt--;
2830 	sc->sc_sendad_errors = 0;
2831 	sc->sc_sendad_success = 0;
2832 
2833 	carp_set_state(sc, INIT);
2834 	carp_setrun(sc, 0);
2835 }
2836 
2837 static void
2838 carp_suspend(struct carp_softc *sc, boolean_t detach)
2839 {
2840 	struct ifnet *cifp = &sc->sc_if;
2841 
2842 	carp_stop(sc, detach);
2843 
2844 	/* Retain the running state, if we are not dead yet */
2845 	if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2846 		cifp->if_flags |= IFF_RUNNING;
2847 }
2848 
2849 static int
2850 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2851     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2852 {
2853 	struct ip_moptions *imo = &sc->sc_imo;
2854 	struct carp_if *ocif = ifp->if_carp;
2855 	int error;
2856 
2857 	KKASSERT(vha->vha_ia != NULL);
2858 
2859 	KASSERT(ia_if != NULL, ("NULL backing address"));
2860 	KASSERT(vha->vha_iaback == NULL, ("%p is already activated", vha));
2861 	KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2862 		("inactive vhaddr %p is the address owner", vha));
2863 
2864 	KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2865 		("%s is already on %s", sc->sc_if.if_xname,
2866 		 sc->sc_carpdev->if_xname));
2867 
2868 	if (ocif == NULL) {
2869 		KASSERT(sc->sc_carpdev == NULL,
2870 			("%s is already on %s", sc->sc_if.if_xname,
2871 			 sc->sc_carpdev->if_xname));
2872 
2873 		error = ifpromisc(ifp, 1);
2874 		if (error)
2875 			return error;
2876 	} else {
2877 		struct carp_softc_container *scc;
2878 
2879 		TAILQ_FOREACH(scc, ocif, scc_link) {
2880 			struct carp_softc *vr = scc->scc_softc;
2881 
2882 			if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2883 				return EINVAL;
2884 		}
2885 	}
2886 
2887 	ifp->if_carp = carp_if_insert(ocif, sc);
2888 	KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed", __func__));
2889 
2890 	sc->sc_ia = ia_if;
2891 	sc->sc_carpdev = ifp;
2892 	sc->arpcom.ac_if.if_hwassist = 0;
2893 	if (sc->arpcom.ac_if.if_capenable & IFCAP_TXCSUM) {
2894 		sc->arpcom.ac_if.if_hwassist |=
2895 		    (ifp->if_hwassist & (CSUM_IP | CSUM_UDP | CSUM_TCP));
2896 	}
2897 	if (sc->arpcom.ac_if.if_capenable & IFCAP_TSO)
2898 		sc->arpcom.ac_if.if_hwassist |= (ifp->if_hwassist & CSUM_TSO);
2899 
2900 	/*
2901 	 * Make sure that all protocol threads see the sc_carpdev and
2902 	 * if_carp changes
2903 	 */
2904 	netmsg_service_sync();
2905 
2906 	if (ocif != NULL && ifp->if_carp != ocif) {
2907 		/*
2908 		 * The old carp list could be safely free now,
2909 		 * since no one can access it.
2910 		 */
2911 		carp_if_free(ocif);
2912 	}
2913 
2914 	vha->vha_iaback = ia_if;
2915 	sc->sc_naddrs++;
2916 
2917 	if (own) {
2918 		vha->vha_flags |= CARP_VHAF_OWNER;
2919 
2920 		/* XXX save user configured advskew? */
2921 		sc->sc_advskew = 0;
2922 	}
2923 
2924 	carp_addroute_vhaddr(sc, vha);
2925 
2926 	/*
2927 	 * Join the multicast group only after the backing interface
2928 	 * has been hooked with the CARP interface.
2929 	 */
2930 	KASSERT(imo->imo_multicast_ifp == NULL ||
2931 		imo->imo_multicast_ifp == &sc->sc_if,
2932 		("%s didn't leave mcast group on %s",
2933 		 sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2934 
2935 	if (imo->imo_num_memberships == 0) {
2936 		struct in_addr addr;
2937 
2938 		addr.s_addr = htonl(INADDR_CARP_GROUP);
2939 		imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2940 		if (imo->imo_membership[0] == NULL) {
2941 			carp_deactivate_vhaddr(sc, vha, FALSE);
2942 			return ENOBUFS;
2943 		}
2944 
2945 		imo->imo_num_memberships++;
2946 		imo->imo_multicast_ifp = &sc->sc_if;
2947 		imo->imo_multicast_ttl = CARP_DFLTTL;
2948 		imo->imo_multicast_loop = 0;
2949 	}
2950 
2951 	carp_hmac_prepare(sc);
2952 	carp_set_state(sc, INIT);
2953 	carp_setrun(sc, 0);
2954 	return 0;
2955 }
2956 
2957 static void
2958 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2959     boolean_t del_iaback)
2960 {
2961 	KKASSERT(vha->vha_ia != NULL);
2962 
2963 	carp_hmac_prepare(sc);
2964 
2965 	if (vha->vha_iaback == NULL) {
2966 		KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2967 			("inactive vhaddr %p is the address owner", vha));
2968 		return;
2969 	}
2970 
2971 	vha->vha_flags &= ~CARP_VHAF_OWNER;
2972 	carp_delroute_vhaddr(sc, vha, del_iaback);
2973 
2974 	KKASSERT(sc->sc_naddrs > 0);
2975 	vha->vha_iaback = NULL;
2976 	sc->sc_naddrs--;
2977 	if (!sc->sc_naddrs) {
2978 		if (sc->sc_naddrs6) {
2979 			carp_multicast_cleanup(sc);
2980 			sc->sc_ia = NULL;
2981 		} else {
2982 			carp_detach(sc, FALSE, del_iaback);
2983 		}
2984 	}
2985 }
2986 
2987 static void
2988 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
2989 {
2990 	struct carp_vhaddr *vha;
2991 	struct in_ifaddr *ia_if;
2992 
2993 	KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
2994 	ia_if = ifatoia(ifa_if);
2995 
2996 	/*
2997 	 * Test each inactive vhaddr against the newly added address.
2998 	 * If the newly added address could be the backing address,
2999 	 * then activate the matching vhaddr.
3000 	 */
3001 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3002 		const struct in_ifaddr *ia;
3003 		int own;
3004 
3005 		if (vha->vha_iaback != NULL)
3006 			continue;
3007 
3008 		ia = vha->vha_ia;
3009 		if (ia->ia_subnetmask != ia_if->ia_subnetmask ||
3010 		    ia->ia_subnet != ia_if->ia_subnet)
3011 			continue;
3012 
3013 		own = 0;
3014 		if (ia->ia_addr.sin_addr.s_addr ==
3015 		    ia_if->ia_addr.sin_addr.s_addr)
3016 			own = 1;
3017 
3018 		carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
3019 	}
3020 }
3021 
3022 static void
3023 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
3024 		  struct ifaddr *ifa_if)
3025 {
3026 	struct carp_vhaddr *vha;
3027 	struct in_ifaddr *ia_if;
3028 
3029 	KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
3030 	ia_if = ifatoia(ifa_if);
3031 
3032 	/*
3033 	 * Ad src address is deleted; set it to NULL.
3034 	 * Following loop will try pick up a new ad src address
3035 	 * if one of the vhaddr could retain its backing address.
3036 	 */
3037 	if (sc->sc_ia == ia_if)
3038 		sc->sc_ia = NULL;
3039 
3040 	/*
3041 	 * Test each active vhaddr against the deleted address.
3042 	 * If the deleted address is vhaddr address's backing
3043 	 * address, then deactivate the vhaddr.
3044 	 */
3045 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3046 		if (vha->vha_iaback == NULL)
3047 			continue;
3048 
3049 		if (vha->vha_iaback == ia_if)
3050 			carp_deactivate_vhaddr(sc, vha, TRUE);
3051 		else if (sc->sc_ia == NULL)
3052 			sc->sc_ia = vha->vha_iaback;
3053 	}
3054 }
3055 
3056 static void
3057 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
3058 {
3059 	struct carp_vhaddr *vha;
3060 
3061 	KKASSERT(sc->sc_carpdev == NULL);
3062 
3063 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
3064 		carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
3065 }
3066 
3067 static void
3068 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
3069 	    enum ifaddr_event event, struct ifaddr *ifa)
3070 {
3071 	struct carp_softc *sc;
3072 
3073 	if (ifa->ifa_addr->sa_family != AF_INET)
3074 		return;
3075 
3076 	ASSERT_IN_NETISR(0);
3077 
3078 	if (ifp->if_type == IFT_CARP) {
3079 		/*
3080 		 * Address is changed on carp(4) interface
3081 		 */
3082 		switch (event) {
3083 		case IFADDR_EVENT_ADD:
3084 			carp_add_addr(ifp->if_softc, ifa);
3085 			break;
3086 
3087 		case IFADDR_EVENT_CHANGE:
3088 			carp_config_addr(ifp->if_softc, ifa);
3089 			break;
3090 
3091 		case IFADDR_EVENT_DELETE:
3092 			carp_del_addr(ifp->if_softc, ifa);
3093 			break;
3094 		}
3095 		return;
3096 	}
3097 
3098 	/*
3099 	 * Address is changed on non-carp(4) interface
3100 	 */
3101 	if ((ifp->if_flags & IFF_MULTICAST) == 0)
3102 		return;
3103 
3104 	LIST_FOREACH(sc, &carpif_list, sc_next) {
3105 		if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
3106 			/* Not the parent iface; skip */
3107 			continue;
3108 		}
3109 
3110 		switch (event) {
3111 		case IFADDR_EVENT_ADD:
3112 			carp_link_addrs(sc, ifp, ifa);
3113 			break;
3114 
3115 		case IFADDR_EVENT_DELETE:
3116 			if (sc->sc_carpdev != NULL) {
3117 				carp_unlink_addrs(sc, ifp, ifa);
3118 				if (sc->sc_carpdev == NULL) {
3119 					/*
3120 					 * We no longer have the parent
3121 					 * interface, however, certain
3122 					 * virtual addresses, which are
3123 					 * not used because they can't
3124 					 * match the previous parent
3125 					 * interface's addresses, may now
3126 					 * match different interface's
3127 					 * addresses.
3128 					 */
3129 					carp_update_addrs(sc, ifa);
3130 				}
3131 			} else {
3132 				/*
3133 				 * The carp(4) interface didn't have a
3134 				 * parent iface, so it is not possible
3135 				 * that it will contain any address to
3136 				 * be unlinked.
3137 				 */
3138 			}
3139 			break;
3140 
3141 		case IFADDR_EVENT_CHANGE:
3142 			if (sc->sc_carpdev == NULL) {
3143 				/*
3144 				 * The carp(4) interface didn't have a
3145 				 * parent iface, so it is not possible
3146 				 * that it will contain any address to
3147 				 * be updated.
3148 				 */
3149 				carp_link_addrs(sc, ifp, ifa);
3150 			} else {
3151 				/*
3152 				 * First try breaking tie with the old
3153 				 * address.  Then see whether we could
3154 				 * link certain vhaddr to the new address.
3155 				 * If that fails, i.e. carpdev is NULL,
3156 				 * we try a global update.
3157 				 *
3158 				 * NOTE: The above order is critical.
3159 				 */
3160 				carp_unlink_addrs(sc, ifp, ifa);
3161 				carp_link_addrs(sc, ifp, ifa);
3162 				if (sc->sc_carpdev == NULL) {
3163 					/*
3164 					 * See the comment in the above
3165 					 * IFADDR_EVENT_DELETE block.
3166 					 */
3167 					carp_update_addrs(sc, NULL);
3168 				}
3169 			}
3170 			break;
3171 		}
3172 	}
3173 }
3174 
3175 void
3176 carp_proto_ctlinput(netmsg_t msg)
3177 {
3178 	int cmd = msg->ctlinput.nm_cmd;
3179 	struct sockaddr *sa = msg->ctlinput.nm_arg;
3180 	struct in_ifaddr_container *iac;
3181 
3182 	/* We only process PRC_IFDOWN and PRC_IFUP commands */
3183 	if (cmd != PRC_IFDOWN && cmd != PRC_IFUP)
3184 		goto done;
3185 
3186 	TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
3187 		struct in_ifaddr *ia = iac->ia;
3188 		struct ifnet *ifp = ia->ia_ifp;
3189 
3190 		if (ifp->if_type == IFT_CARP)
3191 			continue;
3192 
3193 		if (ia->ia_ifa.ifa_addr == sa) {
3194 			if (cmd == PRC_IFDOWN) {
3195 				carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
3196 				    &ia->ia_ifa);
3197 			} else if (cmd == PRC_IFUP) {
3198 				carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
3199 				    &ia->ia_ifa);
3200 			}
3201 			break;
3202 		}
3203 	}
3204 done:
3205 	lwkt_replymsg(&msg->lmsg, 0);
3206 }
3207 
3208 struct ifnet *
3209 carp_parent(struct ifnet *cifp)
3210 {
3211 	struct carp_softc *sc;
3212 
3213 	KKASSERT(cifp->if_type == IFT_CARP);
3214 	sc = cifp->if_softc;
3215 
3216 	return sc->sc_carpdev;
3217 }
3218 
3219 #define rtinitflags(x) \
3220 	(((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
3221 		 ? RTF_HOST : 0)
3222 
3223 static int
3224 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
3225 {
3226 	struct in_ifaddr *ia, *iaback;
3227 
3228 	if (sc->sc_state != MASTER)
3229 		return 0;
3230 
3231 	ia = vha->vha_ia;
3232 	KKASSERT(ia != NULL);
3233 
3234 	iaback = vha->vha_iaback;
3235 	KKASSERT(iaback != NULL);
3236 
3237 	return rtchange(&iaback->ia_ifa, &ia->ia_ifa);
3238 }
3239 
3240 static void
3241 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
3242     boolean_t del_iaback)
3243 {
3244 	struct in_ifaddr *ia, *iaback;
3245 
3246 	ia = vha->vha_ia;
3247 	KKASSERT(ia != NULL);
3248 
3249 	iaback = vha->vha_iaback;
3250 	KKASSERT(iaback != NULL);
3251 
3252 	if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
3253 		rtchange(&ia->ia_ifa, &iaback->ia_ifa);
3254 		return;
3255 	}
3256 
3257 	rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
3258 	in_ifadown_force(&ia->ia_ifa, 1);
3259 	ia->ia_flags &= ~IFA_ROUTE;
3260 }
3261 
3262 static int
3263 carp_modevent(module_t mod, int type, void *data)
3264 {
3265 	switch (type) {
3266 	case MOD_LOAD:
3267 		LIST_INIT(&carpif_list);
3268 		carp_ifdetach_event =
3269 		EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
3270 				      EVENTHANDLER_PRI_ANY);
3271 		carp_ifaddr_event =
3272 		EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
3273 				      EVENTHANDLER_PRI_FIRST);
3274 		if_clone_attach(&carp_cloner);
3275 		break;
3276 
3277 	case MOD_UNLOAD:
3278 		EVENTHANDLER_DEREGISTER(ifnet_detach_event,
3279 					carp_ifdetach_event);
3280 		EVENTHANDLER_DEREGISTER(ifaddr_event,
3281 					carp_ifaddr_event);
3282 		if_clone_detach(&carp_cloner);
3283 		break;
3284 
3285 	default:
3286 		return (EINVAL);
3287 	}
3288 	return (0);
3289 }
3290 
3291 static moduledata_t carp_mod = {
3292 	"carp",
3293 	carp_modevent,
3294 	0
3295 };
3296 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3297