xref: /openbsd/sys/netinet/ip_carp.c (revision 55cc5ba3)
1 /*	$OpenBSD: ip_carp.c,v 1.352 2021/02/08 12:30:10 bluhm Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
5  * Copyright (c) 2003 Ryan McBride. All rights reserved.
6  * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * TODO:
32  *	- iface reconfigure
33  *	- support for hardware checksum calculations;
34  *
35  */
36 
37 #include "ether.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/timeout.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
47 #include <sys/device.h>
48 #include <sys/kernel.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 #include <sys/refcnt.h>
52 
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_types.h>
56 #include <net/netisr.h>
57 
58 #include <crypto/sha1.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_var.h>
62 #include <netinet/ip.h>
63 #include <netinet/ip_var.h>
64 #include <netinet/if_ether.h>
65 #include <netinet/ip_ipsp.h>
66 
67 #include <net/if_dl.h>
68 
69 #ifdef INET6
70 #include <netinet6/in6_var.h>
71 #include <netinet/icmp6.h>
72 #include <netinet/ip6.h>
73 #include <netinet6/ip6_var.h>
74 #include <netinet6/nd6.h>
75 #include <netinet6/in6_ifattach.h>
76 #endif
77 
78 #include "bpfilter.h"
79 #if NBPFILTER > 0
80 #include <net/bpf.h>
81 #endif
82 
83 #include "vlan.h"
84 #if NVLAN > 0
85 #include <net/if_vlan_var.h>
86 #endif
87 
88 #include <netinet/ip_carp.h>
89 
90 struct carp_mc_entry {
91 	LIST_ENTRY(carp_mc_entry)	mc_entries;
92 	union {
93 		struct ether_multi	*mcu_enm;
94 	} mc_u;
95 	struct sockaddr_storage		mc_addr;
96 };
97 #define	mc_enm	mc_u.mcu_enm
98 
99 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 };
100 
101 struct carp_vhost_entry {
102 	SRPL_ENTRY(carp_vhost_entry) vhost_entries;
103 	struct refcnt vhost_refcnt;
104 
105 	struct carp_softc *parent_sc;
106 	int vhe_leader;
107 	int vhid;
108 	int advskew;
109 	enum { INIT = 0, BACKUP, MASTER }	state;
110 	struct timeout ad_tmo;	/* advertisement timeout */
111 	struct timeout md_tmo;	/* master down timeout */
112 	struct timeout md6_tmo;	/* master down timeout */
113 
114 	u_int64_t vhe_replay_cookie;
115 
116 	/* authentication */
117 #define CARP_HMAC_PAD	64
118 	unsigned char vhe_pad[CARP_HMAC_PAD];
119 	SHA1_CTX vhe_sha1[HMAC_MAX];
120 
121 	u_int8_t vhe_enaddr[ETHER_ADDR_LEN];
122 };
123 
124 void	carp_vh_ref(void *, void *);
125 void	carp_vh_unref(void *, void *);
126 
127 struct srpl_rc carp_vh_rc =
128     SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL);
129 
130 struct carp_softc {
131 	struct arpcom sc_ac;
132 #define	sc_if		sc_ac.ac_if
133 #define	sc_carpdevidx	sc_ac.ac_if.if_carpdevidx
134 	struct task sc_atask;
135 	struct task sc_ltask;
136 	struct task sc_dtask;
137 	struct ip_moptions sc_imo;
138 #ifdef INET6
139 	struct ip6_moptions sc_im6o;
140 #endif /* INET6 */
141 
142 	SRPL_ENTRY(carp_softc) sc_list;
143 	struct refcnt sc_refcnt;
144 
145 	int sc_suppress;
146 	int sc_bow_out;
147 	int sc_demote_cnt;
148 
149 	int sc_sendad_errors;
150 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count)
151 	int sc_sendad_success;
152 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count)
153 
154 	char sc_curlladdr[ETHER_ADDR_LEN];
155 
156 	SRPL_HEAD(, carp_vhost_entry) carp_vhosts;
157 	int sc_vhe_count;
158 	u_int8_t sc_vhids[CARP_MAXNODES];
159 	u_int8_t sc_advskews[CARP_MAXNODES];
160 	u_int8_t sc_balancing;
161 
162 	int sc_naddrs;
163 	int sc_naddrs6;
164 	int sc_advbase;		/* seconds */
165 
166 	/* authentication */
167 	unsigned char sc_key[CARP_KEY_LEN];
168 
169 	u_int32_t sc_hashkey[2];
170 	u_int32_t sc_lsmask;		/* load sharing mask */
171 	int sc_lscount;			/* # load sharing interfaces (max 32) */
172 	int sc_delayed_arp;		/* delayed ARP request countdown */
173 	int sc_realmac;			/* using real mac */
174 
175 	struct in_addr sc_peer;
176 
177 	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
178 	struct carp_vhost_entry *cur_vhe; /* current active vhe */
179 };
180 
181 void	carp_sc_ref(void *, void *);
182 void	carp_sc_unref(void *, void *);
183 
184 struct srpl_rc carp_sc_rc =
185     SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL);
186 
187 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT };	/* XXX for now */
188 struct cpumem *carpcounters;
189 
190 int	carp_send_all_recur = 0;
191 
192 #define	CARP_LOG(l, sc, s)						\
193 	do {								\
194 		if (carp_opts[CARPCTL_LOG] >= l) {			\
195 			if (sc)						\
196 				log(l, "%s: ",				\
197 				    (sc)->sc_if.if_xname);		\
198 			else						\
199 				log(l, "carp: ");			\
200 			addlog s;					\
201 			addlog("\n");					\
202 		}							\
203 	} while (0)
204 
205 void	carp_hmac_prepare(struct carp_softc *);
206 void	carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t);
207 void	carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *,
208 	    unsigned char *, u_int8_t);
209 int	carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
210 	    unsigned char *);
211 void	carp_proto_input_c(struct ifnet *, struct mbuf *,
212 	    struct carp_header *, int, sa_family_t);
213 int	carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
214 #ifdef INET6
215 int	carp6_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
216 #endif
217 void	carpattach(int);
218 void	carpdetach(void *);
219 void	carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
220 	    struct carp_header *);
221 void	carp_send_ad_all(void);
222 void	carp_vhe_send_ad_all(struct carp_softc *);
223 void	carp_timer_ad(void *);
224 void	carp_send_ad(struct carp_vhost_entry *);
225 void	carp_send_arp(struct carp_softc *);
226 void	carp_timer_down(void *);
227 void	carp_master_down(struct carp_vhost_entry *);
228 int	carp_ioctl(struct ifnet *, u_long, caddr_t);
229 int	carp_vhids_ioctl(struct carp_softc *, struct carpreq *);
230 int	carp_check_dup_vhids(struct carp_softc *, struct srpl *,
231 	    struct carpreq *);
232 void	carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t);
233 void	carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t);
234 void	carp_start(struct ifnet *);
235 int	carp_enqueue(struct ifnet *, struct mbuf *);
236 void	carp_transmit(struct carp_softc *, struct ifnet *, struct mbuf *);
237 void	carp_setrun_all(struct carp_softc *, sa_family_t);
238 void	carp_setrun(struct carp_vhost_entry *, sa_family_t);
239 void	carp_set_state_all(struct carp_softc *, int);
240 void	carp_set_state(struct carp_vhost_entry *, int);
241 void	carp_multicast_cleanup(struct carp_softc *);
242 int	carp_set_ifp(struct carp_softc *, struct ifnet *);
243 void	carp_set_enaddr(struct carp_softc *);
244 void	carp_set_vhe_enaddr(struct carp_vhost_entry *);
245 void	carp_addr_updated(void *);
246 int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
247 int	carp_join_multicast(struct carp_softc *);
248 #ifdef INET6
249 void	carp_send_na(struct carp_softc *);
250 int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
251 int	carp_join_multicast6(struct carp_softc *);
252 #endif
253 int	carp_clone_create(struct if_clone *, int);
254 int	carp_clone_destroy(struct ifnet *);
255 int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
256 int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
257 void	carp_ether_purgemulti(struct carp_softc *);
258 int	carp_group_demote_count(struct carp_softc *);
259 void	carp_update_lsmask(struct carp_softc *);
260 int	carp_new_vhost(struct carp_softc *, int, int);
261 void	carp_destroy_vhosts(struct carp_softc *);
262 void	carp_del_all_timeouts(struct carp_softc *);
263 int	carp_vhe_match(struct carp_softc *, uint8_t *);
264 
265 struct if_clone carp_cloner =
266     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
267 
268 #define carp_cksum(_m, _l)	((u_int16_t)in_cksum((_m), (_l)))
269 #define CARP_IFQ_PRIO	6
270 
271 void
272 carp_hmac_prepare(struct carp_softc *sc)
273 {
274 	struct carp_vhost_entry *vhe;
275 	u_int8_t i;
276 
277 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
278 
279 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
280 		for (i = 0; i < HMAC_MAX; i++) {
281 			carp_hmac_prepare_ctx(vhe, i);
282 		}
283 	}
284 }
285 
286 void
287 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx)
288 {
289 	struct carp_softc *sc = vhe->parent_sc;
290 
291 	u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
292 	u_int8_t vhid = vhe->vhid & 0xff;
293 	SHA1_CTX sha1ctx;
294 	u_int32_t kmd[5];
295 	struct ifaddr *ifa;
296 	int i, found;
297 	struct in_addr last, cur, in;
298 #ifdef INET6
299 	struct in6_addr last6, cur6, in6;
300 #endif /* INET6 */
301 
302 	/* compute ipad from key */
303 	memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad));
304 	bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key));
305 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
306 		vhe->vhe_pad[i] ^= 0x36;
307 
308 	/* precompute first part of inner hash */
309 	SHA1Init(&vhe->vhe_sha1[ctx]);
310 	SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad));
311 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version));
312 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type));
313 
314 	/* generate a key for the arpbalance hash, before the vhid is hashed */
315 	if (vhe->vhe_leader) {
316 		bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
317 		SHA1Final((unsigned char *)kmd, &sha1ctx);
318 		sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
319 		sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
320 	}
321 
322 	/* the rest of the precomputation */
323 	if (!sc->sc_realmac && vhe->vhe_leader &&
324 	    memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0)
325 		SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr,
326 		    ETHER_ADDR_LEN);
327 
328 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid));
329 
330 	/* Hash the addresses from smallest to largest, not interface order */
331 	cur.s_addr = 0;
332 	do {
333 		found = 0;
334 		last = cur;
335 		cur.s_addr = 0xffffffff;
336 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
337 			if (ifa->ifa_addr->sa_family != AF_INET)
338 				continue;
339 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
340 			if (ntohl(in.s_addr) > ntohl(last.s_addr) &&
341 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
342 				cur.s_addr = in.s_addr;
343 				found++;
344 			}
345 		}
346 		if (found)
347 			SHA1Update(&vhe->vhe_sha1[ctx],
348 			    (void *)&cur, sizeof(cur));
349 	} while (found);
350 #ifdef INET6
351 	memset(&cur6, 0x00, sizeof(cur6));
352 	do {
353 		found = 0;
354 		last6 = cur6;
355 		memset(&cur6, 0xff, sizeof(cur6));
356 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
357 			if (ifa->ifa_addr->sa_family != AF_INET6)
358 				continue;
359 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
360 			if (IN6_IS_SCOPE_EMBED(&in6)) {
361 				if (ctx == HMAC_NOV6LL)
362 					continue;
363 				in6.s6_addr16[1] = 0;
364 			}
365 			if (memcmp(&in6, &last6, sizeof(in6)) > 0 &&
366 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
367 				cur6 = in6;
368 				found++;
369 			}
370 		}
371 		if (found)
372 			SHA1Update(&vhe->vhe_sha1[ctx],
373 			    (void *)&cur6, sizeof(cur6));
374 	} while (found);
375 #endif /* INET6 */
376 
377 	/* convert ipad to opad */
378 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
379 		vhe->vhe_pad[i] ^= 0x36 ^ 0x5c;
380 }
381 
382 void
383 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2],
384     unsigned char md[20], u_int8_t ctx)
385 {
386 	SHA1_CTX sha1ctx;
387 
388 	/* fetch first half of inner hash */
389 	bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
390 
391 	SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie));
392 	SHA1Final(md, &sha1ctx);
393 
394 	/* outer hash */
395 	SHA1Init(&sha1ctx);
396 	SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad));
397 	SHA1Update(&sha1ctx, md, 20);
398 	SHA1Final(md, &sha1ctx);
399 }
400 
401 int
402 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2],
403     unsigned char md[20])
404 {
405 	unsigned char md2[20];
406 	u_int8_t i;
407 
408 	for (i = 0; i < HMAC_MAX; i++) {
409 		carp_hmac_generate(vhe, counter, md2, i);
410 		if (!timingsafe_bcmp(md, md2, sizeof(md2)))
411 			return (0);
412 	}
413 	return (1);
414 }
415 
416 int
417 carp_proto_input(struct mbuf **mp, int *offp, int proto, int af)
418 {
419 	struct ifnet *ifp;
420 
421 	ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
422 	if (ifp == NULL) {
423 		m_freemp(mp);
424 		return IPPROTO_DONE;
425 	}
426 
427 	proto = carp_proto_input_if(ifp, mp, offp, proto);
428 	if_put(ifp);
429 	return proto;
430 }
431 
432 /*
433  * process input packet.
434  * we have rearranged checks order compared to the rfc,
435  * but it seems more efficient this way or not possible otherwise.
436  */
437 int
438 carp_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
439 {
440 	struct mbuf *m = *mp;
441 	struct ip *ip = mtod(m, struct ip *);
442 	struct carp_softc *sc = NULL;
443 	struct carp_header *ch;
444 	int iplen, len, ismulti;
445 
446 	carpstat_inc(carps_ipackets);
447 
448 	if (!carp_opts[CARPCTL_ALLOW]) {
449 		m_freem(m);
450 		return IPPROTO_DONE;
451 	}
452 
453 	ismulti = IN_MULTICAST(ip->ip_dst.s_addr);
454 
455 	/* check if received on a valid carp interface */
456 	switch (ifp->if_type) {
457 	case IFT_CARP:
458 		break;
459 	case IFT_ETHER:
460 		if (ismulti || !SRPL_EMPTY_LOCKED(&ifp->if_carp))
461 			break;
462 		/* FALLTHROUGH */
463 	default:
464 		carpstat_inc(carps_badif);
465 		CARP_LOG(LOG_INFO, sc,
466 		    ("packet received on non-carp interface: %s",
467 		     ifp->if_xname));
468 		m_freem(m);
469 		return IPPROTO_DONE;
470 	}
471 
472 	/* verify that the IP TTL is 255.  */
473 	if (ip->ip_ttl != CARP_DFLTTL) {
474 		carpstat_inc(carps_badttl);
475 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
476 		    ip->ip_ttl, CARP_DFLTTL, ifp->if_xname));
477 		m_freem(m);
478 		return IPPROTO_DONE;
479 	}
480 
481 	/*
482 	 * verify that the received packet length is
483 	 * equal to the CARP header
484 	 */
485 	iplen = ip->ip_hl << 2;
486 	len = iplen + sizeof(*ch);
487 	if (len > m->m_pkthdr.len) {
488 		carpstat_inc(carps_badlen);
489 		CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s",
490 		    m->m_pkthdr.len, ifp->if_xname));
491 		m_freem(m);
492 		return IPPROTO_DONE;
493 	}
494 
495 	if ((m = *mp = m_pullup(m, len)) == NULL) {
496 		carpstat_inc(carps_hdrops);
497 		return IPPROTO_DONE;
498 	}
499 	ip = mtod(m, struct ip *);
500 	ch = (struct carp_header *)(mtod(m, caddr_t) + iplen);
501 
502 	/* verify the CARP checksum */
503 	m->m_data += iplen;
504 	if (carp_cksum(m, len - iplen)) {
505 		carpstat_inc(carps_badsum);
506 		CARP_LOG(LOG_INFO, sc, ("checksum failed on %s",
507 		    ifp->if_xname));
508 		m_freem(m);
509 		return IPPROTO_DONE;
510 	}
511 	m->m_data -= iplen;
512 
513 	KERNEL_LOCK();
514 	carp_proto_input_c(ifp, m, ch, ismulti, AF_INET);
515 	KERNEL_UNLOCK();
516 	return IPPROTO_DONE;
517 }
518 
519 #ifdef INET6
520 int
521 carp6_proto_input(struct mbuf **mp, int *offp, int proto, int af)
522 {
523 	struct ifnet *ifp;
524 
525 	ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
526 	if (ifp == NULL) {
527 		m_freemp(mp);
528 		return IPPROTO_DONE;
529 	}
530 
531 	proto = carp6_proto_input_if(ifp, mp, offp, proto);
532 	if_put(ifp);
533 	return proto;
534 }
535 
536 int
537 carp6_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
538 {
539 	struct mbuf *m = *mp;
540 	struct carp_softc *sc = NULL;
541 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
542 	struct carp_header *ch;
543 	u_int len;
544 
545 	carpstat_inc(carps_ipackets6);
546 
547 	if (!carp_opts[CARPCTL_ALLOW]) {
548 		m_freem(m);
549 		return IPPROTO_DONE;
550 	}
551 
552 	/* check if received on a valid carp interface */
553 	if (ifp->if_type != IFT_CARP) {
554 		carpstat_inc(carps_badif);
555 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
556 		    ifp->if_xname));
557 		m_freem(m);
558 		return IPPROTO_DONE;
559 	}
560 
561 	/* verify that the IP TTL is 255 */
562 	if (ip6->ip6_hlim != CARP_DFLTTL) {
563 		carpstat_inc(carps_badttl);
564 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
565 		    ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname));
566 		m_freem(m);
567 		return IPPROTO_DONE;
568 	}
569 
570 	/* verify that we have a complete carp packet */
571 	len = m->m_len;
572 	if ((m = *mp = m_pullup(m, *offp + sizeof(*ch))) == NULL) {
573 		carpstat_inc(carps_badlen);
574 		CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len));
575 		return IPPROTO_DONE;
576 	}
577 	ch = (struct carp_header *)(mtod(m, caddr_t) + *offp);
578 
579 	/* verify the CARP checksum */
580 	m->m_data += *offp;
581 	if (carp_cksum(m, sizeof(*ch))) {
582 		carpstat_inc(carps_badsum);
583 		CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s",
584 		    ifp->if_xname));
585 		m_freem(m);
586 		return IPPROTO_DONE;
587 	}
588 	m->m_data -= *offp;
589 
590 	KERNEL_LOCK();
591 	carp_proto_input_c(ifp, m, ch, 1, AF_INET6);
592 	KERNEL_UNLOCK();
593 	return IPPROTO_DONE;
594 }
595 #endif /* INET6 */
596 
597 void
598 carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch,
599     int ismulti, sa_family_t af)
600 {
601 	struct carp_softc *sc;
602 	struct ifnet *ifp0;
603 	struct carp_vhost_entry *vhe;
604 	struct timeval sc_tv, ch_tv;
605 	struct srpl *cif;
606 
607 	KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
608 
609 	ifp0 = if_get(ifp->if_carpdevidx);
610 
611 	if (ifp->if_type == IFT_CARP) {
612 		/*
613 		 * If the parent of this carp(4) got destroyed while
614 		 * `m' was being processed, silently drop it.
615 		 */
616 		if (ifp0 == NULL)
617 			goto rele;
618 		cif = &ifp0->if_carp;
619 	} else
620 		cif = &ifp->if_carp;
621 
622 	SRPL_FOREACH_LOCKED(sc, cif, sc_list) {
623 		if (af == AF_INET &&
624 		    ismulti != IN_MULTICAST(sc->sc_peer.s_addr))
625 			continue;
626 		SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
627 			if (vhe->vhid == ch->carp_vhid)
628 				goto found;
629 		}
630 	}
631  found:
632 
633 	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
634 	    (IFF_UP|IFF_RUNNING)) {
635 		carpstat_inc(carps_badvhid);
636 		goto rele;
637 	}
638 
639 	getmicrotime(&sc->sc_if.if_lastchange);
640 
641 	/* verify the CARP version. */
642 	if (ch->carp_version != CARP_VERSION) {
643 		carpstat_inc(carps_badver);
644 		sc->sc_if.if_ierrors++;
645 		CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d",
646 		    ch->carp_version, CARP_VERSION));
647 		goto rele;
648 	}
649 
650 	/* verify the hash */
651 	if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) {
652 		carpstat_inc(carps_badauth);
653 		sc->sc_if.if_ierrors++;
654 		CARP_LOG(LOG_INFO, sc, ("incorrect hash"));
655 		goto rele;
656 	}
657 
658 	if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter,
659 	    sizeof(ch->carp_counter))) {
660 		struct ifnet *ifp2;
661 
662 		ifp2 = if_get(sc->sc_carpdevidx);
663 		/* Do not log duplicates from non simplex interfaces */
664 		if (ifp2 && ifp2->if_flags & IFF_SIMPLEX) {
665 			carpstat_inc(carps_badauth);
666 			sc->sc_if.if_ierrors++;
667 			CARP_LOG(LOG_WARNING, sc,
668 			    ("replay or network loop detected"));
669 		}
670 		if_put(ifp2);
671 		goto rele;
672 	}
673 
674 	sc_tv.tv_sec = sc->sc_advbase;
675 	sc_tv.tv_usec = vhe->advskew * 1000000 / 256;
676 	ch_tv.tv_sec = ch->carp_advbase;
677 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
678 
679 	switch (vhe->state) {
680 	case INIT:
681 		break;
682 	case MASTER:
683 		/*
684 		 * If we receive an advertisement from a master who's going to
685 		 * be more frequent than us, and whose demote count is not higher
686 		 * than ours, go into BACKUP state. If his demote count is lower,
687 		 * also go into BACKUP.
688 		 */
689 		if (((timercmp(&sc_tv, &ch_tv, >) ||
690 		    timercmp(&sc_tv, &ch_tv, ==)) &&
691 		    (ch->carp_demote <= carp_group_demote_count(sc))) ||
692 		    ch->carp_demote < carp_group_demote_count(sc)) {
693 			timeout_del(&vhe->ad_tmo);
694 			carp_set_state(vhe, BACKUP);
695 			carp_setrun(vhe, 0);
696 		}
697 		break;
698 	case BACKUP:
699 		/*
700 		 * If we're pre-empting masters who advertise slower than us,
701 		 * and do not have a better demote count, treat them as down.
702 		 *
703 		 */
704 		if (carp_opts[CARPCTL_PREEMPT] &&
705 		    timercmp(&sc_tv, &ch_tv, <) &&
706 		    ch->carp_demote >= carp_group_demote_count(sc)) {
707 			carp_master_down(vhe);
708 			break;
709 		}
710 
711 		/*
712 		 * Take over masters advertising with a higher demote count,
713 		 * regardless of CARPCTL_PREEMPT.
714 		 */
715 		if (ch->carp_demote > carp_group_demote_count(sc)) {
716 			carp_master_down(vhe);
717 			break;
718 		}
719 
720 		/*
721 		 *  If the master is going to advertise at such a low frequency
722 		 *  that he's guaranteed to time out, we'd might as well just
723 		 *  treat him as timed out now.
724 		 */
725 		sc_tv.tv_sec = sc->sc_advbase * 3;
726 		if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) {
727 			carp_master_down(vhe);
728 			break;
729 		}
730 
731 		/*
732 		 * Otherwise, we reset the counter and wait for the next
733 		 * advertisement.
734 		 */
735 		carp_setrun(vhe, af);
736 		break;
737 	}
738 
739 rele:
740 	if_put(ifp0);
741 	m_freem(m);
742 	return;
743 }
744 
745 int
746 carp_sysctl_carpstat(void *oldp, size_t *oldlenp, void *newp)
747 {
748 	struct carpstats carpstat;
749 
750 	CTASSERT(sizeof(carpstat) == (carps_ncounters * sizeof(uint64_t)));
751 	memset(&carpstat, 0, sizeof carpstat);
752 	counters_read(carpcounters, (uint64_t *)&carpstat, carps_ncounters);
753 	return (sysctl_rdstruct(oldp, oldlenp, newp,
754 	    &carpstat, sizeof(carpstat)));
755 }
756 
757 int
758 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
759     size_t newlen)
760 {
761 	int error;
762 
763 	/* All sysctl names at this level are terminal. */
764 	if (namelen != 1)
765 		return (ENOTDIR);
766 
767 	switch (name[0]) {
768 	case CARPCTL_STATS:
769 		return (carp_sysctl_carpstat(oldp, oldlenp, newp));
770 	default:
771 		if (name[0] <= 0 || name[0] >= CARPCTL_MAXID)
772 			return (ENOPROTOOPT);
773 		NET_LOCK();
774 		error = sysctl_int(oldp, oldlenp, newp, newlen,
775 		    &carp_opts[name[0]]);
776 		NET_UNLOCK();
777 		return (error);
778 	}
779 }
780 
781 /*
782  * Interface side of the CARP implementation.
783  */
784 
785 /* ARGSUSED */
786 void
787 carpattach(int n)
788 {
789 	if_creategroup("carp");  /* keep around even if empty */
790 	if_clone_attach(&carp_cloner);
791 	carpcounters = counters_alloc(carps_ncounters);
792 }
793 
794 int
795 carp_clone_create(struct if_clone *ifc, int unit)
796 {
797 	struct carp_softc *sc;
798 	struct ifnet *ifp;
799 
800 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
801 	refcnt_init(&sc->sc_refcnt);
802 
803 	SRPL_INIT(&sc->carp_vhosts);
804 	sc->sc_vhe_count = 0;
805 	if (carp_new_vhost(sc, 0, 0)) {
806 		free(sc, M_DEVBUF, sizeof(*sc));
807 		return (ENOMEM);
808 	}
809 
810 	task_set(&sc->sc_atask, carp_addr_updated, sc);
811 	task_set(&sc->sc_ltask, carp_carpdev_state, sc);
812 	task_set(&sc->sc_dtask, carpdetach, sc);
813 
814 	sc->sc_suppress = 0;
815 	sc->sc_advbase = CARP_DFLTINTV;
816 	sc->sc_naddrs = sc->sc_naddrs6 = 0;
817 #ifdef INET6
818 	sc->sc_im6o.im6o_hlim = CARP_DFLTTL;
819 #endif /* INET6 */
820 	sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS,
821 	    sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO);
822 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
823 
824 	LIST_INIT(&sc->carp_mc_listhead);
825 	ifp = &sc->sc_if;
826 	ifp->if_softc = sc;
827 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
828 	    unit);
829 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
830 	ifp->if_ioctl = carp_ioctl;
831 	ifp->if_start = carp_start;
832 	ifp->if_enqueue = carp_enqueue;
833 	ifp->if_xflags = IFXF_CLONED;
834 	if_counters_alloc(ifp);
835 	if_attach(ifp);
836 	ether_ifattach(ifp);
837 	ifp->if_type = IFT_CARP;
838 	ifp->if_sadl->sdl_type = IFT_CARP;
839 	ifp->if_output = carp_output;
840 	ifp->if_priority = IF_CARP_DEFAULT_PRIORITY;
841 	ifp->if_link_state = LINK_STATE_INVALID;
842 
843 	/* Hook carp_addr_updated to cope with address and route changes. */
844 	if_addrhook_add(&sc->sc_if, &sc->sc_atask);
845 
846 	return (0);
847 }
848 
849 int
850 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew)
851 {
852 	struct carp_vhost_entry *vhe, *vhe0;
853 
854 	vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO);
855 	if (vhe == NULL)
856 		return (ENOMEM);
857 
858 	refcnt_init(&vhe->vhost_refcnt);
859 	carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */
860 	vhe->parent_sc = sc;
861 	vhe->vhid = vhid;
862 	vhe->advskew = advskew;
863 	vhe->state = INIT;
864 	timeout_set_proc(&vhe->ad_tmo, carp_timer_ad, vhe);
865 	timeout_set_proc(&vhe->md_tmo, carp_timer_down, vhe);
866 	timeout_set_proc(&vhe->md6_tmo, carp_timer_down, vhe);
867 
868 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
869 
870 	/* mark the first vhe as leader */
871 	if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) {
872 		vhe->vhe_leader = 1;
873 		SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts,
874 		    vhe, vhost_entries);
875 		sc->sc_vhe_count = 1;
876 		return (0);
877 	}
878 
879 	SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
880 		if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL)
881 			break;
882 	}
883 
884 	SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries);
885 	sc->sc_vhe_count++;
886 
887 	return (0);
888 }
889 
890 int
891 carp_clone_destroy(struct ifnet *ifp)
892 {
893 	struct carp_softc *sc = ifp->if_softc;
894 
895 	if_addrhook_del(&sc->sc_if, &sc->sc_atask);
896 
897 	NET_LOCK();
898 	carpdetach(sc);
899 	NET_UNLOCK();
900 
901 	ether_ifdetach(ifp);
902 	if_detach(ifp);
903 	carp_destroy_vhosts(ifp->if_softc);
904 	refcnt_finalize(&sc->sc_refcnt, "carpdtor");
905 	free(sc->sc_imo.imo_membership, M_IPMOPTS,
906 	    sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *));
907 	free(sc, M_DEVBUF, sizeof(*sc));
908 	return (0);
909 }
910 
911 void
912 carp_del_all_timeouts(struct carp_softc *sc)
913 {
914 	struct carp_vhost_entry *vhe;
915 
916 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
917 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
918 		timeout_del(&vhe->ad_tmo);
919 		timeout_del(&vhe->md_tmo);
920 		timeout_del(&vhe->md6_tmo);
921 	}
922 }
923 
924 void
925 carpdetach(void *arg)
926 {
927 	struct carp_softc *sc = arg;
928 	struct ifnet *ifp0;
929 	struct srpl *cif;
930 
931 	carp_del_all_timeouts(sc);
932 
933 	if (sc->sc_demote_cnt)
934 		carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach");
935 	sc->sc_suppress = 0;
936 	sc->sc_sendad_errors = 0;
937 
938 	carp_set_state_all(sc, INIT);
939 	sc->sc_if.if_flags &= ~IFF_UP;
940 	carp_setrun_all(sc, 0);
941 	carp_multicast_cleanup(sc);
942 
943 	ifp0 = if_get(sc->sc_carpdevidx);
944 	if (ifp0 == NULL)
945 		return;
946 
947 	KERNEL_ASSERT_LOCKED(); /* touching if_carp */
948 
949 	cif = &ifp0->if_carp;
950 
951 	SRPL_REMOVE_LOCKED(&carp_sc_rc, cif, sc, carp_softc, sc_list);
952 	sc->sc_carpdevidx = 0;
953 
954 	if_linkstatehook_del(ifp0, &sc->sc_ltask);
955 	if_detachhook_del(ifp0, &sc->sc_dtask);
956 	ifpromisc(ifp0, 0);
957 	if_put(ifp0);
958 }
959 
960 void
961 carp_destroy_vhosts(struct carp_softc *sc)
962 {
963 	/* XXX bow out? */
964 	struct carp_vhost_entry *vhe;
965 
966 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
967 
968 	while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) {
969 		SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe,
970 		    carp_vhost_entry, vhost_entries);
971 		carp_vh_unref(NULL, vhe); /* drop last ref */
972 	}
973 	sc->sc_vhe_count = 0;
974 }
975 
976 void
977 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe,
978     struct carp_header *ch)
979 {
980 	if (!vhe->vhe_replay_cookie) {
981 		arc4random_buf(&vhe->vhe_replay_cookie,
982 		    sizeof(vhe->vhe_replay_cookie));
983 	}
984 
985 	bcopy(&vhe->vhe_replay_cookie, ch->carp_counter,
986 	    sizeof(ch->carp_counter));
987 
988 	/*
989 	 * For the time being, do not include the IPv6 linklayer addresses
990 	 * in the HMAC.
991 	 */
992 	carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL);
993 }
994 
995 void
996 carp_send_ad_all(void)
997 {
998 	struct ifnet *ifp0;
999 	struct srpl *cif;
1000 	struct carp_softc *vh;
1001 
1002 	KERNEL_ASSERT_LOCKED(); /* touching if_carp */
1003 
1004 	if (carp_send_all_recur > 0)
1005 		return;
1006 	++carp_send_all_recur;
1007 	TAILQ_FOREACH(ifp0, &ifnet, if_list) {
1008 		if (ifp0->if_type != IFT_ETHER)
1009 			continue;
1010 
1011 		cif = &ifp0->if_carp;
1012 		SRPL_FOREACH_LOCKED(vh, cif, sc_list) {
1013 			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1014 			    (IFF_UP|IFF_RUNNING)) {
1015 				carp_vhe_send_ad_all(vh);
1016 			}
1017 		}
1018 	}
1019 	--carp_send_all_recur;
1020 }
1021 
1022 void
1023 carp_vhe_send_ad_all(struct carp_softc *sc)
1024 {
1025 	struct carp_vhost_entry *vhe;
1026 
1027 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1028 
1029 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1030 		if (vhe->state == MASTER)
1031 			carp_send_ad(vhe);
1032 	}
1033 }
1034 
1035 void
1036 carp_timer_ad(void *v)
1037 {
1038 	NET_LOCK();
1039 	carp_send_ad(v);
1040 	NET_UNLOCK();
1041 }
1042 
1043 void
1044 carp_send_ad(struct carp_vhost_entry *vhe)
1045 {
1046 	struct carp_header ch;
1047 	struct timeval tv;
1048 	struct carp_softc *sc = vhe->parent_sc;
1049 	struct carp_header *ch_ptr;
1050 	struct mbuf *m;
1051 	int error, len, advbase, advskew;
1052 	struct ifnet *ifp;
1053 	struct ifaddr *ifa;
1054 	struct sockaddr sa;
1055 
1056 	NET_ASSERT_LOCKED();
1057 
1058 	if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) {
1059 		sc->sc_if.if_oerrors++;
1060 		return;
1061 	}
1062 
1063 	/* bow out if we've gone to backup (the carp interface is going down) */
1064 	if (sc->sc_bow_out) {
1065 		advbase = 255;
1066 		advskew = 255;
1067 	} else {
1068 		advbase = sc->sc_advbase;
1069 		advskew = vhe->advskew;
1070 		tv.tv_sec = advbase;
1071 		if (advbase == 0 && advskew == 0)
1072 			tv.tv_usec = 1 * 1000000 / 256;
1073 		else
1074 			tv.tv_usec = advskew * 1000000 / 256;
1075 	}
1076 
1077 	ch.carp_version = CARP_VERSION;
1078 	ch.carp_type = CARP_ADVERTISEMENT;
1079 	ch.carp_vhid = vhe->vhid;
1080 	ch.carp_demote = carp_group_demote_count(sc) & 0xff;
1081 	ch.carp_advbase = advbase;
1082 	ch.carp_advskew = advskew;
1083 	ch.carp_authlen = 7;	/* XXX DEFINE */
1084 	ch.carp_cksum = 0;
1085 
1086 	sc->cur_vhe = vhe; /* we need the vhe later on the output path */
1087 
1088 	if (sc->sc_naddrs) {
1089 		struct ip *ip;
1090 
1091 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1092 		if (m == NULL) {
1093 			sc->sc_if.if_oerrors++;
1094 			carpstat_inc(carps_onomem);
1095 			/* XXX maybe less ? */
1096 			goto retry_later;
1097 		}
1098 		len = sizeof(*ip) + sizeof(ch);
1099 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1100 		m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1101 		m->m_pkthdr.len = len;
1102 		m->m_len = len;
1103 		m_align(m, len);
1104 		ip = mtod(m, struct ip *);
1105 		ip->ip_v = IPVERSION;
1106 		ip->ip_hl = sizeof(*ip) >> 2;
1107 		ip->ip_tos = IPTOS_LOWDELAY;
1108 		ip->ip_len = htons(len);
1109 		ip->ip_id = htons(ip_randomid());
1110 		ip->ip_off = htons(IP_DF);
1111 		ip->ip_ttl = CARP_DFLTTL;
1112 		ip->ip_p = IPPROTO_CARP;
1113 		ip->ip_sum = 0;
1114 
1115 		memset(&sa, 0, sizeof(sa));
1116 		sa.sa_family = AF_INET;
1117 		/* Prefer addresses on the parent interface as source for AD. */
1118 		ifa = ifaof_ifpforaddr(&sa, ifp);
1119 		if (ifa == NULL)
1120 			ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1121 		KASSERT(ifa != NULL);
1122 		ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1123 		ip->ip_dst.s_addr = sc->sc_peer.s_addr;
1124 		if (IN_MULTICAST(ip->ip_dst.s_addr))
1125 			m->m_flags |= M_MCAST;
1126 
1127 		ch_ptr = (struct carp_header *)(ip + 1);
1128 		bcopy(&ch, ch_ptr, sizeof(ch));
1129 		carp_prepare_ad(m, vhe, ch_ptr);
1130 
1131 		m->m_data += sizeof(*ip);
1132 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1133 		m->m_data -= sizeof(*ip);
1134 
1135 		getmicrotime(&sc->sc_if.if_lastchange);
1136 		carpstat_inc(carps_opackets);
1137 
1138 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1139 		    NULL, 0);
1140 		if (error &&
1141 		    /* when unicast, the peer's down is not our fault */
1142 		    !(!IN_MULTICAST(sc->sc_peer.s_addr) && error == EHOSTDOWN)){
1143 			if (error == ENOBUFS)
1144 				carpstat_inc(carps_onomem);
1145 			else
1146 				CARP_LOG(LOG_WARNING, sc,
1147 				    ("ip_output failed: %d", error));
1148 			sc->sc_if.if_oerrors++;
1149 			if (sc->sc_sendad_errors < INT_MAX)
1150 				sc->sc_sendad_errors++;
1151 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1152 				carp_group_demote_adj(&sc->sc_if, 1,
1153 				    "> snderrors");
1154 			sc->sc_sendad_success = 0;
1155 		} else {
1156 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1157 				if (++sc->sc_sendad_success >=
1158 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1159 					carp_group_demote_adj(&sc->sc_if, -1,
1160 					    "< snderrors");
1161 					sc->sc_sendad_errors = 0;
1162 				}
1163 			} else
1164 				sc->sc_sendad_errors = 0;
1165 		}
1166 		if (vhe->vhe_leader) {
1167 			if (sc->sc_delayed_arp > 0)
1168 				sc->sc_delayed_arp--;
1169 			if (sc->sc_delayed_arp == 0) {
1170 				carp_send_arp(sc);
1171 				sc->sc_delayed_arp = -1;
1172 			}
1173 		}
1174 	}
1175 #ifdef INET6
1176 	if (sc->sc_naddrs6) {
1177 		struct ip6_hdr *ip6;
1178 
1179 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1180 		if (m == NULL) {
1181 			sc->sc_if.if_oerrors++;
1182 			carpstat_inc(carps_onomem);
1183 			/* XXX maybe less ? */
1184 			goto retry_later;
1185 		}
1186 		len = sizeof(*ip6) + sizeof(ch);
1187 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1188 		m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1189 		m->m_pkthdr.len = len;
1190 		m->m_len = len;
1191 		m_align(m, len);
1192 		m->m_flags |= M_MCAST;
1193 		ip6 = mtod(m, struct ip6_hdr *);
1194 		memset(ip6, 0, sizeof(*ip6));
1195 		ip6->ip6_vfc |= IPV6_VERSION;
1196 		ip6->ip6_hlim = CARP_DFLTTL;
1197 		ip6->ip6_nxt = IPPROTO_CARP;
1198 
1199 		/* set the source address */
1200 		memset(&sa, 0, sizeof(sa));
1201 		sa.sa_family = AF_INET6;
1202 		/* Prefer addresses on the parent interface as source for AD. */
1203 		ifa = ifaof_ifpforaddr(&sa, ifp);
1204 		if (ifa == NULL)
1205 			ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1206 		KASSERT(ifa != NULL);
1207 		bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1208 		    &ip6->ip6_src, sizeof(struct in6_addr));
1209 		/* set the multicast destination */
1210 
1211 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1212 		ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index);
1213 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1214 
1215 		ch_ptr = (struct carp_header *)(ip6 + 1);
1216 		bcopy(&ch, ch_ptr, sizeof(ch));
1217 		carp_prepare_ad(m, vhe, ch_ptr);
1218 
1219 		m->m_data += sizeof(*ip6);
1220 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1221 		m->m_data -= sizeof(*ip6);
1222 
1223 		getmicrotime(&sc->sc_if.if_lastchange);
1224 		carpstat_inc(carps_opackets6);
1225 
1226 		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL);
1227 		if (error) {
1228 			if (error == ENOBUFS)
1229 				carpstat_inc(carps_onomem);
1230 			else
1231 				CARP_LOG(LOG_WARNING, sc,
1232 				    ("ip6_output failed: %d", error));
1233 			sc->sc_if.if_oerrors++;
1234 			if (sc->sc_sendad_errors < INT_MAX)
1235 				sc->sc_sendad_errors++;
1236 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1237 				carp_group_demote_adj(&sc->sc_if, 1,
1238 					    "> snd6errors");
1239 			sc->sc_sendad_success = 0;
1240 		} else {
1241 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1242 				if (++sc->sc_sendad_success >=
1243 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1244 					carp_group_demote_adj(&sc->sc_if, -1,
1245 					    "< snd6errors");
1246 					sc->sc_sendad_errors = 0;
1247 				}
1248 			} else
1249 				sc->sc_sendad_errors = 0;
1250 		}
1251 	}
1252 #endif /* INET6 */
1253 
1254 retry_later:
1255 	sc->cur_vhe = NULL;
1256 	if (advbase != 255 || advskew != 255)
1257 		timeout_add_tv(&vhe->ad_tmo, &tv);
1258 	if_put(ifp);
1259 }
1260 
1261 /*
1262  * Broadcast a gratuitous ARP request containing
1263  * the virtual router MAC address for each IP address
1264  * associated with the virtual router.
1265  */
1266 void
1267 carp_send_arp(struct carp_softc *sc)
1268 {
1269 	struct ifaddr *ifa;
1270 	in_addr_t in;
1271 
1272 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1273 
1274 		if (ifa->ifa_addr->sa_family != AF_INET)
1275 			continue;
1276 
1277 		in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1278 		arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr);
1279 	}
1280 }
1281 
1282 #ifdef INET6
1283 void
1284 carp_send_na(struct carp_softc *sc)
1285 {
1286 	struct ifaddr *ifa;
1287 	struct in6_addr *in6;
1288 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1289 
1290 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1291 
1292 		if (ifa->ifa_addr->sa_family != AF_INET6)
1293 			continue;
1294 
1295 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1296 		nd6_na_output(&sc->sc_if, &mcast, in6,
1297 		    ND_NA_FLAG_OVERRIDE |
1298 		    (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL);
1299 	}
1300 }
1301 #endif /* INET6 */
1302 
1303 void
1304 carp_update_lsmask(struct carp_softc *sc)
1305 {
1306 	struct carp_vhost_entry *vhe;
1307 	int count;
1308 
1309 	if (sc->sc_balancing == CARP_BAL_NONE)
1310 		return;
1311 
1312 	sc->sc_lsmask = 0;
1313 	count = 0;
1314 
1315 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1316 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1317 		if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8)
1318 			sc->sc_lsmask |= 1 << count;
1319 		count++;
1320 	}
1321 	sc->sc_lscount = count;
1322 	CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask));
1323 }
1324 
1325 int
1326 carp_iamatch(struct ifnet *ifp)
1327 {
1328 	struct carp_softc *sc = ifp->if_softc;
1329 	struct carp_vhost_entry *vhe;
1330 	struct srp_ref sr;
1331 	int match = 0;
1332 
1333 	vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
1334 	if (vhe->state == MASTER)
1335 		match = 1;
1336 	SRPL_LEAVE(&sr);
1337 
1338 	return (match);
1339 }
1340 
1341 int
1342 carp_ourether(struct ifnet *ifp, uint8_t *ena)
1343 {
1344 	struct srpl *cif = &ifp->if_carp;
1345 	struct carp_softc *sc;
1346 	struct srp_ref sr;
1347 	int match = 0;
1348 
1349 	KASSERT(ifp->if_type == IFT_ETHER);
1350 
1351 	SRPL_FOREACH(sc, &sr, cif, sc_list) {
1352 		if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1353 		    (IFF_UP|IFF_RUNNING))
1354 			continue;
1355 		if (carp_vhe_match(sc, ena)) {
1356 			match = 1;
1357 			break;
1358 		}
1359 	}
1360 	SRPL_LEAVE(&sr);
1361 
1362 	return (match);
1363 }
1364 
1365 int
1366 carp_vhe_match(struct carp_softc *sc, uint8_t *ena)
1367 {
1368 	struct carp_vhost_entry *vhe;
1369 	struct srp_ref sr;
1370 	int match = 0;
1371 
1372 	vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
1373 	match = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP) &&
1374 	    !memcmp(ena, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1375 	SRPL_LEAVE(&sr);
1376 
1377 	return (match);
1378 }
1379 
1380 struct mbuf *
1381 carp_input(struct ifnet *ifp0, struct mbuf *m)
1382 {
1383 	struct ether_header *eh;
1384 	struct srpl *cif;
1385 	struct carp_softc *sc;
1386 	struct srp_ref sr;
1387 
1388 	eh = mtod(m, struct ether_header *);
1389 	cif = &ifp0->if_carp;
1390 
1391 	SRPL_FOREACH(sc, &sr, cif, sc_list) {
1392 		if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1393 		    (IFF_UP|IFF_RUNNING))
1394 			continue;
1395 
1396 		if (carp_vhe_match(sc, eh->ether_dhost)) {
1397 			/*
1398 			 * These packets look like layer 2 multicast but they
1399 			 * are unicast at layer 3. With help of the tag the
1400 			 * mbuf's M_MCAST flag can be removed by carp_lsdrop()
1401 			 * after we have passed layer 2.
1402 			 */
1403 			if (sc->sc_balancing == CARP_BAL_IP) {
1404 				struct m_tag *mtag;
1405 				mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
1406 				    M_NOWAIT);
1407 				if (mtag == NULL) {
1408 					m_freem(m);
1409 					goto out;
1410 				}
1411 				m_tag_prepend(m, mtag);
1412 			}
1413 			break;
1414 		}
1415 	}
1416 
1417 	if (sc == NULL) {
1418 		SRPL_LEAVE(&sr);
1419 
1420 		if (!ETHER_IS_MULTICAST(eh->ether_dhost))
1421 			return (m);
1422 
1423 		/*
1424 		 * XXX Should really check the list of multicast addresses
1425 		 * for each CARP interface _before_ copying.
1426 		 */
1427 		SRPL_FOREACH(sc, &sr, cif, sc_list) {
1428 			struct mbuf *m0;
1429 
1430 			if (!(sc->sc_if.if_flags & IFF_UP))
1431 				continue;
1432 
1433 			m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT);
1434 			if (m0 == NULL)
1435 				continue;
1436 
1437 			if_vinput(&sc->sc_if, m0);
1438 		}
1439 		SRPL_LEAVE(&sr);
1440 
1441 		return (m);
1442 	}
1443 
1444 	if_vinput(&sc->sc_if, m);
1445 out:
1446 	SRPL_LEAVE(&sr);
1447 
1448 	return (NULL);
1449 }
1450 
1451 int
1452 carp_lsdrop(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int32_t *src,
1453     u_int32_t *dst, int drop)
1454 {
1455 	struct carp_softc *sc;
1456 	u_int32_t fold;
1457 	struct m_tag *mtag;
1458 
1459 	if (ifp->if_type != IFT_CARP)
1460 		return 0;
1461 	sc = ifp->if_softc;
1462 	if (sc->sc_balancing == CARP_BAL_NONE)
1463 		return 0;
1464 
1465 	/*
1466 	 * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact
1467 	 * that it is layer 2 multicast does not implicate that it is also layer
1468 	 * 3 multicast.
1469 	 */
1470 	if (m->m_flags & M_MCAST &&
1471 	    (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
1472 		m_tag_delete(m, mtag);
1473 		m->m_flags &= ~M_MCAST;
1474 	}
1475 
1476 	/*
1477 	 * Return without making a drop decision. This allows to clear the
1478 	 * M_MCAST flag and do nothing else.
1479 	 */
1480 	if (!drop)
1481 		return 0;
1482 
1483 	/*
1484 	 * Never drop carp advertisements.
1485 	 * XXX Bad idea to pass all broadcast / multicast traffic?
1486 	 */
1487 	if (m->m_flags & (M_BCAST|M_MCAST))
1488 		return 0;
1489 
1490 	fold = src[0] ^ dst[0];
1491 #ifdef INET6
1492 	if (af == AF_INET6) {
1493 		int i;
1494 		for (i = 1; i < 4; i++)
1495 			fold ^= src[i] ^ dst[i];
1496 	}
1497 #endif
1498 	if (sc->sc_lscount == 0) /* just to be safe */
1499 		return 1;
1500 
1501 	return ((1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask) == 0;
1502 }
1503 
1504 void
1505 carp_timer_down(void *v)
1506 {
1507 	NET_LOCK();
1508 	carp_master_down(v);
1509 	NET_UNLOCK();
1510 }
1511 
1512 void
1513 carp_master_down(struct carp_vhost_entry *vhe)
1514 {
1515 	struct carp_softc *sc = vhe->parent_sc;
1516 
1517 	NET_ASSERT_LOCKED();
1518 
1519 	switch (vhe->state) {
1520 	case INIT:
1521 		printf("%s: master_down event in INIT state\n",
1522 		    sc->sc_if.if_xname);
1523 		break;
1524 	case MASTER:
1525 		break;
1526 	case BACKUP:
1527 		carp_set_state(vhe, MASTER);
1528 		carp_send_ad(vhe);
1529 		if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) {
1530 			carp_send_arp(sc);
1531 			/* Schedule a delayed ARP to deal w/ some L3 switches */
1532 			sc->sc_delayed_arp = 2;
1533 #ifdef INET6
1534 			carp_send_na(sc);
1535 #endif /* INET6 */
1536 		}
1537 		carp_setrun(vhe, 0);
1538 		carpstat_inc(carps_preempt);
1539 		break;
1540 	}
1541 }
1542 
1543 void
1544 carp_setrun_all(struct carp_softc *sc, sa_family_t af)
1545 {
1546 	struct carp_vhost_entry *vhe;
1547 
1548 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */
1549 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1550 		carp_setrun(vhe, af);
1551 	}
1552 }
1553 
1554 /*
1555  * When in backup state, af indicates whether to reset the master down timer
1556  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1557  */
1558 void
1559 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af)
1560 {
1561 	struct ifnet *ifp;
1562 	struct timeval tv;
1563 	struct carp_softc *sc = vhe->parent_sc;
1564 
1565 	if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) {
1566 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1567 		carp_set_state_all(sc, INIT);
1568 		return;
1569 	}
1570 
1571 	if (memcmp(((struct arpcom *)ifp)->ac_enaddr,
1572 	    sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0)
1573 		sc->sc_realmac = 1;
1574 	else
1575 		sc->sc_realmac = 0;
1576 
1577 	if_put(ifp);
1578 
1579 	if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 &&
1580 	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1581 		sc->sc_if.if_flags |= IFF_RUNNING;
1582 	} else {
1583 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1584 		return;
1585 	}
1586 
1587 	switch (vhe->state) {
1588 	case INIT:
1589 		carp_set_state(vhe, BACKUP);
1590 		carp_setrun(vhe, 0);
1591 		break;
1592 	case BACKUP:
1593 		timeout_del(&vhe->ad_tmo);
1594 		tv.tv_sec = 3 * sc->sc_advbase;
1595 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1596 			tv.tv_usec = 3 * 1000000 / 256;
1597 		else if (sc->sc_advbase == 0)
1598 			tv.tv_usec = 3 * vhe->advskew * 1000000 / 256;
1599 		else
1600 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1601 		if (vhe->vhe_leader)
1602 			sc->sc_delayed_arp = -1;
1603 		switch (af) {
1604 		case AF_INET:
1605 			timeout_add_tv(&vhe->md_tmo, &tv);
1606 			break;
1607 #ifdef INET6
1608 		case AF_INET6:
1609 			timeout_add_tv(&vhe->md6_tmo, &tv);
1610 			break;
1611 #endif /* INET6 */
1612 		default:
1613 			if (sc->sc_naddrs)
1614 				timeout_add_tv(&vhe->md_tmo, &tv);
1615 			if (sc->sc_naddrs6)
1616 				timeout_add_tv(&vhe->md6_tmo, &tv);
1617 			break;
1618 		}
1619 		break;
1620 	case MASTER:
1621 		tv.tv_sec = sc->sc_advbase;
1622 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1623 			tv.tv_usec = 1 * 1000000 / 256;
1624 		else
1625 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1626 		timeout_add_tv(&vhe->ad_tmo, &tv);
1627 		break;
1628 	}
1629 }
1630 
1631 void
1632 carp_multicast_cleanup(struct carp_softc *sc)
1633 {
1634 	struct ip_moptions *imo = &sc->sc_imo;
1635 #ifdef INET6
1636 	struct ip6_moptions *im6o = &sc->sc_im6o;
1637 #endif
1638 	u_int16_t n = imo->imo_num_memberships;
1639 
1640 	/* Clean up our own multicast memberships */
1641 	while (n-- > 0) {
1642 		if (imo->imo_membership[n] != NULL) {
1643 			in_delmulti(imo->imo_membership[n]);
1644 			imo->imo_membership[n] = NULL;
1645 		}
1646 	}
1647 	imo->imo_num_memberships = 0;
1648 	imo->imo_ifidx = 0;
1649 
1650 #ifdef INET6
1651 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1652 		struct in6_multi_mship *imm =
1653 		    LIST_FIRST(&im6o->im6o_memberships);
1654 
1655 		LIST_REMOVE(imm, i6mm_chain);
1656 		in6_leavegroup(imm);
1657 	}
1658 	im6o->im6o_ifidx = 0;
1659 #endif
1660 
1661 	/* And any other multicast memberships */
1662 	carp_ether_purgemulti(sc);
1663 }
1664 
1665 int
1666 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0)
1667 {
1668 	struct srpl *cif;
1669 	struct carp_softc *vr, *last = NULL, *after = NULL;
1670 	int myself = 0, error = 0;
1671 
1672 	KASSERT(ifp0->if_index != sc->sc_carpdevidx);
1673 	KERNEL_ASSERT_LOCKED(); /* touching if_carp */
1674 
1675 	if ((ifp0->if_flags & IFF_MULTICAST) == 0)
1676 		return (EADDRNOTAVAIL);
1677 
1678 	if (ifp0->if_type != IFT_ETHER)
1679 		return (EINVAL);
1680 
1681 	cif = &ifp0->if_carp;
1682 	if (carp_check_dup_vhids(sc, cif, NULL))
1683 		return (EINVAL);
1684 
1685 	if ((error = ifpromisc(ifp0, 1)))
1686 		return (error);
1687 
1688 	/* detach from old interface */
1689 	if (sc->sc_carpdevidx != 0)
1690 		carpdetach(sc);
1691 
1692 	/* attach carp interface to physical interface */
1693 	if_detachhook_add(ifp0, &sc->sc_dtask);
1694 	if_linkstatehook_add(ifp0, &sc->sc_ltask);
1695 
1696 	sc->sc_carpdevidx = ifp0->if_index;
1697 	sc->sc_if.if_capabilities = ifp0->if_capabilities &
1698 	    IFCAP_CSUM_MASK;
1699 
1700 	SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
1701 		struct carp_vhost_entry *vrhead, *schead;
1702 		last = vr;
1703 
1704 		if (vr == sc)
1705 			myself = 1;
1706 
1707 		vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts);
1708 		schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1709 		if (vrhead->vhid < schead->vhid)
1710 			after = vr;
1711 	}
1712 
1713 	if (!myself) {
1714 		/* We're trying to keep things in order */
1715 		if (last == NULL) {
1716 			SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, cif,
1717 			    sc, sc_list);
1718 		} else if (after == NULL) {
1719 			SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last,
1720 			    sc, sc_list);
1721 		} else {
1722 			SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after,
1723 			    sc, sc_list);
1724 		}
1725 	}
1726 	if (sc->sc_naddrs || sc->sc_naddrs6)
1727 		sc->sc_if.if_flags |= IFF_UP;
1728 	carp_set_enaddr(sc);
1729 
1730 	carp_carpdev_state(sc);
1731 
1732 	return (0);
1733 }
1734 
1735 void
1736 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe)
1737 {
1738 	struct carp_softc *sc = vhe->parent_sc;
1739 
1740 	if (vhe->vhid != 0 && sc->sc_carpdevidx != 0) {
1741 		if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP)
1742 			vhe->vhe_enaddr[0] = 1;
1743 		else
1744 			vhe->vhe_enaddr[0] = 0;
1745 		vhe->vhe_enaddr[1] = 0;
1746 		vhe->vhe_enaddr[2] = 0x5e;
1747 		vhe->vhe_enaddr[3] = 0;
1748 		vhe->vhe_enaddr[4] = 1;
1749 		vhe->vhe_enaddr[5] = vhe->vhid;
1750 	} else
1751 		memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN);
1752 }
1753 
1754 void
1755 carp_set_enaddr(struct carp_softc *sc)
1756 {
1757 	struct carp_vhost_entry *vhe;
1758 
1759 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1760 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries)
1761 		carp_set_vhe_enaddr(vhe);
1762 
1763 	vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1764 
1765 	/*
1766 	 * Use the carp lladdr if the running one isn't manually set.
1767 	 * Only compare static parts of the lladdr.
1768 	 */
1769 	if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1,
1770 	    ETHER_ADDR_LEN - 2) == 0) ||
1771 	    (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] &&
1772 	    !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] &&
1773 	    !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5]))
1774 		bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1775 
1776 	/* Make sure the enaddr has changed before further twiddling. */
1777 	if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) {
1778 		bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl),
1779 		    ETHER_ADDR_LEN);
1780 		bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN);
1781 #ifdef INET6
1782 		/*
1783 		 * (re)attach a link-local address which matches
1784 		 * our new MAC address.
1785 		 */
1786 		if (sc->sc_naddrs6)
1787 			in6_ifattach_linklocal(&sc->sc_if, NULL);
1788 #endif
1789 		carp_set_state_all(sc, INIT);
1790 		carp_setrun_all(sc, 0);
1791 	}
1792 }
1793 
1794 void
1795 carp_addr_updated(void *v)
1796 {
1797 	struct carp_softc *sc = (struct carp_softc *) v;
1798 	struct ifaddr *ifa;
1799 	int new_naddrs = 0, new_naddrs6 = 0;
1800 
1801 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1802 		if (ifa->ifa_addr->sa_family == AF_INET)
1803 			new_naddrs++;
1804 #ifdef INET6
1805 		else if (ifa->ifa_addr->sa_family == AF_INET6)
1806 			new_naddrs6++;
1807 #endif /* INET6 */
1808 	}
1809 
1810 	/* We received address changes from if_addrhooks callback */
1811 	if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) {
1812 
1813 		sc->sc_naddrs = new_naddrs;
1814 		sc->sc_naddrs6 = new_naddrs6;
1815 
1816 		/* Re-establish multicast membership removed by in_control */
1817 		if (IN_MULTICAST(sc->sc_peer.s_addr)) {
1818 			if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) {
1819 				struct in_multi **imm =
1820 				    sc->sc_imo.imo_membership;
1821 				u_int16_t maxmem =
1822 				    sc->sc_imo.imo_max_memberships;
1823 
1824 				memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1825 				sc->sc_imo.imo_membership = imm;
1826 				sc->sc_imo.imo_max_memberships = maxmem;
1827 
1828 				if (sc->sc_carpdevidx != 0 &&
1829 				    sc->sc_naddrs > 0)
1830 					carp_join_multicast(sc);
1831 			}
1832 		}
1833 
1834 		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1835 			sc->sc_if.if_flags &= ~IFF_UP;
1836 			carp_set_state_all(sc, INIT);
1837 		} else
1838 			carp_hmac_prepare(sc);
1839 	}
1840 
1841 	carp_setrun_all(sc, 0);
1842 }
1843 
1844 int
1845 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1846 {
1847 	struct in_addr *in = &sin->sin_addr;
1848 	int error;
1849 
1850 	KASSERT(sc->sc_carpdevidx != 0);
1851 
1852 	/* XXX is this necessary? */
1853 	if (in->s_addr == INADDR_ANY) {
1854 		carp_setrun_all(sc, 0);
1855 		return (0);
1856 	}
1857 
1858 	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1859 		return (error);
1860 
1861 	carp_set_state_all(sc, INIT);
1862 
1863 	return (0);
1864 }
1865 
1866 int
1867 carp_join_multicast(struct carp_softc *sc)
1868 {
1869 	struct ip_moptions *imo = &sc->sc_imo;
1870 	struct in_multi *imm;
1871 	struct in_addr addr;
1872 
1873 	if (!IN_MULTICAST(sc->sc_peer.s_addr))
1874 		return (0);
1875 
1876 	addr.s_addr = sc->sc_peer.s_addr;
1877 	if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL)
1878 		return (ENOBUFS);
1879 
1880 	imo->imo_membership[0] = imm;
1881 	imo->imo_num_memberships = 1;
1882 	imo->imo_ifidx = sc->sc_if.if_index;
1883 	imo->imo_ttl = CARP_DFLTTL;
1884 	imo->imo_loop = 0;
1885 	return (0);
1886 }
1887 
1888 
1889 #ifdef INET6
1890 int
1891 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1892 {
1893 	int error;
1894 
1895 	KASSERT(sc->sc_carpdevidx != 0);
1896 
1897 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1898 		carp_setrun_all(sc, 0);
1899 		return (0);
1900 	}
1901 
1902 	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1903 		return (error);
1904 
1905 	carp_set_state_all(sc, INIT);
1906 
1907 	return (0);
1908 }
1909 
1910 int
1911 carp_join_multicast6(struct carp_softc *sc)
1912 {
1913 	struct in6_multi_mship *imm, *imm2;
1914 	struct ip6_moptions *im6o = &sc->sc_im6o;
1915 	struct sockaddr_in6 addr6;
1916 	int error;
1917 
1918 	/* Join IPv6 CARP multicast group */
1919 	memset(&addr6, 0, sizeof(addr6));
1920 	addr6.sin6_family = AF_INET6;
1921 	addr6.sin6_len = sizeof(addr6);
1922 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1923 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1924 	addr6.sin6_addr.s6_addr8[15] = 0x12;
1925 	if ((imm = in6_joingroup(&sc->sc_if,
1926 	    &addr6.sin6_addr, &error)) == NULL) {
1927 		return (error);
1928 	}
1929 	/* join solicited multicast address */
1930 	memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1931 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1932 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1933 	addr6.sin6_addr.s6_addr32[1] = 0;
1934 	addr6.sin6_addr.s6_addr32[2] = htonl(1);
1935 	addr6.sin6_addr.s6_addr32[3] = 0;
1936 	addr6.sin6_addr.s6_addr8[12] = 0xff;
1937 	if ((imm2 = in6_joingroup(&sc->sc_if,
1938 	    &addr6.sin6_addr, &error)) == NULL) {
1939 		in6_leavegroup(imm);
1940 		return (error);
1941 	}
1942 
1943 	/* apply v6 multicast membership */
1944 	im6o->im6o_ifidx = sc->sc_if.if_index;
1945 	if (imm)
1946 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
1947 		    i6mm_chain);
1948 	if (imm2)
1949 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
1950 		    i6mm_chain);
1951 
1952 	return (0);
1953 }
1954 
1955 #endif /* INET6 */
1956 
1957 int
1958 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
1959 {
1960 	struct proc *p = curproc;	/* XXX */
1961 	struct carp_softc *sc = ifp->if_softc;
1962 	struct carp_vhost_entry *vhe;
1963 	struct carpreq carpr;
1964 	struct ifaddr *ifa = (struct ifaddr *)addr;
1965 	struct ifreq *ifr = (struct ifreq *)addr;
1966 	struct ifnet *ifp0 = NULL;
1967 	int i, error = 0;
1968 
1969 	switch (cmd) {
1970 	case SIOCSIFADDR:
1971 		if (sc->sc_carpdevidx == 0)
1972 			return (EINVAL);
1973 
1974 		switch (ifa->ifa_addr->sa_family) {
1975 		case AF_INET:
1976 			sc->sc_if.if_flags |= IFF_UP;
1977 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
1978 			break;
1979 #ifdef INET6
1980 		case AF_INET6:
1981 			sc->sc_if.if_flags |= IFF_UP;
1982 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
1983 			break;
1984 #endif /* INET6 */
1985 		default:
1986 			error = EAFNOSUPPORT;
1987 			break;
1988 		}
1989 		break;
1990 
1991 	case SIOCSIFFLAGS:
1992 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1993 		vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1994 		if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) {
1995 			carp_del_all_timeouts(sc);
1996 
1997 			/* we need the interface up to bow out */
1998 			sc->sc_if.if_flags |= IFF_UP;
1999 			sc->sc_bow_out = 1;
2000 			carp_vhe_send_ad_all(sc);
2001 			sc->sc_bow_out = 0;
2002 
2003 			sc->sc_if.if_flags &= ~IFF_UP;
2004 			carp_set_state_all(sc, INIT);
2005 			carp_setrun_all(sc, 0);
2006 		} else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) {
2007 			sc->sc_if.if_flags |= IFF_UP;
2008 			carp_setrun_all(sc, 0);
2009 		}
2010 		break;
2011 
2012 	case SIOCSVH:
2013 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2014 		vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
2015 		if ((error = suser(p)) != 0)
2016 			break;
2017 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2018 			break;
2019 		error = 1;
2020 		if (carpr.carpr_carpdev[0] != '\0' &&
2021 		    (ifp0 = if_unit(carpr.carpr_carpdev)) == NULL)
2022 			return (EINVAL);
2023 		if (carpr.carpr_peer.s_addr == 0)
2024 			sc->sc_peer.s_addr = INADDR_CARP_GROUP;
2025 		else
2026 			sc->sc_peer.s_addr = carpr.carpr_peer.s_addr;
2027 		if (ifp0 != NULL && ifp0->if_index != sc->sc_carpdevidx) {
2028 			if ((error = carp_set_ifp(sc, ifp0))) {
2029 				if_put(ifp0);
2030 				return (error);
2031 			}
2032 		}
2033 		if_put(ifp0);
2034 		if (vhe->state != INIT && carpr.carpr_state != vhe->state) {
2035 			switch (carpr.carpr_state) {
2036 			case BACKUP:
2037 				timeout_del(&vhe->ad_tmo);
2038 				carp_set_state_all(sc, BACKUP);
2039 				carp_setrun_all(sc, 0);
2040 				break;
2041 			case MASTER:
2042 				KERNEL_ASSERT_LOCKED();
2043 				/* touching carp_vhosts */
2044 				SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2045 				    vhost_entries)
2046 					carp_master_down(vhe);
2047 				break;
2048 			default:
2049 				break;
2050 			}
2051 		}
2052 		if ((error = carp_vhids_ioctl(sc, &carpr)))
2053 			return (error);
2054 		if (carpr.carpr_advbase >= 0) {
2055 			if (carpr.carpr_advbase > 255) {
2056 				error = EINVAL;
2057 				break;
2058 			}
2059 			sc->sc_advbase = carpr.carpr_advbase;
2060 			error--;
2061 		}
2062 		if (memcmp(sc->sc_advskews, carpr.carpr_advskews,
2063 		    sizeof(sc->sc_advskews))) {
2064 			i = 0;
2065 			KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2066 			SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2067 			    vhost_entries)
2068 				vhe->advskew = carpr.carpr_advskews[i++];
2069 			bcopy(carpr.carpr_advskews, sc->sc_advskews,
2070 			    sizeof(sc->sc_advskews));
2071 		}
2072 		if (sc->sc_balancing != carpr.carpr_balancing) {
2073 			if (carpr.carpr_balancing > CARP_BAL_MAXID) {
2074 				error = EINVAL;
2075 				break;
2076 			}
2077 			sc->sc_balancing = carpr.carpr_balancing;
2078 			carp_set_enaddr(sc);
2079 			carp_update_lsmask(sc);
2080 		}
2081 		bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
2082 		if (error > 0)
2083 			error = EINVAL;
2084 		else {
2085 			error = 0;
2086 			carp_hmac_prepare(sc);
2087 			carp_setrun_all(sc, 0);
2088 		}
2089 		break;
2090 
2091 	case SIOCGVH:
2092 		memset(&carpr, 0, sizeof(carpr));
2093 		if ((ifp0 = if_get(sc->sc_carpdevidx)) != NULL)
2094 			strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ);
2095 		if_put(ifp0);
2096 		i = 0;
2097 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2098 		SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2099 			carpr.carpr_vhids[i] = vhe->vhid;
2100 			carpr.carpr_advskews[i] = vhe->advskew;
2101 			carpr.carpr_states[i] = vhe->state;
2102 			i++;
2103 		}
2104 		carpr.carpr_advbase = sc->sc_advbase;
2105 		carpr.carpr_balancing = sc->sc_balancing;
2106 		if (suser(p) == 0)
2107 			bcopy(sc->sc_key, carpr.carpr_key,
2108 			    sizeof(carpr.carpr_key));
2109 		carpr.carpr_peer.s_addr = sc->sc_peer.s_addr;
2110 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2111 		break;
2112 
2113 	case SIOCADDMULTI:
2114 		error = carp_ether_addmulti(sc, ifr);
2115 		break;
2116 
2117 	case SIOCDELMULTI:
2118 		error = carp_ether_delmulti(sc, ifr);
2119 		break;
2120 	case SIOCAIFGROUP:
2121 	case SIOCDIFGROUP:
2122 		if (sc->sc_demote_cnt)
2123 			carp_ifgroup_ioctl(ifp, cmd, addr);
2124 		break;
2125 	case SIOCSIFGATTR:
2126 		carp_ifgattr_ioctl(ifp, cmd, addr);
2127 		break;
2128 	default:
2129 		error = ENOTTY;
2130 	}
2131 
2132 	if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0)
2133 		carp_set_enaddr(sc);
2134 	return (error);
2135 }
2136 
2137 int
2138 carp_check_dup_vhids(struct carp_softc *sc, struct srpl *cif,
2139     struct carpreq *carpr)
2140 {
2141 	struct carp_softc *vr;
2142 	struct carp_vhost_entry *vhe, *vhe0;
2143 	int i;
2144 
2145 	KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
2146 
2147 	SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
2148 		if (vr == sc)
2149 			continue;
2150 		SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) {
2151 			if (carpr) {
2152 				for (i = 0; carpr->carpr_vhids[i]; i++) {
2153 					if (vhe->vhid == carpr->carpr_vhids[i])
2154 						return (EINVAL);
2155 				}
2156 			}
2157 			SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts,
2158 			    vhost_entries) {
2159 				if (vhe->vhid == vhe0->vhid)
2160 					return (EINVAL);
2161 			}
2162 		}
2163 	}
2164 	return (0);
2165 }
2166 
2167 int
2168 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr)
2169 {
2170 	int i, j;
2171 	u_int8_t taken_vhids[256];
2172 
2173 	if (carpr->carpr_vhids[0] == 0 ||
2174 	    !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids)))
2175 		return (0);
2176 
2177 	memset(taken_vhids, 0, sizeof(taken_vhids));
2178 	for (i = 0; carpr->carpr_vhids[i]; i++) {
2179 		struct ifnet *ifp;
2180 
2181 		if (taken_vhids[carpr->carpr_vhids[i]])
2182 			return (EINVAL);
2183 		taken_vhids[carpr->carpr_vhids[i]] = 1;
2184 
2185 		if ((ifp = if_get(sc->sc_carpdevidx)) != NULL) {
2186 			struct srpl *cif;
2187 			cif = &ifp->if_carp;
2188 			if (carp_check_dup_vhids(sc, cif, carpr)) {
2189 				if_put(ifp);
2190 				return (EINVAL);
2191 			}
2192 		}
2193 		if_put(ifp);
2194 		if (carpr->carpr_advskews[i] >= 255)
2195 			return (EINVAL);
2196 	}
2197 	/* set sane balancing defaults */
2198 	if (i <= 1)
2199 		carpr->carpr_balancing = CARP_BAL_NONE;
2200 	else if (carpr->carpr_balancing == CARP_BAL_NONE &&
2201 	    sc->sc_balancing == CARP_BAL_NONE)
2202 		carpr->carpr_balancing = CARP_BAL_IP;
2203 
2204 	/* destroy all */
2205 	carp_del_all_timeouts(sc);
2206 	carp_destroy_vhosts(sc);
2207 	memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids));
2208 
2209 	/* sort vhosts list by vhid */
2210 	for (j = 1; j <= 255; j++) {
2211 		for (i = 0; carpr->carpr_vhids[i]; i++) {
2212 			if (carpr->carpr_vhids[i] != j)
2213 				continue;
2214 			if (carp_new_vhost(sc, carpr->carpr_vhids[i],
2215 			    carpr->carpr_advskews[i]))
2216 				return (ENOMEM);
2217 			sc->sc_vhids[i] = carpr->carpr_vhids[i];
2218 			sc->sc_advskews[i] = carpr->carpr_advskews[i];
2219 		}
2220 	}
2221 	carp_set_enaddr(sc);
2222 	carp_set_state_all(sc, INIT);
2223 	return (0);
2224 }
2225 
2226 void
2227 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2228 {
2229 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2230 	struct ifg_list	*ifgl;
2231 	int *dm, adj;
2232 
2233 	if (!strcmp(ifgr->ifgr_group, IFG_ALL))
2234 		return;
2235 	adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2236 	if (cmd == SIOCDIFGROUP)
2237 		adj = adj * -1;
2238 
2239 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2240 		if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) {
2241 			dm = &ifgl->ifgl_group->ifg_carp_demoted;
2242 			if (*dm + adj >= 0)
2243 				*dm += adj;
2244 			else
2245 				*dm = 0;
2246 		}
2247 }
2248 
2249 void
2250 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2251 {
2252 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2253 	struct carp_softc *sc = ifp->if_softc;
2254 
2255 	if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags &
2256 	    (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
2257 		carp_vhe_send_ad_all(sc);
2258 }
2259 
2260 void
2261 carp_start(struct ifnet *ifp)
2262 {
2263 	struct carp_softc *sc = ifp->if_softc;
2264 	struct ifnet *ifp0;
2265 	struct mbuf *m;
2266 
2267 	if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) {
2268 		ifq_purge(&ifp->if_snd);
2269 		return;
2270 	}
2271 
2272 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL)
2273 		carp_transmit(sc, ifp0, m);
2274 	if_put(ifp0);
2275 }
2276 
2277 void
2278 carp_transmit(struct carp_softc *sc, struct ifnet *ifp0, struct mbuf *m)
2279 {
2280 	struct ifnet *ifp = &sc->sc_if;
2281 
2282 #if NBPFILTER > 0
2283 	{
2284 		caddr_t if_bpf = ifp->if_bpf;
2285 		if (if_bpf)
2286 			bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
2287 	}
2288 #endif /* NBPFILTER > 0 */
2289 
2290 	if (!ISSET(ifp0->if_flags, IFF_RUNNING)) {
2291 		counters_inc(ifp->if_counters, ifc_oerrors);
2292 		m_freem(m);
2293 		return;
2294 	}
2295 
2296 	/*
2297 	 * Do not leak the multicast address when sending
2298 	 * advertisements in 'ip' and 'ip-stealth' balacing
2299 	 * modes.
2300 	 */
2301 	if (sc->sc_balancing == CARP_BAL_IP ||
2302 	    sc->sc_balancing == CARP_BAL_IPSTEALTH) {
2303 		struct ether_header *eh = mtod(m, struct ether_header *);
2304 		memcpy(eh->ether_shost, sc->sc_ac.ac_enaddr,
2305 		    sizeof(eh->ether_shost));
2306 	}
2307 
2308 	if (if_enqueue(ifp0, m))
2309 		counters_inc(ifp->if_counters, ifc_oerrors);
2310 }
2311 
2312 int
2313 carp_enqueue(struct ifnet *ifp, struct mbuf *m)
2314 {
2315 	struct carp_softc *sc = ifp->if_softc;
2316 	struct ifnet *ifp0;
2317 
2318 	/* no ifq_is_priq, cos hfsc on carp doesn't make sense */
2319 
2320 	/*
2321 	 * If the parent of this carp(4) got destroyed while
2322 	 * `m' was being processed, silently drop it.
2323 	 */
2324 	if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) {
2325 		m_freem(m);
2326 		return (0);
2327 	}
2328 
2329 	counters_pkt(ifp->if_counters,
2330 	    ifc_opackets, ifc_obytes, m->m_pkthdr.len);
2331 	carp_transmit(sc, ifp0, m);
2332 	if_put(ifp0);
2333 
2334 	return (0);
2335 }
2336 
2337 int
2338 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2339     struct rtentry *rt)
2340 {
2341 	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2342 	struct carp_vhost_entry *vhe;
2343 	struct srp_ref sr;
2344 	int ismaster;
2345 
2346 	if (sc->cur_vhe == NULL) {
2347 		vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
2348 		ismaster = (vhe->state == MASTER);
2349 		SRPL_LEAVE(&sr);
2350 	} else {
2351 		ismaster = (sc->cur_vhe->state == MASTER);
2352 	}
2353 
2354 	if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) {
2355 		m_freem(m);
2356 		return (ENETUNREACH);
2357 	}
2358 
2359 	return (ether_output(ifp, m, sa, rt));
2360 }
2361 
2362 void
2363 carp_set_state_all(struct carp_softc *sc, int state)
2364 {
2365 	struct carp_vhost_entry *vhe;
2366 
2367 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2368 
2369 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2370 		if (vhe->state == state)
2371 			continue;
2372 
2373 		carp_set_state(vhe, state);
2374 	}
2375 }
2376 
2377 void
2378 carp_set_state(struct carp_vhost_entry *vhe, int state)
2379 {
2380 	struct carp_softc *sc = vhe->parent_sc;
2381 	static const char *carp_states[] = { CARP_STATES };
2382 	int loglevel;
2383 	struct carp_vhost_entry *vhe0;
2384 
2385 	KASSERT(vhe->state != state);
2386 
2387 	if (vhe->state == INIT || state == INIT)
2388 		loglevel = LOG_WARNING;
2389 	else
2390 		loglevel = LOG_CRIT;
2391 
2392 	if (sc->sc_vhe_count > 1)
2393 		CARP_LOG(loglevel, sc,
2394 		    ("state transition (vhid %d): %s -> %s", vhe->vhid,
2395 		    carp_states[vhe->state], carp_states[state]));
2396 	else
2397 		CARP_LOG(loglevel, sc,
2398 		    ("state transition: %s -> %s",
2399 		    carp_states[vhe->state], carp_states[state]));
2400 
2401 	vhe->state = state;
2402 	carp_update_lsmask(sc);
2403 
2404 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2405 
2406 	sc->sc_if.if_link_state = LINK_STATE_INVALID;
2407 	SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
2408 		/*
2409 		 * Link must be up if at least one vhe is in state MASTER to
2410 		 * bring or keep route up.
2411 		 */
2412 		if (vhe0->state == MASTER) {
2413 			sc->sc_if.if_link_state = LINK_STATE_UP;
2414 			break;
2415 		} else if (vhe0->state == BACKUP) {
2416 			sc->sc_if.if_link_state = LINK_STATE_DOWN;
2417 		}
2418 	}
2419 	if_link_state_change(&sc->sc_if);
2420 }
2421 
2422 void
2423 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason)
2424 {
2425 	struct ifg_list	*ifgl;
2426 	int *dm, need_ad;
2427 	struct carp_softc *nil = NULL;
2428 
2429 	if (ifp->if_type == IFT_CARP) {
2430 		dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2431 		if (*dm + adj >= 0)
2432 			*dm += adj;
2433 		else
2434 			*dm = 0;
2435 	}
2436 
2437 	need_ad = 0;
2438 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2439 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2440 			continue;
2441 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2442 
2443 		if (*dm + adj >= 0)
2444 			*dm += adj;
2445 		else
2446 			*dm = 0;
2447 
2448 		if (adj > 0 && *dm == 1)
2449 			need_ad = 1;
2450 		CARP_LOG(LOG_ERR, nil,
2451 		    ("%s demoted group %s by %d to %d (%s)",
2452 		    ifp->if_xname, ifgl->ifgl_group->ifg_group,
2453 		    adj, *dm, reason));
2454 	}
2455 	if (need_ad)
2456 		carp_send_ad_all();
2457 }
2458 
2459 int
2460 carp_group_demote_count(struct carp_softc *sc)
2461 {
2462 	struct ifg_list	*ifgl;
2463 	int count = 0;
2464 
2465 	TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next)
2466 		count += ifgl->ifgl_group->ifg_carp_demoted;
2467 
2468 	if (count == 0 && sc->sc_demote_cnt)
2469 		count = sc->sc_demote_cnt;
2470 
2471 	return (count > 255 ? 255 : count);
2472 }
2473 
2474 void
2475 carp_carpdev_state(void *v)
2476 {
2477 	struct carp_softc *sc = v;
2478 	struct ifnet *ifp0;
2479 	int suppressed = sc->sc_suppress;
2480 
2481 	if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL)
2482 		return;
2483 
2484 	if (ifp0->if_link_state == LINK_STATE_DOWN ||
2485 	    !(ifp0->if_flags & IFF_UP)) {
2486 		sc->sc_if.if_flags &= ~IFF_RUNNING;
2487 		carp_del_all_timeouts(sc);
2488 		carp_set_state_all(sc, INIT);
2489 		sc->sc_suppress = 1;
2490 		carp_setrun_all(sc, 0);
2491 		if (!suppressed)
2492 			carp_group_demote_adj(&sc->sc_if, 1, "carpdev");
2493 	} else if (suppressed) {
2494 		carp_set_state_all(sc, INIT);
2495 		sc->sc_suppress = 0;
2496 		carp_setrun_all(sc, 0);
2497 		carp_group_demote_adj(&sc->sc_if, -1, "carpdev");
2498 	}
2499 
2500 	if_put(ifp0);
2501 }
2502 
2503 int
2504 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2505 {
2506 	struct ifnet *ifp0;
2507 	struct carp_mc_entry *mc;
2508 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2509 	int error;
2510 
2511 	ifp0 = if_get(sc->sc_carpdevidx);
2512 	if (ifp0 == NULL)
2513 		return (EINVAL);
2514 
2515 	error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2516 	if (error != ENETRESET) {
2517 		if_put(ifp0);
2518 		return (error);
2519 	}
2520 
2521 	/*
2522 	 * This is new multicast address.  We have to tell parent
2523 	 * about it.  Also, remember this multicast address so that
2524 	 * we can delete them on unconfigure.
2525 	 */
2526 	mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT);
2527 	if (mc == NULL) {
2528 		error = ENOMEM;
2529 		goto alloc_failed;
2530 	}
2531 
2532 	/*
2533 	 * As ether_addmulti() returns ENETRESET, following two
2534 	 * statement shouldn't fail.
2535 	 */
2536 	(void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2537 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2538 	memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2539 	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2540 
2541 	error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr);
2542 	if (error != 0)
2543 		goto ioctl_failed;
2544 
2545 	if_put(ifp0);
2546 
2547 	return (error);
2548 
2549  ioctl_failed:
2550 	LIST_REMOVE(mc, mc_entries);
2551 	free(mc, M_DEVBUF, sizeof(*mc));
2552  alloc_failed:
2553 	(void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2554 	if_put(ifp0);
2555 
2556 	return (error);
2557 }
2558 
2559 int
2560 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2561 {
2562 	struct ifnet *ifp0;
2563 	struct ether_multi *enm;
2564 	struct carp_mc_entry *mc;
2565 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2566 	int error;
2567 
2568 	ifp0 = if_get(sc->sc_carpdevidx);
2569 	if (ifp0 == NULL)
2570 		return (EINVAL);
2571 
2572 	/*
2573 	 * Find a key to lookup carp_mc_entry.  We have to do this
2574 	 * before calling ether_delmulti for obvious reason.
2575 	 */
2576 	if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0)
2577 		goto rele;
2578 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2579 	if (enm == NULL) {
2580 		error = EINVAL;
2581 		goto rele;
2582 	}
2583 
2584 	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2585 		if (mc->mc_enm == enm)
2586 			break;
2587 
2588 	/* We won't delete entries we didn't add */
2589 	if (mc == NULL) {
2590 		error = EINVAL;
2591 		goto rele;
2592 	}
2593 
2594 	error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2595 	if (error != ENETRESET)
2596 		goto rele;
2597 
2598 	/* We no longer use this multicast address.  Tell parent so. */
2599 	error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2600 	if (error == 0) {
2601 		/* And forget about this address. */
2602 		LIST_REMOVE(mc, mc_entries);
2603 		free(mc, M_DEVBUF, sizeof(*mc));
2604 	} else
2605 		(void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2606 rele:
2607 	if_put(ifp0);
2608 	return (error);
2609 }
2610 
2611 /*
2612  * Delete any multicast address we have asked to add from parent
2613  * interface.  Called when the carp is being unconfigured.
2614  */
2615 void
2616 carp_ether_purgemulti(struct carp_softc *sc)
2617 {
2618 	struct ifnet *ifp0;		/* Parent. */
2619 	struct carp_mc_entry *mc;
2620 	union {
2621 		struct ifreq ifreq;
2622 		struct {
2623 			char ifr_name[IFNAMSIZ];
2624 			struct sockaddr_storage ifr_ss;
2625 		} ifreq_storage;
2626 	} u;
2627 	struct ifreq *ifr = &u.ifreq;
2628 
2629 	if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL)
2630 		return;
2631 
2632 	memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ);
2633 	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2634 		memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len);
2635 		(void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2636 		LIST_REMOVE(mc, mc_entries);
2637 		free(mc, M_DEVBUF, sizeof(*mc));
2638 	}
2639 
2640 	if_put(ifp0);
2641 }
2642 
2643 void
2644 carp_vh_ref(void *null, void *v)
2645 {
2646 	struct carp_vhost_entry *vhe = v;
2647 
2648 	refcnt_take(&vhe->vhost_refcnt);
2649 }
2650 
2651 void
2652 carp_vh_unref(void *null, void *v)
2653 {
2654 	struct carp_vhost_entry *vhe = v;
2655 
2656 	if (refcnt_rele(&vhe->vhost_refcnt)) {
2657 		carp_sc_unref(NULL, vhe->parent_sc);
2658 		free(vhe, M_DEVBUF, sizeof(*vhe));
2659 	}
2660 }
2661 
2662 void
2663 carp_sc_ref(void *null, void *s)
2664 {
2665 	struct carp_softc *sc = s;
2666 
2667 	refcnt_take(&sc->sc_refcnt);
2668 }
2669 
2670 void
2671 carp_sc_unref(void *null, void *s)
2672 {
2673 	struct carp_softc *sc = s;
2674 
2675 	refcnt_rele_wake(&sc->sc_refcnt);
2676 }
2677