xref: /openbsd/sys/netinet/ip_carp.c (revision 17df1aa7)
1 /*	$OpenBSD: ip_carp.c,v 1.175 2010/04/25 17:38:53 mpf Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
5  * Copyright (c) 2003 Ryan McBride. All rights reserved.
6  * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * TODO:
32  *	- iface reconfigure
33  *	- support for hardware checksum calculations;
34  *
35  */
36 
37 #include "ether.h"
38 
39 #include <sys/param.h>
40 #include <sys/proc.h>
41 #include <sys/systm.h>
42 #include <sys/mbuf.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
47 #include <sys/device.h>
48 #include <sys/kernel.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 
52 #include <machine/cpu.h>
53 
54 #include <net/if.h>
55 #include <net/if_types.h>
56 #include <net/if_llc.h>
57 #include <net/route.h>
58 #include <net/netisr.h>
59 
60 /* for arc4random() */
61 #include <dev/rndvar.h>
62 
63 #if NFDDI > 0
64 #include <net/if_fddi.h>
65 #endif
66 
67 #include <crypto/sha1.h>
68 
69 #ifdef INET
70 #include <netinet/in.h>
71 #include <netinet/in_systm.h>
72 #include <netinet/in_var.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip_var.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip_ipsp.h>
77 
78 #include <net/if_enc.h>
79 #include <net/if_dl.h>
80 #endif
81 
82 #ifdef INET6
83 #include <netinet/icmp6.h>
84 #include <netinet/ip6.h>
85 #include <netinet6/ip6_var.h>
86 #include <netinet6/nd6.h>
87 #include <netinet6/in6_ifattach.h>
88 #endif
89 
90 #include "bpfilter.h"
91 #if NBPFILTER > 0
92 #include <net/bpf.h>
93 #endif
94 
95 #include <netinet/ip_carp.h>
96 
97 struct carp_mc_entry {
98 	LIST_ENTRY(carp_mc_entry)	mc_entries;
99 	union {
100 		struct ether_multi	*mcu_enm;
101 	} mc_u;
102 	struct sockaddr_storage		mc_addr;
103 };
104 #define	mc_enm	mc_u.mcu_enm
105 
106 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 };
107 
108 struct carp_vhost_entry {
109 	LIST_ENTRY(carp_vhost_entry)	vhost_entries;
110 	struct carp_softc *parent_sc;
111 	int vhe_leader;
112 	int vhid;
113 	int advskew;
114 	enum { INIT = 0, BACKUP, MASTER }	state;
115 	struct timeout ad_tmo;	/* advertisement timeout */
116 	struct timeout md_tmo;	/* master down timeout */
117 	struct timeout md6_tmo;	/* master down timeout */
118 
119 	u_int64_t vhe_replay_cookie;
120 
121 	/* authentication */
122 #define CARP_HMAC_PAD	64
123 	unsigned char vhe_pad[CARP_HMAC_PAD];
124 	SHA1_CTX vhe_sha1[HMAC_MAX];
125 
126 	u_int8_t vhe_enaddr[ETHER_ADDR_LEN];
127 	struct sockaddr_dl vhe_sdl;	/* for IPv6 ndp balancing */
128 };
129 
130 struct carp_softc {
131 	struct arpcom sc_ac;
132 #define	sc_if		sc_ac.ac_if
133 #define	sc_carpdev	sc_ac.ac_if.if_carpdev
134 	void *ah_cookie;
135 	void *lh_cookie;
136 	struct ip_moptions sc_imo;
137 #ifdef INET6
138 	struct ip6_moptions sc_im6o;
139 #endif /* INET6 */
140 	TAILQ_ENTRY(carp_softc) sc_list;
141 
142 	int sc_suppress;
143 	int sc_bow_out;
144 	int sc_demote_cnt;
145 
146 	int sc_sendad_errors;
147 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count)
148 	int sc_sendad_success;
149 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count)
150 
151 	char sc_curlladdr[ETHER_ADDR_LEN];
152 
153 	LIST_HEAD(__carp_vhosthead, carp_vhost_entry)	carp_vhosts;
154 	int sc_vhe_count;
155 	u_int8_t sc_vhids[CARP_MAXNODES];
156 	u_int8_t sc_advskews[CARP_MAXNODES];
157 	u_int8_t sc_balancing;
158 
159 	int sc_naddrs;
160 	int sc_naddrs6;
161 	int sc_advbase;		/* seconds */
162 
163 	/* authentication */
164 	unsigned char sc_key[CARP_KEY_LEN];
165 
166 	u_int32_t sc_hashkey[2];
167 	u_int32_t sc_lsmask;		/* load sharing mask */
168 	int sc_lscount;			/* # load sharing interfaces (max 32) */
169 	int sc_delayed_arp;		/* delayed ARP request countdown */
170 
171 	struct in_addr sc_peer;
172 
173 	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
174 	struct carp_vhost_entry *cur_vhe; /* current active vhe */
175 };
176 
177 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT };	/* XXX for now */
178 struct carpstats carpstats;
179 
180 struct carp_if {
181 	TAILQ_HEAD(, carp_softc) vhif_vrs;
182 	int vhif_nvrs;
183 
184 	struct ifnet *vhif_ifp;
185 };
186 
187 #define	CARP_LOG(l, sc, s)						\
188 	do {								\
189 		if (carp_opts[CARPCTL_LOG] >= l) {			\
190 			if (sc)						\
191 				log(l, "%s: ",				\
192 				    (sc)->sc_if.if_xname);		\
193 			else						\
194 				log(l, "carp: ");			\
195 			addlog s;					\
196 			addlog("\n");					\
197 		}							\
198 	} while (0)
199 
200 void	carp_hmac_prepare(struct carp_softc *);
201 void	carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t);
202 void	carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *,
203 	    unsigned char *, u_int8_t);
204 int	carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
205 	    unsigned char *);
206 void	carp_setroute(struct carp_softc *, int);
207 void	carp_proto_input_c(struct mbuf *, struct carp_header *, int,
208 	    sa_family_t);
209 void	carpattach(int);
210 void	carpdetach(struct carp_softc *);
211 int	carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
212 	    struct carp_header *);
213 void	carp_send_ad_all(void);
214 void	carp_vhe_send_ad_all(struct carp_softc *);
215 void	carp_send_ad(void *);
216 void	carp_send_arp(struct carp_softc *);
217 void	carp_master_down(void *);
218 int	carp_ioctl(struct ifnet *, u_long, caddr_t);
219 int	carp_vhids_ioctl(struct carp_softc *, struct carpreq *);
220 int	carp_check_dup_vhids(struct carp_softc *, struct carp_if *,
221 	    struct carpreq *);
222 void	carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t);
223 void	carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t);
224 void	carp_start(struct ifnet *);
225 void	carp_setrun_all(struct carp_softc *, sa_family_t);
226 void	carp_setrun(struct carp_vhost_entry *, sa_family_t);
227 void	carp_set_state_all(struct carp_softc *, int);
228 void	carp_set_state(struct carp_vhost_entry *, int);
229 void	carp_multicast_cleanup(struct carp_softc *);
230 int	carp_set_ifp(struct carp_softc *, struct ifnet *);
231 void	carp_set_enaddr(struct carp_softc *);
232 void	carp_set_vhe_enaddr(struct carp_vhost_entry *);
233 void	carp_addr_updated(void *);
234 u_int32_t	carp_hash(struct carp_softc *, u_char *);
235 int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
236 int	carp_join_multicast(struct carp_softc *);
237 #ifdef INET6
238 void	carp_send_na(struct carp_softc *);
239 int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
240 int	carp_join_multicast6(struct carp_softc *);
241 #endif
242 int	carp_clone_create(struct if_clone *, int);
243 int	carp_clone_destroy(struct ifnet *);
244 int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
245 int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
246 void	carp_ether_purgemulti(struct carp_softc *);
247 int	carp_group_demote_count(struct carp_softc *);
248 void	carp_update_lsmask(struct carp_softc *);
249 int	carp_new_vhost(struct carp_softc *, int, int);
250 void	carp_destroy_vhosts(struct carp_softc *);
251 void	carp_del_all_timeouts(struct carp_softc *);
252 
253 struct if_clone carp_cloner =
254     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
255 
256 #define carp_cksum(_m, _l)	((u_int16_t)in_cksum((_m), (_l)))
257 
258 void
259 carp_hmac_prepare(struct carp_softc *sc)
260 {
261 	struct carp_vhost_entry *vhe;
262 	u_int8_t i;
263 
264 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
265 		for (i = 0; i < HMAC_MAX; i++) {
266 			carp_hmac_prepare_ctx(vhe, i);
267 		}
268 	}
269 }
270 
271 void
272 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx)
273 {
274 	struct carp_softc *sc = vhe->parent_sc;
275 
276 	u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
277 	u_int8_t vhid = vhe->vhid & 0xff;
278 	SHA1_CTX sha1ctx;
279 	u_int32_t kmd[5];
280 	struct ifaddr *ifa;
281 	int i, found;
282 	struct in_addr last, cur, in;
283 #ifdef INET6
284 	struct in6_addr last6, cur6, in6;
285 #endif /* INET6 */
286 
287 	/* compute ipad from key */
288 	bzero(vhe->vhe_pad, sizeof(vhe->vhe_pad));
289 	bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key));
290 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
291 		vhe->vhe_pad[i] ^= 0x36;
292 
293 	/* precompute first part of inner hash */
294 	SHA1Init(&vhe->vhe_sha1[ctx]);
295 	SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad));
296 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version));
297 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type));
298 
299 	/* generate a key for the arpbalance hash, before the vhid is hashed */
300 	if (vhe->vhe_leader) {
301 		bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
302 		SHA1Final((unsigned char *)kmd, &sha1ctx);
303 		sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
304 		sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
305 	}
306 
307 	/* the rest of the precomputation */
308 	if (vhe->vhe_leader && bcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr,
309 	    ETHER_ADDR_LEN) != 0)
310 		SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr,
311 		    ETHER_ADDR_LEN);
312 
313 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid));
314 
315 	/* Hash the addresses from smallest to largest, not interface order */
316 #ifdef INET
317 	cur.s_addr = 0;
318 	do {
319 		found = 0;
320 		last = cur;
321 		cur.s_addr = 0xffffffff;
322 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
323 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
324 			if (ifa->ifa_addr->sa_family == AF_INET &&
325 			    ntohl(in.s_addr) > ntohl(last.s_addr) &&
326 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
327 				cur.s_addr = in.s_addr;
328 				found++;
329 			}
330 		}
331 		if (found)
332 			SHA1Update(&vhe->vhe_sha1[ctx],
333 			    (void *)&cur, sizeof(cur));
334 	} while (found);
335 #endif /* INET */
336 #ifdef INET6
337 	memset(&cur6, 0x00, sizeof(cur6));
338 	do {
339 		found = 0;
340 		last6 = cur6;
341 		memset(&cur6, 0xff, sizeof(cur6));
342 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
343 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
344 			if (IN6_IS_SCOPE_EMBED(&in6)) {
345 				if (ctx == HMAC_NOV6LL)
346 					continue;
347 				in6.s6_addr16[1] = 0;
348 			}
349 			if (ifa->ifa_addr->sa_family == AF_INET6 &&
350 			    memcmp(&in6, &last6, sizeof(in6)) > 0 &&
351 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
352 				cur6 = in6;
353 				found++;
354 			}
355 		}
356 		if (found)
357 			SHA1Update(&vhe->vhe_sha1[ctx],
358 			    (void *)&cur6, sizeof(cur6));
359 	} while (found);
360 #endif /* INET6 */
361 
362 	/* convert ipad to opad */
363 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
364 		vhe->vhe_pad[i] ^= 0x36 ^ 0x5c;
365 }
366 
367 void
368 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2],
369     unsigned char md[20], u_int8_t ctx)
370 {
371 	SHA1_CTX sha1ctx;
372 
373 	/* fetch first half of inner hash */
374 	bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
375 
376 	SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie));
377 	SHA1Final(md, &sha1ctx);
378 
379 	/* outer hash */
380 	SHA1Init(&sha1ctx);
381 	SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad));
382 	SHA1Update(&sha1ctx, md, 20);
383 	SHA1Final(md, &sha1ctx);
384 }
385 
386 int
387 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2],
388     unsigned char md[20])
389 {
390 	unsigned char md2[20];
391 	u_int8_t i;
392 
393 	for (i = 0; i < HMAC_MAX; i++) {
394 		carp_hmac_generate(vhe, counter, md2, i);
395 		if (!bcmp(md, md2, sizeof(md2)))
396 			return (0);
397 	}
398 	return (1);
399 }
400 
401 void
402 carp_setroute(struct carp_softc *sc, int cmd)
403 {
404 	struct ifaddr *ifa;
405 	int s;
406 
407 	/* XXX this mess needs fixing */
408 
409 	s = splsoftnet();
410 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
411 		switch (ifa->ifa_addr->sa_family) {
412 		case AF_INET: {
413 			int error;
414 			struct sockaddr sa;
415 			struct rtentry *rt;
416 			struct radix_node_head *rnh;
417 			struct radix_node *rn;
418 			struct rt_addrinfo info;
419 			int hr_otherif, nr_ourif;
420 			struct sockaddr_rtlabel	sa_rl;
421 			const char *label;
422 
423 			/* Remove the existing host route, if any */
424 			bzero(&info, sizeof(info));
425 			info.rti_info[RTAX_DST] = ifa->ifa_addr;
426 			info.rti_flags = RTF_HOST;
427 			error = rtrequest1(RTM_DELETE, &info, RTP_CONNECTED,
428 			    NULL, sc->sc_if.if_rdomain);
429 			rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL,
430 			    error, sc->sc_if.if_rdomain);
431 
432 			/* Check for our address on another interface */
433 			/* XXX cries for proper API */
434 			rnh = rt_gettable(ifa->ifa_addr->sa_family, 0);
435 			rn = rnh->rnh_matchaddr(ifa->ifa_addr, rnh);
436 			rt = (struct rtentry *)rn;
437 			hr_otherif = (rt && rt->rt_ifp != &sc->sc_if &&
438 			    rt->rt_flags & (RTF_CLONING|RTF_CLONED));
439 
440 			/* Check for a network route on our interface */
441 			bcopy(ifa->ifa_addr, &sa, sizeof(sa));
442 			satosin(&sa)->sin_addr.s_addr = satosin(ifa->ifa_netmask
443 			    )->sin_addr.s_addr & satosin(&sa)->sin_addr.s_addr;
444 			rt = (struct rtentry *)rt_lookup(&sa,
445 			    ifa->ifa_netmask, sc->sc_if.if_rdomain);
446 			nr_ourif = (rt && rt->rt_ifp == &sc->sc_if);
447 
448 			/* Restore the route label */
449 			bzero(&sa_rl, sizeof(sa_rl));
450 			if (rt && rt->rt_labelid) {
451 				sa_rl.sr_len = sizeof(sa_rl);
452 				sa_rl.sr_family = AF_UNSPEC;
453 				label = rtlabel_id2name(rt->rt_labelid);
454 				if (label != NULL)
455 					strlcpy(sa_rl.sr_label, label,
456 					    sizeof(sa_rl.sr_label));
457 			}
458 
459 			switch (cmd) {
460 			case RTM_ADD:
461 				if (hr_otherif) {
462 					ifa->ifa_rtrequest = NULL;
463 					ifa->ifa_flags &= ~RTF_CLONING;
464 					bzero(&info, sizeof(info));
465 					info.rti_info[RTAX_DST] = ifa->ifa_addr;
466 					info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
467 					info.rti_flags = RTF_UP | RTF_HOST;
468 					error = rtrequest1(RTM_ADD, &info,
469 					    RTP_CONNECTED, NULL,
470 					    sc->sc_if.if_rdomain);
471 					rt_missmsg(RTM_ADD, &info,
472 					    info.rti_flags, &sc->sc_if,
473 					    error, sc->sc_if.if_rdomain);
474 				}
475 				if (!hr_otherif || nr_ourif || !rt) {
476 					if (nr_ourif && !(rt->rt_flags &
477 					    RTF_CLONING)) {
478 						bzero(&info, sizeof(info));
479 						info.rti_info[RTAX_DST] = &sa;
480 						info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
481 						error = rtrequest1(RTM_DELETE,
482 						    &info, RTP_CONNECTED, NULL,
483 						    sc->sc_if.if_rdomain);
484 						rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL,
485 						    error, sc->sc_if.if_rdomain);
486 					}
487 
488 					ifa->ifa_rtrequest = arp_rtrequest;
489 					ifa->ifa_flags |= RTF_CLONING;
490 
491 					bzero(&info, sizeof(info));
492 					info.rti_info[RTAX_DST] = &sa;
493 					info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
494 					info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
495 					info.rti_info[RTAX_LABEL] =
496 					    (struct sockaddr *)&sa_rl;
497 					error = rtrequest1(RTM_ADD, &info,
498 					    RTP_CONNECTED, NULL,
499 					    sc->sc_if.if_rdomain);
500 					if (error == 0)
501 						ifa->ifa_flags |= IFA_ROUTE;
502 					rt_missmsg(RTM_ADD, &info, info.rti_flags,
503 					    &sc->sc_if, error, sc->sc_if.if_rdomain);
504 				}
505 				break;
506 			case RTM_DELETE:
507 				break;
508 			default:
509 				break;
510 			}
511 			break;
512 		}
513 
514 #ifdef INET6
515 		case AF_INET6:
516 			if (sc->sc_balancing >= CARP_BAL_IP)
517 				continue;
518 			if (cmd == RTM_ADD)
519 				in6_ifaddloop(ifa);
520 			else
521 				in6_ifremloop(ifa);
522 			break;
523 #endif /* INET6 */
524 		default:
525 			break;
526 		}
527 	}
528 	splx(s);
529 }
530 
531 /*
532  * process input packet.
533  * we have rearranged checks order compared to the rfc,
534  * but it seems more efficient this way or not possible otherwise.
535  */
536 void
537 carp_proto_input(struct mbuf *m, ...)
538 {
539 	struct ip *ip = mtod(m, struct ip *);
540 	struct ifnet *ifp = m->m_pkthdr.rcvif;
541 	struct carp_softc *sc = NULL;
542 	struct carp_header *ch;
543 	int iplen, len, hlen, ismulti;
544 	va_list ap;
545 
546 	va_start(ap, m);
547 	hlen = va_arg(ap, int);
548 	va_end(ap);
549 
550 	carpstats.carps_ipackets++;
551 
552 	if (!carp_opts[CARPCTL_ALLOW]) {
553 		m_freem(m);
554 		return;
555 	}
556 
557 	ismulti = IN_MULTICAST(ip->ip_dst.s_addr);
558 
559 	/* check if received on a valid carp interface */
560 	if (!((ifp->if_type == IFT_CARP && ismulti) ||
561 	    (ifp->if_type != IFT_CARP && !ismulti && ifp->if_carp != NULL))) {
562 		carpstats.carps_badif++;
563 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
564 		    m->m_pkthdr.rcvif->if_xname));
565 		m_freem(m);
566 		return;
567 	}
568 
569 	/* verify that the IP TTL is 255.  */
570 	if (ip->ip_ttl != CARP_DFLTTL) {
571 		carpstats.carps_badttl++;
572 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip->ip_ttl,
573 		    CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
574 		m_freem(m);
575 		return;
576 	}
577 
578 	/*
579 	 * verify that the received packet length is
580 	 * equal to the CARP header
581 	 */
582 	iplen = ip->ip_hl << 2;
583 	len = iplen + sizeof(*ch);
584 	if (len > m->m_pkthdr.len) {
585 		carpstats.carps_badlen++;
586 		CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", m->m_pkthdr.len,
587 		    m->m_pkthdr.rcvif->if_xname));
588 		m_freem(m);
589 		return;
590 	}
591 
592 	if ((m = m_pullup2(m, len)) == NULL) {
593 		carpstats.carps_hdrops++;
594 		return;
595 	}
596 	ip = mtod(m, struct ip *);
597 	ch = (void *)ip + iplen;
598 
599 	/* verify the CARP checksum */
600 	m->m_data += iplen;
601 	if (carp_cksum(m, len - iplen)) {
602 		carpstats.carps_badsum++;
603 		CARP_LOG(LOG_INFO, sc, ("checksum failed on %s",
604 		    m->m_pkthdr.rcvif->if_xname));
605 		m_freem(m);
606 		return;
607 	}
608 	m->m_data -= iplen;
609 
610 	carp_proto_input_c(m, ch, ismulti, AF_INET);
611 }
612 
613 #ifdef INET6
614 int
615 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
616 {
617 	struct mbuf *m = *mp;
618 	struct carp_softc *sc = NULL;
619 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
620 	struct carp_header *ch;
621 	u_int len;
622 
623 	carpstats.carps_ipackets6++;
624 
625 	if (!carp_opts[CARPCTL_ALLOW]) {
626 		m_freem(m);
627 		return (IPPROTO_DONE);
628 	}
629 
630 	/* check if received on a valid carp interface */
631 	if (m->m_pkthdr.rcvif->if_type != IFT_CARP) {
632 		carpstats.carps_badif++;
633 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
634 		    m->m_pkthdr.rcvif->if_xname));
635 		m_freem(m);
636 		return (IPPROTO_DONE);
637 	}
638 
639 	/* verify that the IP TTL is 255 */
640 	if (ip6->ip6_hlim != CARP_DFLTTL) {
641 		carpstats.carps_badttl++;
642 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip6->ip6_hlim,
643 		    CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname));
644 		m_freem(m);
645 		return (IPPROTO_DONE);
646 	}
647 
648 	/* verify that we have a complete carp packet */
649 	len = m->m_len;
650 	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
651 	if (ch == NULL) {
652 		carpstats.carps_badlen++;
653 		CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len));
654 		return (IPPROTO_DONE);
655 	}
656 
657 
658 	/* verify the CARP checksum */
659 	m->m_data += *offp;
660 	if (carp_cksum(m, sizeof(*ch))) {
661 		carpstats.carps_badsum++;
662 		CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s",
663 		    m->m_pkthdr.rcvif->if_xname));
664 		m_freem(m);
665 		return (IPPROTO_DONE);
666 	}
667 	m->m_data -= *offp;
668 
669 	carp_proto_input_c(m, ch, 1, AF_INET6);
670 	return (IPPROTO_DONE);
671 }
672 #endif /* INET6 */
673 
674 void
675 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, int ismulti,
676     sa_family_t af)
677 {
678 	struct ifnet *ifp = m->m_pkthdr.rcvif;
679 	struct carp_softc *sc;
680 	struct carp_vhost_entry *vhe;
681 	struct timeval sc_tv, ch_tv;
682 	struct carp_if *cif;
683 
684 	if (ifp->if_type == IFT_CARP)
685 		cif = (struct carp_if *)ifp->if_carpdev->if_carp;
686 	else
687 		cif = (struct carp_if *)ifp->if_carp;
688 
689 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
690 		if (af == AF_INET &&
691 		    ismulti != IN_MULTICAST(sc->sc_peer.s_addr))
692 			continue;
693 		LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
694 			if (vhe->vhid == ch->carp_vhid)
695 				goto found;
696 		}
697 	}
698  found:
699 
700 	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
701 	    (IFF_UP|IFF_RUNNING)) {
702 		carpstats.carps_badvhid++;
703 		m_freem(m);
704 		return;
705 	}
706 
707 	getmicrotime(&sc->sc_if.if_lastchange);
708 	sc->sc_if.if_ipackets++;
709 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
710 
711 	/* verify the CARP version. */
712 	if (ch->carp_version != CARP_VERSION) {
713 		carpstats.carps_badver++;
714 		sc->sc_if.if_ierrors++;
715 		CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d",
716 		    ch->carp_version, CARP_VERSION));
717 		m_freem(m);
718 		return;
719 	}
720 
721 	/* verify the hash */
722 	if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) {
723 		carpstats.carps_badauth++;
724 		sc->sc_if.if_ierrors++;
725 		CARP_LOG(LOG_INFO, sc, ("incorrect hash"));
726 		m_freem(m);
727 		return;
728 	}
729 
730 	if (!bcmp(&vhe->vhe_replay_cookie, ch->carp_counter,
731 	    sizeof(ch->carp_counter))) {
732 		/* Do not log duplicates from non simplex interfaces */
733 		if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) {
734 			carpstats.carps_badauth++;
735 			sc->sc_if.if_ierrors++;
736 			CARP_LOG(LOG_WARNING, sc,
737 			    ("replay or network loop detected"));
738 		}
739 		m_freem(m);
740 		return;
741 	}
742 
743 	sc_tv.tv_sec = sc->sc_advbase;
744 	sc_tv.tv_usec = vhe->advskew * 1000000 / 256;
745 	ch_tv.tv_sec = ch->carp_advbase;
746 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
747 
748 	switch (vhe->state) {
749 	case INIT:
750 		break;
751 	case MASTER:
752 		/*
753 		 * If we receive an advertisement from a master who's going to
754 		 * be more frequent than us, and whose demote count is not higher
755 		 * than ours, go into BACKUP state. If his demote count is lower,
756 		 * also go into BACKUP.
757 		 */
758 		if (((timercmp(&sc_tv, &ch_tv, >) ||
759 		    timercmp(&sc_tv, &ch_tv, ==)) &&
760 		    (ch->carp_demote <= carp_group_demote_count(sc))) ||
761 		    ch->carp_demote < carp_group_demote_count(sc)) {
762 			timeout_del(&vhe->ad_tmo);
763 			carp_set_state(vhe, BACKUP);
764 			carp_setrun(vhe, 0);
765 			if (vhe->vhe_leader)
766 				carp_setroute(sc, RTM_DELETE);
767 		}
768 		break;
769 	case BACKUP:
770 		/*
771 		 * If we're pre-empting masters who advertise slower than us,
772 		 * and do not have a better demote count, treat them as down.
773 		 *
774 		 */
775 		if (carp_opts[CARPCTL_PREEMPT] &&
776 		    timercmp(&sc_tv, &ch_tv, <) &&
777 		    ch->carp_demote >= carp_group_demote_count(sc)) {
778 			carp_master_down(vhe);
779 			break;
780 		}
781 
782 		/*
783 		 * Take over masters advertising with a higher demote count,
784 		 * regardless of CARPCTL_PREEMPT.
785 		 */
786 		if (ch->carp_demote > carp_group_demote_count(sc)) {
787 			carp_master_down(vhe);
788 			break;
789 		}
790 
791 		/*
792 		 *  If the master is going to advertise at such a low frequency
793 		 *  that he's guaranteed to time out, we'd might as well just
794 		 *  treat him as timed out now.
795 		 */
796 		sc_tv.tv_sec = sc->sc_advbase * 3;
797 		if (timercmp(&sc_tv, &ch_tv, <)) {
798 			carp_master_down(vhe);
799 			break;
800 		}
801 
802 		/*
803 		 * Otherwise, we reset the counter and wait for the next
804 		 * advertisement.
805 		 */
806 		carp_setrun(vhe, af);
807 		break;
808 	}
809 
810 	m_freem(m);
811 	return;
812 }
813 
814 int
815 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
816     size_t newlen)
817 {
818 	/* All sysctl names at this level are terminal. */
819 	if (namelen != 1)
820 		return (ENOTDIR);
821 
822 	switch (name[0]) {
823 	case CARPCTL_STATS:
824 		if (newp != NULL)
825 			return (EPERM);
826 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
827 		    &carpstats, sizeof(carpstats)));
828 	default:
829 		if (name[0] <= 0 || name[0] >= CARPCTL_MAXID)
830 			return (ENOPROTOOPT);
831 		return sysctl_int(oldp, oldlenp, newp, newlen,
832 		    &carp_opts[name[0]]);
833 	}
834 }
835 
836 /*
837  * Interface side of the CARP implementation.
838  */
839 
840 /* ARGSUSED */
841 void
842 carpattach(int n)
843 {
844 	struct ifg_group	*ifg;
845 
846 	if ((ifg = if_creategroup("carp")) != NULL)
847 		ifg->ifg_refcnt++;	/* keep around even if empty */
848 	if_clone_attach(&carp_cloner);
849 }
850 
851 int
852 carp_clone_create(ifc, unit)
853 	struct if_clone *ifc;
854 	int unit;
855 {
856 	struct carp_softc *sc;
857 	struct ifnet *ifp;
858 
859 	sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO);
860 	if (!sc)
861 		return (ENOMEM);
862 
863 	LIST_INIT(&sc->carp_vhosts);
864 	sc->sc_vhe_count = 0;
865 	if (carp_new_vhost(sc, 0, 0)) {
866 		free(sc, M_DEVBUF);
867 		return (ENOMEM);
868 	}
869 
870 	sc->sc_suppress = 0;
871 	sc->sc_advbase = CARP_DFLTINTV;
872 	sc->sc_naddrs = sc->sc_naddrs6 = 0;
873 #ifdef INET6
874 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
875 #endif /* INET6 */
876 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
877 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
878 	    M_WAITOK|M_ZERO);
879 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
880 
881 	LIST_INIT(&sc->carp_mc_listhead);
882 	ifp = &sc->sc_if;
883 	ifp->if_softc = sc;
884 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
885 	    unit);
886 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
887 	ifp->if_ioctl = carp_ioctl;
888 	ifp->if_start = carp_start;
889 	ifp->if_output = carp_output;
890 	ifp->if_type = IFT_CARP;
891 	ifp->if_addrlen = ETHER_ADDR_LEN;
892 	ifp->if_hdrlen = ETHER_HDR_LEN;
893 	ifp->if_mtu = ETHERMTU;
894 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
895 	IFQ_SET_READY(&ifp->if_snd);
896 	if_attach(ifp);
897 
898 	if_alloc_sadl(ifp);
899 	LIST_INIT(&sc->sc_ac.ac_multiaddrs);
900 #if NBPFILTER > 0
901 	bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN);
902 #endif
903 	return (0);
904 }
905 
906 int
907 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew)
908 {
909 	struct carp_vhost_entry *vhe, *vhe0;
910 
911 	vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO);
912 	if (vhe == NULL)
913 		return (ENOMEM);
914 
915 	vhe->parent_sc = sc;
916 	vhe->vhid = vhid;
917 	vhe->advskew = advskew;
918 	timeout_set(&vhe->ad_tmo, carp_send_ad, vhe);
919 	timeout_set(&vhe->md_tmo, carp_master_down, vhe);
920 	timeout_set(&vhe->md6_tmo, carp_master_down, vhe);
921 
922 	/* mark the first vhe as leader */
923 	if (LIST_EMPTY(&sc->carp_vhosts)) {
924 		vhe->vhe_leader = 1;
925 		LIST_INSERT_HEAD(&sc->carp_vhosts, vhe, vhost_entries);
926 		sc->sc_vhe_count = 1;
927 		return (0);
928 	}
929 
930 	LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries)
931 		if (LIST_NEXT(vhe0, vhost_entries) == NULL)
932 			break;
933 	LIST_INSERT_AFTER(vhe0, vhe, vhost_entries);
934 	sc->sc_vhe_count++;
935 
936 	return (0);
937 }
938 
939 int
940 carp_clone_destroy(struct ifnet *ifp)
941 {
942 	struct carp_softc *sc = ifp->if_softc;
943 
944 	carpdetach(sc);
945 	ether_ifdetach(ifp);
946 	if_detach(ifp);
947 	carp_destroy_vhosts(ifp->if_softc);
948 	free(sc->sc_imo.imo_membership, M_IPMOPTS);
949 	free(sc, M_DEVBUF);
950 
951 	return (0);
952 }
953 
954 void
955 carp_del_all_timeouts(struct carp_softc *sc)
956 {
957 	struct carp_vhost_entry *vhe;
958 
959 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
960 		timeout_del(&vhe->ad_tmo);
961 		timeout_del(&vhe->md_tmo);
962 		timeout_del(&vhe->md6_tmo);
963 	}
964 }
965 
966 void
967 carpdetach(struct carp_softc *sc)
968 {
969 	struct carp_if *cif;
970 	int s;
971 
972 	carp_del_all_timeouts(sc);
973 
974 	if (sc->sc_demote_cnt)
975 		carp_group_demote_adj(&sc->sc_if, sc->sc_demote_cnt, "detach");
976 	sc->sc_suppress = 0;
977 	sc->sc_sendad_errors = 0;
978 
979 	carp_set_state_all(sc, INIT);
980 	sc->sc_if.if_flags &= ~IFF_UP;
981 	carp_setrun_all(sc, 0);
982 	carp_multicast_cleanup(sc);
983 
984 	s = splnet();
985 	if (sc->sc_carpdev != NULL) {
986 		if (sc->lh_cookie != NULL)
987 			hook_disestablish(sc->sc_carpdev->if_linkstatehooks,
988 			    sc->lh_cookie);
989 		cif = (struct carp_if *)sc->sc_carpdev->if_carp;
990 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
991 		if (!--cif->vhif_nvrs) {
992 			ifpromisc(sc->sc_carpdev, 0);
993 			sc->sc_carpdev->if_carp = NULL;
994 			free(cif, M_IFADDR);
995 		}
996 	}
997 	sc->sc_carpdev = NULL;
998 	splx(s);
999 }
1000 
1001 /* Detach an interface from the carp. */
1002 void
1003 carp_ifdetach(struct ifnet *ifp)
1004 {
1005 	struct carp_softc *sc, *nextsc;
1006 	struct carp_if *cif = (struct carp_if *)ifp->if_carp;
1007 
1008 	for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
1009 		nextsc = TAILQ_NEXT(sc, sc_list);
1010 		carpdetach(sc);
1011 	}
1012 }
1013 
1014 void
1015 carp_destroy_vhosts(struct carp_softc *sc)
1016 {
1017 	/* XXX bow out? */
1018 	struct carp_vhost_entry *vhe, *nvhe;
1019 
1020 	for (vhe = LIST_FIRST(&sc->carp_vhosts);
1021 	     vhe != LIST_END(&sc->carp_vhosts); vhe = nvhe) {
1022 		nvhe = LIST_NEXT(vhe, vhost_entries);
1023 		free(vhe, M_DEVBUF);
1024 	}
1025 	LIST_INIT(&sc->carp_vhosts);
1026 	sc->sc_vhe_count = 0;
1027 }
1028 
1029 int
1030 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe,
1031     struct carp_header *ch)
1032 {
1033 	if (!vhe->vhe_replay_cookie) {
1034 		arc4random_buf(&vhe->vhe_replay_cookie,
1035 		    sizeof(vhe->vhe_replay_cookie));
1036 	}
1037 
1038 	bcopy(&vhe->vhe_replay_cookie, ch->carp_counter,
1039 	    sizeof(ch->carp_counter));
1040 
1041 	/*
1042 	 * For the time being, do not include the IPv6 linklayer addresses
1043 	 * in the HMAC.
1044 	 */
1045 	carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL);
1046 
1047 	return (0);
1048 }
1049 
1050 void
1051 carp_send_ad_all(void)
1052 {
1053 	struct ifnet *ifp;
1054 	struct carp_if *cif;
1055 	struct carp_softc *vh;
1056 
1057 	TAILQ_FOREACH(ifp, &ifnet, if_list) {
1058 		if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP)
1059 			continue;
1060 
1061 		cif = (struct carp_if *)ifp->if_carp;
1062 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1063 			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1064 			    (IFF_UP|IFF_RUNNING)) {
1065 				carp_vhe_send_ad_all(vh);
1066 			}
1067 		}
1068 	}
1069 }
1070 
1071 void
1072 carp_vhe_send_ad_all(struct carp_softc *sc)
1073 {
1074 	struct carp_vhost_entry *vhe;
1075 
1076 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1077 		if (vhe->state == MASTER)
1078 			carp_send_ad(vhe);
1079 	}
1080 }
1081 
1082 void
1083 carp_send_ad(void *v)
1084 {
1085 	struct carp_header ch;
1086 	struct timeval tv;
1087 	struct carp_vhost_entry *vhe = v;
1088 	struct carp_softc *sc = vhe->parent_sc;
1089 	struct carp_header *ch_ptr;
1090 
1091 	struct mbuf *m;
1092 	int error, len, advbase, advskew, s;
1093 	struct ifaddr *ifa;
1094 	struct sockaddr sa;
1095 
1096 	if (sc->sc_carpdev == NULL) {
1097 		sc->sc_if.if_oerrors++;
1098 		return;
1099 	}
1100 
1101 	s = splsoftnet();
1102 
1103 	/* bow out if we've gone to backup (the carp interface is going down) */
1104 	if (sc->sc_bow_out) {
1105 		advbase = 255;
1106 		advskew = 255;
1107 	} else {
1108 		advbase = sc->sc_advbase;
1109 		advskew = vhe->advskew;
1110 		tv.tv_sec = advbase;
1111 		tv.tv_usec = advskew * 1000000 / 256;
1112 	}
1113 
1114 	ch.carp_version = CARP_VERSION;
1115 	ch.carp_type = CARP_ADVERTISEMENT;
1116 	ch.carp_vhid = vhe->vhid;
1117 	ch.carp_demote = carp_group_demote_count(sc) & 0xff;
1118 	ch.carp_advbase = advbase;
1119 	ch.carp_advskew = advskew;
1120 	ch.carp_authlen = 7;	/* XXX DEFINE */
1121 	ch.carp_cksum = 0;
1122 
1123 	sc->cur_vhe = vhe; /* we need the vhe later on the output path */
1124 
1125 #ifdef INET
1126 	if (sc->sc_naddrs) {
1127 		struct ip *ip;
1128 
1129 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1130 		if (m == NULL) {
1131 			sc->sc_if.if_oerrors++;
1132 			carpstats.carps_onomem++;
1133 			/* XXX maybe less ? */
1134 			goto retry_later;
1135 		}
1136 		len = sizeof(*ip) + sizeof(ch);
1137 		m->m_pkthdr.len = len;
1138 		m->m_pkthdr.rcvif = NULL;
1139 		m->m_len = len;
1140 		MH_ALIGN(m, m->m_len);
1141 		ip = mtod(m, struct ip *);
1142 		ip->ip_v = IPVERSION;
1143 		ip->ip_hl = sizeof(*ip) >> 2;
1144 		ip->ip_tos = IPTOS_LOWDELAY;
1145 		ip->ip_len = htons(len);
1146 		ip->ip_id = htons(ip_randomid());
1147 		ip->ip_off = htons(IP_DF);
1148 		ip->ip_ttl = CARP_DFLTTL;
1149 		ip->ip_p = IPPROTO_CARP;
1150 		ip->ip_sum = 0;
1151 
1152 		bzero(&sa, sizeof(sa));
1153 		sa.sa_family = AF_INET;
1154 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1155 		if (ifa == NULL)
1156 			ip->ip_src.s_addr = 0;
1157 		else
1158 			ip->ip_src.s_addr =
1159 			    ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1160 		ip->ip_dst.s_addr = sc->sc_peer.s_addr;
1161 		if (IN_MULTICAST(ip->ip_dst.s_addr))
1162 			m->m_flags |= M_MCAST;
1163 
1164 		ch_ptr = (void *)ip + sizeof(*ip);
1165 		bcopy(&ch, ch_ptr, sizeof(ch));
1166 		if (carp_prepare_ad(m, vhe, ch_ptr))
1167 			goto retry_later;
1168 
1169 		m->m_data += sizeof(*ip);
1170 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1171 		m->m_data -= sizeof(*ip);
1172 
1173 		getmicrotime(&sc->sc_if.if_lastchange);
1174 		sc->sc_if.if_opackets++;
1175 		sc->sc_if.if_obytes += len;
1176 		carpstats.carps_opackets++;
1177 
1178 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1179 		    NULL);
1180 		if (error) {
1181 			if (error == ENOBUFS)
1182 				carpstats.carps_onomem++;
1183 			else
1184 				CARP_LOG(LOG_WARNING, sc,
1185 				    ("ip_output failed: %d", error));
1186 			sc->sc_if.if_oerrors++;
1187 			if (sc->sc_sendad_errors < INT_MAX)
1188 				sc->sc_sendad_errors++;
1189 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1190 				carp_group_demote_adj(&sc->sc_if, 1,
1191 				    "> snderrors");
1192 			sc->sc_sendad_success = 0;
1193 		} else {
1194 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1195 				if (++sc->sc_sendad_success >=
1196 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1197 					carp_group_demote_adj(&sc->sc_if, -1,
1198 					    "< snderrors");
1199 					sc->sc_sendad_errors = 0;
1200 				}
1201 			} else
1202 				sc->sc_sendad_errors = 0;
1203 		}
1204 		if (vhe->vhe_leader) {
1205 			if (sc->sc_delayed_arp > 0)
1206 				sc->sc_delayed_arp--;
1207 			if (sc->sc_delayed_arp == 0) {
1208 				carp_send_arp(sc);
1209 				sc->sc_delayed_arp = -1;
1210 			}
1211 		}
1212 	}
1213 #endif /* INET */
1214 #ifdef INET6
1215 	if (sc->sc_naddrs6) {
1216 		struct ip6_hdr *ip6;
1217 
1218 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1219 		if (m == NULL) {
1220 			sc->sc_if.if_oerrors++;
1221 			carpstats.carps_onomem++;
1222 			/* XXX maybe less ? */
1223 			goto retry_later;
1224 		}
1225 		len = sizeof(*ip6) + sizeof(ch);
1226 		m->m_pkthdr.len = len;
1227 		m->m_pkthdr.rcvif = NULL;
1228 		m->m_len = len;
1229 		MH_ALIGN(m, m->m_len);
1230 		m->m_flags |= M_MCAST;
1231 		ip6 = mtod(m, struct ip6_hdr *);
1232 		bzero(ip6, sizeof(*ip6));
1233 		ip6->ip6_vfc |= IPV6_VERSION;
1234 		ip6->ip6_hlim = CARP_DFLTTL;
1235 		ip6->ip6_nxt = IPPROTO_CARP;
1236 
1237 		/* set the source address */
1238 		bzero(&sa, sizeof(sa));
1239 		sa.sa_family = AF_INET6;
1240 		ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev);
1241 		if (ifa == NULL)	/* This should never happen with IPv6 */
1242 			bzero(&ip6->ip6_src, sizeof(struct in6_addr));
1243 		else
1244 			bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1245 			    &ip6->ip6_src, sizeof(struct in6_addr));
1246 		/* set the multicast destination */
1247 
1248 		ip6->ip6_dst.s6_addr8[0] = 0xff;
1249 		ip6->ip6_dst.s6_addr8[1] = 0x02;
1250 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1251 
1252 		ch_ptr = (void *)ip6 + sizeof(*ip6);
1253 		bcopy(&ch, ch_ptr, sizeof(ch));
1254 		if (carp_prepare_ad(m, vhe, ch_ptr))
1255 			goto retry_later;
1256 
1257 		m->m_data += sizeof(*ip6);
1258 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1259 		m->m_data -= sizeof(*ip6);
1260 
1261 		getmicrotime(&sc->sc_if.if_lastchange);
1262 		sc->sc_if.if_opackets++;
1263 		sc->sc_if.if_obytes += len;
1264 		carpstats.carps_opackets6++;
1265 
1266 		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL);
1267 		if (error) {
1268 			if (error == ENOBUFS)
1269 				carpstats.carps_onomem++;
1270 			else
1271 				CARP_LOG(LOG_WARNING, sc,
1272 				    ("ip6_output failed: %d", error));
1273 			sc->sc_if.if_oerrors++;
1274 			if (sc->sc_sendad_errors < INT_MAX)
1275 				sc->sc_sendad_errors++;
1276 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1277 				carp_group_demote_adj(&sc->sc_if, 1,
1278 					    "> snd6errors");
1279 			sc->sc_sendad_success = 0;
1280 		} else {
1281 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1282 				if (++sc->sc_sendad_success >=
1283 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1284 					carp_group_demote_adj(&sc->sc_if, -1,
1285 					    "< snd6errors");
1286 					sc->sc_sendad_errors = 0;
1287 				}
1288 			} else
1289 				sc->sc_sendad_errors = 0;
1290 		}
1291 	}
1292 #endif /* INET6 */
1293 
1294 retry_later:
1295 	sc->cur_vhe = NULL;
1296 	splx(s);
1297 	if (advbase != 255 || advskew != 255)
1298 		timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1299 }
1300 
1301 /*
1302  * Broadcast a gratuitous ARP request containing
1303  * the virtual router MAC address for each IP address
1304  * associated with the virtual router.
1305  */
1306 void
1307 carp_send_arp(struct carp_softc *sc)
1308 {
1309 	struct ifaddr *ifa;
1310 	in_addr_t in;
1311 	int s = splsoftnet();
1312 
1313 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1314 
1315 		if (ifa->ifa_addr->sa_family != AF_INET)
1316 			continue;
1317 
1318 		in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1319 		arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr);
1320 		DELAY(1000);	/* XXX */
1321 	}
1322 	splx(s);
1323 }
1324 
1325 #ifdef INET6
1326 void
1327 carp_send_na(struct carp_softc *sc)
1328 {
1329 	struct ifaddr *ifa;
1330 	struct in6_addr *in6;
1331 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1332 	int s = splsoftnet();
1333 
1334 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1335 
1336 		if (ifa->ifa_addr->sa_family != AF_INET6)
1337 			continue;
1338 
1339 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1340 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1341 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1342 		DELAY(1000);	/* XXX */
1343 	}
1344 	splx(s);
1345 }
1346 #endif /* INET6 */
1347 
1348 /*
1349  * Based on bridge_hash() in if_bridge.c
1350  */
1351 #define	mix(a,b,c) \
1352 	do {						\
1353 		a -= b; a -= c; a ^= (c >> 13);		\
1354 		b -= c; b -= a; b ^= (a << 8);		\
1355 		c -= a; c -= b; c ^= (b >> 13);		\
1356 		a -= b; a -= c; a ^= (c >> 12);		\
1357 		b -= c; b -= a; b ^= (a << 16);		\
1358 		c -= a; c -= b; c ^= (b >> 5);		\
1359 		a -= b; a -= c; a ^= (c >> 3);		\
1360 		b -= c; b -= a; b ^= (a << 10);		\
1361 		c -= a; c -= b; c ^= (b >> 15);		\
1362 	} while (0)
1363 
1364 u_int32_t
1365 carp_hash(struct carp_softc *sc, u_char *src)
1366 {
1367 	u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1];
1368 
1369 	c += sc->sc_key[3] << 24;
1370 	c += sc->sc_key[2] << 16;
1371 	c += sc->sc_key[1] << 8;
1372 	c += sc->sc_key[0];
1373 	b += src[5] << 8;
1374 	b += src[4];
1375 	a += src[3] << 24;
1376 	a += src[2] << 16;
1377 	a += src[1] << 8;
1378 	a += src[0];
1379 
1380 	mix(a, b, c);
1381 	return (c);
1382 }
1383 
1384 void
1385 carp_update_lsmask(struct carp_softc *sc)
1386 {
1387 	struct carp_vhost_entry *vhe;
1388 	int count;
1389 
1390 	if (!sc->sc_balancing)
1391 		return;
1392 
1393 	sc->sc_lsmask = 0;
1394 	count = 0;
1395 
1396 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1397 		if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8)
1398 			sc->sc_lsmask |= 1 << count;
1399 		count++;
1400 	}
1401 	sc->sc_lscount = count;
1402 	CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask));
1403 }
1404 
1405 int
1406 carp_iamatch(struct in_ifaddr *ia, u_char *src, u_int8_t **sha,
1407     u_int8_t **ether_shost)
1408 {
1409 	struct carp_softc *sc = ia->ia_ifp->if_softc;
1410 	struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts);
1411 
1412 	if (sc->sc_balancing == CARP_BAL_ARP) {
1413 		int lshash;
1414 		/*
1415 		 * We use the source MAC address to decide which virtual host
1416 		 * should handle the request. If we're master of that virtual
1417 		 * host, then we respond, otherwise, just drop the arp packet
1418 		 * on the floor.
1419 		 */
1420 
1421 		if (sc->sc_lscount == 0) /* just to be safe */
1422 			return (0);
1423 		lshash = carp_hash(sc, src) % sc->sc_lscount;
1424 		if ((1 << lshash) & sc->sc_lsmask) {
1425 			int i = 0;
1426 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1427 				if (i++ == lshash)
1428 					break;
1429 			}
1430 			if (vhe == NULL)
1431 				return (0);
1432 			*sha = vhe->vhe_enaddr;
1433 			return (1);
1434 		}
1435 	} else if (sc->sc_balancing == CARP_BAL_IPSTEALTH ||
1436 	    sc->sc_balancing == CARP_BAL_IP) {
1437 		if (vhe->state == MASTER) {
1438 			*ether_shost = ((struct arpcom *)sc->sc_carpdev)->
1439 			    ac_enaddr;
1440 			return (1);
1441 		}
1442 	} else {
1443 		if (vhe->state == MASTER)
1444 			return (1);
1445 	}
1446 
1447 	return (0);
1448 }
1449 
1450 #ifdef INET6
1451 int
1452 carp_iamatch6(struct ifnet *ifp, u_char *src, struct sockaddr_dl **sdl)
1453 {
1454 	struct carp_softc *sc = ifp->if_softc;
1455 	struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts);
1456 
1457 	if (sc->sc_balancing == CARP_BAL_ARP) {
1458 		int lshash;
1459 		/*
1460 		 * We use the source MAC address to decide which virtual host
1461 		 * should handle the request. If we're master of that virtual
1462 		 * host, then we respond, otherwise, just drop the ndp packet
1463 		 * on the floor.
1464 		 */
1465 
1466 		/* can happen if optional src lladdr is not provided */
1467 		if (src == NULL)
1468 			return (0);
1469 		if (sc->sc_lscount == 0) /* just to be safe */
1470 			return (0);
1471 		lshash = carp_hash(sc, src) % sc->sc_lscount;
1472 		if ((1 << lshash) & sc->sc_lsmask) {
1473 			int i = 0;
1474 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1475 				if (i++ == lshash)
1476 					break;
1477 			}
1478 			if (vhe == NULL)
1479 				return (0);
1480 			*sdl = &vhe->vhe_sdl;
1481 			return (1);
1482 		}
1483 	} else {
1484 		if (vhe->state == MASTER)
1485 			return (1);
1486 	}
1487 
1488 	return (0);
1489 }
1490 #endif /* INET6 */
1491 
1492 struct ifnet *
1493 carp_ourether(void *v, struct ether_header *eh, int src)
1494 {
1495 	struct carp_if *cif = (struct carp_if *)v;
1496 	struct carp_softc *vh;
1497 	u_int8_t *ena;
1498 
1499 	if (src)
1500 		ena = (u_int8_t *)&eh->ether_shost;
1501 	else
1502 		ena = (u_int8_t *)&eh->ether_dhost;
1503 
1504 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1505 		struct carp_vhost_entry *vhe;
1506 		if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1507 		    (IFF_UP|IFF_RUNNING))
1508 			continue;
1509 		if (vh->sc_balancing == CARP_BAL_ARP) {
1510 			LIST_FOREACH(vhe, &vh->carp_vhosts, vhost_entries)
1511 				if (vhe->state == MASTER &&
1512 				    !bcmp(ena, vhe->vhe_enaddr, ETHER_ADDR_LEN))
1513 					return (&vh->sc_if);
1514 		} else {
1515 			vhe = LIST_FIRST(&vh->carp_vhosts);
1516 			if ((vhe->state == MASTER ||
1517 			    vh->sc_balancing >= CARP_BAL_IP) &&
1518 			    !bcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN))
1519 				return (&vh->sc_if);
1520 		}
1521 	}
1522 	return (NULL);
1523 }
1524 
1525 void
1526 carp_rewrite_lladdr(struct ifnet *ifp, u_int8_t *s_enaddr)
1527 {
1528 	struct carp_softc *sc = ifp->if_softc;
1529 
1530 	if (sc->sc_balancing != CARP_BAL_IPSTEALTH &&
1531 	    sc->sc_balancing != CARP_BAL_IP && sc->cur_vhe) {
1532 		if (sc->cur_vhe->vhe_leader)
1533 			bcopy((caddr_t)sc->sc_ac.ac_enaddr,
1534 			    (caddr_t)s_enaddr, ETHER_ADDR_LEN);
1535 		else
1536 			bcopy((caddr_t)sc->cur_vhe->vhe_enaddr,
1537 			    (caddr_t)s_enaddr, ETHER_ADDR_LEN);
1538 	}
1539 }
1540 
1541 int
1542 carp_our_mcastaddr(struct ifnet *ifp, u_int8_t *d_enaddr)
1543 {
1544 	struct carp_softc *sc = ifp->if_softc;
1545 
1546 	if (sc->sc_balancing != CARP_BAL_IP)
1547 		return (0);
1548 
1549 	return(!bcmp(sc->sc_ac.ac_enaddr, d_enaddr, ETHER_ADDR_LEN));
1550 }
1551 
1552 
1553 int
1554 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype)
1555 {
1556 	struct ether_header eh;
1557 	struct carp_if *cif = (struct carp_if *)m->m_pkthdr.rcvif->if_carp;
1558 	struct ifnet *ifp;
1559 
1560 	bcopy(shost, &eh.ether_shost, sizeof(eh.ether_shost));
1561 	bcopy(dhost, &eh.ether_dhost, sizeof(eh.ether_dhost));
1562 	eh.ether_type = etype;
1563 
1564 	if ((ifp = carp_ourether(cif, &eh, 0)))
1565 		;
1566 	else if (m->m_flags & (M_BCAST|M_MCAST)) {
1567 		struct carp_softc *vh;
1568 		struct mbuf *m0;
1569 
1570 		/*
1571 		 * XXX Should really check the list of multicast addresses
1572 		 * for each CARP interface _before_ copying.
1573 		 */
1574 		TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1575 			m0 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT);
1576 			if (m0 == NULL)
1577 				continue;
1578 			m0->m_pkthdr.rcvif = &vh->sc_if;
1579 			ether_input(&vh->sc_if, &eh, m0);
1580 		}
1581 		return (1);
1582 	}
1583 
1584 	if (ifp == NULL)
1585 		return (1);
1586 
1587 	m->m_pkthdr.rcvif = ifp;
1588 
1589 #if NBPFILTER > 0
1590 	if (ifp->if_bpf)
1591 		bpf_mtap_hdr(ifp->if_bpf, (char *)&eh, ETHER_HDR_LEN, m,
1592 		    BPF_DIRECTION_IN);
1593 #endif
1594 	ifp->if_ipackets++;
1595 	ether_input(ifp, &eh, m);
1596 
1597 	return (0);
1598 }
1599 
1600 int
1601 carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst)
1602 {
1603 	struct carp_softc *sc = m->m_pkthdr.rcvif->if_softc;
1604 	int match;
1605 	u_int32_t fold;
1606 
1607 	if (sc->sc_balancing < CARP_BAL_IP)
1608 		return (0);
1609 	/*
1610 	 * Never drop carp advertisements.
1611 	 * XXX Bad idea to pass all broadcast / multicast traffic?
1612 	 */
1613 	if (m->m_flags & (M_BCAST|M_MCAST))
1614 		return (0);
1615 
1616 	fold = src[0] ^ dst[0];
1617 #ifdef INET6
1618 	if (af == AF_INET6) {
1619 		int i;
1620 		for (i = 1; i < 4; i++)
1621 			fold ^= src[i] ^ dst[i];
1622 	}
1623 #endif
1624 	if (sc->sc_lscount == 0) /* just to be safe */
1625 		return (1);
1626 	match = (1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask;
1627 
1628 	return (!match);
1629 }
1630 
1631 void
1632 carp_master_down(void *v)
1633 {
1634 	struct carp_vhost_entry *vhe = v;
1635 	struct carp_softc *sc = vhe->parent_sc;
1636 
1637 	switch (vhe->state) {
1638 	case INIT:
1639 		printf("%s: master_down event in INIT state\n",
1640 		    sc->sc_if.if_xname);
1641 		break;
1642 	case MASTER:
1643 		break;
1644 	case BACKUP:
1645 		carp_set_state(vhe, MASTER);
1646 		carp_send_ad(vhe);
1647 		if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) {
1648 			carp_send_arp(sc);
1649 			/* Schedule a delayed ARP to deal w/ some L3 switches */
1650 			sc->sc_delayed_arp = 2;
1651 #ifdef INET6
1652 			carp_send_na(sc);
1653 #endif /* INET6 */
1654 		}
1655 		carp_setrun(vhe, 0);
1656 		if (vhe->vhe_leader)
1657 			carp_setroute(sc, RTM_ADD);
1658 		carpstats.carps_preempt++;
1659 		break;
1660 	}
1661 }
1662 
1663 void
1664 carp_setrun_all(struct carp_softc *sc, sa_family_t af)
1665 {
1666 	struct carp_vhost_entry *vhe;
1667 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
1668 		carp_setrun(vhe, af);
1669 	}
1670 }
1671 
1672 /*
1673  * When in backup state, af indicates whether to reset the master down timer
1674  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1675  */
1676 void
1677 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af)
1678 {
1679 	struct timeval tv;
1680 	struct carp_softc *sc = vhe->parent_sc;
1681 
1682 	if (sc->sc_carpdev == NULL) {
1683 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1684 		carp_set_state_all(sc, INIT);
1685 		return;
1686 	}
1687 
1688 	if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 &&
1689 	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1690 		sc->sc_if.if_flags |= IFF_RUNNING;
1691 	} else {
1692 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1693 		if (vhe->vhe_leader)
1694 			carp_setroute(sc, RTM_DELETE);
1695 		return;
1696 	}
1697 
1698 	switch (vhe->state) {
1699 	case INIT:
1700 		carp_set_state(vhe, BACKUP);
1701 		if (vhe->vhe_leader)
1702 			carp_setroute(sc, RTM_DELETE);
1703 		carp_setrun(vhe, 0);
1704 		break;
1705 	case BACKUP:
1706 		timeout_del(&vhe->ad_tmo);
1707 		tv.tv_sec = 3 * sc->sc_advbase;
1708 		tv.tv_usec = vhe->advskew * 1000000 / 256;
1709 		if (vhe->vhe_leader)
1710 			sc->sc_delayed_arp = -1;
1711 		switch (af) {
1712 #ifdef INET
1713 		case AF_INET:
1714 			timeout_add(&vhe->md_tmo, tvtohz(&tv));
1715 			break;
1716 #endif /* INET */
1717 #ifdef INET6
1718 		case AF_INET6:
1719 			timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1720 			break;
1721 #endif /* INET6 */
1722 		default:
1723 			if (sc->sc_naddrs)
1724 				timeout_add(&vhe->md_tmo, tvtohz(&tv));
1725 			if (sc->sc_naddrs6)
1726 				timeout_add(&vhe->md6_tmo, tvtohz(&tv));
1727 			break;
1728 		}
1729 		break;
1730 	case MASTER:
1731 		tv.tv_sec = sc->sc_advbase;
1732 		tv.tv_usec = vhe->advskew * 1000000 / 256;
1733 		timeout_add(&vhe->ad_tmo, tvtohz(&tv));
1734 		break;
1735 	}
1736 }
1737 
1738 void
1739 carp_multicast_cleanup(struct carp_softc *sc)
1740 {
1741 	struct ip_moptions *imo = &sc->sc_imo;
1742 #ifdef INET6
1743 	struct ip6_moptions *im6o = &sc->sc_im6o;
1744 #endif
1745 	u_int16_t n = imo->imo_num_memberships;
1746 
1747 	/* Clean up our own multicast memberships */
1748 	while (n-- > 0) {
1749 		if (imo->imo_membership[n] != NULL) {
1750 			in_delmulti(imo->imo_membership[n]);
1751 			imo->imo_membership[n] = NULL;
1752 		}
1753 	}
1754 	imo->imo_num_memberships = 0;
1755 	imo->imo_multicast_ifp = NULL;
1756 
1757 #ifdef INET6
1758 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1759 		struct in6_multi_mship *imm =
1760 		    LIST_FIRST(&im6o->im6o_memberships);
1761 
1762 		LIST_REMOVE(imm, i6mm_chain);
1763 		in6_leavegroup(imm);
1764 	}
1765 	im6o->im6o_multicast_ifp = NULL;
1766 #endif
1767 
1768 	/* And any other multicast memberships */
1769 	carp_ether_purgemulti(sc);
1770 }
1771 
1772 int
1773 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp)
1774 {
1775 	struct carp_if *cif, *ncif = NULL;
1776 	struct carp_softc *vr, *after = NULL;
1777 	int myself = 0, error = 0;
1778 	int s;
1779 
1780 	if (ifp == sc->sc_carpdev)
1781 		return (0);
1782 
1783 	if (ifp != NULL) {
1784 		if ((ifp->if_flags & IFF_MULTICAST) == 0)
1785 			return (EADDRNOTAVAIL);
1786 
1787 		if (ifp->if_type == IFT_CARP)
1788 			return (EINVAL);
1789 
1790 		if (ifp->if_carp == NULL) {
1791 			ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT);
1792 			if (ncif == NULL)
1793 				return (ENOBUFS);
1794 			if ((error = ifpromisc(ifp, 1))) {
1795 				free(ncif, M_IFADDR);
1796 				return (error);
1797 			}
1798 
1799 			ncif->vhif_ifp = ifp;
1800 			TAILQ_INIT(&ncif->vhif_vrs);
1801 		} else {
1802 			cif = (struct carp_if *)ifp->if_carp;
1803 			if (carp_check_dup_vhids(sc, cif, NULL))
1804 				return (EINVAL);
1805 		}
1806 
1807 		/* detach from old interface */
1808 		if (sc->sc_carpdev != NULL)
1809 			carpdetach(sc);
1810 
1811 		/* join multicast groups */
1812 		if (sc->sc_naddrs < 0 &&
1813 		    (error = carp_join_multicast(sc)) != 0) {
1814 			if (ncif != NULL)
1815 				free(ncif, M_IFADDR);
1816 			return (error);
1817 		}
1818 
1819 #ifdef INET6
1820 		if (sc->sc_naddrs6 < 0 &&
1821 		    (error = carp_join_multicast6(sc)) != 0) {
1822 			if (ncif != NULL)
1823 				free(ncif, M_IFADDR);
1824 			carp_multicast_cleanup(sc);
1825 			return (error);
1826 		}
1827 #endif
1828 
1829 		/* attach carp interface to physical interface */
1830 		if (ncif != NULL)
1831 			ifp->if_carp = (caddr_t)ncif;
1832 		sc->sc_carpdev = ifp;
1833 		cif = (struct carp_if *)ifp->if_carp;
1834 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
1835 			if (vr == sc)
1836 				myself = 1;
1837 			if (LIST_FIRST(&vr->carp_vhosts)->vhid <
1838 			    LIST_FIRST(&sc->carp_vhosts)->vhid)
1839 				after = vr;
1840 		}
1841 
1842 		if (!myself) {
1843 			/* We're trying to keep things in order */
1844 			if (after == NULL) {
1845 				TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
1846 			} else {
1847 				TAILQ_INSERT_AFTER(&cif->vhif_vrs, after,
1848 				    sc, sc_list);
1849 			}
1850 			cif->vhif_nvrs++;
1851 		}
1852 		if (sc->sc_naddrs || sc->sc_naddrs6)
1853 			sc->sc_if.if_flags |= IFF_UP;
1854 		carp_set_enaddr(sc);
1855 		s = splnet();
1856 		sc->lh_cookie = hook_establish(ifp->if_linkstatehooks, 1,
1857 		    carp_carpdev_state, ifp);
1858 		carp_carpdev_state(ifp);
1859 		splx(s);
1860 	} else {
1861 		carpdetach(sc);
1862 		sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
1863 	}
1864 	return (0);
1865 }
1866 
1867 void
1868 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe)
1869 {
1870 	struct carp_softc *sc = vhe->parent_sc;
1871 
1872 	if (vhe->vhid != 0 && sc->sc_carpdev) {
1873 		if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP)
1874 			vhe->vhe_enaddr[0] = 1;
1875 		else
1876 			vhe->vhe_enaddr[0] = 0;
1877 		vhe->vhe_enaddr[1] = 0;
1878 		vhe->vhe_enaddr[2] = 0x5e;
1879 		vhe->vhe_enaddr[3] = 0;
1880 		vhe->vhe_enaddr[4] = 1;
1881 		vhe->vhe_enaddr[5] = vhe->vhid;
1882 
1883 		vhe->vhe_sdl.sdl_family = AF_LINK;
1884 		vhe->vhe_sdl.sdl_alen = ETHER_ADDR_LEN;
1885 		bcopy(vhe->vhe_enaddr, vhe->vhe_sdl.sdl_data, ETHER_ADDR_LEN);
1886 	} else
1887 		bzero(vhe->vhe_enaddr, ETHER_ADDR_LEN);
1888 }
1889 
1890 void
1891 carp_set_enaddr(struct carp_softc *sc)
1892 {
1893 	struct carp_vhost_entry *vhe;
1894 
1895 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
1896 		carp_set_vhe_enaddr(vhe);
1897 
1898 	vhe = LIST_FIRST(&sc->carp_vhosts);
1899 
1900 	/*
1901 	 * Use the carp lladdr if the running one isn't manually set.
1902 	 * Only compare static parts of the lladdr.
1903 	 */
1904 	if ((bcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1,
1905 	    ETHER_ADDR_LEN - 2) == 0) ||
1906 	    (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] &&
1907 	    !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] &&
1908 	    !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5]))
1909 		bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1910 
1911 	/* Make sure the enaddr has changed before further twiddling. */
1912 	if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) {
1913 		bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl),
1914 		    ETHER_ADDR_LEN);
1915 		bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN);
1916 #ifdef INET6
1917 		/*
1918 		 * (re)attach a link-local address which matches
1919 		 * our new MAC address.
1920 		 */
1921 		in6_ifattach_linklocal(&sc->sc_if, NULL);
1922 #endif
1923 		carp_set_state_all(sc, INIT);
1924 		carp_setrun_all(sc, 0);
1925 	}
1926 }
1927 
1928 void
1929 carp_addr_updated(void *v)
1930 {
1931 	struct carp_softc *sc = (struct carp_softc *) v;
1932 	struct ifaddr *ifa;
1933 	int new_naddrs = 0, new_naddrs6 = 0;
1934 
1935 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1936 		if (ifa->ifa_addr->sa_family == AF_INET)
1937 			new_naddrs++;
1938 		else if (ifa->ifa_addr->sa_family == AF_INET6 &&
1939 		    !IN6_IS_ADDR_LINKLOCAL(&ifatoia6(ifa)->ia_addr.sin6_addr))
1940 				new_naddrs6++;
1941 	}
1942 
1943 	/* Handle a callback after SIOCDIFADDR */
1944 	if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) {
1945 		struct in_addr mc_addr;
1946 		struct in_multi *inm;
1947 
1948 		sc->sc_naddrs = new_naddrs;
1949 		sc->sc_naddrs6 = new_naddrs6;
1950 
1951 		/* Re-establish multicast membership removed by in_control */
1952 		if (IN_MULTICAST(sc->sc_peer.s_addr)) {
1953 			mc_addr.s_addr = sc->sc_peer.s_addr;
1954 			IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm);
1955 			if (inm == NULL) {
1956 				struct in_multi **imm =
1957 				    sc->sc_imo.imo_membership;
1958 				u_int16_t maxmem =
1959 				    sc->sc_imo.imo_max_memberships;
1960 
1961 				bzero(&sc->sc_imo, sizeof(sc->sc_imo));
1962 				sc->sc_imo.imo_membership = imm;
1963 				sc->sc_imo.imo_max_memberships = maxmem;
1964 
1965 				if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0)
1966 					carp_join_multicast(sc);
1967 			}
1968 		}
1969 
1970 		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1971 			sc->sc_if.if_flags &= ~IFF_UP;
1972 			carp_set_state_all(sc, INIT);
1973 		} else
1974 			carp_hmac_prepare(sc);
1975 	}
1976 
1977 	carp_setrun_all(sc, 0);
1978 }
1979 
1980 int
1981 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1982 {
1983 	struct ifnet *ifp = sc->sc_carpdev;
1984 	struct in_ifaddr *ia, *ia_if;
1985 	int error = 0;
1986 
1987 	if (sin->sin_addr.s_addr == 0) {
1988 		if (!(sc->sc_if.if_flags & IFF_UP))
1989 			carp_set_state_all(sc, INIT);
1990 		if (sc->sc_naddrs)
1991 			sc->sc_if.if_flags |= IFF_UP;
1992 		carp_setrun_all(sc, 0);
1993 		return (0);
1994 	}
1995 
1996 	/* we have to do this by hand to ensure we don't match on ourselves */
1997 	ia_if = NULL;
1998 	for (ia = TAILQ_FIRST(&in_ifaddr); ia;
1999 	    ia = TAILQ_NEXT(ia, ia_list)) {
2000 
2001 		/* and, yeah, we need a multicast-capable iface too */
2002 		if (ia->ia_ifp != &sc->sc_if &&
2003 		    ia->ia_ifp->if_type != IFT_CARP &&
2004 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2005 		    ia->ia_ifp->if_rdomain == sc->sc_if.if_rdomain &&
2006 		    (sin->sin_addr.s_addr & ia->ia_netmask) ==
2007 		    ia->ia_net) {
2008 			if (!ia_if)
2009 				ia_if = ia;
2010 		}
2011 	}
2012 
2013 	if (ia_if) {
2014 		ia = ia_if;
2015 		if (ifp) {
2016 			if (ifp != ia->ia_ifp)
2017 				return (EADDRNOTAVAIL);
2018 		} else {
2019 			ifp = ia->ia_ifp;
2020 		}
2021 	}
2022 
2023 	if ((error = carp_set_ifp(sc, ifp)))
2024 		return (error);
2025 
2026 	if (sc->sc_carpdev == NULL)
2027 		return (EADDRNOTAVAIL);
2028 
2029 	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
2030 		return (error);
2031 
2032 	sc->sc_naddrs++;
2033 	if (sc->sc_carpdev != NULL)
2034 		sc->sc_if.if_flags |= IFF_UP;
2035 
2036 	carp_set_state_all(sc, INIT);
2037 
2038 	/*
2039 	 * Hook if_addrhooks so that we get a callback after in_ifinit has run,
2040 	 * to correct any inappropriate routes that it inserted.
2041 	 */
2042 	if (sc->ah_cookie == NULL)
2043 		sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0,
2044 		    carp_addr_updated, sc);
2045 
2046 	return (0);
2047 }
2048 
2049 int
2050 carp_join_multicast(struct carp_softc *sc)
2051 {
2052 	struct ip_moptions *imo = &sc->sc_imo;
2053 	struct in_multi *imm;
2054 	struct in_addr addr;
2055 
2056 	if (!IN_MULTICAST(sc->sc_peer.s_addr))
2057 		return (0);
2058 
2059 	addr.s_addr = sc->sc_peer.s_addr;
2060 	if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL)
2061 		return (ENOBUFS);
2062 
2063 	imo->imo_membership[0] = imm;
2064 	imo->imo_num_memberships = 1;
2065 	imo->imo_multicast_ifp = &sc->sc_if;
2066 	imo->imo_multicast_ttl = CARP_DFLTTL;
2067 	imo->imo_multicast_loop = 0;
2068 	return (0);
2069 }
2070 
2071 
2072 #ifdef INET6
2073 int
2074 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2075 {
2076 	struct ifnet *ifp = sc->sc_carpdev;
2077 	struct in6_ifaddr *ia, *ia_if;
2078 	int error = 0;
2079 
2080 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2081 		if (!(sc->sc_if.if_flags & IFF_UP))
2082 			carp_set_state_all(sc, INIT);
2083 		if (sc->sc_naddrs6)
2084 			sc->sc_if.if_flags |= IFF_UP;
2085 		carp_setrun_all(sc, 0);
2086 		return (0);
2087 	}
2088 
2089 	/* we have to do this by hand to ensure we don't match on ourselves */
2090 	ia_if = NULL;
2091 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2092 		int i;
2093 
2094 		for (i = 0; i < 4; i++) {
2095 			if ((sin6->sin6_addr.s6_addr32[i] &
2096 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2097 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
2098 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2099 				break;
2100 		}
2101 		/* and, yeah, we need a multicast-capable iface too */
2102 		if (ia->ia_ifp != &sc->sc_if &&
2103 		    ia->ia_ifp->if_type != IFT_CARP &&
2104 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2105 		    (i == 4)) {
2106 			if (!ia_if)
2107 				ia_if = ia;
2108 		}
2109 	}
2110 
2111 	if (ia_if) {
2112 		ia = ia_if;
2113 		if (sc->sc_carpdev) {
2114 			if (sc->sc_carpdev != ia->ia_ifp)
2115 				return (EADDRNOTAVAIL);
2116 		} else {
2117 			ifp = ia->ia_ifp;
2118 		}
2119 	}
2120 
2121 	if ((error = carp_set_ifp(sc, ifp)))
2122 		return (error);
2123 
2124 	if (sc->sc_carpdev == NULL)
2125 		return (EADDRNOTAVAIL);
2126 
2127 	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
2128 		return (error);
2129 
2130 	if (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
2131 		sc->sc_naddrs6++;
2132 	if (sc->sc_carpdev != NULL && sc->sc_naddrs6)
2133 		sc->sc_if.if_flags |= IFF_UP;
2134 	carp_set_state_all(sc, INIT);
2135 	carp_setrun_all(sc, 0);
2136 
2137 	return (0);
2138 }
2139 
2140 int
2141 carp_join_multicast6(struct carp_softc *sc)
2142 {
2143 	struct in6_multi_mship *imm, *imm2;
2144 	struct ip6_moptions *im6o = &sc->sc_im6o;
2145 	struct sockaddr_in6 addr6;
2146 	int error;
2147 
2148 	/* Join IPv6 CARP multicast group */
2149 	bzero(&addr6, sizeof(addr6));
2150 	addr6.sin6_family = AF_INET6;
2151 	addr6.sin6_len = sizeof(addr6);
2152 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
2153 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
2154 	addr6.sin6_addr.s6_addr8[15] = 0x12;
2155 	if ((imm = in6_joingroup(&sc->sc_if,
2156 	    &addr6.sin6_addr, &error)) == NULL) {
2157 		return (error);
2158 	}
2159 	/* join solicited multicast address */
2160 	bzero(&addr6.sin6_addr, sizeof(addr6.sin6_addr));
2161 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
2162 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
2163 	addr6.sin6_addr.s6_addr32[1] = 0;
2164 	addr6.sin6_addr.s6_addr32[2] = htonl(1);
2165 	addr6.sin6_addr.s6_addr32[3] = 0;
2166 	addr6.sin6_addr.s6_addr8[12] = 0xff;
2167 	if ((imm2 = in6_joingroup(&sc->sc_if,
2168 	    &addr6.sin6_addr, &error)) == NULL) {
2169 		in6_leavegroup(imm);
2170 		return (error);
2171 	}
2172 
2173 	/* apply v6 multicast membership */
2174 	im6o->im6o_multicast_ifp = &sc->sc_if;
2175 	if (imm)
2176 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
2177 		    i6mm_chain);
2178 	if (imm2)
2179 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
2180 		    i6mm_chain);
2181 
2182 	return (0);
2183 }
2184 
2185 #endif /* INET6 */
2186 
2187 int
2188 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2189 {
2190 	struct proc *p = curproc;	/* XXX */
2191 	struct carp_softc *sc = ifp->if_softc;
2192 	struct carp_vhost_entry *vhe;
2193 	struct carpreq carpr;
2194 	struct ifaddr *ifa = (struct ifaddr *)addr;
2195 	struct ifreq *ifr = (struct ifreq *)addr;
2196 	struct ifnet *cdev = NULL;
2197 	int i, error = 0;
2198 
2199 	switch (cmd) {
2200 	case SIOCSIFADDR:
2201 		switch (ifa->ifa_addr->sa_family) {
2202 #ifdef INET
2203 		case AF_INET:
2204 			sc->sc_if.if_flags |= IFF_UP;
2205 			/*
2206 			 * emulate arp_ifinit() without doing a gratious arp
2207 			 * request so that the routes are setup correctly.
2208 			 */
2209 			ifa->ifa_rtrequest = arp_rtrequest;
2210 			ifa->ifa_flags |= RTF_CLONING;
2211 
2212 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
2213 			break;
2214 #endif /* INET */
2215 #ifdef INET6
2216 		case AF_INET6:
2217 			sc->sc_if.if_flags |= IFF_UP;
2218 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
2219 			break;
2220 #endif /* INET6 */
2221 		default:
2222 			error = EAFNOSUPPORT;
2223 			break;
2224 		}
2225 		break;
2226 
2227 	case SIOCSIFFLAGS:
2228 		vhe = LIST_FIRST(&sc->carp_vhosts);
2229 		if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2230 			carp_del_all_timeouts(sc);
2231 
2232 			/* we need the interface up to bow out */
2233 			sc->sc_if.if_flags |= IFF_UP;
2234 			sc->sc_bow_out = 1;
2235 			carp_vhe_send_ad_all(sc);
2236 			sc->sc_bow_out = 0;
2237 
2238 			sc->sc_if.if_flags &= ~IFF_UP;
2239 			carp_set_state_all(sc, INIT);
2240 			carp_setrun_all(sc, 0);
2241 		} else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) {
2242 			sc->sc_if.if_flags |= IFF_UP;
2243 			carp_setrun_all(sc, 0);
2244 		}
2245 		break;
2246 
2247 	case SIOCSVH:
2248 		vhe = LIST_FIRST(&sc->carp_vhosts);
2249 		if ((error = suser(p, p->p_acflag)) != 0)
2250 			break;
2251 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2252 			break;
2253 		error = 1;
2254 		if (carpr.carpr_carpdev[0] != '\0' &&
2255 		    (cdev = ifunit(carpr.carpr_carpdev)) == NULL)
2256 			return (EINVAL);
2257 		if (carpr.carpr_peer.s_addr == 0)
2258 			sc->sc_peer.s_addr = INADDR_CARP_GROUP;
2259 		else
2260 			sc->sc_peer.s_addr = carpr.carpr_peer.s_addr;
2261 		if ((error = carp_set_ifp(sc, cdev)))
2262 			return (error);
2263 		if (vhe->state != INIT && carpr.carpr_state != vhe->state) {
2264 			switch (carpr.carpr_state) {
2265 			case BACKUP:
2266 				timeout_del(&vhe->ad_tmo);
2267 				carp_set_state_all(sc, BACKUP);
2268 				carp_setrun_all(sc, 0);
2269 				carp_setroute(sc, RTM_DELETE);
2270 				break;
2271 			case MASTER:
2272 				LIST_FOREACH(vhe, &sc->carp_vhosts,
2273 				    vhost_entries)
2274 					carp_master_down(vhe);
2275 				break;
2276 			default:
2277 				break;
2278 			}
2279 		}
2280 		if ((error = carp_vhids_ioctl(sc, &carpr)))
2281 			return (error);
2282 		if (carpr.carpr_advbase > 0) {
2283 			if (carpr.carpr_advbase > 255) {
2284 				error = EINVAL;
2285 				break;
2286 			}
2287 			sc->sc_advbase = carpr.carpr_advbase;
2288 			error--;
2289 		}
2290 		if (bcmp(sc->sc_advskews, carpr.carpr_advskews,
2291 		    sizeof(sc->sc_advskews))) {
2292 			i = 0;
2293 			LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
2294 				vhe->advskew = carpr.carpr_advskews[i++];
2295 			bcopy(carpr.carpr_advskews, sc->sc_advskews,
2296 			    sizeof(sc->sc_advskews));
2297 		}
2298 		if (sc->sc_balancing != carpr.carpr_balancing) {
2299 			if (carpr.carpr_balancing > CARP_BAL_MAXID) {
2300 				error = EINVAL;
2301 				break;
2302 			}
2303 			sc->sc_balancing = carpr.carpr_balancing;
2304 			carp_set_enaddr(sc);
2305 			carp_update_lsmask(sc);
2306 		}
2307 		bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
2308 		if (error > 0)
2309 			error = EINVAL;
2310 		else {
2311 			error = 0;
2312 			carp_setrun_all(sc, 0);
2313 		}
2314 		break;
2315 
2316 	case SIOCGVH:
2317 		bzero(&carpr, sizeof(carpr));
2318 		if (sc->sc_carpdev != NULL)
2319 			strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname,
2320 			    IFNAMSIZ);
2321 		i = 0;
2322 		LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) {
2323 			carpr.carpr_vhids[i] = vhe->vhid;
2324 			carpr.carpr_advskews[i] = vhe->advskew;
2325 			carpr.carpr_states[i] = vhe->state;
2326 			i++;
2327 		}
2328 		carpr.carpr_advbase = sc->sc_advbase;
2329 		carpr.carpr_balancing = sc->sc_balancing;
2330 		if (suser(p, p->p_acflag) == 0)
2331 			bcopy(sc->sc_key, carpr.carpr_key,
2332 			    sizeof(carpr.carpr_key));
2333 		carpr.carpr_peer.s_addr = sc->sc_peer.s_addr;
2334 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2335 		break;
2336 
2337 	case SIOCADDMULTI:
2338 		error = carp_ether_addmulti(sc, ifr);
2339 		break;
2340 
2341 	case SIOCDELMULTI:
2342 		error = carp_ether_delmulti(sc, ifr);
2343 		break;
2344 	case SIOCAIFGROUP:
2345 	case SIOCDIFGROUP:
2346 		if (sc->sc_demote_cnt)
2347 			carp_ifgroup_ioctl(ifp, cmd, addr);
2348 		break;
2349 	case SIOCSIFGATTR:
2350 		carp_ifgattr_ioctl(ifp, cmd, addr);
2351 		break;
2352 	default:
2353 		error = ENOTTY;
2354 	}
2355 
2356 	if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0)
2357 		carp_set_enaddr(sc);
2358 	carp_hmac_prepare(sc);
2359 	return (error);
2360 }
2361 
2362 int
2363 carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif,
2364     struct carpreq *carpr)
2365 {
2366 	struct carp_softc *vr;
2367 	struct carp_vhost_entry *vhe, *vhe0;
2368 	int i;
2369 
2370 	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2371 		if (vr == sc)
2372 			continue;
2373 		LIST_FOREACH(vhe, &vr->carp_vhosts, vhost_entries) {
2374 			if (carpr) {
2375 				for (i = 0; carpr->carpr_vhids[i]; i++) {
2376 					if (vhe->vhid == carpr->carpr_vhids[i])
2377 						return (EINVAL);
2378 				}
2379 			}
2380 			LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) {
2381 				if (vhe->vhid == vhe0->vhid)
2382 					return (EINVAL);
2383 			}
2384 		}
2385 	}
2386 	return (0);
2387 }
2388 
2389 int
2390 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr)
2391 {
2392 	int i, j;
2393 	u_int8_t taken_vhids[256];
2394 
2395 	if (carpr->carpr_vhids[0] == 0 ||
2396 	    !bcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids)))
2397 		return (0);
2398 
2399 	bzero(taken_vhids, sizeof(taken_vhids));
2400 	for (i = 0; carpr->carpr_vhids[i]; i++) {
2401 		if (taken_vhids[carpr->carpr_vhids[i]])
2402 			return (EINVAL);
2403 		taken_vhids[carpr->carpr_vhids[i]] = 1;
2404 
2405 		if (sc->sc_carpdev) {
2406 			struct carp_if *cif;
2407 			cif = (struct carp_if *)sc->sc_carpdev->if_carp;
2408 			if (carp_check_dup_vhids(sc, cif, carpr))
2409 				return (EINVAL);
2410 		}
2411 		if (carpr->carpr_advskews[i] >= 255)
2412 			return (EINVAL);
2413 	}
2414 	/* set sane balancing defaults */
2415 	if (i <= 1)
2416 		carpr->carpr_balancing = CARP_BAL_NONE;
2417 	else if (carpr->carpr_balancing == CARP_BAL_NONE &&
2418 	    sc->sc_balancing == CARP_BAL_NONE)
2419 		carpr->carpr_balancing = CARP_BAL_IP;
2420 
2421 	/* destroy all */
2422 	carp_del_all_timeouts(sc);
2423 	carp_destroy_vhosts(sc);
2424 	bzero(sc->sc_vhids, sizeof(sc->sc_vhids));
2425 
2426 	/* sort vhosts list by vhid */
2427 	for (j = 1; j <= 255; j++) {
2428 		for (i = 0; carpr->carpr_vhids[i]; i++) {
2429 			if (carpr->carpr_vhids[i] != j)
2430 				continue;
2431 			if (carp_new_vhost(sc, carpr->carpr_vhids[i],
2432 			    carpr->carpr_advskews[i]))
2433 				return (ENOMEM);
2434 			sc->sc_vhids[i] = carpr->carpr_vhids[i];
2435 			sc->sc_advskews[i] = carpr->carpr_advskews[i];
2436 		}
2437 	}
2438 	carp_set_enaddr(sc);
2439 	carp_set_state_all(sc, INIT);
2440 	return (0);
2441 }
2442 
2443 void
2444 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2445 {
2446 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2447 	struct ifg_list	*ifgl;
2448 	int *dm, adj;
2449 
2450 	if (!strcmp(ifgr->ifgr_group, IFG_ALL))
2451 		return;
2452 	adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2453 	if (cmd == SIOCDIFGROUP)
2454 		adj = adj * -1;
2455 
2456 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2457 		if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) {
2458 			dm = &ifgl->ifgl_group->ifg_carp_demoted;
2459 			if (*dm + adj >= 0)
2460 				*dm += adj;
2461 			else
2462 				*dm = 0;
2463 		}
2464 }
2465 
2466 void
2467 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2468 {
2469 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2470 	struct carp_softc *sc = ifp->if_softc;
2471 
2472 	if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags &
2473 	    (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
2474 		carp_vhe_send_ad_all(sc);
2475 }
2476 
2477 /*
2478  * Start output on carp interface. This function should never be called.
2479  */
2480 void
2481 carp_start(struct ifnet *ifp)
2482 {
2483 #ifdef DEBUG
2484 	printf("%s: start called\n", ifp->if_xname);
2485 #endif
2486 }
2487 
2488 int
2489 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2490     struct rtentry *rt)
2491 {
2492 	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2493 	struct carp_vhost_entry *vhe;
2494 
2495 	vhe = sc->cur_vhe ? sc->cur_vhe : LIST_FIRST(&sc->carp_vhosts);
2496 
2497 	if (sc->sc_carpdev != NULL &&
2498 	    (sc->sc_balancing || vhe->state == MASTER))
2499 		return (sc->sc_carpdev->if_output(ifp, m, sa, rt));
2500 	else {
2501 		m_freem(m);
2502 		return (ENETUNREACH);
2503 	}
2504 }
2505 
2506 void
2507 carp_set_state_all(struct carp_softc *sc, int state)
2508 {
2509 	struct carp_vhost_entry *vhe;
2510 
2511 	LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries)
2512 		carp_set_state(vhe, state);
2513 }
2514 
2515 void
2516 carp_set_state(struct carp_vhost_entry *vhe, int state)
2517 {
2518 	struct carp_softc *sc = vhe->parent_sc;
2519 	static const char *carp_states[] = { CARP_STATES };
2520 	int loglevel;
2521 
2522 	if (vhe->state == state)
2523 		return;
2524 	if (vhe->state == INIT || state == INIT)
2525 		loglevel = LOG_WARNING;
2526 	else
2527 		loglevel = LOG_CRIT;
2528 
2529 	if (sc->sc_vhe_count > 1)
2530 		CARP_LOG(loglevel, sc,
2531 		    ("state transition (vhid %d): %s -> %s", vhe->vhid,
2532 		    carp_states[vhe->state], carp_states[state]));
2533 	else
2534 		CARP_LOG(loglevel, sc,
2535 		    ("state transition: %s -> %s",
2536 		    carp_states[vhe->state], carp_states[state]));
2537 
2538 	vhe->state = state;
2539 	carp_update_lsmask(sc);
2540 
2541 	/* only the master vhe creates link state messages */
2542 	if (!vhe->vhe_leader)
2543 		return;
2544 
2545 	switch (state) {
2546 	case BACKUP:
2547 		sc->sc_if.if_link_state = LINK_STATE_DOWN;
2548 		break;
2549 	case MASTER:
2550 		sc->sc_if.if_link_state = LINK_STATE_UP;
2551 		break;
2552 	default:
2553 		sc->sc_if.if_link_state = LINK_STATE_UNKNOWN;
2554 		break;
2555 	}
2556 	if_link_state_change(&sc->sc_if);
2557 }
2558 
2559 void
2560 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason)
2561 {
2562 	struct ifg_list	*ifgl;
2563 	int *dm;
2564 	struct carp_softc *nil = NULL;
2565 
2566 	if (ifp->if_type == IFT_CARP) {
2567 		dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2568 		if (*dm + adj >= 0)
2569 			*dm += adj;
2570 		else
2571 			*dm = 0;
2572 	}
2573 
2574 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2575 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2576 			continue;
2577 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2578 
2579 		if (*dm + adj >= 0)
2580 			*dm += adj;
2581 		else
2582 			*dm = 0;
2583 
2584 		if (adj > 0 && *dm == 1)
2585 			carp_send_ad_all();
2586 		CARP_LOG(LOG_NOTICE, nil,
2587 		    ("%s demoted group %s by %d to %d (%s)",
2588 		    ifp->if_xname, ifgl->ifgl_group->ifg_group,
2589 		    adj, *dm, reason));
2590 	}
2591 }
2592 
2593 int
2594 carp_group_demote_count(struct carp_softc *sc)
2595 {
2596 	struct ifg_list	*ifgl;
2597 	int count = 0;
2598 
2599 	TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next)
2600 		count += ifgl->ifgl_group->ifg_carp_demoted;
2601 
2602 	if (count == 0 && sc->sc_demote_cnt)
2603 		count = sc->sc_demote_cnt;
2604 
2605 	return (count > 255 ? 255 : count);
2606 }
2607 
2608 void
2609 carp_carpdev_state(void *v)
2610 {
2611 	struct carp_if *cif;
2612 	struct carp_softc *sc;
2613 	struct ifnet *ifp = v;
2614 
2615 	if (ifp->if_type == IFT_CARP)
2616 		return;
2617 
2618 	cif = (struct carp_if *)ifp->if_carp;
2619 
2620 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
2621 		int suppressed = sc->sc_suppress;
2622 
2623 		if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN ||
2624 		    !(sc->sc_carpdev->if_flags & IFF_UP)) {
2625 			sc->sc_if.if_flags &= ~IFF_RUNNING;
2626 			carp_del_all_timeouts(sc);
2627 			carp_set_state_all(sc, INIT);
2628 			sc->sc_suppress = 1;
2629 			carp_setrun_all(sc, 0);
2630 			if (!suppressed)
2631 				carp_group_demote_adj(&sc->sc_if, 1, "carpdev");
2632 		} else if (suppressed) {
2633 			carp_set_state_all(sc, INIT);
2634 			sc->sc_suppress = 0;
2635 			carp_setrun_all(sc, 0);
2636 			carp_group_demote_adj(&sc->sc_if, -1, "carpdev");
2637 		}
2638 	}
2639 }
2640 
2641 int
2642 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2643 {
2644 	struct ifnet *ifp;
2645 	struct carp_mc_entry *mc;
2646 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2647 	int error;
2648 
2649 	ifp = sc->sc_carpdev;
2650 	if (ifp == NULL)
2651 		return (EINVAL);
2652 
2653 	error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2654 	if (error != ENETRESET)
2655 		return (error);
2656 
2657 	/*
2658 	 * This is new multicast address.  We have to tell parent
2659 	 * about it.  Also, remember this multicast address so that
2660 	 * we can delete them on unconfigure.
2661 	 */
2662 	mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT);
2663 	if (mc == NULL) {
2664 		error = ENOMEM;
2665 		goto alloc_failed;
2666 	}
2667 
2668 	/*
2669 	 * As ether_addmulti() returns ENETRESET, following two
2670 	 * statement shouldn't fail.
2671 	 */
2672 	(void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2673 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2674 	memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2675 	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2676 
2677 	error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)ifr);
2678 	if (error != 0)
2679 		goto ioctl_failed;
2680 
2681 	return (error);
2682 
2683  ioctl_failed:
2684 	LIST_REMOVE(mc, mc_entries);
2685 	free(mc, M_DEVBUF);
2686  alloc_failed:
2687 	(void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2688 
2689 	return (error);
2690 }
2691 
2692 int
2693 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2694 {
2695 	struct ifnet *ifp;
2696 	struct ether_multi *enm;
2697 	struct carp_mc_entry *mc;
2698 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2699 	int error;
2700 
2701 	ifp = sc->sc_carpdev;
2702 	if (ifp == NULL)
2703 		return (EINVAL);
2704 
2705 	/*
2706 	 * Find a key to lookup carp_mc_entry.  We have to do this
2707 	 * before calling ether_delmulti for obvious reason.
2708 	 */
2709 	if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0)
2710 		return (error);
2711 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2712 	if (enm == NULL)
2713 		return (EINVAL);
2714 
2715 	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2716 		if (mc->mc_enm == enm)
2717 			break;
2718 
2719 	/* We won't delete entries we didn't add */
2720 	if (mc == NULL)
2721 		return (EINVAL);
2722 
2723 	error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2724 	if (error != ENETRESET)
2725 		return (error);
2726 
2727 	/* We no longer use this multicast address.  Tell parent so. */
2728 	error = (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr);
2729 	if (error == 0) {
2730 		/* And forget about this address. */
2731 		LIST_REMOVE(mc, mc_entries);
2732 		free(mc, M_DEVBUF);
2733 	} else
2734 		(void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2735 	return (error);
2736 }
2737 
2738 /*
2739  * Delete any multicast address we have asked to add from parent
2740  * interface.  Called when the carp is being unconfigured.
2741  */
2742 void
2743 carp_ether_purgemulti(struct carp_softc *sc)
2744 {
2745 	struct ifnet *ifp = sc->sc_carpdev;		/* Parent. */
2746 	struct carp_mc_entry *mc;
2747 	union {
2748 		struct ifreq ifreq;
2749 		struct {
2750 			char ifr_name[IFNAMSIZ];
2751 			struct sockaddr_storage ifr_ss;
2752 		} ifreq_storage;
2753 	} u;
2754 	struct ifreq *ifr = &u.ifreq;
2755 
2756 	if (ifp == NULL)
2757 		return;
2758 
2759 	memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ);
2760 	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2761 		memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len);
2762 		(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr);
2763 		LIST_REMOVE(mc, mc_entries);
2764 		free(mc, M_DEVBUF);
2765 	}
2766 }
2767