xref: /openbsd/sys/netinet/ip_carp.c (revision 10d5b13e)
1 /*	$OpenBSD: ip_carp.c,v 1.365 2024/12/19 22:10:35 mvs Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
5  * Copyright (c) 2003 Ryan McBride. All rights reserved.
6  * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
30 /*
31  * TODO:
32  *	- iface reconfigure
33  *	- support for hardware checksum calculations;
34  *
35  */
36 
37 #include "ether.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/timeout.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
47 #include <sys/device.h>
48 #include <sys/kernel.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 #include <sys/refcnt.h>
52 
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_types.h>
56 #include <net/netisr.h>
57 #include <net/route.h>
58 
59 #include <crypto/sha1.h>
60 
61 #include <netinet/in.h>
62 #include <netinet/in_var.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/if_ether.h>
66 #include <netinet/ip_ipsp.h>
67 
68 #include <net/if_dl.h>
69 
70 #ifdef INET6
71 #include <netinet6/in6_var.h>
72 #include <netinet/icmp6.h>
73 #include <netinet/ip6.h>
74 #include <netinet6/ip6_var.h>
75 #include <netinet6/nd6.h>
76 #include <netinet6/in6_ifattach.h>
77 #endif
78 
79 #include "bpfilter.h"
80 #if NBPFILTER > 0
81 #include <net/bpf.h>
82 #endif
83 
84 #include "vlan.h"
85 #if NVLAN > 0
86 #include <net/if_vlan_var.h>
87 #endif
88 
89 #include <netinet/ip_carp.h>
90 
91 /*
92  * Locks used to protect data:
93  *	a	atomic
94  */
95 
96 struct carp_mc_entry {
97 	LIST_ENTRY(carp_mc_entry)	mc_entries;
98 	union {
99 		struct ether_multi	*mcu_enm;
100 	} mc_u;
101 	struct sockaddr_storage		mc_addr;
102 };
103 #define	mc_enm	mc_u.mcu_enm
104 
105 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 };
106 
107 struct carp_vhost_entry {
108 	SRPL_ENTRY(carp_vhost_entry) vhost_entries;
109 	struct refcnt vhost_refcnt;
110 
111 	struct carp_softc *parent_sc;
112 	int vhe_leader;
113 	int vhid;
114 	int advskew;
115 	enum { INIT = 0, BACKUP, MASTER }	state;
116 	struct timeout ad_tmo;	/* advertisement timeout */
117 	struct timeout md_tmo;	/* master down timeout */
118 	struct timeout md6_tmo;	/* master down timeout */
119 
120 	u_int64_t vhe_replay_cookie;
121 
122 	/* authentication */
123 #define CARP_HMAC_PAD	64
124 	unsigned char vhe_pad[CARP_HMAC_PAD];
125 	SHA1_CTX vhe_sha1[HMAC_MAX];
126 
127 	u_int8_t vhe_enaddr[ETHER_ADDR_LEN];
128 };
129 
130 void	carp_vh_ref(void *, void *);
131 void	carp_vh_unref(void *, void *);
132 
133 struct srpl_rc carp_vh_rc =
134     SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL);
135 
136 struct carp_softc {
137 	struct arpcom sc_ac;
138 #define	sc_if		sc_ac.ac_if
139 #define	sc_carpdevidx	sc_ac.ac_if.if_carpdevidx
140 	struct task sc_atask;
141 	struct task sc_ltask;
142 	struct task sc_dtask;
143 	struct ip_moptions sc_imo;
144 #ifdef INET6
145 	struct ip6_moptions sc_im6o;
146 	struct task sc_itask;
147 #endif /* INET6 */
148 
149 	SRPL_ENTRY(carp_softc) sc_list;
150 	struct refcnt sc_refcnt;
151 
152 	int sc_suppress;
153 	int sc_bow_out;
154 	int sc_demote_cnt;
155 
156 	int sc_sendad_errors;
157 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count)
158 	int sc_sendad_success;
159 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count)
160 
161 	char sc_curlladdr[ETHER_ADDR_LEN];
162 
163 	SRPL_HEAD(, carp_vhost_entry) carp_vhosts;
164 	int sc_vhe_count;
165 	u_int8_t sc_vhids[CARP_MAXNODES];
166 	u_int8_t sc_advskews[CARP_MAXNODES];
167 	u_int8_t sc_balancing;
168 
169 	int sc_naddrs;
170 	int sc_naddrs6;
171 	int sc_advbase;		/* seconds */
172 
173 	/* authentication */
174 	unsigned char sc_key[CARP_KEY_LEN];
175 
176 	u_int32_t sc_hashkey[2];
177 	u_int32_t sc_lsmask;		/* load sharing mask */
178 	int sc_lscount;			/* # load sharing interfaces (max 32) */
179 	int sc_delayed_arp;		/* delayed ARP request countdown */
180 #ifdef INET6
181 	int sc_send_na;			/* send NA when link state up */
182 #endif /* INET6 */
183 	int sc_realmac;			/* using real mac */
184 
185 	struct in_addr sc_peer;
186 
187 	LIST_HEAD(__carp_mchead, carp_mc_entry)	carp_mc_listhead;
188 	struct carp_vhost_entry *cur_vhe; /* current active vhe */
189 };
190 
191 void	carp_sc_ref(void *, void *);
192 void	carp_sc_unref(void *, void *);
193 
194 struct srpl_rc carp_sc_rc =
195     SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL);
196 
197 int carpctl_allow = 1;		/* [a] */
198 int carpctl_preempt = 0;	/* [a] */
199 int carpctl_log = LOG_CRIT;	/* [a] */
200 
201 const struct sysctl_bounded_args carpctl_vars[] = {
202 	{CARPCTL_ALLOW, &carpctl_allow, INT_MIN, INT_MAX},
203 	{CARPCTL_PREEMPT, &carpctl_preempt, INT_MIN, INT_MAX},
204 	{CARPCTL_LOG, &carpctl_log, INT_MIN, INT_MAX},
205 };
206 
207 struct cpumem *carpcounters;
208 
209 int	carp_send_all_recur = 0;
210 
211 #define	CARP_LOG(l, sc, s)						\
212 	do {								\
213 		if ((int)atomic_load_int(&carpctl_log) >= l) {		\
214 			if (sc)						\
215 				log(l, "%s: ",				\
216 				    (sc)->sc_if.if_xname);		\
217 			else						\
218 				log(l, "carp: ");			\
219 			addlog s;					\
220 			addlog("\n");					\
221 		}							\
222 	} while (0)
223 
224 void	carp_hmac_prepare(struct carp_softc *);
225 void	carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t);
226 void	carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *,
227 	    unsigned char *, u_int8_t);
228 int	carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
229 	    unsigned char *);
230 void	carp_proto_input_c(struct ifnet *, struct mbuf *,
231 	    struct carp_header *, int, sa_family_t);
232 int	carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
233 #ifdef INET6
234 int	carp6_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
235 #endif
236 void	carpattach(int);
237 void	carpdetach(void *);
238 void	carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
239 	    struct carp_header *);
240 void	carp_send_ad_all(void);
241 void	carp_vhe_send_ad_all(struct carp_softc *);
242 void	carp_timer_ad(void *);
243 void	carp_send_ad(struct carp_vhost_entry *);
244 void	carp_send_arp(struct carp_softc *);
245 void	carp_timer_down(void *);
246 void	carp_master_down(struct carp_vhost_entry *);
247 int	carp_ioctl(struct ifnet *, u_long, caddr_t);
248 int	carp_vhids_ioctl(struct carp_softc *, struct carpreq *);
249 int	carp_check_dup_vhids(struct carp_softc *, struct srpl *,
250 	    struct carpreq *);
251 void	carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t);
252 void	carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t);
253 void	carp_start(struct ifnet *);
254 int	carp_enqueue(struct ifnet *, struct mbuf *);
255 void	carp_transmit(struct carp_softc *, struct ifnet *, struct mbuf *);
256 void	carp_setrun_all(struct carp_softc *, sa_family_t);
257 void	carp_setrun(struct carp_vhost_entry *, sa_family_t);
258 void	carp_set_state_all(struct carp_softc *, int);
259 void	carp_set_state(struct carp_vhost_entry *, int);
260 void	carp_multicast_cleanup(struct carp_softc *);
261 int	carp_set_ifp(struct carp_softc *, struct ifnet *);
262 void	carp_set_enaddr(struct carp_softc *);
263 void	carp_set_vhe_enaddr(struct carp_vhost_entry *);
264 void	carp_addr_updated(void *);
265 int	carp_set_addr(struct carp_softc *, struct sockaddr_in *);
266 int	carp_join_multicast(struct carp_softc *);
267 #ifdef INET6
268 void	carp_send_na(struct carp_softc *);
269 int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
270 int	carp_join_multicast6(struct carp_softc *);
271 void	carp_if_linkstate(void *);
272 #endif
273 int	carp_clone_create(struct if_clone *, int);
274 int	carp_clone_destroy(struct ifnet *);
275 int	carp_ether_addmulti(struct carp_softc *, struct ifreq *);
276 int	carp_ether_delmulti(struct carp_softc *, struct ifreq *);
277 void	carp_ether_purgemulti(struct carp_softc *);
278 int	carp_group_demote_count(struct carp_softc *);
279 void	carp_update_lsmask(struct carp_softc *);
280 int	carp_new_vhost(struct carp_softc *, int, int);
281 void	carp_destroy_vhosts(struct carp_softc *);
282 void	carp_del_all_timeouts(struct carp_softc *);
283 int	carp_vhe_match(struct carp_softc *, uint64_t);
284 
285 struct if_clone carp_cloner =
286     IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
287 
288 #define carp_cksum(_m, _l)	((u_int16_t)in_cksum((_m), (_l)))
289 #define CARP_IFQ_PRIO	6
290 
291 void
carp_hmac_prepare(struct carp_softc * sc)292 carp_hmac_prepare(struct carp_softc *sc)
293 {
294 	struct carp_vhost_entry *vhe;
295 	u_int8_t i;
296 
297 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
298 
299 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
300 		for (i = 0; i < HMAC_MAX; i++) {
301 			carp_hmac_prepare_ctx(vhe, i);
302 		}
303 	}
304 }
305 
306 void
carp_hmac_prepare_ctx(struct carp_vhost_entry * vhe,u_int8_t ctx)307 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx)
308 {
309 	struct carp_softc *sc = vhe->parent_sc;
310 
311 	u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
312 	u_int8_t vhid = vhe->vhid & 0xff;
313 	SHA1_CTX sha1ctx;
314 	u_int32_t kmd[5];
315 	struct ifaddr *ifa;
316 	int i, found;
317 	struct in_addr last, cur, in;
318 #ifdef INET6
319 	struct in6_addr last6, cur6, in6;
320 #endif /* INET6 */
321 
322 	/* compute ipad from key */
323 	memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad));
324 	bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key));
325 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
326 		vhe->vhe_pad[i] ^= 0x36;
327 
328 	/* precompute first part of inner hash */
329 	SHA1Init(&vhe->vhe_sha1[ctx]);
330 	SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad));
331 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version));
332 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type));
333 
334 	/* generate a key for the arpbalance hash, before the vhid is hashed */
335 	if (vhe->vhe_leader) {
336 		bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
337 		SHA1Final((unsigned char *)kmd, &sha1ctx);
338 		sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
339 		sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
340 	}
341 
342 	/* the rest of the precomputation */
343 	if (!sc->sc_realmac && vhe->vhe_leader &&
344 	    memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0)
345 		SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr,
346 		    ETHER_ADDR_LEN);
347 
348 	SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid));
349 
350 	/* Hash the addresses from smallest to largest, not interface order */
351 	cur.s_addr = 0;
352 	do {
353 		found = 0;
354 		last = cur;
355 		cur.s_addr = 0xffffffff;
356 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
357 			if (ifa->ifa_addr->sa_family != AF_INET)
358 				continue;
359 			in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
360 			if (ntohl(in.s_addr) > ntohl(last.s_addr) &&
361 			    ntohl(in.s_addr) < ntohl(cur.s_addr)) {
362 				cur.s_addr = in.s_addr;
363 				found++;
364 			}
365 		}
366 		if (found)
367 			SHA1Update(&vhe->vhe_sha1[ctx],
368 			    (void *)&cur, sizeof(cur));
369 	} while (found);
370 #ifdef INET6
371 	memset(&cur6, 0x00, sizeof(cur6));
372 	do {
373 		found = 0;
374 		last6 = cur6;
375 		memset(&cur6, 0xff, sizeof(cur6));
376 		TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
377 			if (ifa->ifa_addr->sa_family != AF_INET6)
378 				continue;
379 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
380 			if (IN6_IS_SCOPE_EMBED(&in6)) {
381 				if (ctx == HMAC_NOV6LL)
382 					continue;
383 				in6.s6_addr16[1] = 0;
384 			}
385 			if (memcmp(&in6, &last6, sizeof(in6)) > 0 &&
386 			    memcmp(&in6, &cur6, sizeof(in6)) < 0) {
387 				cur6 = in6;
388 				found++;
389 			}
390 		}
391 		if (found)
392 			SHA1Update(&vhe->vhe_sha1[ctx],
393 			    (void *)&cur6, sizeof(cur6));
394 	} while (found);
395 #endif /* INET6 */
396 
397 	/* convert ipad to opad */
398 	for (i = 0; i < sizeof(vhe->vhe_pad); i++)
399 		vhe->vhe_pad[i] ^= 0x36 ^ 0x5c;
400 }
401 
402 void
carp_hmac_generate(struct carp_vhost_entry * vhe,u_int32_t counter[2],unsigned char md[20],u_int8_t ctx)403 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2],
404     unsigned char md[20], u_int8_t ctx)
405 {
406 	SHA1_CTX sha1ctx;
407 
408 	/* fetch first half of inner hash */
409 	bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
410 
411 	SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie));
412 	SHA1Final(md, &sha1ctx);
413 
414 	/* outer hash */
415 	SHA1Init(&sha1ctx);
416 	SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad));
417 	SHA1Update(&sha1ctx, md, 20);
418 	SHA1Final(md, &sha1ctx);
419 }
420 
421 int
carp_hmac_verify(struct carp_vhost_entry * vhe,u_int32_t counter[2],unsigned char md[20])422 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2],
423     unsigned char md[20])
424 {
425 	unsigned char md2[20];
426 	u_int8_t i;
427 
428 	for (i = 0; i < HMAC_MAX; i++) {
429 		carp_hmac_generate(vhe, counter, md2, i);
430 		if (!timingsafe_bcmp(md, md2, sizeof(md2)))
431 			return (0);
432 	}
433 	return (1);
434 }
435 
436 int
carp_proto_input(struct mbuf ** mp,int * offp,int proto,int af)437 carp_proto_input(struct mbuf **mp, int *offp, int proto, int af)
438 {
439 	struct ifnet *ifp;
440 
441 	ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
442 	if (ifp == NULL) {
443 		m_freemp(mp);
444 		return IPPROTO_DONE;
445 	}
446 
447 	proto = carp_proto_input_if(ifp, mp, offp, proto);
448 	if_put(ifp);
449 	return proto;
450 }
451 
452 /*
453  * process input packet.
454  * we have rearranged checks order compared to the rfc,
455  * but it seems more efficient this way or not possible otherwise.
456  */
457 int
carp_proto_input_if(struct ifnet * ifp,struct mbuf ** mp,int * offp,int proto)458 carp_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
459 {
460 	struct mbuf *m = *mp;
461 	struct ip *ip = mtod(m, struct ip *);
462 	struct carp_softc *sc = NULL;
463 	struct carp_header *ch;
464 	int iplen, len, ismulti;
465 
466 	carpstat_inc(carps_ipackets);
467 
468 	if (!atomic_load_int(&carpctl_allow)) {
469 		m_freem(m);
470 		return IPPROTO_DONE;
471 	}
472 
473 	ismulti = IN_MULTICAST(ip->ip_dst.s_addr);
474 
475 	/* check if received on a valid carp interface */
476 	switch (ifp->if_type) {
477 	case IFT_CARP:
478 		break;
479 	case IFT_ETHER:
480 		if (ismulti || !SRPL_EMPTY_LOCKED(&ifp->if_carp))
481 			break;
482 		/* FALLTHROUGH */
483 	default:
484 		carpstat_inc(carps_badif);
485 		CARP_LOG(LOG_INFO, sc,
486 		    ("packet received on non-carp interface: %s",
487 		     ifp->if_xname));
488 		m_freem(m);
489 		return IPPROTO_DONE;
490 	}
491 
492 	/* verify that the IP TTL is 255.  */
493 	if (ip->ip_ttl != CARP_DFLTTL) {
494 		carpstat_inc(carps_badttl);
495 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
496 		    ip->ip_ttl, CARP_DFLTTL, ifp->if_xname));
497 		m_freem(m);
498 		return IPPROTO_DONE;
499 	}
500 
501 	/*
502 	 * verify that the received packet length is
503 	 * equal to the CARP header
504 	 */
505 	iplen = ip->ip_hl << 2;
506 	len = iplen + sizeof(*ch);
507 	if (len > m->m_pkthdr.len) {
508 		carpstat_inc(carps_badlen);
509 		CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s",
510 		    m->m_pkthdr.len, ifp->if_xname));
511 		m_freem(m);
512 		return IPPROTO_DONE;
513 	}
514 
515 	if ((m = *mp = m_pullup(m, len)) == NULL) {
516 		carpstat_inc(carps_hdrops);
517 		return IPPROTO_DONE;
518 	}
519 	ip = mtod(m, struct ip *);
520 	ch = (struct carp_header *)(mtod(m, caddr_t) + iplen);
521 
522 	/* verify the CARP checksum */
523 	m->m_data += iplen;
524 	if (carp_cksum(m, len - iplen)) {
525 		carpstat_inc(carps_badsum);
526 		CARP_LOG(LOG_INFO, sc, ("checksum failed on %s",
527 		    ifp->if_xname));
528 		m_freem(m);
529 		return IPPROTO_DONE;
530 	}
531 	m->m_data -= iplen;
532 
533 	KERNEL_LOCK();
534 	carp_proto_input_c(ifp, m, ch, ismulti, AF_INET);
535 	KERNEL_UNLOCK();
536 	return IPPROTO_DONE;
537 }
538 
539 #ifdef INET6
540 int
carp6_proto_input(struct mbuf ** mp,int * offp,int proto,int af)541 carp6_proto_input(struct mbuf **mp, int *offp, int proto, int af)
542 {
543 	struct ifnet *ifp;
544 
545 	ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
546 	if (ifp == NULL) {
547 		m_freemp(mp);
548 		return IPPROTO_DONE;
549 	}
550 
551 	proto = carp6_proto_input_if(ifp, mp, offp, proto);
552 	if_put(ifp);
553 	return proto;
554 }
555 
556 int
carp6_proto_input_if(struct ifnet * ifp,struct mbuf ** mp,int * offp,int proto)557 carp6_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
558 {
559 	struct mbuf *m = *mp;
560 	struct carp_softc *sc = NULL;
561 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
562 	struct carp_header *ch;
563 	u_int len;
564 
565 	carpstat_inc(carps_ipackets6);
566 
567 	if (!atomic_load_int(&carpctl_allow)) {
568 		m_freem(m);
569 		return IPPROTO_DONE;
570 	}
571 
572 	/* check if received on a valid carp interface */
573 	if (ifp->if_type != IFT_CARP) {
574 		carpstat_inc(carps_badif);
575 		CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
576 		    ifp->if_xname));
577 		m_freem(m);
578 		return IPPROTO_DONE;
579 	}
580 
581 	/* verify that the IP TTL is 255 */
582 	if (ip6->ip6_hlim != CARP_DFLTTL) {
583 		carpstat_inc(carps_badttl);
584 		CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
585 		    ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname));
586 		m_freem(m);
587 		return IPPROTO_DONE;
588 	}
589 
590 	/* verify that we have a complete carp packet */
591 	len = m->m_len;
592 	if ((m = *mp = m_pullup(m, *offp + sizeof(*ch))) == NULL) {
593 		carpstat_inc(carps_badlen);
594 		CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len));
595 		return IPPROTO_DONE;
596 	}
597 	ch = (struct carp_header *)(mtod(m, caddr_t) + *offp);
598 
599 	/* verify the CARP checksum */
600 	m->m_data += *offp;
601 	if (carp_cksum(m, sizeof(*ch))) {
602 		carpstat_inc(carps_badsum);
603 		CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s",
604 		    ifp->if_xname));
605 		m_freem(m);
606 		return IPPROTO_DONE;
607 	}
608 	m->m_data -= *offp;
609 
610 	KERNEL_LOCK();
611 	carp_proto_input_c(ifp, m, ch, 1, AF_INET6);
612 	KERNEL_UNLOCK();
613 	return IPPROTO_DONE;
614 }
615 #endif /* INET6 */
616 
617 void
carp_proto_input_c(struct ifnet * ifp,struct mbuf * m,struct carp_header * ch,int ismulti,sa_family_t af)618 carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch,
619     int ismulti, sa_family_t af)
620 {
621 	struct carp_softc *sc;
622 	struct ifnet *ifp0;
623 	struct carp_vhost_entry *vhe;
624 	struct timeval sc_tv, ch_tv;
625 	struct srpl *cif;
626 
627 	KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
628 
629 	ifp0 = if_get(ifp->if_carpdevidx);
630 
631 	if (ifp->if_type == IFT_CARP) {
632 		/*
633 		 * If the parent of this carp(4) got destroyed while
634 		 * `m' was being processed, silently drop it.
635 		 */
636 		if (ifp0 == NULL)
637 			goto rele;
638 		cif = &ifp0->if_carp;
639 	} else
640 		cif = &ifp->if_carp;
641 
642 	SRPL_FOREACH_LOCKED(sc, cif, sc_list) {
643 		if (af == AF_INET &&
644 		    ismulti != IN_MULTICAST(sc->sc_peer.s_addr))
645 			continue;
646 		SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
647 			if (vhe->vhid == ch->carp_vhid)
648 				goto found;
649 		}
650 	}
651  found:
652 
653 	if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
654 	    (IFF_UP|IFF_RUNNING)) {
655 		carpstat_inc(carps_badvhid);
656 		goto rele;
657 	}
658 
659 	getmicrotime(&sc->sc_if.if_lastchange);
660 
661 	/* verify the CARP version. */
662 	if (ch->carp_version != CARP_VERSION) {
663 		carpstat_inc(carps_badver);
664 		sc->sc_if.if_ierrors++;
665 		CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d",
666 		    ch->carp_version, CARP_VERSION));
667 		goto rele;
668 	}
669 
670 	/* verify the hash */
671 	if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) {
672 		carpstat_inc(carps_badauth);
673 		sc->sc_if.if_ierrors++;
674 		CARP_LOG(LOG_INFO, sc, ("incorrect hash"));
675 		goto rele;
676 	}
677 
678 	if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter,
679 	    sizeof(ch->carp_counter))) {
680 		struct ifnet *ifp2;
681 
682 		ifp2 = if_get(sc->sc_carpdevidx);
683 		/* Do not log duplicates from non simplex interfaces */
684 		if (ifp2 && ifp2->if_flags & IFF_SIMPLEX) {
685 			carpstat_inc(carps_badauth);
686 			sc->sc_if.if_ierrors++;
687 			CARP_LOG(LOG_WARNING, sc,
688 			    ("replay or network loop detected"));
689 		}
690 		if_put(ifp2);
691 		goto rele;
692 	}
693 
694 	sc_tv.tv_sec = sc->sc_advbase;
695 	sc_tv.tv_usec = vhe->advskew * 1000000 / 256;
696 	ch_tv.tv_sec = ch->carp_advbase;
697 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
698 
699 	switch (vhe->state) {
700 	case INIT:
701 		break;
702 	case MASTER:
703 		/*
704 		 * If we receive an advertisement from a master who's going to
705 		 * be more frequent than us, and whose demote count is not higher
706 		 * than ours, go into BACKUP state. If his demote count is lower,
707 		 * also go into BACKUP.
708 		 */
709 		if (((timercmp(&sc_tv, &ch_tv, >) ||
710 		    timercmp(&sc_tv, &ch_tv, ==)) &&
711 		    (ch->carp_demote <= carp_group_demote_count(sc))) ||
712 		    ch->carp_demote < carp_group_demote_count(sc)) {
713 			timeout_del(&vhe->ad_tmo);
714 			carp_set_state(vhe, BACKUP);
715 			carp_setrun(vhe, 0);
716 		}
717 		break;
718 	case BACKUP:
719 		/*
720 		 * If we're pre-empting masters who advertise slower than us,
721 		 * and do not have a better demote count, treat them as down.
722 		 *
723 		 */
724 		if (atomic_load_int(&carpctl_preempt) &&
725 		    timercmp(&sc_tv, &ch_tv, <) &&
726 		    ch->carp_demote >= carp_group_demote_count(sc)) {
727 			carp_master_down(vhe);
728 			break;
729 		}
730 
731 		/*
732 		 * Take over masters advertising with a higher demote count,
733 		 * regardless of CARPCTL_PREEMPT.
734 		 */
735 		if (ch->carp_demote > carp_group_demote_count(sc)) {
736 			carp_master_down(vhe);
737 			break;
738 		}
739 
740 		/*
741 		 *  If the master is going to advertise at such a low frequency
742 		 *  that he's guaranteed to time out, we'd might as well just
743 		 *  treat him as timed out now.
744 		 */
745 		sc_tv.tv_sec = sc->sc_advbase * 3;
746 		if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) {
747 			carp_master_down(vhe);
748 			break;
749 		}
750 
751 		/*
752 		 * Otherwise, we reset the counter and wait for the next
753 		 * advertisement.
754 		 */
755 		carp_setrun(vhe, af);
756 		break;
757 	}
758 
759 rele:
760 	if_put(ifp0);
761 	m_freem(m);
762 	return;
763 }
764 
765 int
carp_sysctl_carpstat(void * oldp,size_t * oldlenp,void * newp)766 carp_sysctl_carpstat(void *oldp, size_t *oldlenp, void *newp)
767 {
768 	struct carpstats carpstat;
769 
770 	CTASSERT(sizeof(carpstat) == (carps_ncounters * sizeof(uint64_t)));
771 	memset(&carpstat, 0, sizeof carpstat);
772 	counters_read(carpcounters, (uint64_t *)&carpstat, carps_ncounters,
773 	    NULL);
774 	return (sysctl_rdstruct(oldp, oldlenp, newp,
775 	    &carpstat, sizeof(carpstat)));
776 }
777 
778 int
carp_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)779 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
780     size_t newlen)
781 {
782 	/* All sysctl names at this level are terminal. */
783 	if (namelen != 1)
784 		return (ENOTDIR);
785 
786 	switch (name[0]) {
787 	case CARPCTL_STATS:
788 		return (carp_sysctl_carpstat(oldp, oldlenp, newp));
789 	default:
790 		return (sysctl_bounded_arr(carpctl_vars, nitems(carpctl_vars),
791 		    name, namelen, oldp, oldlenp, newp, newlen));
792 	}
793 }
794 
795 /*
796  * Interface side of the CARP implementation.
797  */
798 
799 void
carpattach(int n)800 carpattach(int n)
801 {
802 	if_creategroup("carp");  /* keep around even if empty */
803 	if_clone_attach(&carp_cloner);
804 	carpcounters = counters_alloc(carps_ncounters);
805 }
806 
807 int
carp_clone_create(struct if_clone * ifc,int unit)808 carp_clone_create(struct if_clone *ifc, int unit)
809 {
810 	struct carp_softc *sc;
811 	struct ifnet *ifp;
812 
813 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
814 	refcnt_init(&sc->sc_refcnt);
815 
816 	SRPL_INIT(&sc->carp_vhosts);
817 	sc->sc_vhe_count = 0;
818 	if (carp_new_vhost(sc, 0, 0)) {
819 		free(sc, M_DEVBUF, sizeof(*sc));
820 		return (ENOMEM);
821 	}
822 
823 	task_set(&sc->sc_atask, carp_addr_updated, sc);
824 	task_set(&sc->sc_ltask, carp_carpdev_state, sc);
825 	task_set(&sc->sc_dtask, carpdetach, sc);
826 #ifdef INET6
827 	task_set(&sc->sc_itask, carp_if_linkstate, sc);
828 #endif /* INET6 */
829 
830 	sc->sc_suppress = 0;
831 	sc->sc_advbase = CARP_DFLTINTV;
832 	sc->sc_naddrs = sc->sc_naddrs6 = 0;
833 #ifdef INET6
834 	sc->sc_im6o.im6o_hlim = CARP_DFLTTL;
835 #endif /* INET6 */
836 	sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS,
837 	    sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO);
838 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
839 
840 	LIST_INIT(&sc->carp_mc_listhead);
841 	ifp = &sc->sc_if;
842 	ifp->if_softc = sc;
843 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
844 	    unit);
845 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
846 	ifp->if_ioctl = carp_ioctl;
847 	ifp->if_start = carp_start;
848 	ifp->if_enqueue = carp_enqueue;
849 	ifp->if_xflags = IFXF_CLONED;
850 	if_counters_alloc(ifp);
851 	if_attach(ifp);
852 	ether_ifattach(ifp);
853 	ifp->if_type = IFT_CARP;
854 	ifp->if_sadl->sdl_type = IFT_CARP;
855 	ifp->if_output = carp_output;
856 	ifp->if_priority = IF_CARP_DEFAULT_PRIORITY;
857 	ifp->if_link_state = LINK_STATE_INVALID;
858 
859 	/* Hook carp_addr_updated to cope with address and route changes. */
860 	if_addrhook_add(&sc->sc_if, &sc->sc_atask);
861 #ifdef INET6
862 	if_linkstatehook_add(&sc->sc_if, &sc->sc_itask);
863 #endif /* INET6 */
864 
865 	return (0);
866 }
867 
868 int
carp_new_vhost(struct carp_softc * sc,int vhid,int advskew)869 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew)
870 {
871 	struct carp_vhost_entry *vhe, *vhe0;
872 
873 	vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO);
874 	if (vhe == NULL)
875 		return (ENOMEM);
876 
877 	refcnt_init(&vhe->vhost_refcnt);
878 	carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */
879 	vhe->parent_sc = sc;
880 	vhe->vhid = vhid;
881 	vhe->advskew = advskew;
882 	vhe->state = INIT;
883 	timeout_set_proc(&vhe->ad_tmo, carp_timer_ad, vhe);
884 	timeout_set_proc(&vhe->md_tmo, carp_timer_down, vhe);
885 	timeout_set_proc(&vhe->md6_tmo, carp_timer_down, vhe);
886 
887 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
888 
889 	/* mark the first vhe as leader */
890 	if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) {
891 		vhe->vhe_leader = 1;
892 		SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts,
893 		    vhe, vhost_entries);
894 		sc->sc_vhe_count = 1;
895 		return (0);
896 	}
897 
898 	SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
899 		if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL)
900 			break;
901 	}
902 
903 	SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries);
904 	sc->sc_vhe_count++;
905 
906 	return (0);
907 }
908 
909 int
carp_clone_destroy(struct ifnet * ifp)910 carp_clone_destroy(struct ifnet *ifp)
911 {
912 	struct carp_softc *sc = ifp->if_softc;
913 
914 	if_addrhook_del(&sc->sc_if, &sc->sc_atask);
915 #ifdef INET6
916 	if_linkstatehook_del(&sc->sc_if, &sc->sc_itask);
917 #endif /* INET6 */
918 
919 	NET_LOCK();
920 	carpdetach(sc);
921 	NET_UNLOCK();
922 
923 	ether_ifdetach(ifp);
924 	if_detach(ifp);
925 	carp_destroy_vhosts(ifp->if_softc);
926 	refcnt_finalize(&sc->sc_refcnt, "carpdtor");
927 	free(sc->sc_imo.imo_membership, M_IPMOPTS,
928 	    sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *));
929 	free(sc, M_DEVBUF, sizeof(*sc));
930 	return (0);
931 }
932 
933 void
carp_del_all_timeouts(struct carp_softc * sc)934 carp_del_all_timeouts(struct carp_softc *sc)
935 {
936 	struct carp_vhost_entry *vhe;
937 
938 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
939 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
940 		timeout_del(&vhe->ad_tmo);
941 		timeout_del(&vhe->md_tmo);
942 		timeout_del(&vhe->md6_tmo);
943 	}
944 }
945 
946 void
carpdetach(void * arg)947 carpdetach(void *arg)
948 {
949 	struct carp_softc *sc = arg;
950 	struct ifnet *ifp0;
951 	struct srpl *cif;
952 
953 	carp_del_all_timeouts(sc);
954 
955 	if (sc->sc_demote_cnt)
956 		carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach");
957 	sc->sc_suppress = 0;
958 	sc->sc_sendad_errors = 0;
959 
960 	carp_set_state_all(sc, INIT);
961 	sc->sc_if.if_flags &= ~IFF_UP;
962 	carp_setrun_all(sc, 0);
963 	carp_multicast_cleanup(sc);
964 
965 	ifp0 = if_get(sc->sc_carpdevidx);
966 	if (ifp0 == NULL)
967 		return;
968 
969 	KERNEL_ASSERT_LOCKED(); /* touching if_carp */
970 
971 	cif = &ifp0->if_carp;
972 
973 	SRPL_REMOVE_LOCKED(&carp_sc_rc, cif, sc, carp_softc, sc_list);
974 	sc->sc_carpdevidx = 0;
975 
976 	if_linkstatehook_del(ifp0, &sc->sc_ltask);
977 	if_detachhook_del(ifp0, &sc->sc_dtask);
978 	ifpromisc(ifp0, 0);
979 	if_put(ifp0);
980 }
981 
982 void
carp_destroy_vhosts(struct carp_softc * sc)983 carp_destroy_vhosts(struct carp_softc *sc)
984 {
985 	/* XXX bow out? */
986 	struct carp_vhost_entry *vhe;
987 
988 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
989 
990 	while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) {
991 		SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe,
992 		    carp_vhost_entry, vhost_entries);
993 		carp_vh_unref(NULL, vhe); /* drop last ref */
994 	}
995 	sc->sc_vhe_count = 0;
996 }
997 
998 void
carp_prepare_ad(struct mbuf * m,struct carp_vhost_entry * vhe,struct carp_header * ch)999 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe,
1000     struct carp_header *ch)
1001 {
1002 	if (!vhe->vhe_replay_cookie) {
1003 		arc4random_buf(&vhe->vhe_replay_cookie,
1004 		    sizeof(vhe->vhe_replay_cookie));
1005 	}
1006 
1007 	bcopy(&vhe->vhe_replay_cookie, ch->carp_counter,
1008 	    sizeof(ch->carp_counter));
1009 
1010 	/*
1011 	 * For the time being, do not include the IPv6 linklayer addresses
1012 	 * in the HMAC.
1013 	 */
1014 	carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL);
1015 }
1016 
1017 void
carp_send_ad_all(void)1018 carp_send_ad_all(void)
1019 {
1020 	struct ifnet *ifp0;
1021 	struct srpl *cif;
1022 	struct carp_softc *vh;
1023 
1024 	KERNEL_ASSERT_LOCKED(); /* touching if_carp */
1025 
1026 	if (carp_send_all_recur > 0)
1027 		return;
1028 	++carp_send_all_recur;
1029 	TAILQ_FOREACH(ifp0, &ifnetlist, if_list) {
1030 		if (ifp0->if_type != IFT_ETHER)
1031 			continue;
1032 
1033 		cif = &ifp0->if_carp;
1034 		SRPL_FOREACH_LOCKED(vh, cif, sc_list) {
1035 			if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1036 			    (IFF_UP|IFF_RUNNING)) {
1037 				carp_vhe_send_ad_all(vh);
1038 			}
1039 		}
1040 	}
1041 	--carp_send_all_recur;
1042 }
1043 
1044 void
carp_vhe_send_ad_all(struct carp_softc * sc)1045 carp_vhe_send_ad_all(struct carp_softc *sc)
1046 {
1047 	struct carp_vhost_entry *vhe;
1048 
1049 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1050 
1051 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1052 		if (vhe->state == MASTER)
1053 			carp_send_ad(vhe);
1054 	}
1055 }
1056 
1057 void
carp_timer_ad(void * v)1058 carp_timer_ad(void *v)
1059 {
1060 	NET_LOCK();
1061 	carp_send_ad(v);
1062 	NET_UNLOCK();
1063 }
1064 
1065 void
carp_send_ad(struct carp_vhost_entry * vhe)1066 carp_send_ad(struct carp_vhost_entry *vhe)
1067 {
1068 	struct carp_header ch;
1069 	struct timeval tv;
1070 	struct carp_softc *sc = vhe->parent_sc;
1071 	struct carp_header *ch_ptr;
1072 	struct mbuf *m;
1073 	int error, len, advbase, advskew;
1074 	struct ifnet *ifp;
1075 	struct ifaddr *ifa;
1076 	struct sockaddr sa;
1077 
1078 	NET_ASSERT_LOCKED();
1079 
1080 	if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) {
1081 		sc->sc_if.if_oerrors++;
1082 		return;
1083 	}
1084 
1085 	/* bow out if we've gone to backup (the carp interface is going down) */
1086 	if (sc->sc_bow_out) {
1087 		advbase = 255;
1088 		advskew = 255;
1089 	} else {
1090 		advbase = sc->sc_advbase;
1091 		advskew = vhe->advskew;
1092 		tv.tv_sec = advbase;
1093 		if (advbase == 0 && advskew == 0)
1094 			tv.tv_usec = 1 * 1000000 / 256;
1095 		else
1096 			tv.tv_usec = advskew * 1000000 / 256;
1097 	}
1098 
1099 	ch.carp_version = CARP_VERSION;
1100 	ch.carp_type = CARP_ADVERTISEMENT;
1101 	ch.carp_vhid = vhe->vhid;
1102 	ch.carp_demote = carp_group_demote_count(sc) & 0xff;
1103 	ch.carp_advbase = advbase;
1104 	ch.carp_advskew = advskew;
1105 	ch.carp_authlen = 7;	/* XXX DEFINE */
1106 	ch.carp_cksum = 0;
1107 
1108 	sc->cur_vhe = vhe; /* we need the vhe later on the output path */
1109 
1110 	if (sc->sc_naddrs) {
1111 		struct ip *ip;
1112 
1113 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1114 		if (m == NULL) {
1115 			sc->sc_if.if_oerrors++;
1116 			carpstat_inc(carps_onomem);
1117 			/* XXX maybe less ? */
1118 			goto retry_later;
1119 		}
1120 		len = sizeof(*ip) + sizeof(ch);
1121 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1122 		m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1123 		m->m_pkthdr.len = len;
1124 		m->m_len = len;
1125 		m_align(m, len);
1126 		ip = mtod(m, struct ip *);
1127 		ip->ip_v = IPVERSION;
1128 		ip->ip_hl = sizeof(*ip) >> 2;
1129 		ip->ip_tos = IPTOS_LOWDELAY;
1130 		ip->ip_len = htons(len);
1131 		ip->ip_id = htons(ip_randomid());
1132 		ip->ip_off = htons(IP_DF);
1133 		ip->ip_ttl = CARP_DFLTTL;
1134 		ip->ip_p = IPPROTO_CARP;
1135 		ip->ip_sum = 0;
1136 
1137 		memset(&sa, 0, sizeof(sa));
1138 		sa.sa_family = AF_INET;
1139 		/* Prefer addresses on the parent interface as source for AD. */
1140 		ifa = ifaof_ifpforaddr(&sa, ifp);
1141 		if (ifa == NULL)
1142 			ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1143 		KASSERT(ifa != NULL);
1144 		ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1145 		ip->ip_dst.s_addr = sc->sc_peer.s_addr;
1146 		if (IN_MULTICAST(ip->ip_dst.s_addr))
1147 			m->m_flags |= M_MCAST;
1148 
1149 		ch_ptr = (struct carp_header *)(ip + 1);
1150 		bcopy(&ch, ch_ptr, sizeof(ch));
1151 		carp_prepare_ad(m, vhe, ch_ptr);
1152 
1153 		m->m_data += sizeof(*ip);
1154 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1155 		m->m_data -= sizeof(*ip);
1156 
1157 		getmicrotime(&sc->sc_if.if_lastchange);
1158 		carpstat_inc(carps_opackets);
1159 
1160 		error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1161 		    NULL, 0);
1162 		if (error &&
1163 		    /* when unicast, the peer's down is not our fault */
1164 		    !(!IN_MULTICAST(sc->sc_peer.s_addr) && error == EHOSTDOWN)){
1165 			if (error == ENOBUFS)
1166 				carpstat_inc(carps_onomem);
1167 			else
1168 				CARP_LOG(LOG_WARNING, sc,
1169 				    ("ip_output failed: %d", error));
1170 			sc->sc_if.if_oerrors++;
1171 			if (sc->sc_sendad_errors < INT_MAX)
1172 				sc->sc_sendad_errors++;
1173 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1174 				carp_group_demote_adj(&sc->sc_if, 1,
1175 				    "> snderrors");
1176 			sc->sc_sendad_success = 0;
1177 		} else {
1178 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1179 				if (++sc->sc_sendad_success >=
1180 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1181 					carp_group_demote_adj(&sc->sc_if, -1,
1182 					    "< snderrors");
1183 					sc->sc_sendad_errors = 0;
1184 				}
1185 			} else
1186 				sc->sc_sendad_errors = 0;
1187 		}
1188 		if (vhe->vhe_leader) {
1189 			if (sc->sc_delayed_arp > 0)
1190 				sc->sc_delayed_arp--;
1191 			if (sc->sc_delayed_arp == 0) {
1192 				carp_send_arp(sc);
1193 				sc->sc_delayed_arp = -1;
1194 			}
1195 		}
1196 	}
1197 #ifdef INET6
1198 	if (sc->sc_naddrs6) {
1199 		struct ip6_hdr *ip6;
1200 
1201 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
1202 		if (m == NULL) {
1203 			sc->sc_if.if_oerrors++;
1204 			carpstat_inc(carps_onomem);
1205 			/* XXX maybe less ? */
1206 			goto retry_later;
1207 		}
1208 		len = sizeof(*ip6) + sizeof(ch);
1209 		m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1210 		m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1211 		m->m_pkthdr.len = len;
1212 		m->m_len = len;
1213 		m_align(m, len);
1214 		m->m_flags |= M_MCAST;
1215 		ip6 = mtod(m, struct ip6_hdr *);
1216 		memset(ip6, 0, sizeof(*ip6));
1217 		ip6->ip6_vfc |= IPV6_VERSION;
1218 		ip6->ip6_hlim = CARP_DFLTTL;
1219 		ip6->ip6_nxt = IPPROTO_CARP;
1220 
1221 		/* set the source address */
1222 		memset(&sa, 0, sizeof(sa));
1223 		sa.sa_family = AF_INET6;
1224 		/* Prefer addresses on the parent interface as source for AD. */
1225 		ifa = ifaof_ifpforaddr(&sa, ifp);
1226 		if (ifa == NULL)
1227 			ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1228 		KASSERT(ifa != NULL);
1229 		bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1230 		    &ip6->ip6_src, sizeof(struct in6_addr));
1231 		/* set the multicast destination */
1232 
1233 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1234 		ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index);
1235 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1236 
1237 		ch_ptr = (struct carp_header *)(ip6 + 1);
1238 		bcopy(&ch, ch_ptr, sizeof(ch));
1239 		carp_prepare_ad(m, vhe, ch_ptr);
1240 
1241 		m->m_data += sizeof(*ip6);
1242 		ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1243 		m->m_data -= sizeof(*ip6);
1244 
1245 		getmicrotime(&sc->sc_if.if_lastchange);
1246 		carpstat_inc(carps_opackets6);
1247 
1248 		error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL);
1249 		if (error) {
1250 			if (error == ENOBUFS)
1251 				carpstat_inc(carps_onomem);
1252 			else
1253 				CARP_LOG(LOG_WARNING, sc,
1254 				    ("ip6_output failed: %d", error));
1255 			sc->sc_if.if_oerrors++;
1256 			if (sc->sc_sendad_errors < INT_MAX)
1257 				sc->sc_sendad_errors++;
1258 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1259 				carp_group_demote_adj(&sc->sc_if, 1,
1260 					    "> snd6errors");
1261 			sc->sc_sendad_success = 0;
1262 		} else {
1263 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1264 				if (++sc->sc_sendad_success >=
1265 				    CARP_SENDAD_MIN_SUCCESS(sc)) {
1266 					carp_group_demote_adj(&sc->sc_if, -1,
1267 					    "< snd6errors");
1268 					sc->sc_sendad_errors = 0;
1269 				}
1270 			} else
1271 				sc->sc_sendad_errors = 0;
1272 		}
1273 	}
1274 #endif /* INET6 */
1275 
1276 retry_later:
1277 	sc->cur_vhe = NULL;
1278 	if (advbase != 255 || advskew != 255)
1279 		timeout_add_tv(&vhe->ad_tmo, &tv);
1280 	if_put(ifp);
1281 }
1282 
1283 /*
1284  * Broadcast a gratuitous ARP request containing
1285  * the virtual router MAC address for each IP address
1286  * associated with the virtual router.
1287  */
1288 void
carp_send_arp(struct carp_softc * sc)1289 carp_send_arp(struct carp_softc *sc)
1290 {
1291 	struct ifaddr *ifa;
1292 	in_addr_t in;
1293 
1294 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1295 
1296 		if (ifa->ifa_addr->sa_family != AF_INET)
1297 			continue;
1298 
1299 		in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1300 		arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr);
1301 	}
1302 }
1303 
1304 #ifdef INET6
1305 void
carp_send_na(struct carp_softc * sc)1306 carp_send_na(struct carp_softc *sc)
1307 {
1308 	struct ifaddr *ifa;
1309 	struct in6_addr *in6, mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1310 	int i_am_router = (atomic_load_int(&ip6_forwarding) != 0);
1311 	int flags = ND_NA_FLAG_OVERRIDE;
1312 
1313 	if (i_am_router)
1314 		flags |= ND_NA_FLAG_ROUTER;
1315 	mcast.s6_addr16[1] = htons(sc->sc_if.if_index);
1316 
1317 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1318 
1319 		if (ifa->ifa_addr->sa_family != AF_INET6)
1320 			continue;
1321 
1322 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1323 		nd6_na_output(&sc->sc_if, &mcast, in6, flags, 1, NULL);
1324 	}
1325 }
1326 #endif /* INET6 */
1327 
1328 void
carp_update_lsmask(struct carp_softc * sc)1329 carp_update_lsmask(struct carp_softc *sc)
1330 {
1331 	struct carp_vhost_entry *vhe;
1332 	int count;
1333 
1334 	if (sc->sc_balancing == CARP_BAL_NONE)
1335 		return;
1336 
1337 	sc->sc_lsmask = 0;
1338 	count = 0;
1339 
1340 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1341 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1342 		if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8)
1343 			sc->sc_lsmask |= 1 << count;
1344 		count++;
1345 	}
1346 	sc->sc_lscount = count;
1347 	CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask));
1348 }
1349 
1350 int
carp_iamatch(struct ifnet * ifp)1351 carp_iamatch(struct ifnet *ifp)
1352 {
1353 	struct carp_softc *sc = ifp->if_softc;
1354 	struct carp_vhost_entry *vhe;
1355 	struct srp_ref sr;
1356 	int match = 0;
1357 
1358 	vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
1359 	if (vhe->state == MASTER)
1360 		match = 1;
1361 	SRPL_LEAVE(&sr);
1362 
1363 	return (match);
1364 }
1365 
1366 int
carp_ourether(struct ifnet * ifp,uint8_t * ena)1367 carp_ourether(struct ifnet *ifp, uint8_t *ena)
1368 {
1369 	struct srpl *cif = &ifp->if_carp;
1370 	struct carp_softc *sc;
1371 	struct srp_ref sr;
1372 	int match = 0;
1373 	uint64_t dst = ether_addr_to_e64((struct ether_addr *)ena);
1374 
1375 	KASSERT(ifp->if_type == IFT_ETHER);
1376 
1377 	SRPL_FOREACH(sc, &sr, cif, sc_list) {
1378 		if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1379 		    (IFF_UP|IFF_RUNNING))
1380 			continue;
1381 		if (carp_vhe_match(sc, dst)) {
1382 			match = 1;
1383 			break;
1384 		}
1385 	}
1386 	SRPL_LEAVE(&sr);
1387 
1388 	return (match);
1389 }
1390 
1391 int
carp_vhe_match(struct carp_softc * sc,uint64_t dst)1392 carp_vhe_match(struct carp_softc *sc, uint64_t dst)
1393 {
1394 	struct carp_vhost_entry *vhe;
1395 	struct srp_ref sr;
1396 	int active = 0;
1397 
1398 	vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
1399 	active = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP);
1400 	SRPL_LEAVE(&sr);
1401 
1402 	return (active && (dst ==
1403 	    ether_addr_to_e64((struct ether_addr *)sc->sc_ac.ac_enaddr)));
1404 }
1405 
1406 struct mbuf *
carp_input(struct ifnet * ifp0,struct mbuf * m,uint64_t dst)1407 carp_input(struct ifnet *ifp0, struct mbuf *m, uint64_t dst)
1408 {
1409 	struct srpl *cif;
1410 	struct carp_softc *sc;
1411 	struct srp_ref sr;
1412 
1413 	cif = &ifp0->if_carp;
1414 
1415 	SRPL_FOREACH(sc, &sr, cif, sc_list) {
1416 		if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1417 		    (IFF_UP|IFF_RUNNING))
1418 			continue;
1419 
1420 		if (carp_vhe_match(sc, dst)) {
1421 			/*
1422 			 * These packets look like layer 2 multicast but they
1423 			 * are unicast at layer 3. With help of the tag the
1424 			 * mbuf's M_MCAST flag can be removed by carp_lsdrop()
1425 			 * after we have passed layer 2.
1426 			 */
1427 			if (sc->sc_balancing == CARP_BAL_IP) {
1428 				struct m_tag *mtag;
1429 				mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
1430 				    M_NOWAIT);
1431 				if (mtag == NULL) {
1432 					m_freem(m);
1433 					goto out;
1434 				}
1435 				m_tag_prepend(m, mtag);
1436 			}
1437 			break;
1438 		}
1439 	}
1440 
1441 	if (sc == NULL) {
1442 		SRPL_LEAVE(&sr);
1443 
1444 		if (!ETH64_IS_MULTICAST(dst))
1445 			return (m);
1446 
1447 		/*
1448 		 * XXX Should really check the list of multicast addresses
1449 		 * for each CARP interface _before_ copying.
1450 		 */
1451 		SRPL_FOREACH(sc, &sr, cif, sc_list) {
1452 			struct mbuf *m0;
1453 
1454 			if (!(sc->sc_if.if_flags & IFF_UP))
1455 				continue;
1456 
1457 			m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT);
1458 			if (m0 == NULL)
1459 				continue;
1460 
1461 			if_vinput(&sc->sc_if, m0);
1462 		}
1463 		SRPL_LEAVE(&sr);
1464 
1465 		return (m);
1466 	}
1467 
1468 	if_vinput(&sc->sc_if, m);
1469 out:
1470 	SRPL_LEAVE(&sr);
1471 
1472 	return (NULL);
1473 }
1474 
1475 int
carp_lsdrop(struct ifnet * ifp,struct mbuf * m,sa_family_t af,u_int32_t * src,u_int32_t * dst,int drop)1476 carp_lsdrop(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int32_t *src,
1477     u_int32_t *dst, int drop)
1478 {
1479 	struct carp_softc *sc;
1480 	u_int32_t fold;
1481 	struct m_tag *mtag;
1482 
1483 	if (ifp->if_type != IFT_CARP)
1484 		return 0;
1485 	sc = ifp->if_softc;
1486 	if (sc->sc_balancing == CARP_BAL_NONE)
1487 		return 0;
1488 
1489 	/*
1490 	 * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact
1491 	 * that it is layer 2 multicast does not implicate that it is also layer
1492 	 * 3 multicast.
1493 	 */
1494 	if (m->m_flags & M_MCAST &&
1495 	    (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
1496 		m_tag_delete(m, mtag);
1497 		m->m_flags &= ~M_MCAST;
1498 	}
1499 
1500 	/*
1501 	 * Return without making a drop decision. This allows to clear the
1502 	 * M_MCAST flag and do nothing else.
1503 	 */
1504 	if (!drop)
1505 		return 0;
1506 
1507 	/*
1508 	 * Never drop carp advertisements.
1509 	 * XXX Bad idea to pass all broadcast / multicast traffic?
1510 	 */
1511 	if (m->m_flags & (M_BCAST|M_MCAST))
1512 		return 0;
1513 
1514 	fold = src[0] ^ dst[0];
1515 #ifdef INET6
1516 	if (af == AF_INET6) {
1517 		int i;
1518 		for (i = 1; i < 4; i++)
1519 			fold ^= src[i] ^ dst[i];
1520 	}
1521 #endif
1522 	if (sc->sc_lscount == 0) /* just to be safe */
1523 		return 1;
1524 
1525 	return ((1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask) == 0;
1526 }
1527 
1528 void
carp_timer_down(void * v)1529 carp_timer_down(void *v)
1530 {
1531 	NET_LOCK();
1532 	carp_master_down(v);
1533 	NET_UNLOCK();
1534 }
1535 
1536 void
carp_master_down(struct carp_vhost_entry * vhe)1537 carp_master_down(struct carp_vhost_entry *vhe)
1538 {
1539 	struct carp_softc *sc = vhe->parent_sc;
1540 
1541 	NET_ASSERT_LOCKED();
1542 
1543 	switch (vhe->state) {
1544 	case INIT:
1545 		printf("%s: master_down event in INIT state\n",
1546 		    sc->sc_if.if_xname);
1547 		break;
1548 	case MASTER:
1549 		break;
1550 	case BACKUP:
1551 		carp_set_state(vhe, MASTER);
1552 		carp_send_ad(vhe);
1553 		if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) {
1554 			carp_send_arp(sc);
1555 			/* Schedule a delayed ARP to deal w/ some L3 switches */
1556 			sc->sc_delayed_arp = 2;
1557 #ifdef INET6
1558 			/* routing entry is not ready yet.  do it later */
1559 			sc->sc_send_na = 1;
1560 #endif /* INET6 */
1561 		}
1562 		carp_setrun(vhe, 0);
1563 		carpstat_inc(carps_preempt);
1564 		break;
1565 	}
1566 }
1567 
1568 void
carp_setrun_all(struct carp_softc * sc,sa_family_t af)1569 carp_setrun_all(struct carp_softc *sc, sa_family_t af)
1570 {
1571 	struct carp_vhost_entry *vhe;
1572 
1573 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */
1574 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1575 		carp_setrun(vhe, af);
1576 	}
1577 }
1578 
1579 /*
1580  * When in backup state, af indicates whether to reset the master down timer
1581  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1582  */
1583 void
carp_setrun(struct carp_vhost_entry * vhe,sa_family_t af)1584 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af)
1585 {
1586 	struct ifnet *ifp;
1587 	struct timeval tv;
1588 	struct carp_softc *sc = vhe->parent_sc;
1589 
1590 	if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) {
1591 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1592 		carp_set_state_all(sc, INIT);
1593 		return;
1594 	}
1595 
1596 	if (memcmp(((struct arpcom *)ifp)->ac_enaddr,
1597 	    sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0)
1598 		sc->sc_realmac = 1;
1599 	else
1600 		sc->sc_realmac = 0;
1601 
1602 	if_put(ifp);
1603 
1604 	if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 &&
1605 	    (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1606 		sc->sc_if.if_flags |= IFF_RUNNING;
1607 	} else {
1608 		sc->sc_if.if_flags &= ~IFF_RUNNING;
1609 		return;
1610 	}
1611 
1612 	switch (vhe->state) {
1613 	case INIT:
1614 		carp_set_state(vhe, BACKUP);
1615 		carp_setrun(vhe, 0);
1616 		break;
1617 	case BACKUP:
1618 		timeout_del(&vhe->ad_tmo);
1619 		tv.tv_sec = 3 * sc->sc_advbase;
1620 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1621 			tv.tv_usec = 3 * 1000000 / 256;
1622 		else if (sc->sc_advbase == 0)
1623 			tv.tv_usec = 3 * vhe->advskew * 1000000 / 256;
1624 		else
1625 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1626 		if (vhe->vhe_leader)
1627 			sc->sc_delayed_arp = -1;
1628 		switch (af) {
1629 		case AF_INET:
1630 			timeout_add_tv(&vhe->md_tmo, &tv);
1631 			break;
1632 #ifdef INET6
1633 		case AF_INET6:
1634 			timeout_add_tv(&vhe->md6_tmo, &tv);
1635 			break;
1636 #endif /* INET6 */
1637 		default:
1638 			if (sc->sc_naddrs)
1639 				timeout_add_tv(&vhe->md_tmo, &tv);
1640 			if (sc->sc_naddrs6)
1641 				timeout_add_tv(&vhe->md6_tmo, &tv);
1642 			break;
1643 		}
1644 		break;
1645 	case MASTER:
1646 		tv.tv_sec = sc->sc_advbase;
1647 		if (sc->sc_advbase == 0 && vhe->advskew == 0)
1648 			tv.tv_usec = 1 * 1000000 / 256;
1649 		else
1650 			tv.tv_usec = vhe->advskew * 1000000 / 256;
1651 		timeout_add_tv(&vhe->ad_tmo, &tv);
1652 		break;
1653 	}
1654 }
1655 
1656 void
carp_multicast_cleanup(struct carp_softc * sc)1657 carp_multicast_cleanup(struct carp_softc *sc)
1658 {
1659 	struct ip_moptions *imo = &sc->sc_imo;
1660 #ifdef INET6
1661 	struct ip6_moptions *im6o = &sc->sc_im6o;
1662 #endif
1663 	u_int16_t n = imo->imo_num_memberships;
1664 
1665 	/* Clean up our own multicast memberships */
1666 	while (n-- > 0) {
1667 		if (imo->imo_membership[n] != NULL) {
1668 			in_delmulti(imo->imo_membership[n]);
1669 			imo->imo_membership[n] = NULL;
1670 		}
1671 	}
1672 	imo->imo_num_memberships = 0;
1673 	imo->imo_ifidx = 0;
1674 
1675 #ifdef INET6
1676 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1677 		struct in6_multi_mship *imm =
1678 		    LIST_FIRST(&im6o->im6o_memberships);
1679 
1680 		LIST_REMOVE(imm, i6mm_chain);
1681 		in6_leavegroup(imm);
1682 	}
1683 	im6o->im6o_ifidx = 0;
1684 #endif
1685 
1686 	/* And any other multicast memberships */
1687 	carp_ether_purgemulti(sc);
1688 }
1689 
1690 int
carp_set_ifp(struct carp_softc * sc,struct ifnet * ifp0)1691 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0)
1692 {
1693 	struct srpl *cif;
1694 	struct carp_softc *vr, *last = NULL, *after = NULL;
1695 	int myself = 0, error = 0;
1696 
1697 	KASSERT(ifp0->if_index != sc->sc_carpdevidx);
1698 	KERNEL_ASSERT_LOCKED(); /* touching if_carp */
1699 
1700 	if ((ifp0->if_flags & IFF_MULTICAST) == 0)
1701 		return (EADDRNOTAVAIL);
1702 
1703 	if (ifp0->if_type != IFT_ETHER)
1704 		return (EINVAL);
1705 
1706 	cif = &ifp0->if_carp;
1707 	if (carp_check_dup_vhids(sc, cif, NULL))
1708 		return (EINVAL);
1709 
1710 	if ((error = ifpromisc(ifp0, 1)))
1711 		return (error);
1712 
1713 	/* detach from old interface */
1714 	if (sc->sc_carpdevidx != 0)
1715 		carpdetach(sc);
1716 
1717 	/* attach carp interface to physical interface */
1718 	if_detachhook_add(ifp0, &sc->sc_dtask);
1719 	if_linkstatehook_add(ifp0, &sc->sc_ltask);
1720 
1721 	sc->sc_carpdevidx = ifp0->if_index;
1722 	sc->sc_if.if_capabilities = ifp0->if_capabilities &
1723 	    (IFCAP_CSUM_MASK | IFCAP_TSOv4 | IFCAP_TSOv6);
1724 
1725 	SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
1726 		struct carp_vhost_entry *vrhead, *schead;
1727 		last = vr;
1728 
1729 		if (vr == sc)
1730 			myself = 1;
1731 
1732 		vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts);
1733 		schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1734 		if (vrhead->vhid < schead->vhid)
1735 			after = vr;
1736 	}
1737 
1738 	if (!myself) {
1739 		/* We're trying to keep things in order */
1740 		if (last == NULL) {
1741 			SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, cif,
1742 			    sc, sc_list);
1743 		} else if (after == NULL) {
1744 			SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last,
1745 			    sc, sc_list);
1746 		} else {
1747 			SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after,
1748 			    sc, sc_list);
1749 		}
1750 	}
1751 	if (sc->sc_naddrs || sc->sc_naddrs6)
1752 		sc->sc_if.if_flags |= IFF_UP;
1753 	carp_set_enaddr(sc);
1754 
1755 	carp_carpdev_state(sc);
1756 
1757 	return (0);
1758 }
1759 
1760 void
carp_set_vhe_enaddr(struct carp_vhost_entry * vhe)1761 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe)
1762 {
1763 	struct carp_softc *sc = vhe->parent_sc;
1764 
1765 	if (vhe->vhid != 0 && sc->sc_carpdevidx != 0) {
1766 		if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP)
1767 			vhe->vhe_enaddr[0] = 1;
1768 		else
1769 			vhe->vhe_enaddr[0] = 0;
1770 		vhe->vhe_enaddr[1] = 0;
1771 		vhe->vhe_enaddr[2] = 0x5e;
1772 		vhe->vhe_enaddr[3] = 0;
1773 		vhe->vhe_enaddr[4] = 1;
1774 		vhe->vhe_enaddr[5] = vhe->vhid;
1775 	} else
1776 		memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN);
1777 }
1778 
1779 void
carp_set_enaddr(struct carp_softc * sc)1780 carp_set_enaddr(struct carp_softc *sc)
1781 {
1782 	struct carp_vhost_entry *vhe;
1783 
1784 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1785 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries)
1786 		carp_set_vhe_enaddr(vhe);
1787 
1788 	vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1789 
1790 	/*
1791 	 * Use the carp lladdr if the running one isn't manually set.
1792 	 * Only compare static parts of the lladdr.
1793 	 */
1794 	if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1,
1795 	    ETHER_ADDR_LEN - 2) == 0) ||
1796 	    (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] &&
1797 	    !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] &&
1798 	    !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5]))
1799 		bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1800 
1801 	/* Make sure the enaddr has changed before further twiddling. */
1802 	if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) {
1803 		bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl),
1804 		    ETHER_ADDR_LEN);
1805 		bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN);
1806 #ifdef INET6
1807 		/*
1808 		 * (re)attach a link-local address which matches
1809 		 * our new MAC address.
1810 		 */
1811 		if (sc->sc_naddrs6)
1812 			in6_ifattach_linklocal(&sc->sc_if, NULL);
1813 #endif
1814 		carp_set_state_all(sc, INIT);
1815 		carp_setrun_all(sc, 0);
1816 	}
1817 }
1818 
1819 void
carp_addr_updated(void * v)1820 carp_addr_updated(void *v)
1821 {
1822 	struct carp_softc *sc = (struct carp_softc *) v;
1823 	struct ifaddr *ifa;
1824 	int new_naddrs = 0, new_naddrs6 = 0;
1825 
1826 	TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1827 		if (ifa->ifa_addr->sa_family == AF_INET)
1828 			new_naddrs++;
1829 #ifdef INET6
1830 		else if (ifa->ifa_addr->sa_family == AF_INET6)
1831 			new_naddrs6++;
1832 #endif /* INET6 */
1833 	}
1834 
1835 	/* We received address changes from if_addrhooks callback */
1836 	if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) {
1837 
1838 		sc->sc_naddrs = new_naddrs;
1839 		sc->sc_naddrs6 = new_naddrs6;
1840 
1841 		/* Re-establish multicast membership removed by in_control */
1842 		if (IN_MULTICAST(sc->sc_peer.s_addr)) {
1843 			if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) {
1844 				struct in_multi **imm =
1845 				    sc->sc_imo.imo_membership;
1846 				u_int16_t maxmem =
1847 				    sc->sc_imo.imo_max_memberships;
1848 
1849 				memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1850 				sc->sc_imo.imo_membership = imm;
1851 				sc->sc_imo.imo_max_memberships = maxmem;
1852 
1853 				if (sc->sc_carpdevidx != 0 &&
1854 				    sc->sc_naddrs > 0)
1855 					carp_join_multicast(sc);
1856 			}
1857 		}
1858 
1859 		if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1860 			sc->sc_if.if_flags &= ~IFF_UP;
1861 			carp_set_state_all(sc, INIT);
1862 		} else
1863 			carp_hmac_prepare(sc);
1864 	}
1865 
1866 	carp_setrun_all(sc, 0);
1867 }
1868 
1869 int
carp_set_addr(struct carp_softc * sc,struct sockaddr_in * sin)1870 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1871 {
1872 	struct in_addr *in = &sin->sin_addr;
1873 	int error;
1874 
1875 	KASSERT(sc->sc_carpdevidx != 0);
1876 
1877 	/* XXX is this necessary? */
1878 	if (in->s_addr == INADDR_ANY) {
1879 		carp_setrun_all(sc, 0);
1880 		return (0);
1881 	}
1882 
1883 	if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1884 		return (error);
1885 
1886 	carp_set_state_all(sc, INIT);
1887 
1888 	return (0);
1889 }
1890 
1891 int
carp_join_multicast(struct carp_softc * sc)1892 carp_join_multicast(struct carp_softc *sc)
1893 {
1894 	struct ip_moptions *imo = &sc->sc_imo;
1895 	struct in_multi *imm;
1896 	struct in_addr addr;
1897 
1898 	if (!IN_MULTICAST(sc->sc_peer.s_addr))
1899 		return (0);
1900 
1901 	addr.s_addr = sc->sc_peer.s_addr;
1902 	if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL)
1903 		return (ENOBUFS);
1904 
1905 	imo->imo_membership[0] = imm;
1906 	imo->imo_num_memberships = 1;
1907 	imo->imo_ifidx = sc->sc_if.if_index;
1908 	imo->imo_ttl = CARP_DFLTTL;
1909 	imo->imo_loop = 0;
1910 	return (0);
1911 }
1912 
1913 
1914 #ifdef INET6
1915 int
carp_set_addr6(struct carp_softc * sc,struct sockaddr_in6 * sin6)1916 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1917 {
1918 	int error;
1919 
1920 	KASSERT(sc->sc_carpdevidx != 0);
1921 
1922 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1923 		carp_setrun_all(sc, 0);
1924 		return (0);
1925 	}
1926 
1927 	if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1928 		return (error);
1929 
1930 	carp_set_state_all(sc, INIT);
1931 
1932 	return (0);
1933 }
1934 
1935 int
carp_join_multicast6(struct carp_softc * sc)1936 carp_join_multicast6(struct carp_softc *sc)
1937 {
1938 	struct in6_multi_mship *imm, *imm2;
1939 	struct ip6_moptions *im6o = &sc->sc_im6o;
1940 	struct sockaddr_in6 addr6;
1941 	int error;
1942 
1943 	/* Join IPv6 CARP multicast group */
1944 	memset(&addr6, 0, sizeof(addr6));
1945 	addr6.sin6_family = AF_INET6;
1946 	addr6.sin6_len = sizeof(addr6);
1947 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1948 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1949 	addr6.sin6_addr.s6_addr8[15] = 0x12;
1950 	if ((imm = in6_joingroup(&sc->sc_if,
1951 	    &addr6.sin6_addr, &error)) == NULL) {
1952 		return (error);
1953 	}
1954 	/* join solicited multicast address */
1955 	memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1956 	addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1957 	addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1958 	addr6.sin6_addr.s6_addr32[1] = 0;
1959 	addr6.sin6_addr.s6_addr32[2] = htonl(1);
1960 	addr6.sin6_addr.s6_addr32[3] = 0;
1961 	addr6.sin6_addr.s6_addr8[12] = 0xff;
1962 	if ((imm2 = in6_joingroup(&sc->sc_if,
1963 	    &addr6.sin6_addr, &error)) == NULL) {
1964 		in6_leavegroup(imm);
1965 		return (error);
1966 	}
1967 
1968 	/* apply v6 multicast membership */
1969 	im6o->im6o_ifidx = sc->sc_if.if_index;
1970 	if (imm)
1971 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
1972 		    i6mm_chain);
1973 	if (imm2)
1974 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
1975 		    i6mm_chain);
1976 
1977 	return (0);
1978 }
1979 
1980 void
carp_if_linkstate(void * v)1981 carp_if_linkstate(void *v)
1982 {
1983 	struct carp_softc *sc = v;
1984 
1985 	if (sc->sc_send_na) {
1986 		if (sc->sc_if.if_link_state == LINK_STATE_UP)
1987 			carp_send_na(sc);
1988 		sc->sc_send_na = 0;
1989 	}
1990 }
1991 #endif /* INET6 */
1992 
1993 int
carp_ioctl(struct ifnet * ifp,u_long cmd,caddr_t addr)1994 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
1995 {
1996 	struct proc *p = curproc;	/* XXX */
1997 	struct carp_softc *sc = ifp->if_softc;
1998 	struct carp_vhost_entry *vhe;
1999 	struct carpreq carpr;
2000 	struct ifaddr *ifa = (struct ifaddr *)addr;
2001 	struct ifreq *ifr = (struct ifreq *)addr;
2002 	struct ifnet *ifp0 = NULL;
2003 	int i, error = 0;
2004 
2005 	switch (cmd) {
2006 	case SIOCSIFADDR:
2007 		if (sc->sc_carpdevidx == 0)
2008 			return (EINVAL);
2009 
2010 		switch (ifa->ifa_addr->sa_family) {
2011 		case AF_INET:
2012 			sc->sc_if.if_flags |= IFF_UP;
2013 			error = carp_set_addr(sc, satosin(ifa->ifa_addr));
2014 			break;
2015 #ifdef INET6
2016 		case AF_INET6:
2017 			sc->sc_if.if_flags |= IFF_UP;
2018 			error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
2019 			break;
2020 #endif /* INET6 */
2021 		default:
2022 			error = EAFNOSUPPORT;
2023 			break;
2024 		}
2025 		break;
2026 
2027 	case SIOCSIFFLAGS:
2028 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2029 		vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
2030 		if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2031 			carp_del_all_timeouts(sc);
2032 
2033 			/* we need the interface up to bow out */
2034 			sc->sc_if.if_flags |= IFF_UP;
2035 			sc->sc_bow_out = 1;
2036 			carp_vhe_send_ad_all(sc);
2037 			sc->sc_bow_out = 0;
2038 
2039 			sc->sc_if.if_flags &= ~IFF_UP;
2040 			carp_set_state_all(sc, INIT);
2041 			carp_setrun_all(sc, 0);
2042 		} else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) {
2043 			sc->sc_if.if_flags |= IFF_UP;
2044 			carp_setrun_all(sc, 0);
2045 		}
2046 		break;
2047 
2048 	case SIOCSVH:
2049 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2050 		vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
2051 		if ((error = suser(p)) != 0)
2052 			break;
2053 		if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2054 			break;
2055 		error = 1;
2056 		if (carpr.carpr_carpdev[0] != '\0' &&
2057 		    (ifp0 = if_unit(carpr.carpr_carpdev)) == NULL)
2058 			return (EINVAL);
2059 		if (carpr.carpr_peer.s_addr == 0)
2060 			sc->sc_peer.s_addr = INADDR_CARP_GROUP;
2061 		else
2062 			sc->sc_peer.s_addr = carpr.carpr_peer.s_addr;
2063 		if (ifp0 != NULL && ifp0->if_index != sc->sc_carpdevidx) {
2064 			if ((error = carp_set_ifp(sc, ifp0))) {
2065 				if_put(ifp0);
2066 				return (error);
2067 			}
2068 		}
2069 		if_put(ifp0);
2070 		if (vhe->state != INIT && carpr.carpr_state != vhe->state) {
2071 			switch (carpr.carpr_state) {
2072 			case BACKUP:
2073 				timeout_del(&vhe->ad_tmo);
2074 				carp_set_state_all(sc, BACKUP);
2075 				carp_setrun_all(sc, 0);
2076 				break;
2077 			case MASTER:
2078 				KERNEL_ASSERT_LOCKED();
2079 				/* touching carp_vhosts */
2080 				SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2081 				    vhost_entries)
2082 					carp_master_down(vhe);
2083 				break;
2084 			default:
2085 				break;
2086 			}
2087 		}
2088 		if ((error = carp_vhids_ioctl(sc, &carpr)))
2089 			return (error);
2090 		if (carpr.carpr_advbase >= 0) {
2091 			if (carpr.carpr_advbase > 255) {
2092 				error = EINVAL;
2093 				break;
2094 			}
2095 			sc->sc_advbase = carpr.carpr_advbase;
2096 			error--;
2097 		}
2098 		if (memcmp(sc->sc_advskews, carpr.carpr_advskews,
2099 		    sizeof(sc->sc_advskews))) {
2100 			i = 0;
2101 			KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2102 			SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2103 			    vhost_entries)
2104 				vhe->advskew = carpr.carpr_advskews[i++];
2105 			bcopy(carpr.carpr_advskews, sc->sc_advskews,
2106 			    sizeof(sc->sc_advskews));
2107 		}
2108 		if (sc->sc_balancing != carpr.carpr_balancing) {
2109 			if (carpr.carpr_balancing > CARP_BAL_MAXID) {
2110 				error = EINVAL;
2111 				break;
2112 			}
2113 			sc->sc_balancing = carpr.carpr_balancing;
2114 			carp_set_enaddr(sc);
2115 			carp_update_lsmask(sc);
2116 		}
2117 		bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
2118 		if (error > 0)
2119 			error = EINVAL;
2120 		else {
2121 			error = 0;
2122 			carp_hmac_prepare(sc);
2123 			carp_setrun_all(sc, 0);
2124 		}
2125 		break;
2126 
2127 	case SIOCGVH:
2128 		memset(&carpr, 0, sizeof(carpr));
2129 		if ((ifp0 = if_get(sc->sc_carpdevidx)) != NULL)
2130 			strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ);
2131 		if_put(ifp0);
2132 		i = 0;
2133 		KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2134 		SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2135 			carpr.carpr_vhids[i] = vhe->vhid;
2136 			carpr.carpr_advskews[i] = vhe->advskew;
2137 			carpr.carpr_states[i] = vhe->state;
2138 			i++;
2139 		}
2140 		carpr.carpr_advbase = sc->sc_advbase;
2141 		carpr.carpr_balancing = sc->sc_balancing;
2142 		if (suser(p) == 0)
2143 			bcopy(sc->sc_key, carpr.carpr_key,
2144 			    sizeof(carpr.carpr_key));
2145 		carpr.carpr_peer.s_addr = sc->sc_peer.s_addr;
2146 		error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2147 		break;
2148 
2149 	case SIOCADDMULTI:
2150 		error = carp_ether_addmulti(sc, ifr);
2151 		break;
2152 
2153 	case SIOCDELMULTI:
2154 		error = carp_ether_delmulti(sc, ifr);
2155 		break;
2156 	case SIOCAIFGROUP:
2157 	case SIOCDIFGROUP:
2158 		if (sc->sc_demote_cnt)
2159 			carp_ifgroup_ioctl(ifp, cmd, addr);
2160 		break;
2161 	case SIOCSIFGATTR:
2162 		carp_ifgattr_ioctl(ifp, cmd, addr);
2163 		break;
2164 	default:
2165 		error = ENOTTY;
2166 	}
2167 
2168 	if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0)
2169 		carp_set_enaddr(sc);
2170 	return (error);
2171 }
2172 
2173 int
carp_check_dup_vhids(struct carp_softc * sc,struct srpl * cif,struct carpreq * carpr)2174 carp_check_dup_vhids(struct carp_softc *sc, struct srpl *cif,
2175     struct carpreq *carpr)
2176 {
2177 	struct carp_softc *vr;
2178 	struct carp_vhost_entry *vhe, *vhe0;
2179 	int i;
2180 
2181 	KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
2182 
2183 	SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
2184 		if (vr == sc)
2185 			continue;
2186 		SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) {
2187 			if (carpr) {
2188 				for (i = 0; carpr->carpr_vhids[i]; i++) {
2189 					if (vhe->vhid == carpr->carpr_vhids[i])
2190 						return (EINVAL);
2191 				}
2192 			}
2193 			SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts,
2194 			    vhost_entries) {
2195 				if (vhe->vhid == vhe0->vhid)
2196 					return (EINVAL);
2197 			}
2198 		}
2199 	}
2200 	return (0);
2201 }
2202 
2203 int
carp_vhids_ioctl(struct carp_softc * sc,struct carpreq * carpr)2204 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr)
2205 {
2206 	int i, j;
2207 	u_int8_t taken_vhids[256];
2208 
2209 	if (carpr->carpr_vhids[0] == 0 ||
2210 	    !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids)))
2211 		return (0);
2212 
2213 	memset(taken_vhids, 0, sizeof(taken_vhids));
2214 	for (i = 0; carpr->carpr_vhids[i]; i++) {
2215 		struct ifnet *ifp;
2216 
2217 		if (taken_vhids[carpr->carpr_vhids[i]])
2218 			return (EINVAL);
2219 		taken_vhids[carpr->carpr_vhids[i]] = 1;
2220 
2221 		if ((ifp = if_get(sc->sc_carpdevidx)) != NULL) {
2222 			struct srpl *cif;
2223 			cif = &ifp->if_carp;
2224 			if (carp_check_dup_vhids(sc, cif, carpr)) {
2225 				if_put(ifp);
2226 				return (EINVAL);
2227 			}
2228 		}
2229 		if_put(ifp);
2230 		if (carpr->carpr_advskews[i] >= 255)
2231 			return (EINVAL);
2232 	}
2233 	/* set sane balancing defaults */
2234 	if (i <= 1)
2235 		carpr->carpr_balancing = CARP_BAL_NONE;
2236 	else if (carpr->carpr_balancing == CARP_BAL_NONE &&
2237 	    sc->sc_balancing == CARP_BAL_NONE)
2238 		carpr->carpr_balancing = CARP_BAL_IP;
2239 
2240 	/* destroy all */
2241 	carp_del_all_timeouts(sc);
2242 	carp_destroy_vhosts(sc);
2243 	memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids));
2244 
2245 	/* sort vhosts list by vhid */
2246 	for (j = 1; j <= 255; j++) {
2247 		for (i = 0; carpr->carpr_vhids[i]; i++) {
2248 			if (carpr->carpr_vhids[i] != j)
2249 				continue;
2250 			if (carp_new_vhost(sc, carpr->carpr_vhids[i],
2251 			    carpr->carpr_advskews[i]))
2252 				return (ENOMEM);
2253 			sc->sc_vhids[i] = carpr->carpr_vhids[i];
2254 			sc->sc_advskews[i] = carpr->carpr_advskews[i];
2255 		}
2256 	}
2257 	carp_set_enaddr(sc);
2258 	carp_set_state_all(sc, INIT);
2259 	return (0);
2260 }
2261 
2262 void
carp_ifgroup_ioctl(struct ifnet * ifp,u_long cmd,caddr_t addr)2263 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2264 {
2265 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2266 	struct ifg_list	*ifgl;
2267 	int *dm, adj;
2268 
2269 	if (!strcmp(ifgr->ifgr_group, IFG_ALL))
2270 		return;
2271 	adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2272 	if (cmd == SIOCDIFGROUP)
2273 		adj = adj * -1;
2274 
2275 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2276 		if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) {
2277 			dm = &ifgl->ifgl_group->ifg_carp_demoted;
2278 			if (*dm + adj >= 0)
2279 				*dm += adj;
2280 			else
2281 				*dm = 0;
2282 		}
2283 }
2284 
2285 void
carp_ifgattr_ioctl(struct ifnet * ifp,u_long cmd,caddr_t addr)2286 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2287 {
2288 	struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2289 	struct carp_softc *sc = ifp->if_softc;
2290 
2291 	if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags &
2292 	    (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
2293 		carp_vhe_send_ad_all(sc);
2294 }
2295 
2296 void
carp_start(struct ifnet * ifp)2297 carp_start(struct ifnet *ifp)
2298 {
2299 	struct carp_softc *sc = ifp->if_softc;
2300 	struct ifnet *ifp0;
2301 	struct mbuf *m;
2302 
2303 	if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) {
2304 		ifq_purge(&ifp->if_snd);
2305 		return;
2306 	}
2307 
2308 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL)
2309 		carp_transmit(sc, ifp0, m);
2310 	if_put(ifp0);
2311 }
2312 
2313 void
carp_transmit(struct carp_softc * sc,struct ifnet * ifp0,struct mbuf * m)2314 carp_transmit(struct carp_softc *sc, struct ifnet *ifp0, struct mbuf *m)
2315 {
2316 	struct ifnet *ifp = &sc->sc_if;
2317 
2318 #if NBPFILTER > 0
2319 	{
2320 		caddr_t if_bpf = ifp->if_bpf;
2321 		if (if_bpf)
2322 			bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
2323 	}
2324 #endif /* NBPFILTER > 0 */
2325 
2326 	if (!ISSET(ifp0->if_flags, IFF_RUNNING)) {
2327 		counters_inc(ifp->if_counters, ifc_oerrors);
2328 		m_freem(m);
2329 		return;
2330 	}
2331 
2332 	/*
2333 	 * Do not leak the multicast address when sending
2334 	 * advertisements in 'ip' and 'ip-stealth' balancing
2335 	 * modes.
2336 	 */
2337 	if (sc->sc_balancing == CARP_BAL_IP ||
2338 	    sc->sc_balancing == CARP_BAL_IPSTEALTH) {
2339 		struct ether_header *eh = mtod(m, struct ether_header *);
2340 		memcpy(eh->ether_shost, sc->sc_ac.ac_enaddr,
2341 		    sizeof(eh->ether_shost));
2342 	}
2343 
2344 	if (if_enqueue(ifp0, m))
2345 		counters_inc(ifp->if_counters, ifc_oerrors);
2346 }
2347 
2348 int
carp_enqueue(struct ifnet * ifp,struct mbuf * m)2349 carp_enqueue(struct ifnet *ifp, struct mbuf *m)
2350 {
2351 	struct carp_softc *sc = ifp->if_softc;
2352 	struct ifnet *ifp0;
2353 
2354 	/* no ifq_is_priq, cos hfsc on carp doesn't make sense */
2355 
2356 	/*
2357 	 * If the parent of this carp(4) got destroyed while
2358 	 * `m' was being processed, silently drop it.
2359 	 */
2360 	if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) {
2361 		m_freem(m);
2362 		return (0);
2363 	}
2364 
2365 	counters_pkt(ifp->if_counters,
2366 	    ifc_opackets, ifc_obytes, m->m_pkthdr.len);
2367 	carp_transmit(sc, ifp0, m);
2368 	if_put(ifp0);
2369 
2370 	return (0);
2371 }
2372 
2373 int
carp_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * sa,struct rtentry * rt)2374 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2375     struct rtentry *rt)
2376 {
2377 	struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2378 	struct carp_vhost_entry *vhe;
2379 	struct srp_ref sr;
2380 	int ismaster;
2381 
2382 	if (sc->cur_vhe == NULL) {
2383 		vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
2384 		ismaster = (vhe->state == MASTER);
2385 		SRPL_LEAVE(&sr);
2386 	} else {
2387 		ismaster = (sc->cur_vhe->state == MASTER);
2388 	}
2389 
2390 	if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) {
2391 		m_freem(m);
2392 		return (ENETUNREACH);
2393 	}
2394 
2395 	return (ether_output(ifp, m, sa, rt));
2396 }
2397 
2398 void
carp_set_state_all(struct carp_softc * sc,int state)2399 carp_set_state_all(struct carp_softc *sc, int state)
2400 {
2401 	struct carp_vhost_entry *vhe;
2402 
2403 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2404 
2405 	SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2406 		if (vhe->state == state)
2407 			continue;
2408 
2409 		carp_set_state(vhe, state);
2410 	}
2411 }
2412 
2413 void
carp_set_state(struct carp_vhost_entry * vhe,int state)2414 carp_set_state(struct carp_vhost_entry *vhe, int state)
2415 {
2416 	struct carp_softc *sc = vhe->parent_sc;
2417 	static const char *carp_states[] = { CARP_STATES };
2418 	int loglevel;
2419 	struct carp_vhost_entry *vhe0;
2420 
2421 	KASSERT(vhe->state != state);
2422 
2423 	if (vhe->state == INIT || state == INIT)
2424 		loglevel = LOG_WARNING;
2425 	else
2426 		loglevel = LOG_CRIT;
2427 
2428 	if (sc->sc_vhe_count > 1)
2429 		CARP_LOG(loglevel, sc,
2430 		    ("state transition (vhid %d): %s -> %s", vhe->vhid,
2431 		    carp_states[vhe->state], carp_states[state]));
2432 	else
2433 		CARP_LOG(loglevel, sc,
2434 		    ("state transition: %s -> %s",
2435 		    carp_states[vhe->state], carp_states[state]));
2436 
2437 	vhe->state = state;
2438 	carp_update_lsmask(sc);
2439 
2440 	KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2441 
2442 	sc->sc_if.if_link_state = LINK_STATE_INVALID;
2443 	SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
2444 		/*
2445 		 * Link must be up if at least one vhe is in state MASTER to
2446 		 * bring or keep route up.
2447 		 */
2448 		if (vhe0->state == MASTER) {
2449 			sc->sc_if.if_link_state = LINK_STATE_UP;
2450 			break;
2451 		} else if (vhe0->state == BACKUP) {
2452 			sc->sc_if.if_link_state = LINK_STATE_DOWN;
2453 		}
2454 	}
2455 	if_link_state_change(&sc->sc_if);
2456 }
2457 
2458 void
carp_group_demote_adj(struct ifnet * ifp,int adj,char * reason)2459 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason)
2460 {
2461 	struct ifg_list	*ifgl;
2462 	int *dm, need_ad;
2463 	struct carp_softc *nil = NULL;
2464 
2465 	if (ifp->if_type == IFT_CARP) {
2466 		dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2467 		if (*dm + adj >= 0)
2468 			*dm += adj;
2469 		else
2470 			*dm = 0;
2471 	}
2472 
2473 	need_ad = 0;
2474 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2475 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2476 			continue;
2477 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2478 
2479 		if (*dm + adj >= 0)
2480 			*dm += adj;
2481 		else
2482 			*dm = 0;
2483 
2484 		if (adj > 0 && *dm == 1)
2485 			need_ad = 1;
2486 		CARP_LOG(LOG_ERR, nil,
2487 		    ("%s demoted group %s by %d to %d (%s)",
2488 		    ifp->if_xname, ifgl->ifgl_group->ifg_group,
2489 		    adj, *dm, reason));
2490 	}
2491 	if (need_ad)
2492 		carp_send_ad_all();
2493 }
2494 
2495 int
carp_group_demote_count(struct carp_softc * sc)2496 carp_group_demote_count(struct carp_softc *sc)
2497 {
2498 	struct ifg_list	*ifgl;
2499 	int count = 0;
2500 
2501 	TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next)
2502 		count += ifgl->ifgl_group->ifg_carp_demoted;
2503 
2504 	if (count == 0 && sc->sc_demote_cnt)
2505 		count = sc->sc_demote_cnt;
2506 
2507 	return (count > 255 ? 255 : count);
2508 }
2509 
2510 void
carp_carpdev_state(void * v)2511 carp_carpdev_state(void *v)
2512 {
2513 	struct carp_softc *sc = v;
2514 	struct ifnet *ifp0;
2515 	int suppressed = sc->sc_suppress;
2516 
2517 	if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL)
2518 		return;
2519 
2520 	if (ifp0->if_link_state == LINK_STATE_DOWN ||
2521 	    !(ifp0->if_flags & IFF_UP)) {
2522 		sc->sc_if.if_flags &= ~IFF_RUNNING;
2523 		carp_del_all_timeouts(sc);
2524 		carp_set_state_all(sc, INIT);
2525 		sc->sc_suppress = 1;
2526 		carp_setrun_all(sc, 0);
2527 		if (!suppressed)
2528 			carp_group_demote_adj(&sc->sc_if, 1, "carpdev");
2529 	} else if (suppressed) {
2530 		carp_set_state_all(sc, INIT);
2531 		sc->sc_suppress = 0;
2532 		carp_setrun_all(sc, 0);
2533 		carp_group_demote_adj(&sc->sc_if, -1, "carpdev");
2534 	}
2535 
2536 	if_put(ifp0);
2537 }
2538 
2539 int
carp_ether_addmulti(struct carp_softc * sc,struct ifreq * ifr)2540 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2541 {
2542 	struct ifnet *ifp0;
2543 	struct carp_mc_entry *mc;
2544 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2545 	int error;
2546 
2547 	ifp0 = if_get(sc->sc_carpdevidx);
2548 	if (ifp0 == NULL)
2549 		return (EINVAL);
2550 
2551 	error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2552 	if (error != ENETRESET) {
2553 		if_put(ifp0);
2554 		return (error);
2555 	}
2556 
2557 	/*
2558 	 * This is new multicast address.  We have to tell parent
2559 	 * about it.  Also, remember this multicast address so that
2560 	 * we can delete them on unconfigure.
2561 	 */
2562 	mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT);
2563 	if (mc == NULL) {
2564 		error = ENOMEM;
2565 		goto alloc_failed;
2566 	}
2567 
2568 	/*
2569 	 * As ether_addmulti() returns ENETRESET, following two
2570 	 * statement shouldn't fail.
2571 	 */
2572 	(void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2573 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2574 	memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2575 	LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2576 
2577 	error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr);
2578 	if (error != 0)
2579 		goto ioctl_failed;
2580 
2581 	if_put(ifp0);
2582 
2583 	return (error);
2584 
2585  ioctl_failed:
2586 	LIST_REMOVE(mc, mc_entries);
2587 	free(mc, M_DEVBUF, sizeof(*mc));
2588  alloc_failed:
2589 	(void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2590 	if_put(ifp0);
2591 
2592 	return (error);
2593 }
2594 
2595 int
carp_ether_delmulti(struct carp_softc * sc,struct ifreq * ifr)2596 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2597 {
2598 	struct ifnet *ifp0;
2599 	struct ether_multi *enm;
2600 	struct carp_mc_entry *mc;
2601 	u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2602 	int error;
2603 
2604 	ifp0 = if_get(sc->sc_carpdevidx);
2605 	if (ifp0 == NULL)
2606 		return (EINVAL);
2607 
2608 	/*
2609 	 * Find a key to lookup carp_mc_entry.  We have to do this
2610 	 * before calling ether_delmulti for obvious reason.
2611 	 */
2612 	if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0)
2613 		goto rele;
2614 	ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2615 	if (enm == NULL) {
2616 		error = EINVAL;
2617 		goto rele;
2618 	}
2619 
2620 	LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2621 		if (mc->mc_enm == enm)
2622 			break;
2623 
2624 	/* We won't delete entries we didn't add */
2625 	if (mc == NULL) {
2626 		error = EINVAL;
2627 		goto rele;
2628 	}
2629 
2630 	error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2631 	if (error != ENETRESET)
2632 		goto rele;
2633 
2634 	/* We no longer use this multicast address.  Tell parent so. */
2635 	error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2636 	if (error == 0) {
2637 		/* And forget about this address. */
2638 		LIST_REMOVE(mc, mc_entries);
2639 		free(mc, M_DEVBUF, sizeof(*mc));
2640 	} else
2641 		(void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2642 rele:
2643 	if_put(ifp0);
2644 	return (error);
2645 }
2646 
2647 /*
2648  * Delete any multicast address we have asked to add from parent
2649  * interface.  Called when the carp is being unconfigured.
2650  */
2651 void
carp_ether_purgemulti(struct carp_softc * sc)2652 carp_ether_purgemulti(struct carp_softc *sc)
2653 {
2654 	struct ifnet *ifp0;		/* Parent. */
2655 	struct carp_mc_entry *mc;
2656 	union {
2657 		struct ifreq ifreq;
2658 		struct {
2659 			char ifr_name[IFNAMSIZ];
2660 			struct sockaddr_storage ifr_ss;
2661 		} ifreq_storage;
2662 	} u;
2663 	struct ifreq *ifr = &u.ifreq;
2664 
2665 	if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL)
2666 		return;
2667 
2668 	memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ);
2669 	while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2670 		memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len);
2671 		(void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2672 		LIST_REMOVE(mc, mc_entries);
2673 		free(mc, M_DEVBUF, sizeof(*mc));
2674 	}
2675 
2676 	if_put(ifp0);
2677 }
2678 
2679 void
carp_vh_ref(void * null,void * v)2680 carp_vh_ref(void *null, void *v)
2681 {
2682 	struct carp_vhost_entry *vhe = v;
2683 
2684 	refcnt_take(&vhe->vhost_refcnt);
2685 }
2686 
2687 void
carp_vh_unref(void * null,void * v)2688 carp_vh_unref(void *null, void *v)
2689 {
2690 	struct carp_vhost_entry *vhe = v;
2691 
2692 	if (refcnt_rele(&vhe->vhost_refcnt)) {
2693 		carp_sc_unref(NULL, vhe->parent_sc);
2694 		free(vhe, M_DEVBUF, sizeof(*vhe));
2695 	}
2696 }
2697 
2698 void
carp_sc_ref(void * null,void * s)2699 carp_sc_ref(void *null, void *s)
2700 {
2701 	struct carp_softc *sc = s;
2702 
2703 	refcnt_take(&sc->sc_refcnt);
2704 }
2705 
2706 void
carp_sc_unref(void * null,void * s)2707 carp_sc_unref(void *null, void *s)
2708 {
2709 	struct carp_softc *sc = s;
2710 
2711 	refcnt_rele_wake(&sc->sc_refcnt);
2712 }
2713