1 /* $OpenBSD: ip_carp.c,v 1.365 2024/12/19 22:10:35 mvs Exp $ */
2
3 /*
4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
5 * Copyright (c) 2003 Ryan McBride. All rights reserved.
6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 /*
31 * TODO:
32 * - iface reconfigure
33 * - support for hardware checksum calculations;
34 *
35 */
36
37 #include "ether.h"
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/timeout.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
47 #include <sys/device.h>
48 #include <sys/kernel.h>
49 #include <sys/sysctl.h>
50 #include <sys/syslog.h>
51 #include <sys/refcnt.h>
52
53 #include <net/if.h>
54 #include <net/if_var.h>
55 #include <net/if_types.h>
56 #include <net/netisr.h>
57 #include <net/route.h>
58
59 #include <crypto/sha1.h>
60
61 #include <netinet/in.h>
62 #include <netinet/in_var.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/if_ether.h>
66 #include <netinet/ip_ipsp.h>
67
68 #include <net/if_dl.h>
69
70 #ifdef INET6
71 #include <netinet6/in6_var.h>
72 #include <netinet/icmp6.h>
73 #include <netinet/ip6.h>
74 #include <netinet6/ip6_var.h>
75 #include <netinet6/nd6.h>
76 #include <netinet6/in6_ifattach.h>
77 #endif
78
79 #include "bpfilter.h"
80 #if NBPFILTER > 0
81 #include <net/bpf.h>
82 #endif
83
84 #include "vlan.h"
85 #if NVLAN > 0
86 #include <net/if_vlan_var.h>
87 #endif
88
89 #include <netinet/ip_carp.h>
90
91 /*
92 * Locks used to protect data:
93 * a atomic
94 */
95
96 struct carp_mc_entry {
97 LIST_ENTRY(carp_mc_entry) mc_entries;
98 union {
99 struct ether_multi *mcu_enm;
100 } mc_u;
101 struct sockaddr_storage mc_addr;
102 };
103 #define mc_enm mc_u.mcu_enm
104
105 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 };
106
107 struct carp_vhost_entry {
108 SRPL_ENTRY(carp_vhost_entry) vhost_entries;
109 struct refcnt vhost_refcnt;
110
111 struct carp_softc *parent_sc;
112 int vhe_leader;
113 int vhid;
114 int advskew;
115 enum { INIT = 0, BACKUP, MASTER } state;
116 struct timeout ad_tmo; /* advertisement timeout */
117 struct timeout md_tmo; /* master down timeout */
118 struct timeout md6_tmo; /* master down timeout */
119
120 u_int64_t vhe_replay_cookie;
121
122 /* authentication */
123 #define CARP_HMAC_PAD 64
124 unsigned char vhe_pad[CARP_HMAC_PAD];
125 SHA1_CTX vhe_sha1[HMAC_MAX];
126
127 u_int8_t vhe_enaddr[ETHER_ADDR_LEN];
128 };
129
130 void carp_vh_ref(void *, void *);
131 void carp_vh_unref(void *, void *);
132
133 struct srpl_rc carp_vh_rc =
134 SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL);
135
136 struct carp_softc {
137 struct arpcom sc_ac;
138 #define sc_if sc_ac.ac_if
139 #define sc_carpdevidx sc_ac.ac_if.if_carpdevidx
140 struct task sc_atask;
141 struct task sc_ltask;
142 struct task sc_dtask;
143 struct ip_moptions sc_imo;
144 #ifdef INET6
145 struct ip6_moptions sc_im6o;
146 struct task sc_itask;
147 #endif /* INET6 */
148
149 SRPL_ENTRY(carp_softc) sc_list;
150 struct refcnt sc_refcnt;
151
152 int sc_suppress;
153 int sc_bow_out;
154 int sc_demote_cnt;
155
156 int sc_sendad_errors;
157 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count)
158 int sc_sendad_success;
159 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count)
160
161 char sc_curlladdr[ETHER_ADDR_LEN];
162
163 SRPL_HEAD(, carp_vhost_entry) carp_vhosts;
164 int sc_vhe_count;
165 u_int8_t sc_vhids[CARP_MAXNODES];
166 u_int8_t sc_advskews[CARP_MAXNODES];
167 u_int8_t sc_balancing;
168
169 int sc_naddrs;
170 int sc_naddrs6;
171 int sc_advbase; /* seconds */
172
173 /* authentication */
174 unsigned char sc_key[CARP_KEY_LEN];
175
176 u_int32_t sc_hashkey[2];
177 u_int32_t sc_lsmask; /* load sharing mask */
178 int sc_lscount; /* # load sharing interfaces (max 32) */
179 int sc_delayed_arp; /* delayed ARP request countdown */
180 #ifdef INET6
181 int sc_send_na; /* send NA when link state up */
182 #endif /* INET6 */
183 int sc_realmac; /* using real mac */
184
185 struct in_addr sc_peer;
186
187 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead;
188 struct carp_vhost_entry *cur_vhe; /* current active vhe */
189 };
190
191 void carp_sc_ref(void *, void *);
192 void carp_sc_unref(void *, void *);
193
194 struct srpl_rc carp_sc_rc =
195 SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL);
196
197 int carpctl_allow = 1; /* [a] */
198 int carpctl_preempt = 0; /* [a] */
199 int carpctl_log = LOG_CRIT; /* [a] */
200
201 const struct sysctl_bounded_args carpctl_vars[] = {
202 {CARPCTL_ALLOW, &carpctl_allow, INT_MIN, INT_MAX},
203 {CARPCTL_PREEMPT, &carpctl_preempt, INT_MIN, INT_MAX},
204 {CARPCTL_LOG, &carpctl_log, INT_MIN, INT_MAX},
205 };
206
207 struct cpumem *carpcounters;
208
209 int carp_send_all_recur = 0;
210
211 #define CARP_LOG(l, sc, s) \
212 do { \
213 if ((int)atomic_load_int(&carpctl_log) >= l) { \
214 if (sc) \
215 log(l, "%s: ", \
216 (sc)->sc_if.if_xname); \
217 else \
218 log(l, "carp: "); \
219 addlog s; \
220 addlog("\n"); \
221 } \
222 } while (0)
223
224 void carp_hmac_prepare(struct carp_softc *);
225 void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t);
226 void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *,
227 unsigned char *, u_int8_t);
228 int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *,
229 unsigned char *);
230 void carp_proto_input_c(struct ifnet *, struct mbuf *,
231 struct carp_header *, int, sa_family_t);
232 int carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
233 #ifdef INET6
234 int carp6_proto_input_if(struct ifnet *, struct mbuf **, int *, int);
235 #endif
236 void carpattach(int);
237 void carpdetach(void *);
238 void carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *,
239 struct carp_header *);
240 void carp_send_ad_all(void);
241 void carp_vhe_send_ad_all(struct carp_softc *);
242 void carp_timer_ad(void *);
243 void carp_send_ad(struct carp_vhost_entry *);
244 void carp_send_arp(struct carp_softc *);
245 void carp_timer_down(void *);
246 void carp_master_down(struct carp_vhost_entry *);
247 int carp_ioctl(struct ifnet *, u_long, caddr_t);
248 int carp_vhids_ioctl(struct carp_softc *, struct carpreq *);
249 int carp_check_dup_vhids(struct carp_softc *, struct srpl *,
250 struct carpreq *);
251 void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t);
252 void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t);
253 void carp_start(struct ifnet *);
254 int carp_enqueue(struct ifnet *, struct mbuf *);
255 void carp_transmit(struct carp_softc *, struct ifnet *, struct mbuf *);
256 void carp_setrun_all(struct carp_softc *, sa_family_t);
257 void carp_setrun(struct carp_vhost_entry *, sa_family_t);
258 void carp_set_state_all(struct carp_softc *, int);
259 void carp_set_state(struct carp_vhost_entry *, int);
260 void carp_multicast_cleanup(struct carp_softc *);
261 int carp_set_ifp(struct carp_softc *, struct ifnet *);
262 void carp_set_enaddr(struct carp_softc *);
263 void carp_set_vhe_enaddr(struct carp_vhost_entry *);
264 void carp_addr_updated(void *);
265 int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
266 int carp_join_multicast(struct carp_softc *);
267 #ifdef INET6
268 void carp_send_na(struct carp_softc *);
269 int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
270 int carp_join_multicast6(struct carp_softc *);
271 void carp_if_linkstate(void *);
272 #endif
273 int carp_clone_create(struct if_clone *, int);
274 int carp_clone_destroy(struct ifnet *);
275 int carp_ether_addmulti(struct carp_softc *, struct ifreq *);
276 int carp_ether_delmulti(struct carp_softc *, struct ifreq *);
277 void carp_ether_purgemulti(struct carp_softc *);
278 int carp_group_demote_count(struct carp_softc *);
279 void carp_update_lsmask(struct carp_softc *);
280 int carp_new_vhost(struct carp_softc *, int, int);
281 void carp_destroy_vhosts(struct carp_softc *);
282 void carp_del_all_timeouts(struct carp_softc *);
283 int carp_vhe_match(struct carp_softc *, uint64_t);
284
285 struct if_clone carp_cloner =
286 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy);
287
288 #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l)))
289 #define CARP_IFQ_PRIO 6
290
291 void
carp_hmac_prepare(struct carp_softc * sc)292 carp_hmac_prepare(struct carp_softc *sc)
293 {
294 struct carp_vhost_entry *vhe;
295 u_int8_t i;
296
297 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
298
299 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
300 for (i = 0; i < HMAC_MAX; i++) {
301 carp_hmac_prepare_ctx(vhe, i);
302 }
303 }
304 }
305
306 void
carp_hmac_prepare_ctx(struct carp_vhost_entry * vhe,u_int8_t ctx)307 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx)
308 {
309 struct carp_softc *sc = vhe->parent_sc;
310
311 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
312 u_int8_t vhid = vhe->vhid & 0xff;
313 SHA1_CTX sha1ctx;
314 u_int32_t kmd[5];
315 struct ifaddr *ifa;
316 int i, found;
317 struct in_addr last, cur, in;
318 #ifdef INET6
319 struct in6_addr last6, cur6, in6;
320 #endif /* INET6 */
321
322 /* compute ipad from key */
323 memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad));
324 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key));
325 for (i = 0; i < sizeof(vhe->vhe_pad); i++)
326 vhe->vhe_pad[i] ^= 0x36;
327
328 /* precompute first part of inner hash */
329 SHA1Init(&vhe->vhe_sha1[ctx]);
330 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad));
331 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version));
332 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type));
333
334 /* generate a key for the arpbalance hash, before the vhid is hashed */
335 if (vhe->vhe_leader) {
336 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
337 SHA1Final((unsigned char *)kmd, &sha1ctx);
338 sc->sc_hashkey[0] = kmd[0] ^ kmd[1];
339 sc->sc_hashkey[1] = kmd[2] ^ kmd[3];
340 }
341
342 /* the rest of the precomputation */
343 if (!sc->sc_realmac && vhe->vhe_leader &&
344 memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0)
345 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr,
346 ETHER_ADDR_LEN);
347
348 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid));
349
350 /* Hash the addresses from smallest to largest, not interface order */
351 cur.s_addr = 0;
352 do {
353 found = 0;
354 last = cur;
355 cur.s_addr = 0xffffffff;
356 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
357 if (ifa->ifa_addr->sa_family != AF_INET)
358 continue;
359 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
360 if (ntohl(in.s_addr) > ntohl(last.s_addr) &&
361 ntohl(in.s_addr) < ntohl(cur.s_addr)) {
362 cur.s_addr = in.s_addr;
363 found++;
364 }
365 }
366 if (found)
367 SHA1Update(&vhe->vhe_sha1[ctx],
368 (void *)&cur, sizeof(cur));
369 } while (found);
370 #ifdef INET6
371 memset(&cur6, 0x00, sizeof(cur6));
372 do {
373 found = 0;
374 last6 = cur6;
375 memset(&cur6, 0xff, sizeof(cur6));
376 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
377 if (ifa->ifa_addr->sa_family != AF_INET6)
378 continue;
379 in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
380 if (IN6_IS_SCOPE_EMBED(&in6)) {
381 if (ctx == HMAC_NOV6LL)
382 continue;
383 in6.s6_addr16[1] = 0;
384 }
385 if (memcmp(&in6, &last6, sizeof(in6)) > 0 &&
386 memcmp(&in6, &cur6, sizeof(in6)) < 0) {
387 cur6 = in6;
388 found++;
389 }
390 }
391 if (found)
392 SHA1Update(&vhe->vhe_sha1[ctx],
393 (void *)&cur6, sizeof(cur6));
394 } while (found);
395 #endif /* INET6 */
396
397 /* convert ipad to opad */
398 for (i = 0; i < sizeof(vhe->vhe_pad); i++)
399 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c;
400 }
401
402 void
carp_hmac_generate(struct carp_vhost_entry * vhe,u_int32_t counter[2],unsigned char md[20],u_int8_t ctx)403 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2],
404 unsigned char md[20], u_int8_t ctx)
405 {
406 SHA1_CTX sha1ctx;
407
408 /* fetch first half of inner hash */
409 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx));
410
411 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie));
412 SHA1Final(md, &sha1ctx);
413
414 /* outer hash */
415 SHA1Init(&sha1ctx);
416 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad));
417 SHA1Update(&sha1ctx, md, 20);
418 SHA1Final(md, &sha1ctx);
419 }
420
421 int
carp_hmac_verify(struct carp_vhost_entry * vhe,u_int32_t counter[2],unsigned char md[20])422 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2],
423 unsigned char md[20])
424 {
425 unsigned char md2[20];
426 u_int8_t i;
427
428 for (i = 0; i < HMAC_MAX; i++) {
429 carp_hmac_generate(vhe, counter, md2, i);
430 if (!timingsafe_bcmp(md, md2, sizeof(md2)))
431 return (0);
432 }
433 return (1);
434 }
435
436 int
carp_proto_input(struct mbuf ** mp,int * offp,int proto,int af)437 carp_proto_input(struct mbuf **mp, int *offp, int proto, int af)
438 {
439 struct ifnet *ifp;
440
441 ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
442 if (ifp == NULL) {
443 m_freemp(mp);
444 return IPPROTO_DONE;
445 }
446
447 proto = carp_proto_input_if(ifp, mp, offp, proto);
448 if_put(ifp);
449 return proto;
450 }
451
452 /*
453 * process input packet.
454 * we have rearranged checks order compared to the rfc,
455 * but it seems more efficient this way or not possible otherwise.
456 */
457 int
carp_proto_input_if(struct ifnet * ifp,struct mbuf ** mp,int * offp,int proto)458 carp_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
459 {
460 struct mbuf *m = *mp;
461 struct ip *ip = mtod(m, struct ip *);
462 struct carp_softc *sc = NULL;
463 struct carp_header *ch;
464 int iplen, len, ismulti;
465
466 carpstat_inc(carps_ipackets);
467
468 if (!atomic_load_int(&carpctl_allow)) {
469 m_freem(m);
470 return IPPROTO_DONE;
471 }
472
473 ismulti = IN_MULTICAST(ip->ip_dst.s_addr);
474
475 /* check if received on a valid carp interface */
476 switch (ifp->if_type) {
477 case IFT_CARP:
478 break;
479 case IFT_ETHER:
480 if (ismulti || !SRPL_EMPTY_LOCKED(&ifp->if_carp))
481 break;
482 /* FALLTHROUGH */
483 default:
484 carpstat_inc(carps_badif);
485 CARP_LOG(LOG_INFO, sc,
486 ("packet received on non-carp interface: %s",
487 ifp->if_xname));
488 m_freem(m);
489 return IPPROTO_DONE;
490 }
491
492 /* verify that the IP TTL is 255. */
493 if (ip->ip_ttl != CARP_DFLTTL) {
494 carpstat_inc(carps_badttl);
495 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
496 ip->ip_ttl, CARP_DFLTTL, ifp->if_xname));
497 m_freem(m);
498 return IPPROTO_DONE;
499 }
500
501 /*
502 * verify that the received packet length is
503 * equal to the CARP header
504 */
505 iplen = ip->ip_hl << 2;
506 len = iplen + sizeof(*ch);
507 if (len > m->m_pkthdr.len) {
508 carpstat_inc(carps_badlen);
509 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s",
510 m->m_pkthdr.len, ifp->if_xname));
511 m_freem(m);
512 return IPPROTO_DONE;
513 }
514
515 if ((m = *mp = m_pullup(m, len)) == NULL) {
516 carpstat_inc(carps_hdrops);
517 return IPPROTO_DONE;
518 }
519 ip = mtod(m, struct ip *);
520 ch = (struct carp_header *)(mtod(m, caddr_t) + iplen);
521
522 /* verify the CARP checksum */
523 m->m_data += iplen;
524 if (carp_cksum(m, len - iplen)) {
525 carpstat_inc(carps_badsum);
526 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s",
527 ifp->if_xname));
528 m_freem(m);
529 return IPPROTO_DONE;
530 }
531 m->m_data -= iplen;
532
533 KERNEL_LOCK();
534 carp_proto_input_c(ifp, m, ch, ismulti, AF_INET);
535 KERNEL_UNLOCK();
536 return IPPROTO_DONE;
537 }
538
539 #ifdef INET6
540 int
carp6_proto_input(struct mbuf ** mp,int * offp,int proto,int af)541 carp6_proto_input(struct mbuf **mp, int *offp, int proto, int af)
542 {
543 struct ifnet *ifp;
544
545 ifp = if_get((*mp)->m_pkthdr.ph_ifidx);
546 if (ifp == NULL) {
547 m_freemp(mp);
548 return IPPROTO_DONE;
549 }
550
551 proto = carp6_proto_input_if(ifp, mp, offp, proto);
552 if_put(ifp);
553 return proto;
554 }
555
556 int
carp6_proto_input_if(struct ifnet * ifp,struct mbuf ** mp,int * offp,int proto)557 carp6_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto)
558 {
559 struct mbuf *m = *mp;
560 struct carp_softc *sc = NULL;
561 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
562 struct carp_header *ch;
563 u_int len;
564
565 carpstat_inc(carps_ipackets6);
566
567 if (!atomic_load_int(&carpctl_allow)) {
568 m_freem(m);
569 return IPPROTO_DONE;
570 }
571
572 /* check if received on a valid carp interface */
573 if (ifp->if_type != IFT_CARP) {
574 carpstat_inc(carps_badif);
575 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s",
576 ifp->if_xname));
577 m_freem(m);
578 return IPPROTO_DONE;
579 }
580
581 /* verify that the IP TTL is 255 */
582 if (ip6->ip6_hlim != CARP_DFLTTL) {
583 carpstat_inc(carps_badttl);
584 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s",
585 ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname));
586 m_freem(m);
587 return IPPROTO_DONE;
588 }
589
590 /* verify that we have a complete carp packet */
591 len = m->m_len;
592 if ((m = *mp = m_pullup(m, *offp + sizeof(*ch))) == NULL) {
593 carpstat_inc(carps_badlen);
594 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len));
595 return IPPROTO_DONE;
596 }
597 ch = (struct carp_header *)(mtod(m, caddr_t) + *offp);
598
599 /* verify the CARP checksum */
600 m->m_data += *offp;
601 if (carp_cksum(m, sizeof(*ch))) {
602 carpstat_inc(carps_badsum);
603 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s",
604 ifp->if_xname));
605 m_freem(m);
606 return IPPROTO_DONE;
607 }
608 m->m_data -= *offp;
609
610 KERNEL_LOCK();
611 carp_proto_input_c(ifp, m, ch, 1, AF_INET6);
612 KERNEL_UNLOCK();
613 return IPPROTO_DONE;
614 }
615 #endif /* INET6 */
616
617 void
carp_proto_input_c(struct ifnet * ifp,struct mbuf * m,struct carp_header * ch,int ismulti,sa_family_t af)618 carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch,
619 int ismulti, sa_family_t af)
620 {
621 struct carp_softc *sc;
622 struct ifnet *ifp0;
623 struct carp_vhost_entry *vhe;
624 struct timeval sc_tv, ch_tv;
625 struct srpl *cif;
626
627 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
628
629 ifp0 = if_get(ifp->if_carpdevidx);
630
631 if (ifp->if_type == IFT_CARP) {
632 /*
633 * If the parent of this carp(4) got destroyed while
634 * `m' was being processed, silently drop it.
635 */
636 if (ifp0 == NULL)
637 goto rele;
638 cif = &ifp0->if_carp;
639 } else
640 cif = &ifp->if_carp;
641
642 SRPL_FOREACH_LOCKED(sc, cif, sc_list) {
643 if (af == AF_INET &&
644 ismulti != IN_MULTICAST(sc->sc_peer.s_addr))
645 continue;
646 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
647 if (vhe->vhid == ch->carp_vhid)
648 goto found;
649 }
650 }
651 found:
652
653 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
654 (IFF_UP|IFF_RUNNING)) {
655 carpstat_inc(carps_badvhid);
656 goto rele;
657 }
658
659 getmicrotime(&sc->sc_if.if_lastchange);
660
661 /* verify the CARP version. */
662 if (ch->carp_version != CARP_VERSION) {
663 carpstat_inc(carps_badver);
664 sc->sc_if.if_ierrors++;
665 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d",
666 ch->carp_version, CARP_VERSION));
667 goto rele;
668 }
669
670 /* verify the hash */
671 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) {
672 carpstat_inc(carps_badauth);
673 sc->sc_if.if_ierrors++;
674 CARP_LOG(LOG_INFO, sc, ("incorrect hash"));
675 goto rele;
676 }
677
678 if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter,
679 sizeof(ch->carp_counter))) {
680 struct ifnet *ifp2;
681
682 ifp2 = if_get(sc->sc_carpdevidx);
683 /* Do not log duplicates from non simplex interfaces */
684 if (ifp2 && ifp2->if_flags & IFF_SIMPLEX) {
685 carpstat_inc(carps_badauth);
686 sc->sc_if.if_ierrors++;
687 CARP_LOG(LOG_WARNING, sc,
688 ("replay or network loop detected"));
689 }
690 if_put(ifp2);
691 goto rele;
692 }
693
694 sc_tv.tv_sec = sc->sc_advbase;
695 sc_tv.tv_usec = vhe->advskew * 1000000 / 256;
696 ch_tv.tv_sec = ch->carp_advbase;
697 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
698
699 switch (vhe->state) {
700 case INIT:
701 break;
702 case MASTER:
703 /*
704 * If we receive an advertisement from a master who's going to
705 * be more frequent than us, and whose demote count is not higher
706 * than ours, go into BACKUP state. If his demote count is lower,
707 * also go into BACKUP.
708 */
709 if (((timercmp(&sc_tv, &ch_tv, >) ||
710 timercmp(&sc_tv, &ch_tv, ==)) &&
711 (ch->carp_demote <= carp_group_demote_count(sc))) ||
712 ch->carp_demote < carp_group_demote_count(sc)) {
713 timeout_del(&vhe->ad_tmo);
714 carp_set_state(vhe, BACKUP);
715 carp_setrun(vhe, 0);
716 }
717 break;
718 case BACKUP:
719 /*
720 * If we're pre-empting masters who advertise slower than us,
721 * and do not have a better demote count, treat them as down.
722 *
723 */
724 if (atomic_load_int(&carpctl_preempt) &&
725 timercmp(&sc_tv, &ch_tv, <) &&
726 ch->carp_demote >= carp_group_demote_count(sc)) {
727 carp_master_down(vhe);
728 break;
729 }
730
731 /*
732 * Take over masters advertising with a higher demote count,
733 * regardless of CARPCTL_PREEMPT.
734 */
735 if (ch->carp_demote > carp_group_demote_count(sc)) {
736 carp_master_down(vhe);
737 break;
738 }
739
740 /*
741 * If the master is going to advertise at such a low frequency
742 * that he's guaranteed to time out, we'd might as well just
743 * treat him as timed out now.
744 */
745 sc_tv.tv_sec = sc->sc_advbase * 3;
746 if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) {
747 carp_master_down(vhe);
748 break;
749 }
750
751 /*
752 * Otherwise, we reset the counter and wait for the next
753 * advertisement.
754 */
755 carp_setrun(vhe, af);
756 break;
757 }
758
759 rele:
760 if_put(ifp0);
761 m_freem(m);
762 return;
763 }
764
765 int
carp_sysctl_carpstat(void * oldp,size_t * oldlenp,void * newp)766 carp_sysctl_carpstat(void *oldp, size_t *oldlenp, void *newp)
767 {
768 struct carpstats carpstat;
769
770 CTASSERT(sizeof(carpstat) == (carps_ncounters * sizeof(uint64_t)));
771 memset(&carpstat, 0, sizeof carpstat);
772 counters_read(carpcounters, (uint64_t *)&carpstat, carps_ncounters,
773 NULL);
774 return (sysctl_rdstruct(oldp, oldlenp, newp,
775 &carpstat, sizeof(carpstat)));
776 }
777
778 int
carp_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)779 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
780 size_t newlen)
781 {
782 /* All sysctl names at this level are terminal. */
783 if (namelen != 1)
784 return (ENOTDIR);
785
786 switch (name[0]) {
787 case CARPCTL_STATS:
788 return (carp_sysctl_carpstat(oldp, oldlenp, newp));
789 default:
790 return (sysctl_bounded_arr(carpctl_vars, nitems(carpctl_vars),
791 name, namelen, oldp, oldlenp, newp, newlen));
792 }
793 }
794
795 /*
796 * Interface side of the CARP implementation.
797 */
798
799 void
carpattach(int n)800 carpattach(int n)
801 {
802 if_creategroup("carp"); /* keep around even if empty */
803 if_clone_attach(&carp_cloner);
804 carpcounters = counters_alloc(carps_ncounters);
805 }
806
807 int
carp_clone_create(struct if_clone * ifc,int unit)808 carp_clone_create(struct if_clone *ifc, int unit)
809 {
810 struct carp_softc *sc;
811 struct ifnet *ifp;
812
813 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
814 refcnt_init(&sc->sc_refcnt);
815
816 SRPL_INIT(&sc->carp_vhosts);
817 sc->sc_vhe_count = 0;
818 if (carp_new_vhost(sc, 0, 0)) {
819 free(sc, M_DEVBUF, sizeof(*sc));
820 return (ENOMEM);
821 }
822
823 task_set(&sc->sc_atask, carp_addr_updated, sc);
824 task_set(&sc->sc_ltask, carp_carpdev_state, sc);
825 task_set(&sc->sc_dtask, carpdetach, sc);
826 #ifdef INET6
827 task_set(&sc->sc_itask, carp_if_linkstate, sc);
828 #endif /* INET6 */
829
830 sc->sc_suppress = 0;
831 sc->sc_advbase = CARP_DFLTINTV;
832 sc->sc_naddrs = sc->sc_naddrs6 = 0;
833 #ifdef INET6
834 sc->sc_im6o.im6o_hlim = CARP_DFLTTL;
835 #endif /* INET6 */
836 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS,
837 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO);
838 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
839
840 LIST_INIT(&sc->carp_mc_listhead);
841 ifp = &sc->sc_if;
842 ifp->if_softc = sc;
843 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name,
844 unit);
845 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
846 ifp->if_ioctl = carp_ioctl;
847 ifp->if_start = carp_start;
848 ifp->if_enqueue = carp_enqueue;
849 ifp->if_xflags = IFXF_CLONED;
850 if_counters_alloc(ifp);
851 if_attach(ifp);
852 ether_ifattach(ifp);
853 ifp->if_type = IFT_CARP;
854 ifp->if_sadl->sdl_type = IFT_CARP;
855 ifp->if_output = carp_output;
856 ifp->if_priority = IF_CARP_DEFAULT_PRIORITY;
857 ifp->if_link_state = LINK_STATE_INVALID;
858
859 /* Hook carp_addr_updated to cope with address and route changes. */
860 if_addrhook_add(&sc->sc_if, &sc->sc_atask);
861 #ifdef INET6
862 if_linkstatehook_add(&sc->sc_if, &sc->sc_itask);
863 #endif /* INET6 */
864
865 return (0);
866 }
867
868 int
carp_new_vhost(struct carp_softc * sc,int vhid,int advskew)869 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew)
870 {
871 struct carp_vhost_entry *vhe, *vhe0;
872
873 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO);
874 if (vhe == NULL)
875 return (ENOMEM);
876
877 refcnt_init(&vhe->vhost_refcnt);
878 carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */
879 vhe->parent_sc = sc;
880 vhe->vhid = vhid;
881 vhe->advskew = advskew;
882 vhe->state = INIT;
883 timeout_set_proc(&vhe->ad_tmo, carp_timer_ad, vhe);
884 timeout_set_proc(&vhe->md_tmo, carp_timer_down, vhe);
885 timeout_set_proc(&vhe->md6_tmo, carp_timer_down, vhe);
886
887 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
888
889 /* mark the first vhe as leader */
890 if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) {
891 vhe->vhe_leader = 1;
892 SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts,
893 vhe, vhost_entries);
894 sc->sc_vhe_count = 1;
895 return (0);
896 }
897
898 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
899 if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL)
900 break;
901 }
902
903 SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries);
904 sc->sc_vhe_count++;
905
906 return (0);
907 }
908
909 int
carp_clone_destroy(struct ifnet * ifp)910 carp_clone_destroy(struct ifnet *ifp)
911 {
912 struct carp_softc *sc = ifp->if_softc;
913
914 if_addrhook_del(&sc->sc_if, &sc->sc_atask);
915 #ifdef INET6
916 if_linkstatehook_del(&sc->sc_if, &sc->sc_itask);
917 #endif /* INET6 */
918
919 NET_LOCK();
920 carpdetach(sc);
921 NET_UNLOCK();
922
923 ether_ifdetach(ifp);
924 if_detach(ifp);
925 carp_destroy_vhosts(ifp->if_softc);
926 refcnt_finalize(&sc->sc_refcnt, "carpdtor");
927 free(sc->sc_imo.imo_membership, M_IPMOPTS,
928 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *));
929 free(sc, M_DEVBUF, sizeof(*sc));
930 return (0);
931 }
932
933 void
carp_del_all_timeouts(struct carp_softc * sc)934 carp_del_all_timeouts(struct carp_softc *sc)
935 {
936 struct carp_vhost_entry *vhe;
937
938 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
939 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
940 timeout_del(&vhe->ad_tmo);
941 timeout_del(&vhe->md_tmo);
942 timeout_del(&vhe->md6_tmo);
943 }
944 }
945
946 void
carpdetach(void * arg)947 carpdetach(void *arg)
948 {
949 struct carp_softc *sc = arg;
950 struct ifnet *ifp0;
951 struct srpl *cif;
952
953 carp_del_all_timeouts(sc);
954
955 if (sc->sc_demote_cnt)
956 carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach");
957 sc->sc_suppress = 0;
958 sc->sc_sendad_errors = 0;
959
960 carp_set_state_all(sc, INIT);
961 sc->sc_if.if_flags &= ~IFF_UP;
962 carp_setrun_all(sc, 0);
963 carp_multicast_cleanup(sc);
964
965 ifp0 = if_get(sc->sc_carpdevidx);
966 if (ifp0 == NULL)
967 return;
968
969 KERNEL_ASSERT_LOCKED(); /* touching if_carp */
970
971 cif = &ifp0->if_carp;
972
973 SRPL_REMOVE_LOCKED(&carp_sc_rc, cif, sc, carp_softc, sc_list);
974 sc->sc_carpdevidx = 0;
975
976 if_linkstatehook_del(ifp0, &sc->sc_ltask);
977 if_detachhook_del(ifp0, &sc->sc_dtask);
978 ifpromisc(ifp0, 0);
979 if_put(ifp0);
980 }
981
982 void
carp_destroy_vhosts(struct carp_softc * sc)983 carp_destroy_vhosts(struct carp_softc *sc)
984 {
985 /* XXX bow out? */
986 struct carp_vhost_entry *vhe;
987
988 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
989
990 while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) {
991 SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe,
992 carp_vhost_entry, vhost_entries);
993 carp_vh_unref(NULL, vhe); /* drop last ref */
994 }
995 sc->sc_vhe_count = 0;
996 }
997
998 void
carp_prepare_ad(struct mbuf * m,struct carp_vhost_entry * vhe,struct carp_header * ch)999 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe,
1000 struct carp_header *ch)
1001 {
1002 if (!vhe->vhe_replay_cookie) {
1003 arc4random_buf(&vhe->vhe_replay_cookie,
1004 sizeof(vhe->vhe_replay_cookie));
1005 }
1006
1007 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter,
1008 sizeof(ch->carp_counter));
1009
1010 /*
1011 * For the time being, do not include the IPv6 linklayer addresses
1012 * in the HMAC.
1013 */
1014 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL);
1015 }
1016
1017 void
carp_send_ad_all(void)1018 carp_send_ad_all(void)
1019 {
1020 struct ifnet *ifp0;
1021 struct srpl *cif;
1022 struct carp_softc *vh;
1023
1024 KERNEL_ASSERT_LOCKED(); /* touching if_carp */
1025
1026 if (carp_send_all_recur > 0)
1027 return;
1028 ++carp_send_all_recur;
1029 TAILQ_FOREACH(ifp0, &ifnetlist, if_list) {
1030 if (ifp0->if_type != IFT_ETHER)
1031 continue;
1032
1033 cif = &ifp0->if_carp;
1034 SRPL_FOREACH_LOCKED(vh, cif, sc_list) {
1035 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) ==
1036 (IFF_UP|IFF_RUNNING)) {
1037 carp_vhe_send_ad_all(vh);
1038 }
1039 }
1040 }
1041 --carp_send_all_recur;
1042 }
1043
1044 void
carp_vhe_send_ad_all(struct carp_softc * sc)1045 carp_vhe_send_ad_all(struct carp_softc *sc)
1046 {
1047 struct carp_vhost_entry *vhe;
1048
1049 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1050
1051 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1052 if (vhe->state == MASTER)
1053 carp_send_ad(vhe);
1054 }
1055 }
1056
1057 void
carp_timer_ad(void * v)1058 carp_timer_ad(void *v)
1059 {
1060 NET_LOCK();
1061 carp_send_ad(v);
1062 NET_UNLOCK();
1063 }
1064
1065 void
carp_send_ad(struct carp_vhost_entry * vhe)1066 carp_send_ad(struct carp_vhost_entry *vhe)
1067 {
1068 struct carp_header ch;
1069 struct timeval tv;
1070 struct carp_softc *sc = vhe->parent_sc;
1071 struct carp_header *ch_ptr;
1072 struct mbuf *m;
1073 int error, len, advbase, advskew;
1074 struct ifnet *ifp;
1075 struct ifaddr *ifa;
1076 struct sockaddr sa;
1077
1078 NET_ASSERT_LOCKED();
1079
1080 if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) {
1081 sc->sc_if.if_oerrors++;
1082 return;
1083 }
1084
1085 /* bow out if we've gone to backup (the carp interface is going down) */
1086 if (sc->sc_bow_out) {
1087 advbase = 255;
1088 advskew = 255;
1089 } else {
1090 advbase = sc->sc_advbase;
1091 advskew = vhe->advskew;
1092 tv.tv_sec = advbase;
1093 if (advbase == 0 && advskew == 0)
1094 tv.tv_usec = 1 * 1000000 / 256;
1095 else
1096 tv.tv_usec = advskew * 1000000 / 256;
1097 }
1098
1099 ch.carp_version = CARP_VERSION;
1100 ch.carp_type = CARP_ADVERTISEMENT;
1101 ch.carp_vhid = vhe->vhid;
1102 ch.carp_demote = carp_group_demote_count(sc) & 0xff;
1103 ch.carp_advbase = advbase;
1104 ch.carp_advskew = advskew;
1105 ch.carp_authlen = 7; /* XXX DEFINE */
1106 ch.carp_cksum = 0;
1107
1108 sc->cur_vhe = vhe; /* we need the vhe later on the output path */
1109
1110 if (sc->sc_naddrs) {
1111 struct ip *ip;
1112
1113 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1114 if (m == NULL) {
1115 sc->sc_if.if_oerrors++;
1116 carpstat_inc(carps_onomem);
1117 /* XXX maybe less ? */
1118 goto retry_later;
1119 }
1120 len = sizeof(*ip) + sizeof(ch);
1121 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1122 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1123 m->m_pkthdr.len = len;
1124 m->m_len = len;
1125 m_align(m, len);
1126 ip = mtod(m, struct ip *);
1127 ip->ip_v = IPVERSION;
1128 ip->ip_hl = sizeof(*ip) >> 2;
1129 ip->ip_tos = IPTOS_LOWDELAY;
1130 ip->ip_len = htons(len);
1131 ip->ip_id = htons(ip_randomid());
1132 ip->ip_off = htons(IP_DF);
1133 ip->ip_ttl = CARP_DFLTTL;
1134 ip->ip_p = IPPROTO_CARP;
1135 ip->ip_sum = 0;
1136
1137 memset(&sa, 0, sizeof(sa));
1138 sa.sa_family = AF_INET;
1139 /* Prefer addresses on the parent interface as source for AD. */
1140 ifa = ifaof_ifpforaddr(&sa, ifp);
1141 if (ifa == NULL)
1142 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1143 KASSERT(ifa != NULL);
1144 ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1145 ip->ip_dst.s_addr = sc->sc_peer.s_addr;
1146 if (IN_MULTICAST(ip->ip_dst.s_addr))
1147 m->m_flags |= M_MCAST;
1148
1149 ch_ptr = (struct carp_header *)(ip + 1);
1150 bcopy(&ch, ch_ptr, sizeof(ch));
1151 carp_prepare_ad(m, vhe, ch_ptr);
1152
1153 m->m_data += sizeof(*ip);
1154 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
1155 m->m_data -= sizeof(*ip);
1156
1157 getmicrotime(&sc->sc_if.if_lastchange);
1158 carpstat_inc(carps_opackets);
1159
1160 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo,
1161 NULL, 0);
1162 if (error &&
1163 /* when unicast, the peer's down is not our fault */
1164 !(!IN_MULTICAST(sc->sc_peer.s_addr) && error == EHOSTDOWN)){
1165 if (error == ENOBUFS)
1166 carpstat_inc(carps_onomem);
1167 else
1168 CARP_LOG(LOG_WARNING, sc,
1169 ("ip_output failed: %d", error));
1170 sc->sc_if.if_oerrors++;
1171 if (sc->sc_sendad_errors < INT_MAX)
1172 sc->sc_sendad_errors++;
1173 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1174 carp_group_demote_adj(&sc->sc_if, 1,
1175 "> snderrors");
1176 sc->sc_sendad_success = 0;
1177 } else {
1178 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1179 if (++sc->sc_sendad_success >=
1180 CARP_SENDAD_MIN_SUCCESS(sc)) {
1181 carp_group_demote_adj(&sc->sc_if, -1,
1182 "< snderrors");
1183 sc->sc_sendad_errors = 0;
1184 }
1185 } else
1186 sc->sc_sendad_errors = 0;
1187 }
1188 if (vhe->vhe_leader) {
1189 if (sc->sc_delayed_arp > 0)
1190 sc->sc_delayed_arp--;
1191 if (sc->sc_delayed_arp == 0) {
1192 carp_send_arp(sc);
1193 sc->sc_delayed_arp = -1;
1194 }
1195 }
1196 }
1197 #ifdef INET6
1198 if (sc->sc_naddrs6) {
1199 struct ip6_hdr *ip6;
1200
1201 MGETHDR(m, M_DONTWAIT, MT_HEADER);
1202 if (m == NULL) {
1203 sc->sc_if.if_oerrors++;
1204 carpstat_inc(carps_onomem);
1205 /* XXX maybe less ? */
1206 goto retry_later;
1207 }
1208 len = sizeof(*ip6) + sizeof(ch);
1209 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO;
1210 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1211 m->m_pkthdr.len = len;
1212 m->m_len = len;
1213 m_align(m, len);
1214 m->m_flags |= M_MCAST;
1215 ip6 = mtod(m, struct ip6_hdr *);
1216 memset(ip6, 0, sizeof(*ip6));
1217 ip6->ip6_vfc |= IPV6_VERSION;
1218 ip6->ip6_hlim = CARP_DFLTTL;
1219 ip6->ip6_nxt = IPPROTO_CARP;
1220
1221 /* set the source address */
1222 memset(&sa, 0, sizeof(sa));
1223 sa.sa_family = AF_INET6;
1224 /* Prefer addresses on the parent interface as source for AD. */
1225 ifa = ifaof_ifpforaddr(&sa, ifp);
1226 if (ifa == NULL)
1227 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if);
1228 KASSERT(ifa != NULL);
1229 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr,
1230 &ip6->ip6_src, sizeof(struct in6_addr));
1231 /* set the multicast destination */
1232
1233 ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1234 ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index);
1235 ip6->ip6_dst.s6_addr8[15] = 0x12;
1236
1237 ch_ptr = (struct carp_header *)(ip6 + 1);
1238 bcopy(&ch, ch_ptr, sizeof(ch));
1239 carp_prepare_ad(m, vhe, ch_ptr);
1240
1241 m->m_data += sizeof(*ip6);
1242 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
1243 m->m_data -= sizeof(*ip6);
1244
1245 getmicrotime(&sc->sc_if.if_lastchange);
1246 carpstat_inc(carps_opackets6);
1247
1248 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL);
1249 if (error) {
1250 if (error == ENOBUFS)
1251 carpstat_inc(carps_onomem);
1252 else
1253 CARP_LOG(LOG_WARNING, sc,
1254 ("ip6_output failed: %d", error));
1255 sc->sc_if.if_oerrors++;
1256 if (sc->sc_sendad_errors < INT_MAX)
1257 sc->sc_sendad_errors++;
1258 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc))
1259 carp_group_demote_adj(&sc->sc_if, 1,
1260 "> snd6errors");
1261 sc->sc_sendad_success = 0;
1262 } else {
1263 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) {
1264 if (++sc->sc_sendad_success >=
1265 CARP_SENDAD_MIN_SUCCESS(sc)) {
1266 carp_group_demote_adj(&sc->sc_if, -1,
1267 "< snd6errors");
1268 sc->sc_sendad_errors = 0;
1269 }
1270 } else
1271 sc->sc_sendad_errors = 0;
1272 }
1273 }
1274 #endif /* INET6 */
1275
1276 retry_later:
1277 sc->cur_vhe = NULL;
1278 if (advbase != 255 || advskew != 255)
1279 timeout_add_tv(&vhe->ad_tmo, &tv);
1280 if_put(ifp);
1281 }
1282
1283 /*
1284 * Broadcast a gratuitous ARP request containing
1285 * the virtual router MAC address for each IP address
1286 * associated with the virtual router.
1287 */
1288 void
carp_send_arp(struct carp_softc * sc)1289 carp_send_arp(struct carp_softc *sc)
1290 {
1291 struct ifaddr *ifa;
1292 in_addr_t in;
1293
1294 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1295
1296 if (ifa->ifa_addr->sa_family != AF_INET)
1297 continue;
1298
1299 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
1300 arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr);
1301 }
1302 }
1303
1304 #ifdef INET6
1305 void
carp_send_na(struct carp_softc * sc)1306 carp_send_na(struct carp_softc *sc)
1307 {
1308 struct ifaddr *ifa;
1309 struct in6_addr *in6, mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1310 int i_am_router = (atomic_load_int(&ip6_forwarding) != 0);
1311 int flags = ND_NA_FLAG_OVERRIDE;
1312
1313 if (i_am_router)
1314 flags |= ND_NA_FLAG_ROUTER;
1315 mcast.s6_addr16[1] = htons(sc->sc_if.if_index);
1316
1317 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1318
1319 if (ifa->ifa_addr->sa_family != AF_INET6)
1320 continue;
1321
1322 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1323 nd6_na_output(&sc->sc_if, &mcast, in6, flags, 1, NULL);
1324 }
1325 }
1326 #endif /* INET6 */
1327
1328 void
carp_update_lsmask(struct carp_softc * sc)1329 carp_update_lsmask(struct carp_softc *sc)
1330 {
1331 struct carp_vhost_entry *vhe;
1332 int count;
1333
1334 if (sc->sc_balancing == CARP_BAL_NONE)
1335 return;
1336
1337 sc->sc_lsmask = 0;
1338 count = 0;
1339
1340 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1341 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1342 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8)
1343 sc->sc_lsmask |= 1 << count;
1344 count++;
1345 }
1346 sc->sc_lscount = count;
1347 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask));
1348 }
1349
1350 int
carp_iamatch(struct ifnet * ifp)1351 carp_iamatch(struct ifnet *ifp)
1352 {
1353 struct carp_softc *sc = ifp->if_softc;
1354 struct carp_vhost_entry *vhe;
1355 struct srp_ref sr;
1356 int match = 0;
1357
1358 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
1359 if (vhe->state == MASTER)
1360 match = 1;
1361 SRPL_LEAVE(&sr);
1362
1363 return (match);
1364 }
1365
1366 int
carp_ourether(struct ifnet * ifp,uint8_t * ena)1367 carp_ourether(struct ifnet *ifp, uint8_t *ena)
1368 {
1369 struct srpl *cif = &ifp->if_carp;
1370 struct carp_softc *sc;
1371 struct srp_ref sr;
1372 int match = 0;
1373 uint64_t dst = ether_addr_to_e64((struct ether_addr *)ena);
1374
1375 KASSERT(ifp->if_type == IFT_ETHER);
1376
1377 SRPL_FOREACH(sc, &sr, cif, sc_list) {
1378 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1379 (IFF_UP|IFF_RUNNING))
1380 continue;
1381 if (carp_vhe_match(sc, dst)) {
1382 match = 1;
1383 break;
1384 }
1385 }
1386 SRPL_LEAVE(&sr);
1387
1388 return (match);
1389 }
1390
1391 int
carp_vhe_match(struct carp_softc * sc,uint64_t dst)1392 carp_vhe_match(struct carp_softc *sc, uint64_t dst)
1393 {
1394 struct carp_vhost_entry *vhe;
1395 struct srp_ref sr;
1396 int active = 0;
1397
1398 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
1399 active = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP);
1400 SRPL_LEAVE(&sr);
1401
1402 return (active && (dst ==
1403 ether_addr_to_e64((struct ether_addr *)sc->sc_ac.ac_enaddr)));
1404 }
1405
1406 struct mbuf *
carp_input(struct ifnet * ifp0,struct mbuf * m,uint64_t dst)1407 carp_input(struct ifnet *ifp0, struct mbuf *m, uint64_t dst)
1408 {
1409 struct srpl *cif;
1410 struct carp_softc *sc;
1411 struct srp_ref sr;
1412
1413 cif = &ifp0->if_carp;
1414
1415 SRPL_FOREACH(sc, &sr, cif, sc_list) {
1416 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
1417 (IFF_UP|IFF_RUNNING))
1418 continue;
1419
1420 if (carp_vhe_match(sc, dst)) {
1421 /*
1422 * These packets look like layer 2 multicast but they
1423 * are unicast at layer 3. With help of the tag the
1424 * mbuf's M_MCAST flag can be removed by carp_lsdrop()
1425 * after we have passed layer 2.
1426 */
1427 if (sc->sc_balancing == CARP_BAL_IP) {
1428 struct m_tag *mtag;
1429 mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
1430 M_NOWAIT);
1431 if (mtag == NULL) {
1432 m_freem(m);
1433 goto out;
1434 }
1435 m_tag_prepend(m, mtag);
1436 }
1437 break;
1438 }
1439 }
1440
1441 if (sc == NULL) {
1442 SRPL_LEAVE(&sr);
1443
1444 if (!ETH64_IS_MULTICAST(dst))
1445 return (m);
1446
1447 /*
1448 * XXX Should really check the list of multicast addresses
1449 * for each CARP interface _before_ copying.
1450 */
1451 SRPL_FOREACH(sc, &sr, cif, sc_list) {
1452 struct mbuf *m0;
1453
1454 if (!(sc->sc_if.if_flags & IFF_UP))
1455 continue;
1456
1457 m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT);
1458 if (m0 == NULL)
1459 continue;
1460
1461 if_vinput(&sc->sc_if, m0);
1462 }
1463 SRPL_LEAVE(&sr);
1464
1465 return (m);
1466 }
1467
1468 if_vinput(&sc->sc_if, m);
1469 out:
1470 SRPL_LEAVE(&sr);
1471
1472 return (NULL);
1473 }
1474
1475 int
carp_lsdrop(struct ifnet * ifp,struct mbuf * m,sa_family_t af,u_int32_t * src,u_int32_t * dst,int drop)1476 carp_lsdrop(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int32_t *src,
1477 u_int32_t *dst, int drop)
1478 {
1479 struct carp_softc *sc;
1480 u_int32_t fold;
1481 struct m_tag *mtag;
1482
1483 if (ifp->if_type != IFT_CARP)
1484 return 0;
1485 sc = ifp->if_softc;
1486 if (sc->sc_balancing == CARP_BAL_NONE)
1487 return 0;
1488
1489 /*
1490 * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact
1491 * that it is layer 2 multicast does not implicate that it is also layer
1492 * 3 multicast.
1493 */
1494 if (m->m_flags & M_MCAST &&
1495 (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
1496 m_tag_delete(m, mtag);
1497 m->m_flags &= ~M_MCAST;
1498 }
1499
1500 /*
1501 * Return without making a drop decision. This allows to clear the
1502 * M_MCAST flag and do nothing else.
1503 */
1504 if (!drop)
1505 return 0;
1506
1507 /*
1508 * Never drop carp advertisements.
1509 * XXX Bad idea to pass all broadcast / multicast traffic?
1510 */
1511 if (m->m_flags & (M_BCAST|M_MCAST))
1512 return 0;
1513
1514 fold = src[0] ^ dst[0];
1515 #ifdef INET6
1516 if (af == AF_INET6) {
1517 int i;
1518 for (i = 1; i < 4; i++)
1519 fold ^= src[i] ^ dst[i];
1520 }
1521 #endif
1522 if (sc->sc_lscount == 0) /* just to be safe */
1523 return 1;
1524
1525 return ((1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask) == 0;
1526 }
1527
1528 void
carp_timer_down(void * v)1529 carp_timer_down(void *v)
1530 {
1531 NET_LOCK();
1532 carp_master_down(v);
1533 NET_UNLOCK();
1534 }
1535
1536 void
carp_master_down(struct carp_vhost_entry * vhe)1537 carp_master_down(struct carp_vhost_entry *vhe)
1538 {
1539 struct carp_softc *sc = vhe->parent_sc;
1540
1541 NET_ASSERT_LOCKED();
1542
1543 switch (vhe->state) {
1544 case INIT:
1545 printf("%s: master_down event in INIT state\n",
1546 sc->sc_if.if_xname);
1547 break;
1548 case MASTER:
1549 break;
1550 case BACKUP:
1551 carp_set_state(vhe, MASTER);
1552 carp_send_ad(vhe);
1553 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) {
1554 carp_send_arp(sc);
1555 /* Schedule a delayed ARP to deal w/ some L3 switches */
1556 sc->sc_delayed_arp = 2;
1557 #ifdef INET6
1558 /* routing entry is not ready yet. do it later */
1559 sc->sc_send_na = 1;
1560 #endif /* INET6 */
1561 }
1562 carp_setrun(vhe, 0);
1563 carpstat_inc(carps_preempt);
1564 break;
1565 }
1566 }
1567
1568 void
carp_setrun_all(struct carp_softc * sc,sa_family_t af)1569 carp_setrun_all(struct carp_softc *sc, sa_family_t af)
1570 {
1571 struct carp_vhost_entry *vhe;
1572
1573 KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */
1574 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
1575 carp_setrun(vhe, af);
1576 }
1577 }
1578
1579 /*
1580 * When in backup state, af indicates whether to reset the master down timer
1581 * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1582 */
1583 void
carp_setrun(struct carp_vhost_entry * vhe,sa_family_t af)1584 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af)
1585 {
1586 struct ifnet *ifp;
1587 struct timeval tv;
1588 struct carp_softc *sc = vhe->parent_sc;
1589
1590 if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) {
1591 sc->sc_if.if_flags &= ~IFF_RUNNING;
1592 carp_set_state_all(sc, INIT);
1593 return;
1594 }
1595
1596 if (memcmp(((struct arpcom *)ifp)->ac_enaddr,
1597 sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0)
1598 sc->sc_realmac = 1;
1599 else
1600 sc->sc_realmac = 0;
1601
1602 if_put(ifp);
1603
1604 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 &&
1605 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) {
1606 sc->sc_if.if_flags |= IFF_RUNNING;
1607 } else {
1608 sc->sc_if.if_flags &= ~IFF_RUNNING;
1609 return;
1610 }
1611
1612 switch (vhe->state) {
1613 case INIT:
1614 carp_set_state(vhe, BACKUP);
1615 carp_setrun(vhe, 0);
1616 break;
1617 case BACKUP:
1618 timeout_del(&vhe->ad_tmo);
1619 tv.tv_sec = 3 * sc->sc_advbase;
1620 if (sc->sc_advbase == 0 && vhe->advskew == 0)
1621 tv.tv_usec = 3 * 1000000 / 256;
1622 else if (sc->sc_advbase == 0)
1623 tv.tv_usec = 3 * vhe->advskew * 1000000 / 256;
1624 else
1625 tv.tv_usec = vhe->advskew * 1000000 / 256;
1626 if (vhe->vhe_leader)
1627 sc->sc_delayed_arp = -1;
1628 switch (af) {
1629 case AF_INET:
1630 timeout_add_tv(&vhe->md_tmo, &tv);
1631 break;
1632 #ifdef INET6
1633 case AF_INET6:
1634 timeout_add_tv(&vhe->md6_tmo, &tv);
1635 break;
1636 #endif /* INET6 */
1637 default:
1638 if (sc->sc_naddrs)
1639 timeout_add_tv(&vhe->md_tmo, &tv);
1640 if (sc->sc_naddrs6)
1641 timeout_add_tv(&vhe->md6_tmo, &tv);
1642 break;
1643 }
1644 break;
1645 case MASTER:
1646 tv.tv_sec = sc->sc_advbase;
1647 if (sc->sc_advbase == 0 && vhe->advskew == 0)
1648 tv.tv_usec = 1 * 1000000 / 256;
1649 else
1650 tv.tv_usec = vhe->advskew * 1000000 / 256;
1651 timeout_add_tv(&vhe->ad_tmo, &tv);
1652 break;
1653 }
1654 }
1655
1656 void
carp_multicast_cleanup(struct carp_softc * sc)1657 carp_multicast_cleanup(struct carp_softc *sc)
1658 {
1659 struct ip_moptions *imo = &sc->sc_imo;
1660 #ifdef INET6
1661 struct ip6_moptions *im6o = &sc->sc_im6o;
1662 #endif
1663 u_int16_t n = imo->imo_num_memberships;
1664
1665 /* Clean up our own multicast memberships */
1666 while (n-- > 0) {
1667 if (imo->imo_membership[n] != NULL) {
1668 in_delmulti(imo->imo_membership[n]);
1669 imo->imo_membership[n] = NULL;
1670 }
1671 }
1672 imo->imo_num_memberships = 0;
1673 imo->imo_ifidx = 0;
1674
1675 #ifdef INET6
1676 while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1677 struct in6_multi_mship *imm =
1678 LIST_FIRST(&im6o->im6o_memberships);
1679
1680 LIST_REMOVE(imm, i6mm_chain);
1681 in6_leavegroup(imm);
1682 }
1683 im6o->im6o_ifidx = 0;
1684 #endif
1685
1686 /* And any other multicast memberships */
1687 carp_ether_purgemulti(sc);
1688 }
1689
1690 int
carp_set_ifp(struct carp_softc * sc,struct ifnet * ifp0)1691 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0)
1692 {
1693 struct srpl *cif;
1694 struct carp_softc *vr, *last = NULL, *after = NULL;
1695 int myself = 0, error = 0;
1696
1697 KASSERT(ifp0->if_index != sc->sc_carpdevidx);
1698 KERNEL_ASSERT_LOCKED(); /* touching if_carp */
1699
1700 if ((ifp0->if_flags & IFF_MULTICAST) == 0)
1701 return (EADDRNOTAVAIL);
1702
1703 if (ifp0->if_type != IFT_ETHER)
1704 return (EINVAL);
1705
1706 cif = &ifp0->if_carp;
1707 if (carp_check_dup_vhids(sc, cif, NULL))
1708 return (EINVAL);
1709
1710 if ((error = ifpromisc(ifp0, 1)))
1711 return (error);
1712
1713 /* detach from old interface */
1714 if (sc->sc_carpdevidx != 0)
1715 carpdetach(sc);
1716
1717 /* attach carp interface to physical interface */
1718 if_detachhook_add(ifp0, &sc->sc_dtask);
1719 if_linkstatehook_add(ifp0, &sc->sc_ltask);
1720
1721 sc->sc_carpdevidx = ifp0->if_index;
1722 sc->sc_if.if_capabilities = ifp0->if_capabilities &
1723 (IFCAP_CSUM_MASK | IFCAP_TSOv4 | IFCAP_TSOv6);
1724
1725 SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
1726 struct carp_vhost_entry *vrhead, *schead;
1727 last = vr;
1728
1729 if (vr == sc)
1730 myself = 1;
1731
1732 vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts);
1733 schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1734 if (vrhead->vhid < schead->vhid)
1735 after = vr;
1736 }
1737
1738 if (!myself) {
1739 /* We're trying to keep things in order */
1740 if (last == NULL) {
1741 SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, cif,
1742 sc, sc_list);
1743 } else if (after == NULL) {
1744 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last,
1745 sc, sc_list);
1746 } else {
1747 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after,
1748 sc, sc_list);
1749 }
1750 }
1751 if (sc->sc_naddrs || sc->sc_naddrs6)
1752 sc->sc_if.if_flags |= IFF_UP;
1753 carp_set_enaddr(sc);
1754
1755 carp_carpdev_state(sc);
1756
1757 return (0);
1758 }
1759
1760 void
carp_set_vhe_enaddr(struct carp_vhost_entry * vhe)1761 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe)
1762 {
1763 struct carp_softc *sc = vhe->parent_sc;
1764
1765 if (vhe->vhid != 0 && sc->sc_carpdevidx != 0) {
1766 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP)
1767 vhe->vhe_enaddr[0] = 1;
1768 else
1769 vhe->vhe_enaddr[0] = 0;
1770 vhe->vhe_enaddr[1] = 0;
1771 vhe->vhe_enaddr[2] = 0x5e;
1772 vhe->vhe_enaddr[3] = 0;
1773 vhe->vhe_enaddr[4] = 1;
1774 vhe->vhe_enaddr[5] = vhe->vhid;
1775 } else
1776 memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN);
1777 }
1778
1779 void
carp_set_enaddr(struct carp_softc * sc)1780 carp_set_enaddr(struct carp_softc *sc)
1781 {
1782 struct carp_vhost_entry *vhe;
1783
1784 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
1785 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries)
1786 carp_set_vhe_enaddr(vhe);
1787
1788 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
1789
1790 /*
1791 * Use the carp lladdr if the running one isn't manually set.
1792 * Only compare static parts of the lladdr.
1793 */
1794 if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1,
1795 ETHER_ADDR_LEN - 2) == 0) ||
1796 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] &&
1797 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] &&
1798 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5]))
1799 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN);
1800
1801 /* Make sure the enaddr has changed before further twiddling. */
1802 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) {
1803 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl),
1804 ETHER_ADDR_LEN);
1805 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN);
1806 #ifdef INET6
1807 /*
1808 * (re)attach a link-local address which matches
1809 * our new MAC address.
1810 */
1811 if (sc->sc_naddrs6)
1812 in6_ifattach_linklocal(&sc->sc_if, NULL);
1813 #endif
1814 carp_set_state_all(sc, INIT);
1815 carp_setrun_all(sc, 0);
1816 }
1817 }
1818
1819 void
carp_addr_updated(void * v)1820 carp_addr_updated(void *v)
1821 {
1822 struct carp_softc *sc = (struct carp_softc *) v;
1823 struct ifaddr *ifa;
1824 int new_naddrs = 0, new_naddrs6 = 0;
1825
1826 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) {
1827 if (ifa->ifa_addr->sa_family == AF_INET)
1828 new_naddrs++;
1829 #ifdef INET6
1830 else if (ifa->ifa_addr->sa_family == AF_INET6)
1831 new_naddrs6++;
1832 #endif /* INET6 */
1833 }
1834
1835 /* We received address changes from if_addrhooks callback */
1836 if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) {
1837
1838 sc->sc_naddrs = new_naddrs;
1839 sc->sc_naddrs6 = new_naddrs6;
1840
1841 /* Re-establish multicast membership removed by in_control */
1842 if (IN_MULTICAST(sc->sc_peer.s_addr)) {
1843 if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) {
1844 struct in_multi **imm =
1845 sc->sc_imo.imo_membership;
1846 u_int16_t maxmem =
1847 sc->sc_imo.imo_max_memberships;
1848
1849 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo));
1850 sc->sc_imo.imo_membership = imm;
1851 sc->sc_imo.imo_max_memberships = maxmem;
1852
1853 if (sc->sc_carpdevidx != 0 &&
1854 sc->sc_naddrs > 0)
1855 carp_join_multicast(sc);
1856 }
1857 }
1858
1859 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) {
1860 sc->sc_if.if_flags &= ~IFF_UP;
1861 carp_set_state_all(sc, INIT);
1862 } else
1863 carp_hmac_prepare(sc);
1864 }
1865
1866 carp_setrun_all(sc, 0);
1867 }
1868
1869 int
carp_set_addr(struct carp_softc * sc,struct sockaddr_in * sin)1870 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
1871 {
1872 struct in_addr *in = &sin->sin_addr;
1873 int error;
1874
1875 KASSERT(sc->sc_carpdevidx != 0);
1876
1877 /* XXX is this necessary? */
1878 if (in->s_addr == INADDR_ANY) {
1879 carp_setrun_all(sc, 0);
1880 return (0);
1881 }
1882
1883 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0)
1884 return (error);
1885
1886 carp_set_state_all(sc, INIT);
1887
1888 return (0);
1889 }
1890
1891 int
carp_join_multicast(struct carp_softc * sc)1892 carp_join_multicast(struct carp_softc *sc)
1893 {
1894 struct ip_moptions *imo = &sc->sc_imo;
1895 struct in_multi *imm;
1896 struct in_addr addr;
1897
1898 if (!IN_MULTICAST(sc->sc_peer.s_addr))
1899 return (0);
1900
1901 addr.s_addr = sc->sc_peer.s_addr;
1902 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL)
1903 return (ENOBUFS);
1904
1905 imo->imo_membership[0] = imm;
1906 imo->imo_num_memberships = 1;
1907 imo->imo_ifidx = sc->sc_if.if_index;
1908 imo->imo_ttl = CARP_DFLTTL;
1909 imo->imo_loop = 0;
1910 return (0);
1911 }
1912
1913
1914 #ifdef INET6
1915 int
carp_set_addr6(struct carp_softc * sc,struct sockaddr_in6 * sin6)1916 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
1917 {
1918 int error;
1919
1920 KASSERT(sc->sc_carpdevidx != 0);
1921
1922 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
1923 carp_setrun_all(sc, 0);
1924 return (0);
1925 }
1926
1927 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0)
1928 return (error);
1929
1930 carp_set_state_all(sc, INIT);
1931
1932 return (0);
1933 }
1934
1935 int
carp_join_multicast6(struct carp_softc * sc)1936 carp_join_multicast6(struct carp_softc *sc)
1937 {
1938 struct in6_multi_mship *imm, *imm2;
1939 struct ip6_moptions *im6o = &sc->sc_im6o;
1940 struct sockaddr_in6 addr6;
1941 int error;
1942
1943 /* Join IPv6 CARP multicast group */
1944 memset(&addr6, 0, sizeof(addr6));
1945 addr6.sin6_family = AF_INET6;
1946 addr6.sin6_len = sizeof(addr6);
1947 addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1948 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1949 addr6.sin6_addr.s6_addr8[15] = 0x12;
1950 if ((imm = in6_joingroup(&sc->sc_if,
1951 &addr6.sin6_addr, &error)) == NULL) {
1952 return (error);
1953 }
1954 /* join solicited multicast address */
1955 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr));
1956 addr6.sin6_addr.s6_addr16[0] = htons(0xff02);
1957 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index);
1958 addr6.sin6_addr.s6_addr32[1] = 0;
1959 addr6.sin6_addr.s6_addr32[2] = htonl(1);
1960 addr6.sin6_addr.s6_addr32[3] = 0;
1961 addr6.sin6_addr.s6_addr8[12] = 0xff;
1962 if ((imm2 = in6_joingroup(&sc->sc_if,
1963 &addr6.sin6_addr, &error)) == NULL) {
1964 in6_leavegroup(imm);
1965 return (error);
1966 }
1967
1968 /* apply v6 multicast membership */
1969 im6o->im6o_ifidx = sc->sc_if.if_index;
1970 if (imm)
1971 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm,
1972 i6mm_chain);
1973 if (imm2)
1974 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2,
1975 i6mm_chain);
1976
1977 return (0);
1978 }
1979
1980 void
carp_if_linkstate(void * v)1981 carp_if_linkstate(void *v)
1982 {
1983 struct carp_softc *sc = v;
1984
1985 if (sc->sc_send_na) {
1986 if (sc->sc_if.if_link_state == LINK_STATE_UP)
1987 carp_send_na(sc);
1988 sc->sc_send_na = 0;
1989 }
1990 }
1991 #endif /* INET6 */
1992
1993 int
carp_ioctl(struct ifnet * ifp,u_long cmd,caddr_t addr)1994 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
1995 {
1996 struct proc *p = curproc; /* XXX */
1997 struct carp_softc *sc = ifp->if_softc;
1998 struct carp_vhost_entry *vhe;
1999 struct carpreq carpr;
2000 struct ifaddr *ifa = (struct ifaddr *)addr;
2001 struct ifreq *ifr = (struct ifreq *)addr;
2002 struct ifnet *ifp0 = NULL;
2003 int i, error = 0;
2004
2005 switch (cmd) {
2006 case SIOCSIFADDR:
2007 if (sc->sc_carpdevidx == 0)
2008 return (EINVAL);
2009
2010 switch (ifa->ifa_addr->sa_family) {
2011 case AF_INET:
2012 sc->sc_if.if_flags |= IFF_UP;
2013 error = carp_set_addr(sc, satosin(ifa->ifa_addr));
2014 break;
2015 #ifdef INET6
2016 case AF_INET6:
2017 sc->sc_if.if_flags |= IFF_UP;
2018 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
2019 break;
2020 #endif /* INET6 */
2021 default:
2022 error = EAFNOSUPPORT;
2023 break;
2024 }
2025 break;
2026
2027 case SIOCSIFFLAGS:
2028 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2029 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
2030 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) {
2031 carp_del_all_timeouts(sc);
2032
2033 /* we need the interface up to bow out */
2034 sc->sc_if.if_flags |= IFF_UP;
2035 sc->sc_bow_out = 1;
2036 carp_vhe_send_ad_all(sc);
2037 sc->sc_bow_out = 0;
2038
2039 sc->sc_if.if_flags &= ~IFF_UP;
2040 carp_set_state_all(sc, INIT);
2041 carp_setrun_all(sc, 0);
2042 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) {
2043 sc->sc_if.if_flags |= IFF_UP;
2044 carp_setrun_all(sc, 0);
2045 }
2046 break;
2047
2048 case SIOCSVH:
2049 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2050 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts);
2051 if ((error = suser(p)) != 0)
2052 break;
2053 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
2054 break;
2055 error = 1;
2056 if (carpr.carpr_carpdev[0] != '\0' &&
2057 (ifp0 = if_unit(carpr.carpr_carpdev)) == NULL)
2058 return (EINVAL);
2059 if (carpr.carpr_peer.s_addr == 0)
2060 sc->sc_peer.s_addr = INADDR_CARP_GROUP;
2061 else
2062 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr;
2063 if (ifp0 != NULL && ifp0->if_index != sc->sc_carpdevidx) {
2064 if ((error = carp_set_ifp(sc, ifp0))) {
2065 if_put(ifp0);
2066 return (error);
2067 }
2068 }
2069 if_put(ifp0);
2070 if (vhe->state != INIT && carpr.carpr_state != vhe->state) {
2071 switch (carpr.carpr_state) {
2072 case BACKUP:
2073 timeout_del(&vhe->ad_tmo);
2074 carp_set_state_all(sc, BACKUP);
2075 carp_setrun_all(sc, 0);
2076 break;
2077 case MASTER:
2078 KERNEL_ASSERT_LOCKED();
2079 /* touching carp_vhosts */
2080 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2081 vhost_entries)
2082 carp_master_down(vhe);
2083 break;
2084 default:
2085 break;
2086 }
2087 }
2088 if ((error = carp_vhids_ioctl(sc, &carpr)))
2089 return (error);
2090 if (carpr.carpr_advbase >= 0) {
2091 if (carpr.carpr_advbase > 255) {
2092 error = EINVAL;
2093 break;
2094 }
2095 sc->sc_advbase = carpr.carpr_advbase;
2096 error--;
2097 }
2098 if (memcmp(sc->sc_advskews, carpr.carpr_advskews,
2099 sizeof(sc->sc_advskews))) {
2100 i = 0;
2101 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2102 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts,
2103 vhost_entries)
2104 vhe->advskew = carpr.carpr_advskews[i++];
2105 bcopy(carpr.carpr_advskews, sc->sc_advskews,
2106 sizeof(sc->sc_advskews));
2107 }
2108 if (sc->sc_balancing != carpr.carpr_balancing) {
2109 if (carpr.carpr_balancing > CARP_BAL_MAXID) {
2110 error = EINVAL;
2111 break;
2112 }
2113 sc->sc_balancing = carpr.carpr_balancing;
2114 carp_set_enaddr(sc);
2115 carp_update_lsmask(sc);
2116 }
2117 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
2118 if (error > 0)
2119 error = EINVAL;
2120 else {
2121 error = 0;
2122 carp_hmac_prepare(sc);
2123 carp_setrun_all(sc, 0);
2124 }
2125 break;
2126
2127 case SIOCGVH:
2128 memset(&carpr, 0, sizeof(carpr));
2129 if ((ifp0 = if_get(sc->sc_carpdevidx)) != NULL)
2130 strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ);
2131 if_put(ifp0);
2132 i = 0;
2133 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2134 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2135 carpr.carpr_vhids[i] = vhe->vhid;
2136 carpr.carpr_advskews[i] = vhe->advskew;
2137 carpr.carpr_states[i] = vhe->state;
2138 i++;
2139 }
2140 carpr.carpr_advbase = sc->sc_advbase;
2141 carpr.carpr_balancing = sc->sc_balancing;
2142 if (suser(p) == 0)
2143 bcopy(sc->sc_key, carpr.carpr_key,
2144 sizeof(carpr.carpr_key));
2145 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr;
2146 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
2147 break;
2148
2149 case SIOCADDMULTI:
2150 error = carp_ether_addmulti(sc, ifr);
2151 break;
2152
2153 case SIOCDELMULTI:
2154 error = carp_ether_delmulti(sc, ifr);
2155 break;
2156 case SIOCAIFGROUP:
2157 case SIOCDIFGROUP:
2158 if (sc->sc_demote_cnt)
2159 carp_ifgroup_ioctl(ifp, cmd, addr);
2160 break;
2161 case SIOCSIFGATTR:
2162 carp_ifgattr_ioctl(ifp, cmd, addr);
2163 break;
2164 default:
2165 error = ENOTTY;
2166 }
2167
2168 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0)
2169 carp_set_enaddr(sc);
2170 return (error);
2171 }
2172
2173 int
carp_check_dup_vhids(struct carp_softc * sc,struct srpl * cif,struct carpreq * carpr)2174 carp_check_dup_vhids(struct carp_softc *sc, struct srpl *cif,
2175 struct carpreq *carpr)
2176 {
2177 struct carp_softc *vr;
2178 struct carp_vhost_entry *vhe, *vhe0;
2179 int i;
2180
2181 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */
2182
2183 SRPL_FOREACH_LOCKED(vr, cif, sc_list) {
2184 if (vr == sc)
2185 continue;
2186 SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) {
2187 if (carpr) {
2188 for (i = 0; carpr->carpr_vhids[i]; i++) {
2189 if (vhe->vhid == carpr->carpr_vhids[i])
2190 return (EINVAL);
2191 }
2192 }
2193 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts,
2194 vhost_entries) {
2195 if (vhe->vhid == vhe0->vhid)
2196 return (EINVAL);
2197 }
2198 }
2199 }
2200 return (0);
2201 }
2202
2203 int
carp_vhids_ioctl(struct carp_softc * sc,struct carpreq * carpr)2204 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr)
2205 {
2206 int i, j;
2207 u_int8_t taken_vhids[256];
2208
2209 if (carpr->carpr_vhids[0] == 0 ||
2210 !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids)))
2211 return (0);
2212
2213 memset(taken_vhids, 0, sizeof(taken_vhids));
2214 for (i = 0; carpr->carpr_vhids[i]; i++) {
2215 struct ifnet *ifp;
2216
2217 if (taken_vhids[carpr->carpr_vhids[i]])
2218 return (EINVAL);
2219 taken_vhids[carpr->carpr_vhids[i]] = 1;
2220
2221 if ((ifp = if_get(sc->sc_carpdevidx)) != NULL) {
2222 struct srpl *cif;
2223 cif = &ifp->if_carp;
2224 if (carp_check_dup_vhids(sc, cif, carpr)) {
2225 if_put(ifp);
2226 return (EINVAL);
2227 }
2228 }
2229 if_put(ifp);
2230 if (carpr->carpr_advskews[i] >= 255)
2231 return (EINVAL);
2232 }
2233 /* set sane balancing defaults */
2234 if (i <= 1)
2235 carpr->carpr_balancing = CARP_BAL_NONE;
2236 else if (carpr->carpr_balancing == CARP_BAL_NONE &&
2237 sc->sc_balancing == CARP_BAL_NONE)
2238 carpr->carpr_balancing = CARP_BAL_IP;
2239
2240 /* destroy all */
2241 carp_del_all_timeouts(sc);
2242 carp_destroy_vhosts(sc);
2243 memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids));
2244
2245 /* sort vhosts list by vhid */
2246 for (j = 1; j <= 255; j++) {
2247 for (i = 0; carpr->carpr_vhids[i]; i++) {
2248 if (carpr->carpr_vhids[i] != j)
2249 continue;
2250 if (carp_new_vhost(sc, carpr->carpr_vhids[i],
2251 carpr->carpr_advskews[i]))
2252 return (ENOMEM);
2253 sc->sc_vhids[i] = carpr->carpr_vhids[i];
2254 sc->sc_advskews[i] = carpr->carpr_advskews[i];
2255 }
2256 }
2257 carp_set_enaddr(sc);
2258 carp_set_state_all(sc, INIT);
2259 return (0);
2260 }
2261
2262 void
carp_ifgroup_ioctl(struct ifnet * ifp,u_long cmd,caddr_t addr)2263 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2264 {
2265 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2266 struct ifg_list *ifgl;
2267 int *dm, adj;
2268
2269 if (!strcmp(ifgr->ifgr_group, IFG_ALL))
2270 return;
2271 adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2272 if (cmd == SIOCDIFGROUP)
2273 adj = adj * -1;
2274
2275 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
2276 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) {
2277 dm = &ifgl->ifgl_group->ifg_carp_demoted;
2278 if (*dm + adj >= 0)
2279 *dm += adj;
2280 else
2281 *dm = 0;
2282 }
2283 }
2284
2285 void
carp_ifgattr_ioctl(struct ifnet * ifp,u_long cmd,caddr_t addr)2286 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
2287 {
2288 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr;
2289 struct carp_softc *sc = ifp->if_softc;
2290
2291 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags &
2292 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))
2293 carp_vhe_send_ad_all(sc);
2294 }
2295
2296 void
carp_start(struct ifnet * ifp)2297 carp_start(struct ifnet *ifp)
2298 {
2299 struct carp_softc *sc = ifp->if_softc;
2300 struct ifnet *ifp0;
2301 struct mbuf *m;
2302
2303 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) {
2304 ifq_purge(&ifp->if_snd);
2305 return;
2306 }
2307
2308 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL)
2309 carp_transmit(sc, ifp0, m);
2310 if_put(ifp0);
2311 }
2312
2313 void
carp_transmit(struct carp_softc * sc,struct ifnet * ifp0,struct mbuf * m)2314 carp_transmit(struct carp_softc *sc, struct ifnet *ifp0, struct mbuf *m)
2315 {
2316 struct ifnet *ifp = &sc->sc_if;
2317
2318 #if NBPFILTER > 0
2319 {
2320 caddr_t if_bpf = ifp->if_bpf;
2321 if (if_bpf)
2322 bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
2323 }
2324 #endif /* NBPFILTER > 0 */
2325
2326 if (!ISSET(ifp0->if_flags, IFF_RUNNING)) {
2327 counters_inc(ifp->if_counters, ifc_oerrors);
2328 m_freem(m);
2329 return;
2330 }
2331
2332 /*
2333 * Do not leak the multicast address when sending
2334 * advertisements in 'ip' and 'ip-stealth' balancing
2335 * modes.
2336 */
2337 if (sc->sc_balancing == CARP_BAL_IP ||
2338 sc->sc_balancing == CARP_BAL_IPSTEALTH) {
2339 struct ether_header *eh = mtod(m, struct ether_header *);
2340 memcpy(eh->ether_shost, sc->sc_ac.ac_enaddr,
2341 sizeof(eh->ether_shost));
2342 }
2343
2344 if (if_enqueue(ifp0, m))
2345 counters_inc(ifp->if_counters, ifc_oerrors);
2346 }
2347
2348 int
carp_enqueue(struct ifnet * ifp,struct mbuf * m)2349 carp_enqueue(struct ifnet *ifp, struct mbuf *m)
2350 {
2351 struct carp_softc *sc = ifp->if_softc;
2352 struct ifnet *ifp0;
2353
2354 /* no ifq_is_priq, cos hfsc on carp doesn't make sense */
2355
2356 /*
2357 * If the parent of this carp(4) got destroyed while
2358 * `m' was being processed, silently drop it.
2359 */
2360 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) {
2361 m_freem(m);
2362 return (0);
2363 }
2364
2365 counters_pkt(ifp->if_counters,
2366 ifc_opackets, ifc_obytes, m->m_pkthdr.len);
2367 carp_transmit(sc, ifp0, m);
2368 if_put(ifp0);
2369
2370 return (0);
2371 }
2372
2373 int
carp_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * sa,struct rtentry * rt)2374 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
2375 struct rtentry *rt)
2376 {
2377 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc);
2378 struct carp_vhost_entry *vhe;
2379 struct srp_ref sr;
2380 int ismaster;
2381
2382 if (sc->cur_vhe == NULL) {
2383 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts);
2384 ismaster = (vhe->state == MASTER);
2385 SRPL_LEAVE(&sr);
2386 } else {
2387 ismaster = (sc->cur_vhe->state == MASTER);
2388 }
2389
2390 if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) {
2391 m_freem(m);
2392 return (ENETUNREACH);
2393 }
2394
2395 return (ether_output(ifp, m, sa, rt));
2396 }
2397
2398 void
carp_set_state_all(struct carp_softc * sc,int state)2399 carp_set_state_all(struct carp_softc *sc, int state)
2400 {
2401 struct carp_vhost_entry *vhe;
2402
2403 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2404
2405 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) {
2406 if (vhe->state == state)
2407 continue;
2408
2409 carp_set_state(vhe, state);
2410 }
2411 }
2412
2413 void
carp_set_state(struct carp_vhost_entry * vhe,int state)2414 carp_set_state(struct carp_vhost_entry *vhe, int state)
2415 {
2416 struct carp_softc *sc = vhe->parent_sc;
2417 static const char *carp_states[] = { CARP_STATES };
2418 int loglevel;
2419 struct carp_vhost_entry *vhe0;
2420
2421 KASSERT(vhe->state != state);
2422
2423 if (vhe->state == INIT || state == INIT)
2424 loglevel = LOG_WARNING;
2425 else
2426 loglevel = LOG_CRIT;
2427
2428 if (sc->sc_vhe_count > 1)
2429 CARP_LOG(loglevel, sc,
2430 ("state transition (vhid %d): %s -> %s", vhe->vhid,
2431 carp_states[vhe->state], carp_states[state]));
2432 else
2433 CARP_LOG(loglevel, sc,
2434 ("state transition: %s -> %s",
2435 carp_states[vhe->state], carp_states[state]));
2436
2437 vhe->state = state;
2438 carp_update_lsmask(sc);
2439
2440 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
2441
2442 sc->sc_if.if_link_state = LINK_STATE_INVALID;
2443 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) {
2444 /*
2445 * Link must be up if at least one vhe is in state MASTER to
2446 * bring or keep route up.
2447 */
2448 if (vhe0->state == MASTER) {
2449 sc->sc_if.if_link_state = LINK_STATE_UP;
2450 break;
2451 } else if (vhe0->state == BACKUP) {
2452 sc->sc_if.if_link_state = LINK_STATE_DOWN;
2453 }
2454 }
2455 if_link_state_change(&sc->sc_if);
2456 }
2457
2458 void
carp_group_demote_adj(struct ifnet * ifp,int adj,char * reason)2459 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason)
2460 {
2461 struct ifg_list *ifgl;
2462 int *dm, need_ad;
2463 struct carp_softc *nil = NULL;
2464
2465 if (ifp->if_type == IFT_CARP) {
2466 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt;
2467 if (*dm + adj >= 0)
2468 *dm += adj;
2469 else
2470 *dm = 0;
2471 }
2472
2473 need_ad = 0;
2474 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2475 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2476 continue;
2477 dm = &ifgl->ifgl_group->ifg_carp_demoted;
2478
2479 if (*dm + adj >= 0)
2480 *dm += adj;
2481 else
2482 *dm = 0;
2483
2484 if (adj > 0 && *dm == 1)
2485 need_ad = 1;
2486 CARP_LOG(LOG_ERR, nil,
2487 ("%s demoted group %s by %d to %d (%s)",
2488 ifp->if_xname, ifgl->ifgl_group->ifg_group,
2489 adj, *dm, reason));
2490 }
2491 if (need_ad)
2492 carp_send_ad_all();
2493 }
2494
2495 int
carp_group_demote_count(struct carp_softc * sc)2496 carp_group_demote_count(struct carp_softc *sc)
2497 {
2498 struct ifg_list *ifgl;
2499 int count = 0;
2500
2501 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next)
2502 count += ifgl->ifgl_group->ifg_carp_demoted;
2503
2504 if (count == 0 && sc->sc_demote_cnt)
2505 count = sc->sc_demote_cnt;
2506
2507 return (count > 255 ? 255 : count);
2508 }
2509
2510 void
carp_carpdev_state(void * v)2511 carp_carpdev_state(void *v)
2512 {
2513 struct carp_softc *sc = v;
2514 struct ifnet *ifp0;
2515 int suppressed = sc->sc_suppress;
2516
2517 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL)
2518 return;
2519
2520 if (ifp0->if_link_state == LINK_STATE_DOWN ||
2521 !(ifp0->if_flags & IFF_UP)) {
2522 sc->sc_if.if_flags &= ~IFF_RUNNING;
2523 carp_del_all_timeouts(sc);
2524 carp_set_state_all(sc, INIT);
2525 sc->sc_suppress = 1;
2526 carp_setrun_all(sc, 0);
2527 if (!suppressed)
2528 carp_group_demote_adj(&sc->sc_if, 1, "carpdev");
2529 } else if (suppressed) {
2530 carp_set_state_all(sc, INIT);
2531 sc->sc_suppress = 0;
2532 carp_setrun_all(sc, 0);
2533 carp_group_demote_adj(&sc->sc_if, -1, "carpdev");
2534 }
2535
2536 if_put(ifp0);
2537 }
2538
2539 int
carp_ether_addmulti(struct carp_softc * sc,struct ifreq * ifr)2540 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr)
2541 {
2542 struct ifnet *ifp0;
2543 struct carp_mc_entry *mc;
2544 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2545 int error;
2546
2547 ifp0 = if_get(sc->sc_carpdevidx);
2548 if (ifp0 == NULL)
2549 return (EINVAL);
2550
2551 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2552 if (error != ENETRESET) {
2553 if_put(ifp0);
2554 return (error);
2555 }
2556
2557 /*
2558 * This is new multicast address. We have to tell parent
2559 * about it. Also, remember this multicast address so that
2560 * we can delete them on unconfigure.
2561 */
2562 mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT);
2563 if (mc == NULL) {
2564 error = ENOMEM;
2565 goto alloc_failed;
2566 }
2567
2568 /*
2569 * As ether_addmulti() returns ENETRESET, following two
2570 * statement shouldn't fail.
2571 */
2572 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi);
2573 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm);
2574 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len);
2575 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries);
2576
2577 error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr);
2578 if (error != 0)
2579 goto ioctl_failed;
2580
2581 if_put(ifp0);
2582
2583 return (error);
2584
2585 ioctl_failed:
2586 LIST_REMOVE(mc, mc_entries);
2587 free(mc, M_DEVBUF, sizeof(*mc));
2588 alloc_failed:
2589 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2590 if_put(ifp0);
2591
2592 return (error);
2593 }
2594
2595 int
carp_ether_delmulti(struct carp_softc * sc,struct ifreq * ifr)2596 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr)
2597 {
2598 struct ifnet *ifp0;
2599 struct ether_multi *enm;
2600 struct carp_mc_entry *mc;
2601 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN];
2602 int error;
2603
2604 ifp0 = if_get(sc->sc_carpdevidx);
2605 if (ifp0 == NULL)
2606 return (EINVAL);
2607
2608 /*
2609 * Find a key to lookup carp_mc_entry. We have to do this
2610 * before calling ether_delmulti for obvious reason.
2611 */
2612 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0)
2613 goto rele;
2614 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm);
2615 if (enm == NULL) {
2616 error = EINVAL;
2617 goto rele;
2618 }
2619
2620 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries)
2621 if (mc->mc_enm == enm)
2622 break;
2623
2624 /* We won't delete entries we didn't add */
2625 if (mc == NULL) {
2626 error = EINVAL;
2627 goto rele;
2628 }
2629
2630 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac);
2631 if (error != ENETRESET)
2632 goto rele;
2633
2634 /* We no longer use this multicast address. Tell parent so. */
2635 error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2636 if (error == 0) {
2637 /* And forget about this address. */
2638 LIST_REMOVE(mc, mc_entries);
2639 free(mc, M_DEVBUF, sizeof(*mc));
2640 } else
2641 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac);
2642 rele:
2643 if_put(ifp0);
2644 return (error);
2645 }
2646
2647 /*
2648 * Delete any multicast address we have asked to add from parent
2649 * interface. Called when the carp is being unconfigured.
2650 */
2651 void
carp_ether_purgemulti(struct carp_softc * sc)2652 carp_ether_purgemulti(struct carp_softc *sc)
2653 {
2654 struct ifnet *ifp0; /* Parent. */
2655 struct carp_mc_entry *mc;
2656 union {
2657 struct ifreq ifreq;
2658 struct {
2659 char ifr_name[IFNAMSIZ];
2660 struct sockaddr_storage ifr_ss;
2661 } ifreq_storage;
2662 } u;
2663 struct ifreq *ifr = &u.ifreq;
2664
2665 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL)
2666 return;
2667
2668 memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ);
2669 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) {
2670 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len);
2671 (void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr);
2672 LIST_REMOVE(mc, mc_entries);
2673 free(mc, M_DEVBUF, sizeof(*mc));
2674 }
2675
2676 if_put(ifp0);
2677 }
2678
2679 void
carp_vh_ref(void * null,void * v)2680 carp_vh_ref(void *null, void *v)
2681 {
2682 struct carp_vhost_entry *vhe = v;
2683
2684 refcnt_take(&vhe->vhost_refcnt);
2685 }
2686
2687 void
carp_vh_unref(void * null,void * v)2688 carp_vh_unref(void *null, void *v)
2689 {
2690 struct carp_vhost_entry *vhe = v;
2691
2692 if (refcnt_rele(&vhe->vhost_refcnt)) {
2693 carp_sc_unref(NULL, vhe->parent_sc);
2694 free(vhe, M_DEVBUF, sizeof(*vhe));
2695 }
2696 }
2697
2698 void
carp_sc_ref(void * null,void * s)2699 carp_sc_ref(void *null, void *s)
2700 {
2701 struct carp_softc *sc = s;
2702
2703 refcnt_take(&sc->sc_refcnt);
2704 }
2705
2706 void
carp_sc_unref(void * null,void * s)2707 carp_sc_unref(void *null, void *s)
2708 {
2709 struct carp_softc *sc = s;
2710
2711 refcnt_rele_wake(&sc->sc_refcnt);
2712 }
2713