1 /* $OpenBSD: ip_carp.c,v 1.352 2021/02/08 12:30:10 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * TODO: 32 * - iface reconfigure 33 * - support for hardware checksum calculations; 34 * 35 */ 36 37 #include "ether.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/mbuf.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/timeout.h> 45 #include <sys/ioctl.h> 46 #include <sys/errno.h> 47 #include <sys/device.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 #include <sys/refcnt.h> 52 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_types.h> 56 #include <net/netisr.h> 57 58 #include <crypto/sha1.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_var.h> 62 #include <netinet/ip.h> 63 #include <netinet/ip_var.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip_ipsp.h> 66 67 #include <net/if_dl.h> 68 69 #ifdef INET6 70 #include <netinet6/in6_var.h> 71 #include <netinet/icmp6.h> 72 #include <netinet/ip6.h> 73 #include <netinet6/ip6_var.h> 74 #include <netinet6/nd6.h> 75 #include <netinet6/in6_ifattach.h> 76 #endif 77 78 #include "bpfilter.h" 79 #if NBPFILTER > 0 80 #include <net/bpf.h> 81 #endif 82 83 #include "vlan.h" 84 #if NVLAN > 0 85 #include <net/if_vlan_var.h> 86 #endif 87 88 #include <netinet/ip_carp.h> 89 90 struct carp_mc_entry { 91 LIST_ENTRY(carp_mc_entry) mc_entries; 92 union { 93 struct ether_multi *mcu_enm; 94 } mc_u; 95 struct sockaddr_storage mc_addr; 96 }; 97 #define mc_enm mc_u.mcu_enm 98 99 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; 100 101 struct carp_vhost_entry { 102 SRPL_ENTRY(carp_vhost_entry) vhost_entries; 103 struct refcnt vhost_refcnt; 104 105 struct carp_softc *parent_sc; 106 int vhe_leader; 107 int vhid; 108 int advskew; 109 enum { INIT = 0, BACKUP, MASTER } state; 110 struct timeout ad_tmo; /* advertisement timeout */ 111 struct timeout md_tmo; /* master down timeout */ 112 struct timeout md6_tmo; /* master down timeout */ 113 114 u_int64_t vhe_replay_cookie; 115 116 /* authentication */ 117 #define CARP_HMAC_PAD 64 118 unsigned char vhe_pad[CARP_HMAC_PAD]; 119 SHA1_CTX vhe_sha1[HMAC_MAX]; 120 121 u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; 122 }; 123 124 void carp_vh_ref(void *, void *); 125 void carp_vh_unref(void *, void *); 126 127 struct srpl_rc carp_vh_rc = 128 SRPL_RC_INITIALIZER(carp_vh_ref, carp_vh_unref, NULL); 129 130 struct carp_softc { 131 struct arpcom sc_ac; 132 #define sc_if sc_ac.ac_if 133 #define sc_carpdevidx sc_ac.ac_if.if_carpdevidx 134 struct task sc_atask; 135 struct task sc_ltask; 136 struct task sc_dtask; 137 struct ip_moptions sc_imo; 138 #ifdef INET6 139 struct ip6_moptions sc_im6o; 140 #endif /* INET6 */ 141 142 SRPL_ENTRY(carp_softc) sc_list; 143 struct refcnt sc_refcnt; 144 145 int sc_suppress; 146 int sc_bow_out; 147 int sc_demote_cnt; 148 149 int sc_sendad_errors; 150 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count) 151 int sc_sendad_success; 152 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count) 153 154 char sc_curlladdr[ETHER_ADDR_LEN]; 155 156 SRPL_HEAD(, carp_vhost_entry) carp_vhosts; 157 int sc_vhe_count; 158 u_int8_t sc_vhids[CARP_MAXNODES]; 159 u_int8_t sc_advskews[CARP_MAXNODES]; 160 u_int8_t sc_balancing; 161 162 int sc_naddrs; 163 int sc_naddrs6; 164 int sc_advbase; /* seconds */ 165 166 /* authentication */ 167 unsigned char sc_key[CARP_KEY_LEN]; 168 169 u_int32_t sc_hashkey[2]; 170 u_int32_t sc_lsmask; /* load sharing mask */ 171 int sc_lscount; /* # load sharing interfaces (max 32) */ 172 int sc_delayed_arp; /* delayed ARP request countdown */ 173 int sc_realmac; /* using real mac */ 174 175 struct in_addr sc_peer; 176 177 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 178 struct carp_vhost_entry *cur_vhe; /* current active vhe */ 179 }; 180 181 void carp_sc_ref(void *, void *); 182 void carp_sc_unref(void *, void *); 183 184 struct srpl_rc carp_sc_rc = 185 SRPL_RC_INITIALIZER(carp_sc_ref, carp_sc_unref, NULL); 186 187 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT }; /* XXX for now */ 188 struct cpumem *carpcounters; 189 190 int carp_send_all_recur = 0; 191 192 #define CARP_LOG(l, sc, s) \ 193 do { \ 194 if (carp_opts[CARPCTL_LOG] >= l) { \ 195 if (sc) \ 196 log(l, "%s: ", \ 197 (sc)->sc_if.if_xname); \ 198 else \ 199 log(l, "carp: "); \ 200 addlog s; \ 201 addlog("\n"); \ 202 } \ 203 } while (0) 204 205 void carp_hmac_prepare(struct carp_softc *); 206 void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t); 207 void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *, 208 unsigned char *, u_int8_t); 209 int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *, 210 unsigned char *); 211 void carp_proto_input_c(struct ifnet *, struct mbuf *, 212 struct carp_header *, int, sa_family_t); 213 int carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int); 214 #ifdef INET6 215 int carp6_proto_input_if(struct ifnet *, struct mbuf **, int *, int); 216 #endif 217 void carpattach(int); 218 void carpdetach(void *); 219 void carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *, 220 struct carp_header *); 221 void carp_send_ad_all(void); 222 void carp_vhe_send_ad_all(struct carp_softc *); 223 void carp_timer_ad(void *); 224 void carp_send_ad(struct carp_vhost_entry *); 225 void carp_send_arp(struct carp_softc *); 226 void carp_timer_down(void *); 227 void carp_master_down(struct carp_vhost_entry *); 228 int carp_ioctl(struct ifnet *, u_long, caddr_t); 229 int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); 230 int carp_check_dup_vhids(struct carp_softc *, struct srpl *, 231 struct carpreq *); 232 void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); 233 void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t); 234 void carp_start(struct ifnet *); 235 int carp_enqueue(struct ifnet *, struct mbuf *); 236 void carp_transmit(struct carp_softc *, struct ifnet *, struct mbuf *); 237 void carp_setrun_all(struct carp_softc *, sa_family_t); 238 void carp_setrun(struct carp_vhost_entry *, sa_family_t); 239 void carp_set_state_all(struct carp_softc *, int); 240 void carp_set_state(struct carp_vhost_entry *, int); 241 void carp_multicast_cleanup(struct carp_softc *); 242 int carp_set_ifp(struct carp_softc *, struct ifnet *); 243 void carp_set_enaddr(struct carp_softc *); 244 void carp_set_vhe_enaddr(struct carp_vhost_entry *); 245 void carp_addr_updated(void *); 246 int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 247 int carp_join_multicast(struct carp_softc *); 248 #ifdef INET6 249 void carp_send_na(struct carp_softc *); 250 int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 251 int carp_join_multicast6(struct carp_softc *); 252 #endif 253 int carp_clone_create(struct if_clone *, int); 254 int carp_clone_destroy(struct ifnet *); 255 int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 256 int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 257 void carp_ether_purgemulti(struct carp_softc *); 258 int carp_group_demote_count(struct carp_softc *); 259 void carp_update_lsmask(struct carp_softc *); 260 int carp_new_vhost(struct carp_softc *, int, int); 261 void carp_destroy_vhosts(struct carp_softc *); 262 void carp_del_all_timeouts(struct carp_softc *); 263 int carp_vhe_match(struct carp_softc *, uint8_t *); 264 265 struct if_clone carp_cloner = 266 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 267 268 #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l))) 269 #define CARP_IFQ_PRIO 6 270 271 void 272 carp_hmac_prepare(struct carp_softc *sc) 273 { 274 struct carp_vhost_entry *vhe; 275 u_int8_t i; 276 277 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 278 279 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 280 for (i = 0; i < HMAC_MAX; i++) { 281 carp_hmac_prepare_ctx(vhe, i); 282 } 283 } 284 } 285 286 void 287 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) 288 { 289 struct carp_softc *sc = vhe->parent_sc; 290 291 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 292 u_int8_t vhid = vhe->vhid & 0xff; 293 SHA1_CTX sha1ctx; 294 u_int32_t kmd[5]; 295 struct ifaddr *ifa; 296 int i, found; 297 struct in_addr last, cur, in; 298 #ifdef INET6 299 struct in6_addr last6, cur6, in6; 300 #endif /* INET6 */ 301 302 /* compute ipad from key */ 303 memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad)); 304 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key)); 305 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 306 vhe->vhe_pad[i] ^= 0x36; 307 308 /* precompute first part of inner hash */ 309 SHA1Init(&vhe->vhe_sha1[ctx]); 310 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad)); 311 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version)); 312 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); 313 314 /* generate a key for the arpbalance hash, before the vhid is hashed */ 315 if (vhe->vhe_leader) { 316 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 317 SHA1Final((unsigned char *)kmd, &sha1ctx); 318 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 319 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 320 } 321 322 /* the rest of the precomputation */ 323 if (!sc->sc_realmac && vhe->vhe_leader && 324 memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0) 325 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, 326 ETHER_ADDR_LEN); 327 328 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid)); 329 330 /* Hash the addresses from smallest to largest, not interface order */ 331 cur.s_addr = 0; 332 do { 333 found = 0; 334 last = cur; 335 cur.s_addr = 0xffffffff; 336 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 337 if (ifa->ifa_addr->sa_family != AF_INET) 338 continue; 339 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 340 if (ntohl(in.s_addr) > ntohl(last.s_addr) && 341 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 342 cur.s_addr = in.s_addr; 343 found++; 344 } 345 } 346 if (found) 347 SHA1Update(&vhe->vhe_sha1[ctx], 348 (void *)&cur, sizeof(cur)); 349 } while (found); 350 #ifdef INET6 351 memset(&cur6, 0x00, sizeof(cur6)); 352 do { 353 found = 0; 354 last6 = cur6; 355 memset(&cur6, 0xff, sizeof(cur6)); 356 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 357 if (ifa->ifa_addr->sa_family != AF_INET6) 358 continue; 359 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 360 if (IN6_IS_SCOPE_EMBED(&in6)) { 361 if (ctx == HMAC_NOV6LL) 362 continue; 363 in6.s6_addr16[1] = 0; 364 } 365 if (memcmp(&in6, &last6, sizeof(in6)) > 0 && 366 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 367 cur6 = in6; 368 found++; 369 } 370 } 371 if (found) 372 SHA1Update(&vhe->vhe_sha1[ctx], 373 (void *)&cur6, sizeof(cur6)); 374 } while (found); 375 #endif /* INET6 */ 376 377 /* convert ipad to opad */ 378 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 379 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c; 380 } 381 382 void 383 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2], 384 unsigned char md[20], u_int8_t ctx) 385 { 386 SHA1_CTX sha1ctx; 387 388 /* fetch first half of inner hash */ 389 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 390 391 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie)); 392 SHA1Final(md, &sha1ctx); 393 394 /* outer hash */ 395 SHA1Init(&sha1ctx); 396 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad)); 397 SHA1Update(&sha1ctx, md, 20); 398 SHA1Final(md, &sha1ctx); 399 } 400 401 int 402 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2], 403 unsigned char md[20]) 404 { 405 unsigned char md2[20]; 406 u_int8_t i; 407 408 for (i = 0; i < HMAC_MAX; i++) { 409 carp_hmac_generate(vhe, counter, md2, i); 410 if (!timingsafe_bcmp(md, md2, sizeof(md2))) 411 return (0); 412 } 413 return (1); 414 } 415 416 int 417 carp_proto_input(struct mbuf **mp, int *offp, int proto, int af) 418 { 419 struct ifnet *ifp; 420 421 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 422 if (ifp == NULL) { 423 m_freemp(mp); 424 return IPPROTO_DONE; 425 } 426 427 proto = carp_proto_input_if(ifp, mp, offp, proto); 428 if_put(ifp); 429 return proto; 430 } 431 432 /* 433 * process input packet. 434 * we have rearranged checks order compared to the rfc, 435 * but it seems more efficient this way or not possible otherwise. 436 */ 437 int 438 carp_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto) 439 { 440 struct mbuf *m = *mp; 441 struct ip *ip = mtod(m, struct ip *); 442 struct carp_softc *sc = NULL; 443 struct carp_header *ch; 444 int iplen, len, ismulti; 445 446 carpstat_inc(carps_ipackets); 447 448 if (!carp_opts[CARPCTL_ALLOW]) { 449 m_freem(m); 450 return IPPROTO_DONE; 451 } 452 453 ismulti = IN_MULTICAST(ip->ip_dst.s_addr); 454 455 /* check if received on a valid carp interface */ 456 switch (ifp->if_type) { 457 case IFT_CARP: 458 break; 459 case IFT_ETHER: 460 if (ismulti || !SRPL_EMPTY_LOCKED(&ifp->if_carp)) 461 break; 462 /* FALLTHROUGH */ 463 default: 464 carpstat_inc(carps_badif); 465 CARP_LOG(LOG_INFO, sc, 466 ("packet received on non-carp interface: %s", 467 ifp->if_xname)); 468 m_freem(m); 469 return IPPROTO_DONE; 470 } 471 472 /* verify that the IP TTL is 255. */ 473 if (ip->ip_ttl != CARP_DFLTTL) { 474 carpstat_inc(carps_badttl); 475 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 476 ip->ip_ttl, CARP_DFLTTL, ifp->if_xname)); 477 m_freem(m); 478 return IPPROTO_DONE; 479 } 480 481 /* 482 * verify that the received packet length is 483 * equal to the CARP header 484 */ 485 iplen = ip->ip_hl << 2; 486 len = iplen + sizeof(*ch); 487 if (len > m->m_pkthdr.len) { 488 carpstat_inc(carps_badlen); 489 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", 490 m->m_pkthdr.len, ifp->if_xname)); 491 m_freem(m); 492 return IPPROTO_DONE; 493 } 494 495 if ((m = *mp = m_pullup(m, len)) == NULL) { 496 carpstat_inc(carps_hdrops); 497 return IPPROTO_DONE; 498 } 499 ip = mtod(m, struct ip *); 500 ch = (struct carp_header *)(mtod(m, caddr_t) + iplen); 501 502 /* verify the CARP checksum */ 503 m->m_data += iplen; 504 if (carp_cksum(m, len - iplen)) { 505 carpstat_inc(carps_badsum); 506 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s", 507 ifp->if_xname)); 508 m_freem(m); 509 return IPPROTO_DONE; 510 } 511 m->m_data -= iplen; 512 513 KERNEL_LOCK(); 514 carp_proto_input_c(ifp, m, ch, ismulti, AF_INET); 515 KERNEL_UNLOCK(); 516 return IPPROTO_DONE; 517 } 518 519 #ifdef INET6 520 int 521 carp6_proto_input(struct mbuf **mp, int *offp, int proto, int af) 522 { 523 struct ifnet *ifp; 524 525 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 526 if (ifp == NULL) { 527 m_freemp(mp); 528 return IPPROTO_DONE; 529 } 530 531 proto = carp6_proto_input_if(ifp, mp, offp, proto); 532 if_put(ifp); 533 return proto; 534 } 535 536 int 537 carp6_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto) 538 { 539 struct mbuf *m = *mp; 540 struct carp_softc *sc = NULL; 541 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 542 struct carp_header *ch; 543 u_int len; 544 545 carpstat_inc(carps_ipackets6); 546 547 if (!carp_opts[CARPCTL_ALLOW]) { 548 m_freem(m); 549 return IPPROTO_DONE; 550 } 551 552 /* check if received on a valid carp interface */ 553 if (ifp->if_type != IFT_CARP) { 554 carpstat_inc(carps_badif); 555 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 556 ifp->if_xname)); 557 m_freem(m); 558 return IPPROTO_DONE; 559 } 560 561 /* verify that the IP TTL is 255 */ 562 if (ip6->ip6_hlim != CARP_DFLTTL) { 563 carpstat_inc(carps_badttl); 564 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 565 ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname)); 566 m_freem(m); 567 return IPPROTO_DONE; 568 } 569 570 /* verify that we have a complete carp packet */ 571 len = m->m_len; 572 if ((m = *mp = m_pullup(m, *offp + sizeof(*ch))) == NULL) { 573 carpstat_inc(carps_badlen); 574 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len)); 575 return IPPROTO_DONE; 576 } 577 ch = (struct carp_header *)(mtod(m, caddr_t) + *offp); 578 579 /* verify the CARP checksum */ 580 m->m_data += *offp; 581 if (carp_cksum(m, sizeof(*ch))) { 582 carpstat_inc(carps_badsum); 583 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s", 584 ifp->if_xname)); 585 m_freem(m); 586 return IPPROTO_DONE; 587 } 588 m->m_data -= *offp; 589 590 KERNEL_LOCK(); 591 carp_proto_input_c(ifp, m, ch, 1, AF_INET6); 592 KERNEL_UNLOCK(); 593 return IPPROTO_DONE; 594 } 595 #endif /* INET6 */ 596 597 void 598 carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch, 599 int ismulti, sa_family_t af) 600 { 601 struct carp_softc *sc; 602 struct ifnet *ifp0; 603 struct carp_vhost_entry *vhe; 604 struct timeval sc_tv, ch_tv; 605 struct srpl *cif; 606 607 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */ 608 609 ifp0 = if_get(ifp->if_carpdevidx); 610 611 if (ifp->if_type == IFT_CARP) { 612 /* 613 * If the parent of this carp(4) got destroyed while 614 * `m' was being processed, silently drop it. 615 */ 616 if (ifp0 == NULL) 617 goto rele; 618 cif = &ifp0->if_carp; 619 } else 620 cif = &ifp->if_carp; 621 622 SRPL_FOREACH_LOCKED(sc, cif, sc_list) { 623 if (af == AF_INET && 624 ismulti != IN_MULTICAST(sc->sc_peer.s_addr)) 625 continue; 626 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 627 if (vhe->vhid == ch->carp_vhid) 628 goto found; 629 } 630 } 631 found: 632 633 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 634 (IFF_UP|IFF_RUNNING)) { 635 carpstat_inc(carps_badvhid); 636 goto rele; 637 } 638 639 getmicrotime(&sc->sc_if.if_lastchange); 640 641 /* verify the CARP version. */ 642 if (ch->carp_version != CARP_VERSION) { 643 carpstat_inc(carps_badver); 644 sc->sc_if.if_ierrors++; 645 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d", 646 ch->carp_version, CARP_VERSION)); 647 goto rele; 648 } 649 650 /* verify the hash */ 651 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) { 652 carpstat_inc(carps_badauth); 653 sc->sc_if.if_ierrors++; 654 CARP_LOG(LOG_INFO, sc, ("incorrect hash")); 655 goto rele; 656 } 657 658 if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter, 659 sizeof(ch->carp_counter))) { 660 struct ifnet *ifp2; 661 662 ifp2 = if_get(sc->sc_carpdevidx); 663 /* Do not log duplicates from non simplex interfaces */ 664 if (ifp2 && ifp2->if_flags & IFF_SIMPLEX) { 665 carpstat_inc(carps_badauth); 666 sc->sc_if.if_ierrors++; 667 CARP_LOG(LOG_WARNING, sc, 668 ("replay or network loop detected")); 669 } 670 if_put(ifp2); 671 goto rele; 672 } 673 674 sc_tv.tv_sec = sc->sc_advbase; 675 sc_tv.tv_usec = vhe->advskew * 1000000 / 256; 676 ch_tv.tv_sec = ch->carp_advbase; 677 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 678 679 switch (vhe->state) { 680 case INIT: 681 break; 682 case MASTER: 683 /* 684 * If we receive an advertisement from a master who's going to 685 * be more frequent than us, and whose demote count is not higher 686 * than ours, go into BACKUP state. If his demote count is lower, 687 * also go into BACKUP. 688 */ 689 if (((timercmp(&sc_tv, &ch_tv, >) || 690 timercmp(&sc_tv, &ch_tv, ==)) && 691 (ch->carp_demote <= carp_group_demote_count(sc))) || 692 ch->carp_demote < carp_group_demote_count(sc)) { 693 timeout_del(&vhe->ad_tmo); 694 carp_set_state(vhe, BACKUP); 695 carp_setrun(vhe, 0); 696 } 697 break; 698 case BACKUP: 699 /* 700 * If we're pre-empting masters who advertise slower than us, 701 * and do not have a better demote count, treat them as down. 702 * 703 */ 704 if (carp_opts[CARPCTL_PREEMPT] && 705 timercmp(&sc_tv, &ch_tv, <) && 706 ch->carp_demote >= carp_group_demote_count(sc)) { 707 carp_master_down(vhe); 708 break; 709 } 710 711 /* 712 * Take over masters advertising with a higher demote count, 713 * regardless of CARPCTL_PREEMPT. 714 */ 715 if (ch->carp_demote > carp_group_demote_count(sc)) { 716 carp_master_down(vhe); 717 break; 718 } 719 720 /* 721 * If the master is going to advertise at such a low frequency 722 * that he's guaranteed to time out, we'd might as well just 723 * treat him as timed out now. 724 */ 725 sc_tv.tv_sec = sc->sc_advbase * 3; 726 if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) { 727 carp_master_down(vhe); 728 break; 729 } 730 731 /* 732 * Otherwise, we reset the counter and wait for the next 733 * advertisement. 734 */ 735 carp_setrun(vhe, af); 736 break; 737 } 738 739 rele: 740 if_put(ifp0); 741 m_freem(m); 742 return; 743 } 744 745 int 746 carp_sysctl_carpstat(void *oldp, size_t *oldlenp, void *newp) 747 { 748 struct carpstats carpstat; 749 750 CTASSERT(sizeof(carpstat) == (carps_ncounters * sizeof(uint64_t))); 751 memset(&carpstat, 0, sizeof carpstat); 752 counters_read(carpcounters, (uint64_t *)&carpstat, carps_ncounters); 753 return (sysctl_rdstruct(oldp, oldlenp, newp, 754 &carpstat, sizeof(carpstat))); 755 } 756 757 int 758 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 759 size_t newlen) 760 { 761 int error; 762 763 /* All sysctl names at this level are terminal. */ 764 if (namelen != 1) 765 return (ENOTDIR); 766 767 switch (name[0]) { 768 case CARPCTL_STATS: 769 return (carp_sysctl_carpstat(oldp, oldlenp, newp)); 770 default: 771 if (name[0] <= 0 || name[0] >= CARPCTL_MAXID) 772 return (ENOPROTOOPT); 773 NET_LOCK(); 774 error = sysctl_int(oldp, oldlenp, newp, newlen, 775 &carp_opts[name[0]]); 776 NET_UNLOCK(); 777 return (error); 778 } 779 } 780 781 /* 782 * Interface side of the CARP implementation. 783 */ 784 785 /* ARGSUSED */ 786 void 787 carpattach(int n) 788 { 789 if_creategroup("carp"); /* keep around even if empty */ 790 if_clone_attach(&carp_cloner); 791 carpcounters = counters_alloc(carps_ncounters); 792 } 793 794 int 795 carp_clone_create(struct if_clone *ifc, int unit) 796 { 797 struct carp_softc *sc; 798 struct ifnet *ifp; 799 800 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 801 refcnt_init(&sc->sc_refcnt); 802 803 SRPL_INIT(&sc->carp_vhosts); 804 sc->sc_vhe_count = 0; 805 if (carp_new_vhost(sc, 0, 0)) { 806 free(sc, M_DEVBUF, sizeof(*sc)); 807 return (ENOMEM); 808 } 809 810 task_set(&sc->sc_atask, carp_addr_updated, sc); 811 task_set(&sc->sc_ltask, carp_carpdev_state, sc); 812 task_set(&sc->sc_dtask, carpdetach, sc); 813 814 sc->sc_suppress = 0; 815 sc->sc_advbase = CARP_DFLTINTV; 816 sc->sc_naddrs = sc->sc_naddrs6 = 0; 817 #ifdef INET6 818 sc->sc_im6o.im6o_hlim = CARP_DFLTTL; 819 #endif /* INET6 */ 820 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 821 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 822 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 823 824 LIST_INIT(&sc->carp_mc_listhead); 825 ifp = &sc->sc_if; 826 ifp->if_softc = sc; 827 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 828 unit); 829 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 830 ifp->if_ioctl = carp_ioctl; 831 ifp->if_start = carp_start; 832 ifp->if_enqueue = carp_enqueue; 833 ifp->if_xflags = IFXF_CLONED; 834 if_counters_alloc(ifp); 835 if_attach(ifp); 836 ether_ifattach(ifp); 837 ifp->if_type = IFT_CARP; 838 ifp->if_sadl->sdl_type = IFT_CARP; 839 ifp->if_output = carp_output; 840 ifp->if_priority = IF_CARP_DEFAULT_PRIORITY; 841 ifp->if_link_state = LINK_STATE_INVALID; 842 843 /* Hook carp_addr_updated to cope with address and route changes. */ 844 if_addrhook_add(&sc->sc_if, &sc->sc_atask); 845 846 return (0); 847 } 848 849 int 850 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) 851 { 852 struct carp_vhost_entry *vhe, *vhe0; 853 854 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO); 855 if (vhe == NULL) 856 return (ENOMEM); 857 858 refcnt_init(&vhe->vhost_refcnt); 859 carp_sc_ref(NULL, sc); /* give a sc ref to the vhe */ 860 vhe->parent_sc = sc; 861 vhe->vhid = vhid; 862 vhe->advskew = advskew; 863 vhe->state = INIT; 864 timeout_set_proc(&vhe->ad_tmo, carp_timer_ad, vhe); 865 timeout_set_proc(&vhe->md_tmo, carp_timer_down, vhe); 866 timeout_set_proc(&vhe->md6_tmo, carp_timer_down, vhe); 867 868 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 869 870 /* mark the first vhe as leader */ 871 if (SRPL_EMPTY_LOCKED(&sc->carp_vhosts)) { 872 vhe->vhe_leader = 1; 873 SRPL_INSERT_HEAD_LOCKED(&carp_vh_rc, &sc->carp_vhosts, 874 vhe, vhost_entries); 875 sc->sc_vhe_count = 1; 876 return (0); 877 } 878 879 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) { 880 if (SRPL_NEXT_LOCKED(vhe0, vhost_entries) == NULL) 881 break; 882 } 883 884 SRPL_INSERT_AFTER_LOCKED(&carp_vh_rc, vhe0, vhe, vhost_entries); 885 sc->sc_vhe_count++; 886 887 return (0); 888 } 889 890 int 891 carp_clone_destroy(struct ifnet *ifp) 892 { 893 struct carp_softc *sc = ifp->if_softc; 894 895 if_addrhook_del(&sc->sc_if, &sc->sc_atask); 896 897 NET_LOCK(); 898 carpdetach(sc); 899 NET_UNLOCK(); 900 901 ether_ifdetach(ifp); 902 if_detach(ifp); 903 carp_destroy_vhosts(ifp->if_softc); 904 refcnt_finalize(&sc->sc_refcnt, "carpdtor"); 905 free(sc->sc_imo.imo_membership, M_IPMOPTS, 906 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 907 free(sc, M_DEVBUF, sizeof(*sc)); 908 return (0); 909 } 910 911 void 912 carp_del_all_timeouts(struct carp_softc *sc) 913 { 914 struct carp_vhost_entry *vhe; 915 916 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 917 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 918 timeout_del(&vhe->ad_tmo); 919 timeout_del(&vhe->md_tmo); 920 timeout_del(&vhe->md6_tmo); 921 } 922 } 923 924 void 925 carpdetach(void *arg) 926 { 927 struct carp_softc *sc = arg; 928 struct ifnet *ifp0; 929 struct srpl *cif; 930 931 carp_del_all_timeouts(sc); 932 933 if (sc->sc_demote_cnt) 934 carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach"); 935 sc->sc_suppress = 0; 936 sc->sc_sendad_errors = 0; 937 938 carp_set_state_all(sc, INIT); 939 sc->sc_if.if_flags &= ~IFF_UP; 940 carp_setrun_all(sc, 0); 941 carp_multicast_cleanup(sc); 942 943 ifp0 = if_get(sc->sc_carpdevidx); 944 if (ifp0 == NULL) 945 return; 946 947 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 948 949 cif = &ifp0->if_carp; 950 951 SRPL_REMOVE_LOCKED(&carp_sc_rc, cif, sc, carp_softc, sc_list); 952 sc->sc_carpdevidx = 0; 953 954 if_linkstatehook_del(ifp0, &sc->sc_ltask); 955 if_detachhook_del(ifp0, &sc->sc_dtask); 956 ifpromisc(ifp0, 0); 957 if_put(ifp0); 958 } 959 960 void 961 carp_destroy_vhosts(struct carp_softc *sc) 962 { 963 /* XXX bow out? */ 964 struct carp_vhost_entry *vhe; 965 966 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 967 968 while ((vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts)) != NULL) { 969 SRPL_REMOVE_LOCKED(&carp_vh_rc, &sc->carp_vhosts, vhe, 970 carp_vhost_entry, vhost_entries); 971 carp_vh_unref(NULL, vhe); /* drop last ref */ 972 } 973 sc->sc_vhe_count = 0; 974 } 975 976 void 977 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe, 978 struct carp_header *ch) 979 { 980 if (!vhe->vhe_replay_cookie) { 981 arc4random_buf(&vhe->vhe_replay_cookie, 982 sizeof(vhe->vhe_replay_cookie)); 983 } 984 985 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter, 986 sizeof(ch->carp_counter)); 987 988 /* 989 * For the time being, do not include the IPv6 linklayer addresses 990 * in the HMAC. 991 */ 992 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL); 993 } 994 995 void 996 carp_send_ad_all(void) 997 { 998 struct ifnet *ifp0; 999 struct srpl *cif; 1000 struct carp_softc *vh; 1001 1002 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 1003 1004 if (carp_send_all_recur > 0) 1005 return; 1006 ++carp_send_all_recur; 1007 TAILQ_FOREACH(ifp0, &ifnet, if_list) { 1008 if (ifp0->if_type != IFT_ETHER) 1009 continue; 1010 1011 cif = &ifp0->if_carp; 1012 SRPL_FOREACH_LOCKED(vh, cif, sc_list) { 1013 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1014 (IFF_UP|IFF_RUNNING)) { 1015 carp_vhe_send_ad_all(vh); 1016 } 1017 } 1018 } 1019 --carp_send_all_recur; 1020 } 1021 1022 void 1023 carp_vhe_send_ad_all(struct carp_softc *sc) 1024 { 1025 struct carp_vhost_entry *vhe; 1026 1027 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1028 1029 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1030 if (vhe->state == MASTER) 1031 carp_send_ad(vhe); 1032 } 1033 } 1034 1035 void 1036 carp_timer_ad(void *v) 1037 { 1038 NET_LOCK(); 1039 carp_send_ad(v); 1040 NET_UNLOCK(); 1041 } 1042 1043 void 1044 carp_send_ad(struct carp_vhost_entry *vhe) 1045 { 1046 struct carp_header ch; 1047 struct timeval tv; 1048 struct carp_softc *sc = vhe->parent_sc; 1049 struct carp_header *ch_ptr; 1050 struct mbuf *m; 1051 int error, len, advbase, advskew; 1052 struct ifnet *ifp; 1053 struct ifaddr *ifa; 1054 struct sockaddr sa; 1055 1056 NET_ASSERT_LOCKED(); 1057 1058 if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) { 1059 sc->sc_if.if_oerrors++; 1060 return; 1061 } 1062 1063 /* bow out if we've gone to backup (the carp interface is going down) */ 1064 if (sc->sc_bow_out) { 1065 advbase = 255; 1066 advskew = 255; 1067 } else { 1068 advbase = sc->sc_advbase; 1069 advskew = vhe->advskew; 1070 tv.tv_sec = advbase; 1071 if (advbase == 0 && advskew == 0) 1072 tv.tv_usec = 1 * 1000000 / 256; 1073 else 1074 tv.tv_usec = advskew * 1000000 / 256; 1075 } 1076 1077 ch.carp_version = CARP_VERSION; 1078 ch.carp_type = CARP_ADVERTISEMENT; 1079 ch.carp_vhid = vhe->vhid; 1080 ch.carp_demote = carp_group_demote_count(sc) & 0xff; 1081 ch.carp_advbase = advbase; 1082 ch.carp_advskew = advskew; 1083 ch.carp_authlen = 7; /* XXX DEFINE */ 1084 ch.carp_cksum = 0; 1085 1086 sc->cur_vhe = vhe; /* we need the vhe later on the output path */ 1087 1088 if (sc->sc_naddrs) { 1089 struct ip *ip; 1090 1091 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1092 if (m == NULL) { 1093 sc->sc_if.if_oerrors++; 1094 carpstat_inc(carps_onomem); 1095 /* XXX maybe less ? */ 1096 goto retry_later; 1097 } 1098 len = sizeof(*ip) + sizeof(ch); 1099 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1100 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1101 m->m_pkthdr.len = len; 1102 m->m_len = len; 1103 m_align(m, len); 1104 ip = mtod(m, struct ip *); 1105 ip->ip_v = IPVERSION; 1106 ip->ip_hl = sizeof(*ip) >> 2; 1107 ip->ip_tos = IPTOS_LOWDELAY; 1108 ip->ip_len = htons(len); 1109 ip->ip_id = htons(ip_randomid()); 1110 ip->ip_off = htons(IP_DF); 1111 ip->ip_ttl = CARP_DFLTTL; 1112 ip->ip_p = IPPROTO_CARP; 1113 ip->ip_sum = 0; 1114 1115 memset(&sa, 0, sizeof(sa)); 1116 sa.sa_family = AF_INET; 1117 /* Prefer addresses on the parent interface as source for AD. */ 1118 ifa = ifaof_ifpforaddr(&sa, ifp); 1119 if (ifa == NULL) 1120 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1121 KASSERT(ifa != NULL); 1122 ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1123 ip->ip_dst.s_addr = sc->sc_peer.s_addr; 1124 if (IN_MULTICAST(ip->ip_dst.s_addr)) 1125 m->m_flags |= M_MCAST; 1126 1127 ch_ptr = (struct carp_header *)(ip + 1); 1128 bcopy(&ch, ch_ptr, sizeof(ch)); 1129 carp_prepare_ad(m, vhe, ch_ptr); 1130 1131 m->m_data += sizeof(*ip); 1132 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1133 m->m_data -= sizeof(*ip); 1134 1135 getmicrotime(&sc->sc_if.if_lastchange); 1136 carpstat_inc(carps_opackets); 1137 1138 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1139 NULL, 0); 1140 if (error && 1141 /* when unicast, the peer's down is not our fault */ 1142 !(!IN_MULTICAST(sc->sc_peer.s_addr) && error == EHOSTDOWN)){ 1143 if (error == ENOBUFS) 1144 carpstat_inc(carps_onomem); 1145 else 1146 CARP_LOG(LOG_WARNING, sc, 1147 ("ip_output failed: %d", error)); 1148 sc->sc_if.if_oerrors++; 1149 if (sc->sc_sendad_errors < INT_MAX) 1150 sc->sc_sendad_errors++; 1151 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1152 carp_group_demote_adj(&sc->sc_if, 1, 1153 "> snderrors"); 1154 sc->sc_sendad_success = 0; 1155 } else { 1156 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1157 if (++sc->sc_sendad_success >= 1158 CARP_SENDAD_MIN_SUCCESS(sc)) { 1159 carp_group_demote_adj(&sc->sc_if, -1, 1160 "< snderrors"); 1161 sc->sc_sendad_errors = 0; 1162 } 1163 } else 1164 sc->sc_sendad_errors = 0; 1165 } 1166 if (vhe->vhe_leader) { 1167 if (sc->sc_delayed_arp > 0) 1168 sc->sc_delayed_arp--; 1169 if (sc->sc_delayed_arp == 0) { 1170 carp_send_arp(sc); 1171 sc->sc_delayed_arp = -1; 1172 } 1173 } 1174 } 1175 #ifdef INET6 1176 if (sc->sc_naddrs6) { 1177 struct ip6_hdr *ip6; 1178 1179 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1180 if (m == NULL) { 1181 sc->sc_if.if_oerrors++; 1182 carpstat_inc(carps_onomem); 1183 /* XXX maybe less ? */ 1184 goto retry_later; 1185 } 1186 len = sizeof(*ip6) + sizeof(ch); 1187 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1188 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1189 m->m_pkthdr.len = len; 1190 m->m_len = len; 1191 m_align(m, len); 1192 m->m_flags |= M_MCAST; 1193 ip6 = mtod(m, struct ip6_hdr *); 1194 memset(ip6, 0, sizeof(*ip6)); 1195 ip6->ip6_vfc |= IPV6_VERSION; 1196 ip6->ip6_hlim = CARP_DFLTTL; 1197 ip6->ip6_nxt = IPPROTO_CARP; 1198 1199 /* set the source address */ 1200 memset(&sa, 0, sizeof(sa)); 1201 sa.sa_family = AF_INET6; 1202 /* Prefer addresses on the parent interface as source for AD. */ 1203 ifa = ifaof_ifpforaddr(&sa, ifp); 1204 if (ifa == NULL) 1205 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1206 KASSERT(ifa != NULL); 1207 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1208 &ip6->ip6_src, sizeof(struct in6_addr)); 1209 /* set the multicast destination */ 1210 1211 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1212 ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index); 1213 ip6->ip6_dst.s6_addr8[15] = 0x12; 1214 1215 ch_ptr = (struct carp_header *)(ip6 + 1); 1216 bcopy(&ch, ch_ptr, sizeof(ch)); 1217 carp_prepare_ad(m, vhe, ch_ptr); 1218 1219 m->m_data += sizeof(*ip6); 1220 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1221 m->m_data -= sizeof(*ip6); 1222 1223 getmicrotime(&sc->sc_if.if_lastchange); 1224 carpstat_inc(carps_opackets6); 1225 1226 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL); 1227 if (error) { 1228 if (error == ENOBUFS) 1229 carpstat_inc(carps_onomem); 1230 else 1231 CARP_LOG(LOG_WARNING, sc, 1232 ("ip6_output failed: %d", error)); 1233 sc->sc_if.if_oerrors++; 1234 if (sc->sc_sendad_errors < INT_MAX) 1235 sc->sc_sendad_errors++; 1236 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1237 carp_group_demote_adj(&sc->sc_if, 1, 1238 "> snd6errors"); 1239 sc->sc_sendad_success = 0; 1240 } else { 1241 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1242 if (++sc->sc_sendad_success >= 1243 CARP_SENDAD_MIN_SUCCESS(sc)) { 1244 carp_group_demote_adj(&sc->sc_if, -1, 1245 "< snd6errors"); 1246 sc->sc_sendad_errors = 0; 1247 } 1248 } else 1249 sc->sc_sendad_errors = 0; 1250 } 1251 } 1252 #endif /* INET6 */ 1253 1254 retry_later: 1255 sc->cur_vhe = NULL; 1256 if (advbase != 255 || advskew != 255) 1257 timeout_add_tv(&vhe->ad_tmo, &tv); 1258 if_put(ifp); 1259 } 1260 1261 /* 1262 * Broadcast a gratuitous ARP request containing 1263 * the virtual router MAC address for each IP address 1264 * associated with the virtual router. 1265 */ 1266 void 1267 carp_send_arp(struct carp_softc *sc) 1268 { 1269 struct ifaddr *ifa; 1270 in_addr_t in; 1271 1272 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1273 1274 if (ifa->ifa_addr->sa_family != AF_INET) 1275 continue; 1276 1277 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1278 arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr); 1279 } 1280 } 1281 1282 #ifdef INET6 1283 void 1284 carp_send_na(struct carp_softc *sc) 1285 { 1286 struct ifaddr *ifa; 1287 struct in6_addr *in6; 1288 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1289 1290 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1291 1292 if (ifa->ifa_addr->sa_family != AF_INET6) 1293 continue; 1294 1295 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1296 nd6_na_output(&sc->sc_if, &mcast, in6, 1297 ND_NA_FLAG_OVERRIDE | 1298 (ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL); 1299 } 1300 } 1301 #endif /* INET6 */ 1302 1303 void 1304 carp_update_lsmask(struct carp_softc *sc) 1305 { 1306 struct carp_vhost_entry *vhe; 1307 int count; 1308 1309 if (sc->sc_balancing == CARP_BAL_NONE) 1310 return; 1311 1312 sc->sc_lsmask = 0; 1313 count = 0; 1314 1315 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1316 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1317 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) 1318 sc->sc_lsmask |= 1 << count; 1319 count++; 1320 } 1321 sc->sc_lscount = count; 1322 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); 1323 } 1324 1325 int 1326 carp_iamatch(struct ifnet *ifp) 1327 { 1328 struct carp_softc *sc = ifp->if_softc; 1329 struct carp_vhost_entry *vhe; 1330 struct srp_ref sr; 1331 int match = 0; 1332 1333 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 1334 if (vhe->state == MASTER) 1335 match = 1; 1336 SRPL_LEAVE(&sr); 1337 1338 return (match); 1339 } 1340 1341 int 1342 carp_ourether(struct ifnet *ifp, uint8_t *ena) 1343 { 1344 struct srpl *cif = &ifp->if_carp; 1345 struct carp_softc *sc; 1346 struct srp_ref sr; 1347 int match = 0; 1348 1349 KASSERT(ifp->if_type == IFT_ETHER); 1350 1351 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1352 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1353 (IFF_UP|IFF_RUNNING)) 1354 continue; 1355 if (carp_vhe_match(sc, ena)) { 1356 match = 1; 1357 break; 1358 } 1359 } 1360 SRPL_LEAVE(&sr); 1361 1362 return (match); 1363 } 1364 1365 int 1366 carp_vhe_match(struct carp_softc *sc, uint8_t *ena) 1367 { 1368 struct carp_vhost_entry *vhe; 1369 struct srp_ref sr; 1370 int match = 0; 1371 1372 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 1373 match = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP) && 1374 !memcmp(ena, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1375 SRPL_LEAVE(&sr); 1376 1377 return (match); 1378 } 1379 1380 struct mbuf * 1381 carp_input(struct ifnet *ifp0, struct mbuf *m) 1382 { 1383 struct ether_header *eh; 1384 struct srpl *cif; 1385 struct carp_softc *sc; 1386 struct srp_ref sr; 1387 1388 eh = mtod(m, struct ether_header *); 1389 cif = &ifp0->if_carp; 1390 1391 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1392 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1393 (IFF_UP|IFF_RUNNING)) 1394 continue; 1395 1396 if (carp_vhe_match(sc, eh->ether_dhost)) { 1397 /* 1398 * These packets look like layer 2 multicast but they 1399 * are unicast at layer 3. With help of the tag the 1400 * mbuf's M_MCAST flag can be removed by carp_lsdrop() 1401 * after we have passed layer 2. 1402 */ 1403 if (sc->sc_balancing == CARP_BAL_IP) { 1404 struct m_tag *mtag; 1405 mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0, 1406 M_NOWAIT); 1407 if (mtag == NULL) { 1408 m_freem(m); 1409 goto out; 1410 } 1411 m_tag_prepend(m, mtag); 1412 } 1413 break; 1414 } 1415 } 1416 1417 if (sc == NULL) { 1418 SRPL_LEAVE(&sr); 1419 1420 if (!ETHER_IS_MULTICAST(eh->ether_dhost)) 1421 return (m); 1422 1423 /* 1424 * XXX Should really check the list of multicast addresses 1425 * for each CARP interface _before_ copying. 1426 */ 1427 SRPL_FOREACH(sc, &sr, cif, sc_list) { 1428 struct mbuf *m0; 1429 1430 if (!(sc->sc_if.if_flags & IFF_UP)) 1431 continue; 1432 1433 m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT); 1434 if (m0 == NULL) 1435 continue; 1436 1437 if_vinput(&sc->sc_if, m0); 1438 } 1439 SRPL_LEAVE(&sr); 1440 1441 return (m); 1442 } 1443 1444 if_vinput(&sc->sc_if, m); 1445 out: 1446 SRPL_LEAVE(&sr); 1447 1448 return (NULL); 1449 } 1450 1451 int 1452 carp_lsdrop(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int32_t *src, 1453 u_int32_t *dst, int drop) 1454 { 1455 struct carp_softc *sc; 1456 u_int32_t fold; 1457 struct m_tag *mtag; 1458 1459 if (ifp->if_type != IFT_CARP) 1460 return 0; 1461 sc = ifp->if_softc; 1462 if (sc->sc_balancing == CARP_BAL_NONE) 1463 return 0; 1464 1465 /* 1466 * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact 1467 * that it is layer 2 multicast does not implicate that it is also layer 1468 * 3 multicast. 1469 */ 1470 if (m->m_flags & M_MCAST && 1471 (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) { 1472 m_tag_delete(m, mtag); 1473 m->m_flags &= ~M_MCAST; 1474 } 1475 1476 /* 1477 * Return without making a drop decision. This allows to clear the 1478 * M_MCAST flag and do nothing else. 1479 */ 1480 if (!drop) 1481 return 0; 1482 1483 /* 1484 * Never drop carp advertisements. 1485 * XXX Bad idea to pass all broadcast / multicast traffic? 1486 */ 1487 if (m->m_flags & (M_BCAST|M_MCAST)) 1488 return 0; 1489 1490 fold = src[0] ^ dst[0]; 1491 #ifdef INET6 1492 if (af == AF_INET6) { 1493 int i; 1494 for (i = 1; i < 4; i++) 1495 fold ^= src[i] ^ dst[i]; 1496 } 1497 #endif 1498 if (sc->sc_lscount == 0) /* just to be safe */ 1499 return 1; 1500 1501 return ((1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask) == 0; 1502 } 1503 1504 void 1505 carp_timer_down(void *v) 1506 { 1507 NET_LOCK(); 1508 carp_master_down(v); 1509 NET_UNLOCK(); 1510 } 1511 1512 void 1513 carp_master_down(struct carp_vhost_entry *vhe) 1514 { 1515 struct carp_softc *sc = vhe->parent_sc; 1516 1517 NET_ASSERT_LOCKED(); 1518 1519 switch (vhe->state) { 1520 case INIT: 1521 printf("%s: master_down event in INIT state\n", 1522 sc->sc_if.if_xname); 1523 break; 1524 case MASTER: 1525 break; 1526 case BACKUP: 1527 carp_set_state(vhe, MASTER); 1528 carp_send_ad(vhe); 1529 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { 1530 carp_send_arp(sc); 1531 /* Schedule a delayed ARP to deal w/ some L3 switches */ 1532 sc->sc_delayed_arp = 2; 1533 #ifdef INET6 1534 carp_send_na(sc); 1535 #endif /* INET6 */ 1536 } 1537 carp_setrun(vhe, 0); 1538 carpstat_inc(carps_preempt); 1539 break; 1540 } 1541 } 1542 1543 void 1544 carp_setrun_all(struct carp_softc *sc, sa_family_t af) 1545 { 1546 struct carp_vhost_entry *vhe; 1547 1548 KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */ 1549 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1550 carp_setrun(vhe, af); 1551 } 1552 } 1553 1554 /* 1555 * When in backup state, af indicates whether to reset the master down timer 1556 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1557 */ 1558 void 1559 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) 1560 { 1561 struct ifnet *ifp; 1562 struct timeval tv; 1563 struct carp_softc *sc = vhe->parent_sc; 1564 1565 if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) { 1566 sc->sc_if.if_flags &= ~IFF_RUNNING; 1567 carp_set_state_all(sc, INIT); 1568 return; 1569 } 1570 1571 if (memcmp(((struct arpcom *)ifp)->ac_enaddr, 1572 sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0) 1573 sc->sc_realmac = 1; 1574 else 1575 sc->sc_realmac = 0; 1576 1577 if_put(ifp); 1578 1579 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 && 1580 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1581 sc->sc_if.if_flags |= IFF_RUNNING; 1582 } else { 1583 sc->sc_if.if_flags &= ~IFF_RUNNING; 1584 return; 1585 } 1586 1587 switch (vhe->state) { 1588 case INIT: 1589 carp_set_state(vhe, BACKUP); 1590 carp_setrun(vhe, 0); 1591 break; 1592 case BACKUP: 1593 timeout_del(&vhe->ad_tmo); 1594 tv.tv_sec = 3 * sc->sc_advbase; 1595 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1596 tv.tv_usec = 3 * 1000000 / 256; 1597 else if (sc->sc_advbase == 0) 1598 tv.tv_usec = 3 * vhe->advskew * 1000000 / 256; 1599 else 1600 tv.tv_usec = vhe->advskew * 1000000 / 256; 1601 if (vhe->vhe_leader) 1602 sc->sc_delayed_arp = -1; 1603 switch (af) { 1604 case AF_INET: 1605 timeout_add_tv(&vhe->md_tmo, &tv); 1606 break; 1607 #ifdef INET6 1608 case AF_INET6: 1609 timeout_add_tv(&vhe->md6_tmo, &tv); 1610 break; 1611 #endif /* INET6 */ 1612 default: 1613 if (sc->sc_naddrs) 1614 timeout_add_tv(&vhe->md_tmo, &tv); 1615 if (sc->sc_naddrs6) 1616 timeout_add_tv(&vhe->md6_tmo, &tv); 1617 break; 1618 } 1619 break; 1620 case MASTER: 1621 tv.tv_sec = sc->sc_advbase; 1622 if (sc->sc_advbase == 0 && vhe->advskew == 0) 1623 tv.tv_usec = 1 * 1000000 / 256; 1624 else 1625 tv.tv_usec = vhe->advskew * 1000000 / 256; 1626 timeout_add_tv(&vhe->ad_tmo, &tv); 1627 break; 1628 } 1629 } 1630 1631 void 1632 carp_multicast_cleanup(struct carp_softc *sc) 1633 { 1634 struct ip_moptions *imo = &sc->sc_imo; 1635 #ifdef INET6 1636 struct ip6_moptions *im6o = &sc->sc_im6o; 1637 #endif 1638 u_int16_t n = imo->imo_num_memberships; 1639 1640 /* Clean up our own multicast memberships */ 1641 while (n-- > 0) { 1642 if (imo->imo_membership[n] != NULL) { 1643 in_delmulti(imo->imo_membership[n]); 1644 imo->imo_membership[n] = NULL; 1645 } 1646 } 1647 imo->imo_num_memberships = 0; 1648 imo->imo_ifidx = 0; 1649 1650 #ifdef INET6 1651 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1652 struct in6_multi_mship *imm = 1653 LIST_FIRST(&im6o->im6o_memberships); 1654 1655 LIST_REMOVE(imm, i6mm_chain); 1656 in6_leavegroup(imm); 1657 } 1658 im6o->im6o_ifidx = 0; 1659 #endif 1660 1661 /* And any other multicast memberships */ 1662 carp_ether_purgemulti(sc); 1663 } 1664 1665 int 1666 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0) 1667 { 1668 struct srpl *cif; 1669 struct carp_softc *vr, *last = NULL, *after = NULL; 1670 int myself = 0, error = 0; 1671 1672 KASSERT(ifp0->if_index != sc->sc_carpdevidx); 1673 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 1674 1675 if ((ifp0->if_flags & IFF_MULTICAST) == 0) 1676 return (EADDRNOTAVAIL); 1677 1678 if (ifp0->if_type != IFT_ETHER) 1679 return (EINVAL); 1680 1681 cif = &ifp0->if_carp; 1682 if (carp_check_dup_vhids(sc, cif, NULL)) 1683 return (EINVAL); 1684 1685 if ((error = ifpromisc(ifp0, 1))) 1686 return (error); 1687 1688 /* detach from old interface */ 1689 if (sc->sc_carpdevidx != 0) 1690 carpdetach(sc); 1691 1692 /* attach carp interface to physical interface */ 1693 if_detachhook_add(ifp0, &sc->sc_dtask); 1694 if_linkstatehook_add(ifp0, &sc->sc_ltask); 1695 1696 sc->sc_carpdevidx = ifp0->if_index; 1697 sc->sc_if.if_capabilities = ifp0->if_capabilities & 1698 IFCAP_CSUM_MASK; 1699 1700 SRPL_FOREACH_LOCKED(vr, cif, sc_list) { 1701 struct carp_vhost_entry *vrhead, *schead; 1702 last = vr; 1703 1704 if (vr == sc) 1705 myself = 1; 1706 1707 vrhead = SRPL_FIRST_LOCKED(&vr->carp_vhosts); 1708 schead = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1709 if (vrhead->vhid < schead->vhid) 1710 after = vr; 1711 } 1712 1713 if (!myself) { 1714 /* We're trying to keep things in order */ 1715 if (last == NULL) { 1716 SRPL_INSERT_HEAD_LOCKED(&carp_sc_rc, cif, 1717 sc, sc_list); 1718 } else if (after == NULL) { 1719 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, last, 1720 sc, sc_list); 1721 } else { 1722 SRPL_INSERT_AFTER_LOCKED(&carp_sc_rc, after, 1723 sc, sc_list); 1724 } 1725 } 1726 if (sc->sc_naddrs || sc->sc_naddrs6) 1727 sc->sc_if.if_flags |= IFF_UP; 1728 carp_set_enaddr(sc); 1729 1730 carp_carpdev_state(sc); 1731 1732 return (0); 1733 } 1734 1735 void 1736 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) 1737 { 1738 struct carp_softc *sc = vhe->parent_sc; 1739 1740 if (vhe->vhid != 0 && sc->sc_carpdevidx != 0) { 1741 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) 1742 vhe->vhe_enaddr[0] = 1; 1743 else 1744 vhe->vhe_enaddr[0] = 0; 1745 vhe->vhe_enaddr[1] = 0; 1746 vhe->vhe_enaddr[2] = 0x5e; 1747 vhe->vhe_enaddr[3] = 0; 1748 vhe->vhe_enaddr[4] = 1; 1749 vhe->vhe_enaddr[5] = vhe->vhid; 1750 } else 1751 memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN); 1752 } 1753 1754 void 1755 carp_set_enaddr(struct carp_softc *sc) 1756 { 1757 struct carp_vhost_entry *vhe; 1758 1759 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1760 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) 1761 carp_set_vhe_enaddr(vhe); 1762 1763 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1764 1765 /* 1766 * Use the carp lladdr if the running one isn't manually set. 1767 * Only compare static parts of the lladdr. 1768 */ 1769 if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1, 1770 ETHER_ADDR_LEN - 2) == 0) || 1771 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] && 1772 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] && 1773 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5])) 1774 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1775 1776 /* Make sure the enaddr has changed before further twiddling. */ 1777 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) { 1778 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl), 1779 ETHER_ADDR_LEN); 1780 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN); 1781 #ifdef INET6 1782 /* 1783 * (re)attach a link-local address which matches 1784 * our new MAC address. 1785 */ 1786 if (sc->sc_naddrs6) 1787 in6_ifattach_linklocal(&sc->sc_if, NULL); 1788 #endif 1789 carp_set_state_all(sc, INIT); 1790 carp_setrun_all(sc, 0); 1791 } 1792 } 1793 1794 void 1795 carp_addr_updated(void *v) 1796 { 1797 struct carp_softc *sc = (struct carp_softc *) v; 1798 struct ifaddr *ifa; 1799 int new_naddrs = 0, new_naddrs6 = 0; 1800 1801 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1802 if (ifa->ifa_addr->sa_family == AF_INET) 1803 new_naddrs++; 1804 #ifdef INET6 1805 else if (ifa->ifa_addr->sa_family == AF_INET6) 1806 new_naddrs6++; 1807 #endif /* INET6 */ 1808 } 1809 1810 /* We received address changes from if_addrhooks callback */ 1811 if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) { 1812 1813 sc->sc_naddrs = new_naddrs; 1814 sc->sc_naddrs6 = new_naddrs6; 1815 1816 /* Re-establish multicast membership removed by in_control */ 1817 if (IN_MULTICAST(sc->sc_peer.s_addr)) { 1818 if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) { 1819 struct in_multi **imm = 1820 sc->sc_imo.imo_membership; 1821 u_int16_t maxmem = 1822 sc->sc_imo.imo_max_memberships; 1823 1824 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1825 sc->sc_imo.imo_membership = imm; 1826 sc->sc_imo.imo_max_memberships = maxmem; 1827 1828 if (sc->sc_carpdevidx != 0 && 1829 sc->sc_naddrs > 0) 1830 carp_join_multicast(sc); 1831 } 1832 } 1833 1834 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1835 sc->sc_if.if_flags &= ~IFF_UP; 1836 carp_set_state_all(sc, INIT); 1837 } else 1838 carp_hmac_prepare(sc); 1839 } 1840 1841 carp_setrun_all(sc, 0); 1842 } 1843 1844 int 1845 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1846 { 1847 struct in_addr *in = &sin->sin_addr; 1848 int error; 1849 1850 KASSERT(sc->sc_carpdevidx != 0); 1851 1852 /* XXX is this necessary? */ 1853 if (in->s_addr == INADDR_ANY) { 1854 carp_setrun_all(sc, 0); 1855 return (0); 1856 } 1857 1858 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1859 return (error); 1860 1861 carp_set_state_all(sc, INIT); 1862 1863 return (0); 1864 } 1865 1866 int 1867 carp_join_multicast(struct carp_softc *sc) 1868 { 1869 struct ip_moptions *imo = &sc->sc_imo; 1870 struct in_multi *imm; 1871 struct in_addr addr; 1872 1873 if (!IN_MULTICAST(sc->sc_peer.s_addr)) 1874 return (0); 1875 1876 addr.s_addr = sc->sc_peer.s_addr; 1877 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 1878 return (ENOBUFS); 1879 1880 imo->imo_membership[0] = imm; 1881 imo->imo_num_memberships = 1; 1882 imo->imo_ifidx = sc->sc_if.if_index; 1883 imo->imo_ttl = CARP_DFLTTL; 1884 imo->imo_loop = 0; 1885 return (0); 1886 } 1887 1888 1889 #ifdef INET6 1890 int 1891 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1892 { 1893 int error; 1894 1895 KASSERT(sc->sc_carpdevidx != 0); 1896 1897 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1898 carp_setrun_all(sc, 0); 1899 return (0); 1900 } 1901 1902 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1903 return (error); 1904 1905 carp_set_state_all(sc, INIT); 1906 1907 return (0); 1908 } 1909 1910 int 1911 carp_join_multicast6(struct carp_softc *sc) 1912 { 1913 struct in6_multi_mship *imm, *imm2; 1914 struct ip6_moptions *im6o = &sc->sc_im6o; 1915 struct sockaddr_in6 addr6; 1916 int error; 1917 1918 /* Join IPv6 CARP multicast group */ 1919 memset(&addr6, 0, sizeof(addr6)); 1920 addr6.sin6_family = AF_INET6; 1921 addr6.sin6_len = sizeof(addr6); 1922 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1923 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1924 addr6.sin6_addr.s6_addr8[15] = 0x12; 1925 if ((imm = in6_joingroup(&sc->sc_if, 1926 &addr6.sin6_addr, &error)) == NULL) { 1927 return (error); 1928 } 1929 /* join solicited multicast address */ 1930 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 1931 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1932 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1933 addr6.sin6_addr.s6_addr32[1] = 0; 1934 addr6.sin6_addr.s6_addr32[2] = htonl(1); 1935 addr6.sin6_addr.s6_addr32[3] = 0; 1936 addr6.sin6_addr.s6_addr8[12] = 0xff; 1937 if ((imm2 = in6_joingroup(&sc->sc_if, 1938 &addr6.sin6_addr, &error)) == NULL) { 1939 in6_leavegroup(imm); 1940 return (error); 1941 } 1942 1943 /* apply v6 multicast membership */ 1944 im6o->im6o_ifidx = sc->sc_if.if_index; 1945 if (imm) 1946 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 1947 i6mm_chain); 1948 if (imm2) 1949 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 1950 i6mm_chain); 1951 1952 return (0); 1953 } 1954 1955 #endif /* INET6 */ 1956 1957 int 1958 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1959 { 1960 struct proc *p = curproc; /* XXX */ 1961 struct carp_softc *sc = ifp->if_softc; 1962 struct carp_vhost_entry *vhe; 1963 struct carpreq carpr; 1964 struct ifaddr *ifa = (struct ifaddr *)addr; 1965 struct ifreq *ifr = (struct ifreq *)addr; 1966 struct ifnet *ifp0 = NULL; 1967 int i, error = 0; 1968 1969 switch (cmd) { 1970 case SIOCSIFADDR: 1971 if (sc->sc_carpdevidx == 0) 1972 return (EINVAL); 1973 1974 switch (ifa->ifa_addr->sa_family) { 1975 case AF_INET: 1976 sc->sc_if.if_flags |= IFF_UP; 1977 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1978 break; 1979 #ifdef INET6 1980 case AF_INET6: 1981 sc->sc_if.if_flags |= IFF_UP; 1982 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1983 break; 1984 #endif /* INET6 */ 1985 default: 1986 error = EAFNOSUPPORT; 1987 break; 1988 } 1989 break; 1990 1991 case SIOCSIFFLAGS: 1992 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1993 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 1994 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1995 carp_del_all_timeouts(sc); 1996 1997 /* we need the interface up to bow out */ 1998 sc->sc_if.if_flags |= IFF_UP; 1999 sc->sc_bow_out = 1; 2000 carp_vhe_send_ad_all(sc); 2001 sc->sc_bow_out = 0; 2002 2003 sc->sc_if.if_flags &= ~IFF_UP; 2004 carp_set_state_all(sc, INIT); 2005 carp_setrun_all(sc, 0); 2006 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) { 2007 sc->sc_if.if_flags |= IFF_UP; 2008 carp_setrun_all(sc, 0); 2009 } 2010 break; 2011 2012 case SIOCSVH: 2013 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2014 vhe = SRPL_FIRST_LOCKED(&sc->carp_vhosts); 2015 if ((error = suser(p)) != 0) 2016 break; 2017 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2018 break; 2019 error = 1; 2020 if (carpr.carpr_carpdev[0] != '\0' && 2021 (ifp0 = if_unit(carpr.carpr_carpdev)) == NULL) 2022 return (EINVAL); 2023 if (carpr.carpr_peer.s_addr == 0) 2024 sc->sc_peer.s_addr = INADDR_CARP_GROUP; 2025 else 2026 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr; 2027 if (ifp0 != NULL && ifp0->if_index != sc->sc_carpdevidx) { 2028 if ((error = carp_set_ifp(sc, ifp0))) { 2029 if_put(ifp0); 2030 return (error); 2031 } 2032 } 2033 if_put(ifp0); 2034 if (vhe->state != INIT && carpr.carpr_state != vhe->state) { 2035 switch (carpr.carpr_state) { 2036 case BACKUP: 2037 timeout_del(&vhe->ad_tmo); 2038 carp_set_state_all(sc, BACKUP); 2039 carp_setrun_all(sc, 0); 2040 break; 2041 case MASTER: 2042 KERNEL_ASSERT_LOCKED(); 2043 /* touching carp_vhosts */ 2044 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2045 vhost_entries) 2046 carp_master_down(vhe); 2047 break; 2048 default: 2049 break; 2050 } 2051 } 2052 if ((error = carp_vhids_ioctl(sc, &carpr))) 2053 return (error); 2054 if (carpr.carpr_advbase >= 0) { 2055 if (carpr.carpr_advbase > 255) { 2056 error = EINVAL; 2057 break; 2058 } 2059 sc->sc_advbase = carpr.carpr_advbase; 2060 error--; 2061 } 2062 if (memcmp(sc->sc_advskews, carpr.carpr_advskews, 2063 sizeof(sc->sc_advskews))) { 2064 i = 0; 2065 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2066 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2067 vhost_entries) 2068 vhe->advskew = carpr.carpr_advskews[i++]; 2069 bcopy(carpr.carpr_advskews, sc->sc_advskews, 2070 sizeof(sc->sc_advskews)); 2071 } 2072 if (sc->sc_balancing != carpr.carpr_balancing) { 2073 if (carpr.carpr_balancing > CARP_BAL_MAXID) { 2074 error = EINVAL; 2075 break; 2076 } 2077 sc->sc_balancing = carpr.carpr_balancing; 2078 carp_set_enaddr(sc); 2079 carp_update_lsmask(sc); 2080 } 2081 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2082 if (error > 0) 2083 error = EINVAL; 2084 else { 2085 error = 0; 2086 carp_hmac_prepare(sc); 2087 carp_setrun_all(sc, 0); 2088 } 2089 break; 2090 2091 case SIOCGVH: 2092 memset(&carpr, 0, sizeof(carpr)); 2093 if ((ifp0 = if_get(sc->sc_carpdevidx)) != NULL) 2094 strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ); 2095 if_put(ifp0); 2096 i = 0; 2097 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2098 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2099 carpr.carpr_vhids[i] = vhe->vhid; 2100 carpr.carpr_advskews[i] = vhe->advskew; 2101 carpr.carpr_states[i] = vhe->state; 2102 i++; 2103 } 2104 carpr.carpr_advbase = sc->sc_advbase; 2105 carpr.carpr_balancing = sc->sc_balancing; 2106 if (suser(p) == 0) 2107 bcopy(sc->sc_key, carpr.carpr_key, 2108 sizeof(carpr.carpr_key)); 2109 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr; 2110 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2111 break; 2112 2113 case SIOCADDMULTI: 2114 error = carp_ether_addmulti(sc, ifr); 2115 break; 2116 2117 case SIOCDELMULTI: 2118 error = carp_ether_delmulti(sc, ifr); 2119 break; 2120 case SIOCAIFGROUP: 2121 case SIOCDIFGROUP: 2122 if (sc->sc_demote_cnt) 2123 carp_ifgroup_ioctl(ifp, cmd, addr); 2124 break; 2125 case SIOCSIFGATTR: 2126 carp_ifgattr_ioctl(ifp, cmd, addr); 2127 break; 2128 default: 2129 error = ENOTTY; 2130 } 2131 2132 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) 2133 carp_set_enaddr(sc); 2134 return (error); 2135 } 2136 2137 int 2138 carp_check_dup_vhids(struct carp_softc *sc, struct srpl *cif, 2139 struct carpreq *carpr) 2140 { 2141 struct carp_softc *vr; 2142 struct carp_vhost_entry *vhe, *vhe0; 2143 int i; 2144 2145 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */ 2146 2147 SRPL_FOREACH_LOCKED(vr, cif, sc_list) { 2148 if (vr == sc) 2149 continue; 2150 SRPL_FOREACH_LOCKED(vhe, &vr->carp_vhosts, vhost_entries) { 2151 if (carpr) { 2152 for (i = 0; carpr->carpr_vhids[i]; i++) { 2153 if (vhe->vhid == carpr->carpr_vhids[i]) 2154 return (EINVAL); 2155 } 2156 } 2157 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, 2158 vhost_entries) { 2159 if (vhe->vhid == vhe0->vhid) 2160 return (EINVAL); 2161 } 2162 } 2163 } 2164 return (0); 2165 } 2166 2167 int 2168 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) 2169 { 2170 int i, j; 2171 u_int8_t taken_vhids[256]; 2172 2173 if (carpr->carpr_vhids[0] == 0 || 2174 !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) 2175 return (0); 2176 2177 memset(taken_vhids, 0, sizeof(taken_vhids)); 2178 for (i = 0; carpr->carpr_vhids[i]; i++) { 2179 struct ifnet *ifp; 2180 2181 if (taken_vhids[carpr->carpr_vhids[i]]) 2182 return (EINVAL); 2183 taken_vhids[carpr->carpr_vhids[i]] = 1; 2184 2185 if ((ifp = if_get(sc->sc_carpdevidx)) != NULL) { 2186 struct srpl *cif; 2187 cif = &ifp->if_carp; 2188 if (carp_check_dup_vhids(sc, cif, carpr)) { 2189 if_put(ifp); 2190 return (EINVAL); 2191 } 2192 } 2193 if_put(ifp); 2194 if (carpr->carpr_advskews[i] >= 255) 2195 return (EINVAL); 2196 } 2197 /* set sane balancing defaults */ 2198 if (i <= 1) 2199 carpr->carpr_balancing = CARP_BAL_NONE; 2200 else if (carpr->carpr_balancing == CARP_BAL_NONE && 2201 sc->sc_balancing == CARP_BAL_NONE) 2202 carpr->carpr_balancing = CARP_BAL_IP; 2203 2204 /* destroy all */ 2205 carp_del_all_timeouts(sc); 2206 carp_destroy_vhosts(sc); 2207 memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids)); 2208 2209 /* sort vhosts list by vhid */ 2210 for (j = 1; j <= 255; j++) { 2211 for (i = 0; carpr->carpr_vhids[i]; i++) { 2212 if (carpr->carpr_vhids[i] != j) 2213 continue; 2214 if (carp_new_vhost(sc, carpr->carpr_vhids[i], 2215 carpr->carpr_advskews[i])) 2216 return (ENOMEM); 2217 sc->sc_vhids[i] = carpr->carpr_vhids[i]; 2218 sc->sc_advskews[i] = carpr->carpr_advskews[i]; 2219 } 2220 } 2221 carp_set_enaddr(sc); 2222 carp_set_state_all(sc, INIT); 2223 return (0); 2224 } 2225 2226 void 2227 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2228 { 2229 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2230 struct ifg_list *ifgl; 2231 int *dm, adj; 2232 2233 if (!strcmp(ifgr->ifgr_group, IFG_ALL)) 2234 return; 2235 adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2236 if (cmd == SIOCDIFGROUP) 2237 adj = adj * -1; 2238 2239 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 2240 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) { 2241 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2242 if (*dm + adj >= 0) 2243 *dm += adj; 2244 else 2245 *dm = 0; 2246 } 2247 } 2248 2249 void 2250 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2251 { 2252 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2253 struct carp_softc *sc = ifp->if_softc; 2254 2255 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags & 2256 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) 2257 carp_vhe_send_ad_all(sc); 2258 } 2259 2260 void 2261 carp_start(struct ifnet *ifp) 2262 { 2263 struct carp_softc *sc = ifp->if_softc; 2264 struct ifnet *ifp0; 2265 struct mbuf *m; 2266 2267 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) { 2268 ifq_purge(&ifp->if_snd); 2269 return; 2270 } 2271 2272 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) 2273 carp_transmit(sc, ifp0, m); 2274 if_put(ifp0); 2275 } 2276 2277 void 2278 carp_transmit(struct carp_softc *sc, struct ifnet *ifp0, struct mbuf *m) 2279 { 2280 struct ifnet *ifp = &sc->sc_if; 2281 2282 #if NBPFILTER > 0 2283 { 2284 caddr_t if_bpf = ifp->if_bpf; 2285 if (if_bpf) 2286 bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); 2287 } 2288 #endif /* NBPFILTER > 0 */ 2289 2290 if (!ISSET(ifp0->if_flags, IFF_RUNNING)) { 2291 counters_inc(ifp->if_counters, ifc_oerrors); 2292 m_freem(m); 2293 return; 2294 } 2295 2296 /* 2297 * Do not leak the multicast address when sending 2298 * advertisements in 'ip' and 'ip-stealth' balacing 2299 * modes. 2300 */ 2301 if (sc->sc_balancing == CARP_BAL_IP || 2302 sc->sc_balancing == CARP_BAL_IPSTEALTH) { 2303 struct ether_header *eh = mtod(m, struct ether_header *); 2304 memcpy(eh->ether_shost, sc->sc_ac.ac_enaddr, 2305 sizeof(eh->ether_shost)); 2306 } 2307 2308 if (if_enqueue(ifp0, m)) 2309 counters_inc(ifp->if_counters, ifc_oerrors); 2310 } 2311 2312 int 2313 carp_enqueue(struct ifnet *ifp, struct mbuf *m) 2314 { 2315 struct carp_softc *sc = ifp->if_softc; 2316 struct ifnet *ifp0; 2317 2318 /* no ifq_is_priq, cos hfsc on carp doesn't make sense */ 2319 2320 /* 2321 * If the parent of this carp(4) got destroyed while 2322 * `m' was being processed, silently drop it. 2323 */ 2324 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) { 2325 m_freem(m); 2326 return (0); 2327 } 2328 2329 counters_pkt(ifp->if_counters, 2330 ifc_opackets, ifc_obytes, m->m_pkthdr.len); 2331 carp_transmit(sc, ifp0, m); 2332 if_put(ifp0); 2333 2334 return (0); 2335 } 2336 2337 int 2338 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2339 struct rtentry *rt) 2340 { 2341 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2342 struct carp_vhost_entry *vhe; 2343 struct srp_ref sr; 2344 int ismaster; 2345 2346 if (sc->cur_vhe == NULL) { 2347 vhe = SRPL_FIRST(&sr, &sc->carp_vhosts); 2348 ismaster = (vhe->state == MASTER); 2349 SRPL_LEAVE(&sr); 2350 } else { 2351 ismaster = (sc->cur_vhe->state == MASTER); 2352 } 2353 2354 if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) { 2355 m_freem(m); 2356 return (ENETUNREACH); 2357 } 2358 2359 return (ether_output(ifp, m, sa, rt)); 2360 } 2361 2362 void 2363 carp_set_state_all(struct carp_softc *sc, int state) 2364 { 2365 struct carp_vhost_entry *vhe; 2366 2367 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2368 2369 SRPL_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2370 if (vhe->state == state) 2371 continue; 2372 2373 carp_set_state(vhe, state); 2374 } 2375 } 2376 2377 void 2378 carp_set_state(struct carp_vhost_entry *vhe, int state) 2379 { 2380 struct carp_softc *sc = vhe->parent_sc; 2381 static const char *carp_states[] = { CARP_STATES }; 2382 int loglevel; 2383 struct carp_vhost_entry *vhe0; 2384 2385 KASSERT(vhe->state != state); 2386 2387 if (vhe->state == INIT || state == INIT) 2388 loglevel = LOG_WARNING; 2389 else 2390 loglevel = LOG_CRIT; 2391 2392 if (sc->sc_vhe_count > 1) 2393 CARP_LOG(loglevel, sc, 2394 ("state transition (vhid %d): %s -> %s", vhe->vhid, 2395 carp_states[vhe->state], carp_states[state])); 2396 else 2397 CARP_LOG(loglevel, sc, 2398 ("state transition: %s -> %s", 2399 carp_states[vhe->state], carp_states[state])); 2400 2401 vhe->state = state; 2402 carp_update_lsmask(sc); 2403 2404 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2405 2406 sc->sc_if.if_link_state = LINK_STATE_INVALID; 2407 SRPL_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) { 2408 /* 2409 * Link must be up if at least one vhe is in state MASTER to 2410 * bring or keep route up. 2411 */ 2412 if (vhe0->state == MASTER) { 2413 sc->sc_if.if_link_state = LINK_STATE_UP; 2414 break; 2415 } else if (vhe0->state == BACKUP) { 2416 sc->sc_if.if_link_state = LINK_STATE_DOWN; 2417 } 2418 } 2419 if_link_state_change(&sc->sc_if); 2420 } 2421 2422 void 2423 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason) 2424 { 2425 struct ifg_list *ifgl; 2426 int *dm, need_ad; 2427 struct carp_softc *nil = NULL; 2428 2429 if (ifp->if_type == IFT_CARP) { 2430 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2431 if (*dm + adj >= 0) 2432 *dm += adj; 2433 else 2434 *dm = 0; 2435 } 2436 2437 need_ad = 0; 2438 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2439 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2440 continue; 2441 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2442 2443 if (*dm + adj >= 0) 2444 *dm += adj; 2445 else 2446 *dm = 0; 2447 2448 if (adj > 0 && *dm == 1) 2449 need_ad = 1; 2450 CARP_LOG(LOG_ERR, nil, 2451 ("%s demoted group %s by %d to %d (%s)", 2452 ifp->if_xname, ifgl->ifgl_group->ifg_group, 2453 adj, *dm, reason)); 2454 } 2455 if (need_ad) 2456 carp_send_ad_all(); 2457 } 2458 2459 int 2460 carp_group_demote_count(struct carp_softc *sc) 2461 { 2462 struct ifg_list *ifgl; 2463 int count = 0; 2464 2465 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next) 2466 count += ifgl->ifgl_group->ifg_carp_demoted; 2467 2468 if (count == 0 && sc->sc_demote_cnt) 2469 count = sc->sc_demote_cnt; 2470 2471 return (count > 255 ? 255 : count); 2472 } 2473 2474 void 2475 carp_carpdev_state(void *v) 2476 { 2477 struct carp_softc *sc = v; 2478 struct ifnet *ifp0; 2479 int suppressed = sc->sc_suppress; 2480 2481 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) 2482 return; 2483 2484 if (ifp0->if_link_state == LINK_STATE_DOWN || 2485 !(ifp0->if_flags & IFF_UP)) { 2486 sc->sc_if.if_flags &= ~IFF_RUNNING; 2487 carp_del_all_timeouts(sc); 2488 carp_set_state_all(sc, INIT); 2489 sc->sc_suppress = 1; 2490 carp_setrun_all(sc, 0); 2491 if (!suppressed) 2492 carp_group_demote_adj(&sc->sc_if, 1, "carpdev"); 2493 } else if (suppressed) { 2494 carp_set_state_all(sc, INIT); 2495 sc->sc_suppress = 0; 2496 carp_setrun_all(sc, 0); 2497 carp_group_demote_adj(&sc->sc_if, -1, "carpdev"); 2498 } 2499 2500 if_put(ifp0); 2501 } 2502 2503 int 2504 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2505 { 2506 struct ifnet *ifp0; 2507 struct carp_mc_entry *mc; 2508 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2509 int error; 2510 2511 ifp0 = if_get(sc->sc_carpdevidx); 2512 if (ifp0 == NULL) 2513 return (EINVAL); 2514 2515 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2516 if (error != ENETRESET) { 2517 if_put(ifp0); 2518 return (error); 2519 } 2520 2521 /* 2522 * This is new multicast address. We have to tell parent 2523 * about it. Also, remember this multicast address so that 2524 * we can delete them on unconfigure. 2525 */ 2526 mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT); 2527 if (mc == NULL) { 2528 error = ENOMEM; 2529 goto alloc_failed; 2530 } 2531 2532 /* 2533 * As ether_addmulti() returns ENETRESET, following two 2534 * statement shouldn't fail. 2535 */ 2536 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2537 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2538 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2539 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2540 2541 error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr); 2542 if (error != 0) 2543 goto ioctl_failed; 2544 2545 if_put(ifp0); 2546 2547 return (error); 2548 2549 ioctl_failed: 2550 LIST_REMOVE(mc, mc_entries); 2551 free(mc, M_DEVBUF, sizeof(*mc)); 2552 alloc_failed: 2553 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2554 if_put(ifp0); 2555 2556 return (error); 2557 } 2558 2559 int 2560 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2561 { 2562 struct ifnet *ifp0; 2563 struct ether_multi *enm; 2564 struct carp_mc_entry *mc; 2565 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2566 int error; 2567 2568 ifp0 = if_get(sc->sc_carpdevidx); 2569 if (ifp0 == NULL) 2570 return (EINVAL); 2571 2572 /* 2573 * Find a key to lookup carp_mc_entry. We have to do this 2574 * before calling ether_delmulti for obvious reason. 2575 */ 2576 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0) 2577 goto rele; 2578 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2579 if (enm == NULL) { 2580 error = EINVAL; 2581 goto rele; 2582 } 2583 2584 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2585 if (mc->mc_enm == enm) 2586 break; 2587 2588 /* We won't delete entries we didn't add */ 2589 if (mc == NULL) { 2590 error = EINVAL; 2591 goto rele; 2592 } 2593 2594 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2595 if (error != ENETRESET) 2596 goto rele; 2597 2598 /* We no longer use this multicast address. Tell parent so. */ 2599 error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2600 if (error == 0) { 2601 /* And forget about this address. */ 2602 LIST_REMOVE(mc, mc_entries); 2603 free(mc, M_DEVBUF, sizeof(*mc)); 2604 } else 2605 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2606 rele: 2607 if_put(ifp0); 2608 return (error); 2609 } 2610 2611 /* 2612 * Delete any multicast address we have asked to add from parent 2613 * interface. Called when the carp is being unconfigured. 2614 */ 2615 void 2616 carp_ether_purgemulti(struct carp_softc *sc) 2617 { 2618 struct ifnet *ifp0; /* Parent. */ 2619 struct carp_mc_entry *mc; 2620 union { 2621 struct ifreq ifreq; 2622 struct { 2623 char ifr_name[IFNAMSIZ]; 2624 struct sockaddr_storage ifr_ss; 2625 } ifreq_storage; 2626 } u; 2627 struct ifreq *ifr = &u.ifreq; 2628 2629 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) 2630 return; 2631 2632 memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ); 2633 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2634 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len); 2635 (void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2636 LIST_REMOVE(mc, mc_entries); 2637 free(mc, M_DEVBUF, sizeof(*mc)); 2638 } 2639 2640 if_put(ifp0); 2641 } 2642 2643 void 2644 carp_vh_ref(void *null, void *v) 2645 { 2646 struct carp_vhost_entry *vhe = v; 2647 2648 refcnt_take(&vhe->vhost_refcnt); 2649 } 2650 2651 void 2652 carp_vh_unref(void *null, void *v) 2653 { 2654 struct carp_vhost_entry *vhe = v; 2655 2656 if (refcnt_rele(&vhe->vhost_refcnt)) { 2657 carp_sc_unref(NULL, vhe->parent_sc); 2658 free(vhe, M_DEVBUF, sizeof(*vhe)); 2659 } 2660 } 2661 2662 void 2663 carp_sc_ref(void *null, void *s) 2664 { 2665 struct carp_softc *sc = s; 2666 2667 refcnt_take(&sc->sc_refcnt); 2668 } 2669 2670 void 2671 carp_sc_unref(void *null, void *s) 2672 { 2673 struct carp_softc *sc = s; 2674 2675 refcnt_rele_wake(&sc->sc_refcnt); 2676 } 2677