1 /* $OpenBSD: ip_carp.c,v 1.175 2010/04/25 17:38:53 mpf Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * TODO: 32 * - iface reconfigure 33 * - support for hardware checksum calculations; 34 * 35 */ 36 37 #include "ether.h" 38 39 #include <sys/param.h> 40 #include <sys/proc.h> 41 #include <sys/systm.h> 42 #include <sys/mbuf.h> 43 #include <sys/socket.h> 44 #include <sys/socketvar.h> 45 #include <sys/ioctl.h> 46 #include <sys/errno.h> 47 #include <sys/device.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 #include <sys/syslog.h> 51 52 #include <machine/cpu.h> 53 54 #include <net/if.h> 55 #include <net/if_types.h> 56 #include <net/if_llc.h> 57 #include <net/route.h> 58 #include <net/netisr.h> 59 60 /* for arc4random() */ 61 #include <dev/rndvar.h> 62 63 #if NFDDI > 0 64 #include <net/if_fddi.h> 65 #endif 66 67 #include <crypto/sha1.h> 68 69 #ifdef INET 70 #include <netinet/in.h> 71 #include <netinet/in_systm.h> 72 #include <netinet/in_var.h> 73 #include <netinet/ip.h> 74 #include <netinet/ip_var.h> 75 #include <netinet/if_ether.h> 76 #include <netinet/ip_ipsp.h> 77 78 #include <net/if_enc.h> 79 #include <net/if_dl.h> 80 #endif 81 82 #ifdef INET6 83 #include <netinet/icmp6.h> 84 #include <netinet/ip6.h> 85 #include <netinet6/ip6_var.h> 86 #include <netinet6/nd6.h> 87 #include <netinet6/in6_ifattach.h> 88 #endif 89 90 #include "bpfilter.h" 91 #if NBPFILTER > 0 92 #include <net/bpf.h> 93 #endif 94 95 #include <netinet/ip_carp.h> 96 97 struct carp_mc_entry { 98 LIST_ENTRY(carp_mc_entry) mc_entries; 99 union { 100 struct ether_multi *mcu_enm; 101 } mc_u; 102 struct sockaddr_storage mc_addr; 103 }; 104 #define mc_enm mc_u.mcu_enm 105 106 enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; 107 108 struct carp_vhost_entry { 109 LIST_ENTRY(carp_vhost_entry) vhost_entries; 110 struct carp_softc *parent_sc; 111 int vhe_leader; 112 int vhid; 113 int advskew; 114 enum { INIT = 0, BACKUP, MASTER } state; 115 struct timeout ad_tmo; /* advertisement timeout */ 116 struct timeout md_tmo; /* master down timeout */ 117 struct timeout md6_tmo; /* master down timeout */ 118 119 u_int64_t vhe_replay_cookie; 120 121 /* authentication */ 122 #define CARP_HMAC_PAD 64 123 unsigned char vhe_pad[CARP_HMAC_PAD]; 124 SHA1_CTX vhe_sha1[HMAC_MAX]; 125 126 u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; 127 struct sockaddr_dl vhe_sdl; /* for IPv6 ndp balancing */ 128 }; 129 130 struct carp_softc { 131 struct arpcom sc_ac; 132 #define sc_if sc_ac.ac_if 133 #define sc_carpdev sc_ac.ac_if.if_carpdev 134 void *ah_cookie; 135 void *lh_cookie; 136 struct ip_moptions sc_imo; 137 #ifdef INET6 138 struct ip6_moptions sc_im6o; 139 #endif /* INET6 */ 140 TAILQ_ENTRY(carp_softc) sc_list; 141 142 int sc_suppress; 143 int sc_bow_out; 144 int sc_demote_cnt; 145 146 int sc_sendad_errors; 147 #define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count) 148 int sc_sendad_success; 149 #define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count) 150 151 char sc_curlladdr[ETHER_ADDR_LEN]; 152 153 LIST_HEAD(__carp_vhosthead, carp_vhost_entry) carp_vhosts; 154 int sc_vhe_count; 155 u_int8_t sc_vhids[CARP_MAXNODES]; 156 u_int8_t sc_advskews[CARP_MAXNODES]; 157 u_int8_t sc_balancing; 158 159 int sc_naddrs; 160 int sc_naddrs6; 161 int sc_advbase; /* seconds */ 162 163 /* authentication */ 164 unsigned char sc_key[CARP_KEY_LEN]; 165 166 u_int32_t sc_hashkey[2]; 167 u_int32_t sc_lsmask; /* load sharing mask */ 168 int sc_lscount; /* # load sharing interfaces (max 32) */ 169 int sc_delayed_arp; /* delayed ARP request countdown */ 170 171 struct in_addr sc_peer; 172 173 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 174 struct carp_vhost_entry *cur_vhe; /* current active vhe */ 175 }; 176 177 int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, LOG_CRIT }; /* XXX for now */ 178 struct carpstats carpstats; 179 180 struct carp_if { 181 TAILQ_HEAD(, carp_softc) vhif_vrs; 182 int vhif_nvrs; 183 184 struct ifnet *vhif_ifp; 185 }; 186 187 #define CARP_LOG(l, sc, s) \ 188 do { \ 189 if (carp_opts[CARPCTL_LOG] >= l) { \ 190 if (sc) \ 191 log(l, "%s: ", \ 192 (sc)->sc_if.if_xname); \ 193 else \ 194 log(l, "carp: "); \ 195 addlog s; \ 196 addlog("\n"); \ 197 } \ 198 } while (0) 199 200 void carp_hmac_prepare(struct carp_softc *); 201 void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t); 202 void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *, 203 unsigned char *, u_int8_t); 204 int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *, 205 unsigned char *); 206 void carp_setroute(struct carp_softc *, int); 207 void carp_proto_input_c(struct mbuf *, struct carp_header *, int, 208 sa_family_t); 209 void carpattach(int); 210 void carpdetach(struct carp_softc *); 211 int carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *, 212 struct carp_header *); 213 void carp_send_ad_all(void); 214 void carp_vhe_send_ad_all(struct carp_softc *); 215 void carp_send_ad(void *); 216 void carp_send_arp(struct carp_softc *); 217 void carp_master_down(void *); 218 int carp_ioctl(struct ifnet *, u_long, caddr_t); 219 int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); 220 int carp_check_dup_vhids(struct carp_softc *, struct carp_if *, 221 struct carpreq *); 222 void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); 223 void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t); 224 void carp_start(struct ifnet *); 225 void carp_setrun_all(struct carp_softc *, sa_family_t); 226 void carp_setrun(struct carp_vhost_entry *, sa_family_t); 227 void carp_set_state_all(struct carp_softc *, int); 228 void carp_set_state(struct carp_vhost_entry *, int); 229 void carp_multicast_cleanup(struct carp_softc *); 230 int carp_set_ifp(struct carp_softc *, struct ifnet *); 231 void carp_set_enaddr(struct carp_softc *); 232 void carp_set_vhe_enaddr(struct carp_vhost_entry *); 233 void carp_addr_updated(void *); 234 u_int32_t carp_hash(struct carp_softc *, u_char *); 235 int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 236 int carp_join_multicast(struct carp_softc *); 237 #ifdef INET6 238 void carp_send_na(struct carp_softc *); 239 int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 240 int carp_join_multicast6(struct carp_softc *); 241 #endif 242 int carp_clone_create(struct if_clone *, int); 243 int carp_clone_destroy(struct ifnet *); 244 int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 245 int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 246 void carp_ether_purgemulti(struct carp_softc *); 247 int carp_group_demote_count(struct carp_softc *); 248 void carp_update_lsmask(struct carp_softc *); 249 int carp_new_vhost(struct carp_softc *, int, int); 250 void carp_destroy_vhosts(struct carp_softc *); 251 void carp_del_all_timeouts(struct carp_softc *); 252 253 struct if_clone carp_cloner = 254 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 255 256 #define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l))) 257 258 void 259 carp_hmac_prepare(struct carp_softc *sc) 260 { 261 struct carp_vhost_entry *vhe; 262 u_int8_t i; 263 264 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 265 for (i = 0; i < HMAC_MAX; i++) { 266 carp_hmac_prepare_ctx(vhe, i); 267 } 268 } 269 } 270 271 void 272 carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) 273 { 274 struct carp_softc *sc = vhe->parent_sc; 275 276 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 277 u_int8_t vhid = vhe->vhid & 0xff; 278 SHA1_CTX sha1ctx; 279 u_int32_t kmd[5]; 280 struct ifaddr *ifa; 281 int i, found; 282 struct in_addr last, cur, in; 283 #ifdef INET6 284 struct in6_addr last6, cur6, in6; 285 #endif /* INET6 */ 286 287 /* compute ipad from key */ 288 bzero(vhe->vhe_pad, sizeof(vhe->vhe_pad)); 289 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key)); 290 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 291 vhe->vhe_pad[i] ^= 0x36; 292 293 /* precompute first part of inner hash */ 294 SHA1Init(&vhe->vhe_sha1[ctx]); 295 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad)); 296 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version)); 297 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); 298 299 /* generate a key for the arpbalance hash, before the vhid is hashed */ 300 if (vhe->vhe_leader) { 301 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 302 SHA1Final((unsigned char *)kmd, &sha1ctx); 303 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 304 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 305 } 306 307 /* the rest of the precomputation */ 308 if (vhe->vhe_leader && bcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, 309 ETHER_ADDR_LEN) != 0) 310 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, 311 ETHER_ADDR_LEN); 312 313 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid)); 314 315 /* Hash the addresses from smallest to largest, not interface order */ 316 #ifdef INET 317 cur.s_addr = 0; 318 do { 319 found = 0; 320 last = cur; 321 cur.s_addr = 0xffffffff; 322 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 323 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 324 if (ifa->ifa_addr->sa_family == AF_INET && 325 ntohl(in.s_addr) > ntohl(last.s_addr) && 326 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 327 cur.s_addr = in.s_addr; 328 found++; 329 } 330 } 331 if (found) 332 SHA1Update(&vhe->vhe_sha1[ctx], 333 (void *)&cur, sizeof(cur)); 334 } while (found); 335 #endif /* INET */ 336 #ifdef INET6 337 memset(&cur6, 0x00, sizeof(cur6)); 338 do { 339 found = 0; 340 last6 = cur6; 341 memset(&cur6, 0xff, sizeof(cur6)); 342 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 343 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 344 if (IN6_IS_SCOPE_EMBED(&in6)) { 345 if (ctx == HMAC_NOV6LL) 346 continue; 347 in6.s6_addr16[1] = 0; 348 } 349 if (ifa->ifa_addr->sa_family == AF_INET6 && 350 memcmp(&in6, &last6, sizeof(in6)) > 0 && 351 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 352 cur6 = in6; 353 found++; 354 } 355 } 356 if (found) 357 SHA1Update(&vhe->vhe_sha1[ctx], 358 (void *)&cur6, sizeof(cur6)); 359 } while (found); 360 #endif /* INET6 */ 361 362 /* convert ipad to opad */ 363 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 364 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c; 365 } 366 367 void 368 carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2], 369 unsigned char md[20], u_int8_t ctx) 370 { 371 SHA1_CTX sha1ctx; 372 373 /* fetch first half of inner hash */ 374 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 375 376 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie)); 377 SHA1Final(md, &sha1ctx); 378 379 /* outer hash */ 380 SHA1Init(&sha1ctx); 381 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad)); 382 SHA1Update(&sha1ctx, md, 20); 383 SHA1Final(md, &sha1ctx); 384 } 385 386 int 387 carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2], 388 unsigned char md[20]) 389 { 390 unsigned char md2[20]; 391 u_int8_t i; 392 393 for (i = 0; i < HMAC_MAX; i++) { 394 carp_hmac_generate(vhe, counter, md2, i); 395 if (!bcmp(md, md2, sizeof(md2))) 396 return (0); 397 } 398 return (1); 399 } 400 401 void 402 carp_setroute(struct carp_softc *sc, int cmd) 403 { 404 struct ifaddr *ifa; 405 int s; 406 407 /* XXX this mess needs fixing */ 408 409 s = splsoftnet(); 410 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 411 switch (ifa->ifa_addr->sa_family) { 412 case AF_INET: { 413 int error; 414 struct sockaddr sa; 415 struct rtentry *rt; 416 struct radix_node_head *rnh; 417 struct radix_node *rn; 418 struct rt_addrinfo info; 419 int hr_otherif, nr_ourif; 420 struct sockaddr_rtlabel sa_rl; 421 const char *label; 422 423 /* Remove the existing host route, if any */ 424 bzero(&info, sizeof(info)); 425 info.rti_info[RTAX_DST] = ifa->ifa_addr; 426 info.rti_flags = RTF_HOST; 427 error = rtrequest1(RTM_DELETE, &info, RTP_CONNECTED, 428 NULL, sc->sc_if.if_rdomain); 429 rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL, 430 error, sc->sc_if.if_rdomain); 431 432 /* Check for our address on another interface */ 433 /* XXX cries for proper API */ 434 rnh = rt_gettable(ifa->ifa_addr->sa_family, 0); 435 rn = rnh->rnh_matchaddr(ifa->ifa_addr, rnh); 436 rt = (struct rtentry *)rn; 437 hr_otherif = (rt && rt->rt_ifp != &sc->sc_if && 438 rt->rt_flags & (RTF_CLONING|RTF_CLONED)); 439 440 /* Check for a network route on our interface */ 441 bcopy(ifa->ifa_addr, &sa, sizeof(sa)); 442 satosin(&sa)->sin_addr.s_addr = satosin(ifa->ifa_netmask 443 )->sin_addr.s_addr & satosin(&sa)->sin_addr.s_addr; 444 rt = (struct rtentry *)rt_lookup(&sa, 445 ifa->ifa_netmask, sc->sc_if.if_rdomain); 446 nr_ourif = (rt && rt->rt_ifp == &sc->sc_if); 447 448 /* Restore the route label */ 449 bzero(&sa_rl, sizeof(sa_rl)); 450 if (rt && rt->rt_labelid) { 451 sa_rl.sr_len = sizeof(sa_rl); 452 sa_rl.sr_family = AF_UNSPEC; 453 label = rtlabel_id2name(rt->rt_labelid); 454 if (label != NULL) 455 strlcpy(sa_rl.sr_label, label, 456 sizeof(sa_rl.sr_label)); 457 } 458 459 switch (cmd) { 460 case RTM_ADD: 461 if (hr_otherif) { 462 ifa->ifa_rtrequest = NULL; 463 ifa->ifa_flags &= ~RTF_CLONING; 464 bzero(&info, sizeof(info)); 465 info.rti_info[RTAX_DST] = ifa->ifa_addr; 466 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 467 info.rti_flags = RTF_UP | RTF_HOST; 468 error = rtrequest1(RTM_ADD, &info, 469 RTP_CONNECTED, NULL, 470 sc->sc_if.if_rdomain); 471 rt_missmsg(RTM_ADD, &info, 472 info.rti_flags, &sc->sc_if, 473 error, sc->sc_if.if_rdomain); 474 } 475 if (!hr_otherif || nr_ourif || !rt) { 476 if (nr_ourif && !(rt->rt_flags & 477 RTF_CLONING)) { 478 bzero(&info, sizeof(info)); 479 info.rti_info[RTAX_DST] = &sa; 480 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 481 error = rtrequest1(RTM_DELETE, 482 &info, RTP_CONNECTED, NULL, 483 sc->sc_if.if_rdomain); 484 rt_missmsg(RTM_DELETE, &info, info.rti_flags, NULL, 485 error, sc->sc_if.if_rdomain); 486 } 487 488 ifa->ifa_rtrequest = arp_rtrequest; 489 ifa->ifa_flags |= RTF_CLONING; 490 491 bzero(&info, sizeof(info)); 492 info.rti_info[RTAX_DST] = &sa; 493 info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 494 info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask; 495 info.rti_info[RTAX_LABEL] = 496 (struct sockaddr *)&sa_rl; 497 error = rtrequest1(RTM_ADD, &info, 498 RTP_CONNECTED, NULL, 499 sc->sc_if.if_rdomain); 500 if (error == 0) 501 ifa->ifa_flags |= IFA_ROUTE; 502 rt_missmsg(RTM_ADD, &info, info.rti_flags, 503 &sc->sc_if, error, sc->sc_if.if_rdomain); 504 } 505 break; 506 case RTM_DELETE: 507 break; 508 default: 509 break; 510 } 511 break; 512 } 513 514 #ifdef INET6 515 case AF_INET6: 516 if (sc->sc_balancing >= CARP_BAL_IP) 517 continue; 518 if (cmd == RTM_ADD) 519 in6_ifaddloop(ifa); 520 else 521 in6_ifremloop(ifa); 522 break; 523 #endif /* INET6 */ 524 default: 525 break; 526 } 527 } 528 splx(s); 529 } 530 531 /* 532 * process input packet. 533 * we have rearranged checks order compared to the rfc, 534 * but it seems more efficient this way or not possible otherwise. 535 */ 536 void 537 carp_proto_input(struct mbuf *m, ...) 538 { 539 struct ip *ip = mtod(m, struct ip *); 540 struct ifnet *ifp = m->m_pkthdr.rcvif; 541 struct carp_softc *sc = NULL; 542 struct carp_header *ch; 543 int iplen, len, hlen, ismulti; 544 va_list ap; 545 546 va_start(ap, m); 547 hlen = va_arg(ap, int); 548 va_end(ap); 549 550 carpstats.carps_ipackets++; 551 552 if (!carp_opts[CARPCTL_ALLOW]) { 553 m_freem(m); 554 return; 555 } 556 557 ismulti = IN_MULTICAST(ip->ip_dst.s_addr); 558 559 /* check if received on a valid carp interface */ 560 if (!((ifp->if_type == IFT_CARP && ismulti) || 561 (ifp->if_type != IFT_CARP && !ismulti && ifp->if_carp != NULL))) { 562 carpstats.carps_badif++; 563 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 564 m->m_pkthdr.rcvif->if_xname)); 565 m_freem(m); 566 return; 567 } 568 569 /* verify that the IP TTL is 255. */ 570 if (ip->ip_ttl != CARP_DFLTTL) { 571 carpstats.carps_badttl++; 572 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip->ip_ttl, 573 CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname)); 574 m_freem(m); 575 return; 576 } 577 578 /* 579 * verify that the received packet length is 580 * equal to the CARP header 581 */ 582 iplen = ip->ip_hl << 2; 583 len = iplen + sizeof(*ch); 584 if (len > m->m_pkthdr.len) { 585 carpstats.carps_badlen++; 586 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", m->m_pkthdr.len, 587 m->m_pkthdr.rcvif->if_xname)); 588 m_freem(m); 589 return; 590 } 591 592 if ((m = m_pullup2(m, len)) == NULL) { 593 carpstats.carps_hdrops++; 594 return; 595 } 596 ip = mtod(m, struct ip *); 597 ch = (void *)ip + iplen; 598 599 /* verify the CARP checksum */ 600 m->m_data += iplen; 601 if (carp_cksum(m, len - iplen)) { 602 carpstats.carps_badsum++; 603 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s", 604 m->m_pkthdr.rcvif->if_xname)); 605 m_freem(m); 606 return; 607 } 608 m->m_data -= iplen; 609 610 carp_proto_input_c(m, ch, ismulti, AF_INET); 611 } 612 613 #ifdef INET6 614 int 615 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 616 { 617 struct mbuf *m = *mp; 618 struct carp_softc *sc = NULL; 619 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 620 struct carp_header *ch; 621 u_int len; 622 623 carpstats.carps_ipackets6++; 624 625 if (!carp_opts[CARPCTL_ALLOW]) { 626 m_freem(m); 627 return (IPPROTO_DONE); 628 } 629 630 /* check if received on a valid carp interface */ 631 if (m->m_pkthdr.rcvif->if_type != IFT_CARP) { 632 carpstats.carps_badif++; 633 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 634 m->m_pkthdr.rcvif->if_xname)); 635 m_freem(m); 636 return (IPPROTO_DONE); 637 } 638 639 /* verify that the IP TTL is 255 */ 640 if (ip6->ip6_hlim != CARP_DFLTTL) { 641 carpstats.carps_badttl++; 642 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", ip6->ip6_hlim, 643 CARP_DFLTTL, m->m_pkthdr.rcvif->if_xname)); 644 m_freem(m); 645 return (IPPROTO_DONE); 646 } 647 648 /* verify that we have a complete carp packet */ 649 len = m->m_len; 650 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 651 if (ch == NULL) { 652 carpstats.carps_badlen++; 653 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len)); 654 return (IPPROTO_DONE); 655 } 656 657 658 /* verify the CARP checksum */ 659 m->m_data += *offp; 660 if (carp_cksum(m, sizeof(*ch))) { 661 carpstats.carps_badsum++; 662 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s", 663 m->m_pkthdr.rcvif->if_xname)); 664 m_freem(m); 665 return (IPPROTO_DONE); 666 } 667 m->m_data -= *offp; 668 669 carp_proto_input_c(m, ch, 1, AF_INET6); 670 return (IPPROTO_DONE); 671 } 672 #endif /* INET6 */ 673 674 void 675 carp_proto_input_c(struct mbuf *m, struct carp_header *ch, int ismulti, 676 sa_family_t af) 677 { 678 struct ifnet *ifp = m->m_pkthdr.rcvif; 679 struct carp_softc *sc; 680 struct carp_vhost_entry *vhe; 681 struct timeval sc_tv, ch_tv; 682 struct carp_if *cif; 683 684 if (ifp->if_type == IFT_CARP) 685 cif = (struct carp_if *)ifp->if_carpdev->if_carp; 686 else 687 cif = (struct carp_if *)ifp->if_carp; 688 689 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 690 if (af == AF_INET && 691 ismulti != IN_MULTICAST(sc->sc_peer.s_addr)) 692 continue; 693 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 694 if (vhe->vhid == ch->carp_vhid) 695 goto found; 696 } 697 } 698 found: 699 700 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 701 (IFF_UP|IFF_RUNNING)) { 702 carpstats.carps_badvhid++; 703 m_freem(m); 704 return; 705 } 706 707 getmicrotime(&sc->sc_if.if_lastchange); 708 sc->sc_if.if_ipackets++; 709 sc->sc_if.if_ibytes += m->m_pkthdr.len; 710 711 /* verify the CARP version. */ 712 if (ch->carp_version != CARP_VERSION) { 713 carpstats.carps_badver++; 714 sc->sc_if.if_ierrors++; 715 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d", 716 ch->carp_version, CARP_VERSION)); 717 m_freem(m); 718 return; 719 } 720 721 /* verify the hash */ 722 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) { 723 carpstats.carps_badauth++; 724 sc->sc_if.if_ierrors++; 725 CARP_LOG(LOG_INFO, sc, ("incorrect hash")); 726 m_freem(m); 727 return; 728 } 729 730 if (!bcmp(&vhe->vhe_replay_cookie, ch->carp_counter, 731 sizeof(ch->carp_counter))) { 732 /* Do not log duplicates from non simplex interfaces */ 733 if (sc->sc_carpdev->if_flags & IFF_SIMPLEX) { 734 carpstats.carps_badauth++; 735 sc->sc_if.if_ierrors++; 736 CARP_LOG(LOG_WARNING, sc, 737 ("replay or network loop detected")); 738 } 739 m_freem(m); 740 return; 741 } 742 743 sc_tv.tv_sec = sc->sc_advbase; 744 sc_tv.tv_usec = vhe->advskew * 1000000 / 256; 745 ch_tv.tv_sec = ch->carp_advbase; 746 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 747 748 switch (vhe->state) { 749 case INIT: 750 break; 751 case MASTER: 752 /* 753 * If we receive an advertisement from a master who's going to 754 * be more frequent than us, and whose demote count is not higher 755 * than ours, go into BACKUP state. If his demote count is lower, 756 * also go into BACKUP. 757 */ 758 if (((timercmp(&sc_tv, &ch_tv, >) || 759 timercmp(&sc_tv, &ch_tv, ==)) && 760 (ch->carp_demote <= carp_group_demote_count(sc))) || 761 ch->carp_demote < carp_group_demote_count(sc)) { 762 timeout_del(&vhe->ad_tmo); 763 carp_set_state(vhe, BACKUP); 764 carp_setrun(vhe, 0); 765 if (vhe->vhe_leader) 766 carp_setroute(sc, RTM_DELETE); 767 } 768 break; 769 case BACKUP: 770 /* 771 * If we're pre-empting masters who advertise slower than us, 772 * and do not have a better demote count, treat them as down. 773 * 774 */ 775 if (carp_opts[CARPCTL_PREEMPT] && 776 timercmp(&sc_tv, &ch_tv, <) && 777 ch->carp_demote >= carp_group_demote_count(sc)) { 778 carp_master_down(vhe); 779 break; 780 } 781 782 /* 783 * Take over masters advertising with a higher demote count, 784 * regardless of CARPCTL_PREEMPT. 785 */ 786 if (ch->carp_demote > carp_group_demote_count(sc)) { 787 carp_master_down(vhe); 788 break; 789 } 790 791 /* 792 * If the master is going to advertise at such a low frequency 793 * that he's guaranteed to time out, we'd might as well just 794 * treat him as timed out now. 795 */ 796 sc_tv.tv_sec = sc->sc_advbase * 3; 797 if (timercmp(&sc_tv, &ch_tv, <)) { 798 carp_master_down(vhe); 799 break; 800 } 801 802 /* 803 * Otherwise, we reset the counter and wait for the next 804 * advertisement. 805 */ 806 carp_setrun(vhe, af); 807 break; 808 } 809 810 m_freem(m); 811 return; 812 } 813 814 int 815 carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 816 size_t newlen) 817 { 818 /* All sysctl names at this level are terminal. */ 819 if (namelen != 1) 820 return (ENOTDIR); 821 822 switch (name[0]) { 823 case CARPCTL_STATS: 824 if (newp != NULL) 825 return (EPERM); 826 return (sysctl_struct(oldp, oldlenp, newp, newlen, 827 &carpstats, sizeof(carpstats))); 828 default: 829 if (name[0] <= 0 || name[0] >= CARPCTL_MAXID) 830 return (ENOPROTOOPT); 831 return sysctl_int(oldp, oldlenp, newp, newlen, 832 &carp_opts[name[0]]); 833 } 834 } 835 836 /* 837 * Interface side of the CARP implementation. 838 */ 839 840 /* ARGSUSED */ 841 void 842 carpattach(int n) 843 { 844 struct ifg_group *ifg; 845 846 if ((ifg = if_creategroup("carp")) != NULL) 847 ifg->ifg_refcnt++; /* keep around even if empty */ 848 if_clone_attach(&carp_cloner); 849 } 850 851 int 852 carp_clone_create(ifc, unit) 853 struct if_clone *ifc; 854 int unit; 855 { 856 struct carp_softc *sc; 857 struct ifnet *ifp; 858 859 sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); 860 if (!sc) 861 return (ENOMEM); 862 863 LIST_INIT(&sc->carp_vhosts); 864 sc->sc_vhe_count = 0; 865 if (carp_new_vhost(sc, 0, 0)) { 866 free(sc, M_DEVBUF); 867 return (ENOMEM); 868 } 869 870 sc->sc_suppress = 0; 871 sc->sc_advbase = CARP_DFLTINTV; 872 sc->sc_naddrs = sc->sc_naddrs6 = 0; 873 #ifdef INET6 874 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 875 #endif /* INET6 */ 876 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 877 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 878 M_WAITOK|M_ZERO); 879 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 880 881 LIST_INIT(&sc->carp_mc_listhead); 882 ifp = &sc->sc_if; 883 ifp->if_softc = sc; 884 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 885 unit); 886 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 887 ifp->if_ioctl = carp_ioctl; 888 ifp->if_start = carp_start; 889 ifp->if_output = carp_output; 890 ifp->if_type = IFT_CARP; 891 ifp->if_addrlen = ETHER_ADDR_LEN; 892 ifp->if_hdrlen = ETHER_HDR_LEN; 893 ifp->if_mtu = ETHERMTU; 894 IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); 895 IFQ_SET_READY(&ifp->if_snd); 896 if_attach(ifp); 897 898 if_alloc_sadl(ifp); 899 LIST_INIT(&sc->sc_ac.ac_multiaddrs); 900 #if NBPFILTER > 0 901 bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN); 902 #endif 903 return (0); 904 } 905 906 int 907 carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) 908 { 909 struct carp_vhost_entry *vhe, *vhe0; 910 911 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO); 912 if (vhe == NULL) 913 return (ENOMEM); 914 915 vhe->parent_sc = sc; 916 vhe->vhid = vhid; 917 vhe->advskew = advskew; 918 timeout_set(&vhe->ad_tmo, carp_send_ad, vhe); 919 timeout_set(&vhe->md_tmo, carp_master_down, vhe); 920 timeout_set(&vhe->md6_tmo, carp_master_down, vhe); 921 922 /* mark the first vhe as leader */ 923 if (LIST_EMPTY(&sc->carp_vhosts)) { 924 vhe->vhe_leader = 1; 925 LIST_INSERT_HEAD(&sc->carp_vhosts, vhe, vhost_entries); 926 sc->sc_vhe_count = 1; 927 return (0); 928 } 929 930 LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) 931 if (LIST_NEXT(vhe0, vhost_entries) == NULL) 932 break; 933 LIST_INSERT_AFTER(vhe0, vhe, vhost_entries); 934 sc->sc_vhe_count++; 935 936 return (0); 937 } 938 939 int 940 carp_clone_destroy(struct ifnet *ifp) 941 { 942 struct carp_softc *sc = ifp->if_softc; 943 944 carpdetach(sc); 945 ether_ifdetach(ifp); 946 if_detach(ifp); 947 carp_destroy_vhosts(ifp->if_softc); 948 free(sc->sc_imo.imo_membership, M_IPMOPTS); 949 free(sc, M_DEVBUF); 950 951 return (0); 952 } 953 954 void 955 carp_del_all_timeouts(struct carp_softc *sc) 956 { 957 struct carp_vhost_entry *vhe; 958 959 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 960 timeout_del(&vhe->ad_tmo); 961 timeout_del(&vhe->md_tmo); 962 timeout_del(&vhe->md6_tmo); 963 } 964 } 965 966 void 967 carpdetach(struct carp_softc *sc) 968 { 969 struct carp_if *cif; 970 int s; 971 972 carp_del_all_timeouts(sc); 973 974 if (sc->sc_demote_cnt) 975 carp_group_demote_adj(&sc->sc_if, sc->sc_demote_cnt, "detach"); 976 sc->sc_suppress = 0; 977 sc->sc_sendad_errors = 0; 978 979 carp_set_state_all(sc, INIT); 980 sc->sc_if.if_flags &= ~IFF_UP; 981 carp_setrun_all(sc, 0); 982 carp_multicast_cleanup(sc); 983 984 s = splnet(); 985 if (sc->sc_carpdev != NULL) { 986 if (sc->lh_cookie != NULL) 987 hook_disestablish(sc->sc_carpdev->if_linkstatehooks, 988 sc->lh_cookie); 989 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 990 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 991 if (!--cif->vhif_nvrs) { 992 ifpromisc(sc->sc_carpdev, 0); 993 sc->sc_carpdev->if_carp = NULL; 994 free(cif, M_IFADDR); 995 } 996 } 997 sc->sc_carpdev = NULL; 998 splx(s); 999 } 1000 1001 /* Detach an interface from the carp. */ 1002 void 1003 carp_ifdetach(struct ifnet *ifp) 1004 { 1005 struct carp_softc *sc, *nextsc; 1006 struct carp_if *cif = (struct carp_if *)ifp->if_carp; 1007 1008 for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) { 1009 nextsc = TAILQ_NEXT(sc, sc_list); 1010 carpdetach(sc); 1011 } 1012 } 1013 1014 void 1015 carp_destroy_vhosts(struct carp_softc *sc) 1016 { 1017 /* XXX bow out? */ 1018 struct carp_vhost_entry *vhe, *nvhe; 1019 1020 for (vhe = LIST_FIRST(&sc->carp_vhosts); 1021 vhe != LIST_END(&sc->carp_vhosts); vhe = nvhe) { 1022 nvhe = LIST_NEXT(vhe, vhost_entries); 1023 free(vhe, M_DEVBUF); 1024 } 1025 LIST_INIT(&sc->carp_vhosts); 1026 sc->sc_vhe_count = 0; 1027 } 1028 1029 int 1030 carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe, 1031 struct carp_header *ch) 1032 { 1033 if (!vhe->vhe_replay_cookie) { 1034 arc4random_buf(&vhe->vhe_replay_cookie, 1035 sizeof(vhe->vhe_replay_cookie)); 1036 } 1037 1038 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter, 1039 sizeof(ch->carp_counter)); 1040 1041 /* 1042 * For the time being, do not include the IPv6 linklayer addresses 1043 * in the HMAC. 1044 */ 1045 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL); 1046 1047 return (0); 1048 } 1049 1050 void 1051 carp_send_ad_all(void) 1052 { 1053 struct ifnet *ifp; 1054 struct carp_if *cif; 1055 struct carp_softc *vh; 1056 1057 TAILQ_FOREACH(ifp, &ifnet, if_list) { 1058 if (ifp->if_carp == NULL || ifp->if_type == IFT_CARP) 1059 continue; 1060 1061 cif = (struct carp_if *)ifp->if_carp; 1062 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1063 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1064 (IFF_UP|IFF_RUNNING)) { 1065 carp_vhe_send_ad_all(vh); 1066 } 1067 } 1068 } 1069 } 1070 1071 void 1072 carp_vhe_send_ad_all(struct carp_softc *sc) 1073 { 1074 struct carp_vhost_entry *vhe; 1075 1076 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1077 if (vhe->state == MASTER) 1078 carp_send_ad(vhe); 1079 } 1080 } 1081 1082 void 1083 carp_send_ad(void *v) 1084 { 1085 struct carp_header ch; 1086 struct timeval tv; 1087 struct carp_vhost_entry *vhe = v; 1088 struct carp_softc *sc = vhe->parent_sc; 1089 struct carp_header *ch_ptr; 1090 1091 struct mbuf *m; 1092 int error, len, advbase, advskew, s; 1093 struct ifaddr *ifa; 1094 struct sockaddr sa; 1095 1096 if (sc->sc_carpdev == NULL) { 1097 sc->sc_if.if_oerrors++; 1098 return; 1099 } 1100 1101 s = splsoftnet(); 1102 1103 /* bow out if we've gone to backup (the carp interface is going down) */ 1104 if (sc->sc_bow_out) { 1105 advbase = 255; 1106 advskew = 255; 1107 } else { 1108 advbase = sc->sc_advbase; 1109 advskew = vhe->advskew; 1110 tv.tv_sec = advbase; 1111 tv.tv_usec = advskew * 1000000 / 256; 1112 } 1113 1114 ch.carp_version = CARP_VERSION; 1115 ch.carp_type = CARP_ADVERTISEMENT; 1116 ch.carp_vhid = vhe->vhid; 1117 ch.carp_demote = carp_group_demote_count(sc) & 0xff; 1118 ch.carp_advbase = advbase; 1119 ch.carp_advskew = advskew; 1120 ch.carp_authlen = 7; /* XXX DEFINE */ 1121 ch.carp_cksum = 0; 1122 1123 sc->cur_vhe = vhe; /* we need the vhe later on the output path */ 1124 1125 #ifdef INET 1126 if (sc->sc_naddrs) { 1127 struct ip *ip; 1128 1129 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1130 if (m == NULL) { 1131 sc->sc_if.if_oerrors++; 1132 carpstats.carps_onomem++; 1133 /* XXX maybe less ? */ 1134 goto retry_later; 1135 } 1136 len = sizeof(*ip) + sizeof(ch); 1137 m->m_pkthdr.len = len; 1138 m->m_pkthdr.rcvif = NULL; 1139 m->m_len = len; 1140 MH_ALIGN(m, m->m_len); 1141 ip = mtod(m, struct ip *); 1142 ip->ip_v = IPVERSION; 1143 ip->ip_hl = sizeof(*ip) >> 2; 1144 ip->ip_tos = IPTOS_LOWDELAY; 1145 ip->ip_len = htons(len); 1146 ip->ip_id = htons(ip_randomid()); 1147 ip->ip_off = htons(IP_DF); 1148 ip->ip_ttl = CARP_DFLTTL; 1149 ip->ip_p = IPPROTO_CARP; 1150 ip->ip_sum = 0; 1151 1152 bzero(&sa, sizeof(sa)); 1153 sa.sa_family = AF_INET; 1154 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1155 if (ifa == NULL) 1156 ip->ip_src.s_addr = 0; 1157 else 1158 ip->ip_src.s_addr = 1159 ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1160 ip->ip_dst.s_addr = sc->sc_peer.s_addr; 1161 if (IN_MULTICAST(ip->ip_dst.s_addr)) 1162 m->m_flags |= M_MCAST; 1163 1164 ch_ptr = (void *)ip + sizeof(*ip); 1165 bcopy(&ch, ch_ptr, sizeof(ch)); 1166 if (carp_prepare_ad(m, vhe, ch_ptr)) 1167 goto retry_later; 1168 1169 m->m_data += sizeof(*ip); 1170 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1171 m->m_data -= sizeof(*ip); 1172 1173 getmicrotime(&sc->sc_if.if_lastchange); 1174 sc->sc_if.if_opackets++; 1175 sc->sc_if.if_obytes += len; 1176 carpstats.carps_opackets++; 1177 1178 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1179 NULL); 1180 if (error) { 1181 if (error == ENOBUFS) 1182 carpstats.carps_onomem++; 1183 else 1184 CARP_LOG(LOG_WARNING, sc, 1185 ("ip_output failed: %d", error)); 1186 sc->sc_if.if_oerrors++; 1187 if (sc->sc_sendad_errors < INT_MAX) 1188 sc->sc_sendad_errors++; 1189 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1190 carp_group_demote_adj(&sc->sc_if, 1, 1191 "> snderrors"); 1192 sc->sc_sendad_success = 0; 1193 } else { 1194 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1195 if (++sc->sc_sendad_success >= 1196 CARP_SENDAD_MIN_SUCCESS(sc)) { 1197 carp_group_demote_adj(&sc->sc_if, -1, 1198 "< snderrors"); 1199 sc->sc_sendad_errors = 0; 1200 } 1201 } else 1202 sc->sc_sendad_errors = 0; 1203 } 1204 if (vhe->vhe_leader) { 1205 if (sc->sc_delayed_arp > 0) 1206 sc->sc_delayed_arp--; 1207 if (sc->sc_delayed_arp == 0) { 1208 carp_send_arp(sc); 1209 sc->sc_delayed_arp = -1; 1210 } 1211 } 1212 } 1213 #endif /* INET */ 1214 #ifdef INET6 1215 if (sc->sc_naddrs6) { 1216 struct ip6_hdr *ip6; 1217 1218 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1219 if (m == NULL) { 1220 sc->sc_if.if_oerrors++; 1221 carpstats.carps_onomem++; 1222 /* XXX maybe less ? */ 1223 goto retry_later; 1224 } 1225 len = sizeof(*ip6) + sizeof(ch); 1226 m->m_pkthdr.len = len; 1227 m->m_pkthdr.rcvif = NULL; 1228 m->m_len = len; 1229 MH_ALIGN(m, m->m_len); 1230 m->m_flags |= M_MCAST; 1231 ip6 = mtod(m, struct ip6_hdr *); 1232 bzero(ip6, sizeof(*ip6)); 1233 ip6->ip6_vfc |= IPV6_VERSION; 1234 ip6->ip6_hlim = CARP_DFLTTL; 1235 ip6->ip6_nxt = IPPROTO_CARP; 1236 1237 /* set the source address */ 1238 bzero(&sa, sizeof(sa)); 1239 sa.sa_family = AF_INET6; 1240 ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); 1241 if (ifa == NULL) /* This should never happen with IPv6 */ 1242 bzero(&ip6->ip6_src, sizeof(struct in6_addr)); 1243 else 1244 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1245 &ip6->ip6_src, sizeof(struct in6_addr)); 1246 /* set the multicast destination */ 1247 1248 ip6->ip6_dst.s6_addr8[0] = 0xff; 1249 ip6->ip6_dst.s6_addr8[1] = 0x02; 1250 ip6->ip6_dst.s6_addr8[15] = 0x12; 1251 1252 ch_ptr = (void *)ip6 + sizeof(*ip6); 1253 bcopy(&ch, ch_ptr, sizeof(ch)); 1254 if (carp_prepare_ad(m, vhe, ch_ptr)) 1255 goto retry_later; 1256 1257 m->m_data += sizeof(*ip6); 1258 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1259 m->m_data -= sizeof(*ip6); 1260 1261 getmicrotime(&sc->sc_if.if_lastchange); 1262 sc->sc_if.if_opackets++; 1263 sc->sc_if.if_obytes += len; 1264 carpstats.carps_opackets6++; 1265 1266 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL); 1267 if (error) { 1268 if (error == ENOBUFS) 1269 carpstats.carps_onomem++; 1270 else 1271 CARP_LOG(LOG_WARNING, sc, 1272 ("ip6_output failed: %d", error)); 1273 sc->sc_if.if_oerrors++; 1274 if (sc->sc_sendad_errors < INT_MAX) 1275 sc->sc_sendad_errors++; 1276 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1277 carp_group_demote_adj(&sc->sc_if, 1, 1278 "> snd6errors"); 1279 sc->sc_sendad_success = 0; 1280 } else { 1281 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1282 if (++sc->sc_sendad_success >= 1283 CARP_SENDAD_MIN_SUCCESS(sc)) { 1284 carp_group_demote_adj(&sc->sc_if, -1, 1285 "< snd6errors"); 1286 sc->sc_sendad_errors = 0; 1287 } 1288 } else 1289 sc->sc_sendad_errors = 0; 1290 } 1291 } 1292 #endif /* INET6 */ 1293 1294 retry_later: 1295 sc->cur_vhe = NULL; 1296 splx(s); 1297 if (advbase != 255 || advskew != 255) 1298 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1299 } 1300 1301 /* 1302 * Broadcast a gratuitous ARP request containing 1303 * the virtual router MAC address for each IP address 1304 * associated with the virtual router. 1305 */ 1306 void 1307 carp_send_arp(struct carp_softc *sc) 1308 { 1309 struct ifaddr *ifa; 1310 in_addr_t in; 1311 int s = splsoftnet(); 1312 1313 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1314 1315 if (ifa->ifa_addr->sa_family != AF_INET) 1316 continue; 1317 1318 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1319 arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr); 1320 DELAY(1000); /* XXX */ 1321 } 1322 splx(s); 1323 } 1324 1325 #ifdef INET6 1326 void 1327 carp_send_na(struct carp_softc *sc) 1328 { 1329 struct ifaddr *ifa; 1330 struct in6_addr *in6; 1331 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1332 int s = splsoftnet(); 1333 1334 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1335 1336 if (ifa->ifa_addr->sa_family != AF_INET6) 1337 continue; 1338 1339 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1340 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1341 ND_NA_FLAG_OVERRIDE, 1, NULL); 1342 DELAY(1000); /* XXX */ 1343 } 1344 splx(s); 1345 } 1346 #endif /* INET6 */ 1347 1348 /* 1349 * Based on bridge_hash() in if_bridge.c 1350 */ 1351 #define mix(a,b,c) \ 1352 do { \ 1353 a -= b; a -= c; a ^= (c >> 13); \ 1354 b -= c; b -= a; b ^= (a << 8); \ 1355 c -= a; c -= b; c ^= (b >> 13); \ 1356 a -= b; a -= c; a ^= (c >> 12); \ 1357 b -= c; b -= a; b ^= (a << 16); \ 1358 c -= a; c -= b; c ^= (b >> 5); \ 1359 a -= b; a -= c; a ^= (c >> 3); \ 1360 b -= c; b -= a; b ^= (a << 10); \ 1361 c -= a; c -= b; c ^= (b >> 15); \ 1362 } while (0) 1363 1364 u_int32_t 1365 carp_hash(struct carp_softc *sc, u_char *src) 1366 { 1367 u_int32_t a = 0x9e3779b9, b = sc->sc_hashkey[0], c = sc->sc_hashkey[1]; 1368 1369 c += sc->sc_key[3] << 24; 1370 c += sc->sc_key[2] << 16; 1371 c += sc->sc_key[1] << 8; 1372 c += sc->sc_key[0]; 1373 b += src[5] << 8; 1374 b += src[4]; 1375 a += src[3] << 24; 1376 a += src[2] << 16; 1377 a += src[1] << 8; 1378 a += src[0]; 1379 1380 mix(a, b, c); 1381 return (c); 1382 } 1383 1384 void 1385 carp_update_lsmask(struct carp_softc *sc) 1386 { 1387 struct carp_vhost_entry *vhe; 1388 int count; 1389 1390 if (!sc->sc_balancing) 1391 return; 1392 1393 sc->sc_lsmask = 0; 1394 count = 0; 1395 1396 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1397 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) 1398 sc->sc_lsmask |= 1 << count; 1399 count++; 1400 } 1401 sc->sc_lscount = count; 1402 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); 1403 } 1404 1405 int 1406 carp_iamatch(struct in_ifaddr *ia, u_char *src, u_int8_t **sha, 1407 u_int8_t **ether_shost) 1408 { 1409 struct carp_softc *sc = ia->ia_ifp->if_softc; 1410 struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); 1411 1412 if (sc->sc_balancing == CARP_BAL_ARP) { 1413 int lshash; 1414 /* 1415 * We use the source MAC address to decide which virtual host 1416 * should handle the request. If we're master of that virtual 1417 * host, then we respond, otherwise, just drop the arp packet 1418 * on the floor. 1419 */ 1420 1421 if (sc->sc_lscount == 0) /* just to be safe */ 1422 return (0); 1423 lshash = carp_hash(sc, src) % sc->sc_lscount; 1424 if ((1 << lshash) & sc->sc_lsmask) { 1425 int i = 0; 1426 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1427 if (i++ == lshash) 1428 break; 1429 } 1430 if (vhe == NULL) 1431 return (0); 1432 *sha = vhe->vhe_enaddr; 1433 return (1); 1434 } 1435 } else if (sc->sc_balancing == CARP_BAL_IPSTEALTH || 1436 sc->sc_balancing == CARP_BAL_IP) { 1437 if (vhe->state == MASTER) { 1438 *ether_shost = ((struct arpcom *)sc->sc_carpdev)-> 1439 ac_enaddr; 1440 return (1); 1441 } 1442 } else { 1443 if (vhe->state == MASTER) 1444 return (1); 1445 } 1446 1447 return (0); 1448 } 1449 1450 #ifdef INET6 1451 int 1452 carp_iamatch6(struct ifnet *ifp, u_char *src, struct sockaddr_dl **sdl) 1453 { 1454 struct carp_softc *sc = ifp->if_softc; 1455 struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); 1456 1457 if (sc->sc_balancing == CARP_BAL_ARP) { 1458 int lshash; 1459 /* 1460 * We use the source MAC address to decide which virtual host 1461 * should handle the request. If we're master of that virtual 1462 * host, then we respond, otherwise, just drop the ndp packet 1463 * on the floor. 1464 */ 1465 1466 /* can happen if optional src lladdr is not provided */ 1467 if (src == NULL) 1468 return (0); 1469 if (sc->sc_lscount == 0) /* just to be safe */ 1470 return (0); 1471 lshash = carp_hash(sc, src) % sc->sc_lscount; 1472 if ((1 << lshash) & sc->sc_lsmask) { 1473 int i = 0; 1474 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1475 if (i++ == lshash) 1476 break; 1477 } 1478 if (vhe == NULL) 1479 return (0); 1480 *sdl = &vhe->vhe_sdl; 1481 return (1); 1482 } 1483 } else { 1484 if (vhe->state == MASTER) 1485 return (1); 1486 } 1487 1488 return (0); 1489 } 1490 #endif /* INET6 */ 1491 1492 struct ifnet * 1493 carp_ourether(void *v, struct ether_header *eh, int src) 1494 { 1495 struct carp_if *cif = (struct carp_if *)v; 1496 struct carp_softc *vh; 1497 u_int8_t *ena; 1498 1499 if (src) 1500 ena = (u_int8_t *)&eh->ether_shost; 1501 else 1502 ena = (u_int8_t *)&eh->ether_dhost; 1503 1504 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1505 struct carp_vhost_entry *vhe; 1506 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1507 (IFF_UP|IFF_RUNNING)) 1508 continue; 1509 if (vh->sc_balancing == CARP_BAL_ARP) { 1510 LIST_FOREACH(vhe, &vh->carp_vhosts, vhost_entries) 1511 if (vhe->state == MASTER && 1512 !bcmp(ena, vhe->vhe_enaddr, ETHER_ADDR_LEN)) 1513 return (&vh->sc_if); 1514 } else { 1515 vhe = LIST_FIRST(&vh->carp_vhosts); 1516 if ((vhe->state == MASTER || 1517 vh->sc_balancing >= CARP_BAL_IP) && 1518 !bcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) 1519 return (&vh->sc_if); 1520 } 1521 } 1522 return (NULL); 1523 } 1524 1525 void 1526 carp_rewrite_lladdr(struct ifnet *ifp, u_int8_t *s_enaddr) 1527 { 1528 struct carp_softc *sc = ifp->if_softc; 1529 1530 if (sc->sc_balancing != CARP_BAL_IPSTEALTH && 1531 sc->sc_balancing != CARP_BAL_IP && sc->cur_vhe) { 1532 if (sc->cur_vhe->vhe_leader) 1533 bcopy((caddr_t)sc->sc_ac.ac_enaddr, 1534 (caddr_t)s_enaddr, ETHER_ADDR_LEN); 1535 else 1536 bcopy((caddr_t)sc->cur_vhe->vhe_enaddr, 1537 (caddr_t)s_enaddr, ETHER_ADDR_LEN); 1538 } 1539 } 1540 1541 int 1542 carp_our_mcastaddr(struct ifnet *ifp, u_int8_t *d_enaddr) 1543 { 1544 struct carp_softc *sc = ifp->if_softc; 1545 1546 if (sc->sc_balancing != CARP_BAL_IP) 1547 return (0); 1548 1549 return(!bcmp(sc->sc_ac.ac_enaddr, d_enaddr, ETHER_ADDR_LEN)); 1550 } 1551 1552 1553 int 1554 carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype) 1555 { 1556 struct ether_header eh; 1557 struct carp_if *cif = (struct carp_if *)m->m_pkthdr.rcvif->if_carp; 1558 struct ifnet *ifp; 1559 1560 bcopy(shost, &eh.ether_shost, sizeof(eh.ether_shost)); 1561 bcopy(dhost, &eh.ether_dhost, sizeof(eh.ether_dhost)); 1562 eh.ether_type = etype; 1563 1564 if ((ifp = carp_ourether(cif, &eh, 0))) 1565 ; 1566 else if (m->m_flags & (M_BCAST|M_MCAST)) { 1567 struct carp_softc *vh; 1568 struct mbuf *m0; 1569 1570 /* 1571 * XXX Should really check the list of multicast addresses 1572 * for each CARP interface _before_ copying. 1573 */ 1574 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1575 m0 = m_copym2(m, 0, M_COPYALL, M_DONTWAIT); 1576 if (m0 == NULL) 1577 continue; 1578 m0->m_pkthdr.rcvif = &vh->sc_if; 1579 ether_input(&vh->sc_if, &eh, m0); 1580 } 1581 return (1); 1582 } 1583 1584 if (ifp == NULL) 1585 return (1); 1586 1587 m->m_pkthdr.rcvif = ifp; 1588 1589 #if NBPFILTER > 0 1590 if (ifp->if_bpf) 1591 bpf_mtap_hdr(ifp->if_bpf, (char *)&eh, ETHER_HDR_LEN, m, 1592 BPF_DIRECTION_IN); 1593 #endif 1594 ifp->if_ipackets++; 1595 ether_input(ifp, &eh, m); 1596 1597 return (0); 1598 } 1599 1600 int 1601 carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst) 1602 { 1603 struct carp_softc *sc = m->m_pkthdr.rcvif->if_softc; 1604 int match; 1605 u_int32_t fold; 1606 1607 if (sc->sc_balancing < CARP_BAL_IP) 1608 return (0); 1609 /* 1610 * Never drop carp advertisements. 1611 * XXX Bad idea to pass all broadcast / multicast traffic? 1612 */ 1613 if (m->m_flags & (M_BCAST|M_MCAST)) 1614 return (0); 1615 1616 fold = src[0] ^ dst[0]; 1617 #ifdef INET6 1618 if (af == AF_INET6) { 1619 int i; 1620 for (i = 1; i < 4; i++) 1621 fold ^= src[i] ^ dst[i]; 1622 } 1623 #endif 1624 if (sc->sc_lscount == 0) /* just to be safe */ 1625 return (1); 1626 match = (1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask; 1627 1628 return (!match); 1629 } 1630 1631 void 1632 carp_master_down(void *v) 1633 { 1634 struct carp_vhost_entry *vhe = v; 1635 struct carp_softc *sc = vhe->parent_sc; 1636 1637 switch (vhe->state) { 1638 case INIT: 1639 printf("%s: master_down event in INIT state\n", 1640 sc->sc_if.if_xname); 1641 break; 1642 case MASTER: 1643 break; 1644 case BACKUP: 1645 carp_set_state(vhe, MASTER); 1646 carp_send_ad(vhe); 1647 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { 1648 carp_send_arp(sc); 1649 /* Schedule a delayed ARP to deal w/ some L3 switches */ 1650 sc->sc_delayed_arp = 2; 1651 #ifdef INET6 1652 carp_send_na(sc); 1653 #endif /* INET6 */ 1654 } 1655 carp_setrun(vhe, 0); 1656 if (vhe->vhe_leader) 1657 carp_setroute(sc, RTM_ADD); 1658 carpstats.carps_preempt++; 1659 break; 1660 } 1661 } 1662 1663 void 1664 carp_setrun_all(struct carp_softc *sc, sa_family_t af) 1665 { 1666 struct carp_vhost_entry *vhe; 1667 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 1668 carp_setrun(vhe, af); 1669 } 1670 } 1671 1672 /* 1673 * When in backup state, af indicates whether to reset the master down timer 1674 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1675 */ 1676 void 1677 carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) 1678 { 1679 struct timeval tv; 1680 struct carp_softc *sc = vhe->parent_sc; 1681 1682 if (sc->sc_carpdev == NULL) { 1683 sc->sc_if.if_flags &= ~IFF_RUNNING; 1684 carp_set_state_all(sc, INIT); 1685 return; 1686 } 1687 1688 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 && 1689 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1690 sc->sc_if.if_flags |= IFF_RUNNING; 1691 } else { 1692 sc->sc_if.if_flags &= ~IFF_RUNNING; 1693 if (vhe->vhe_leader) 1694 carp_setroute(sc, RTM_DELETE); 1695 return; 1696 } 1697 1698 switch (vhe->state) { 1699 case INIT: 1700 carp_set_state(vhe, BACKUP); 1701 if (vhe->vhe_leader) 1702 carp_setroute(sc, RTM_DELETE); 1703 carp_setrun(vhe, 0); 1704 break; 1705 case BACKUP: 1706 timeout_del(&vhe->ad_tmo); 1707 tv.tv_sec = 3 * sc->sc_advbase; 1708 tv.tv_usec = vhe->advskew * 1000000 / 256; 1709 if (vhe->vhe_leader) 1710 sc->sc_delayed_arp = -1; 1711 switch (af) { 1712 #ifdef INET 1713 case AF_INET: 1714 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1715 break; 1716 #endif /* INET */ 1717 #ifdef INET6 1718 case AF_INET6: 1719 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1720 break; 1721 #endif /* INET6 */ 1722 default: 1723 if (sc->sc_naddrs) 1724 timeout_add(&vhe->md_tmo, tvtohz(&tv)); 1725 if (sc->sc_naddrs6) 1726 timeout_add(&vhe->md6_tmo, tvtohz(&tv)); 1727 break; 1728 } 1729 break; 1730 case MASTER: 1731 tv.tv_sec = sc->sc_advbase; 1732 tv.tv_usec = vhe->advskew * 1000000 / 256; 1733 timeout_add(&vhe->ad_tmo, tvtohz(&tv)); 1734 break; 1735 } 1736 } 1737 1738 void 1739 carp_multicast_cleanup(struct carp_softc *sc) 1740 { 1741 struct ip_moptions *imo = &sc->sc_imo; 1742 #ifdef INET6 1743 struct ip6_moptions *im6o = &sc->sc_im6o; 1744 #endif 1745 u_int16_t n = imo->imo_num_memberships; 1746 1747 /* Clean up our own multicast memberships */ 1748 while (n-- > 0) { 1749 if (imo->imo_membership[n] != NULL) { 1750 in_delmulti(imo->imo_membership[n]); 1751 imo->imo_membership[n] = NULL; 1752 } 1753 } 1754 imo->imo_num_memberships = 0; 1755 imo->imo_multicast_ifp = NULL; 1756 1757 #ifdef INET6 1758 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1759 struct in6_multi_mship *imm = 1760 LIST_FIRST(&im6o->im6o_memberships); 1761 1762 LIST_REMOVE(imm, i6mm_chain); 1763 in6_leavegroup(imm); 1764 } 1765 im6o->im6o_multicast_ifp = NULL; 1766 #endif 1767 1768 /* And any other multicast memberships */ 1769 carp_ether_purgemulti(sc); 1770 } 1771 1772 int 1773 carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp) 1774 { 1775 struct carp_if *cif, *ncif = NULL; 1776 struct carp_softc *vr, *after = NULL; 1777 int myself = 0, error = 0; 1778 int s; 1779 1780 if (ifp == sc->sc_carpdev) 1781 return (0); 1782 1783 if (ifp != NULL) { 1784 if ((ifp->if_flags & IFF_MULTICAST) == 0) 1785 return (EADDRNOTAVAIL); 1786 1787 if (ifp->if_type == IFT_CARP) 1788 return (EINVAL); 1789 1790 if (ifp->if_carp == NULL) { 1791 ncif = malloc(sizeof(*cif), M_IFADDR, M_NOWAIT); 1792 if (ncif == NULL) 1793 return (ENOBUFS); 1794 if ((error = ifpromisc(ifp, 1))) { 1795 free(ncif, M_IFADDR); 1796 return (error); 1797 } 1798 1799 ncif->vhif_ifp = ifp; 1800 TAILQ_INIT(&ncif->vhif_vrs); 1801 } else { 1802 cif = (struct carp_if *)ifp->if_carp; 1803 if (carp_check_dup_vhids(sc, cif, NULL)) 1804 return (EINVAL); 1805 } 1806 1807 /* detach from old interface */ 1808 if (sc->sc_carpdev != NULL) 1809 carpdetach(sc); 1810 1811 /* join multicast groups */ 1812 if (sc->sc_naddrs < 0 && 1813 (error = carp_join_multicast(sc)) != 0) { 1814 if (ncif != NULL) 1815 free(ncif, M_IFADDR); 1816 return (error); 1817 } 1818 1819 #ifdef INET6 1820 if (sc->sc_naddrs6 < 0 && 1821 (error = carp_join_multicast6(sc)) != 0) { 1822 if (ncif != NULL) 1823 free(ncif, M_IFADDR); 1824 carp_multicast_cleanup(sc); 1825 return (error); 1826 } 1827 #endif 1828 1829 /* attach carp interface to physical interface */ 1830 if (ncif != NULL) 1831 ifp->if_carp = (caddr_t)ncif; 1832 sc->sc_carpdev = ifp; 1833 cif = (struct carp_if *)ifp->if_carp; 1834 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1835 if (vr == sc) 1836 myself = 1; 1837 if (LIST_FIRST(&vr->carp_vhosts)->vhid < 1838 LIST_FIRST(&sc->carp_vhosts)->vhid) 1839 after = vr; 1840 } 1841 1842 if (!myself) { 1843 /* We're trying to keep things in order */ 1844 if (after == NULL) { 1845 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1846 } else { 1847 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, 1848 sc, sc_list); 1849 } 1850 cif->vhif_nvrs++; 1851 } 1852 if (sc->sc_naddrs || sc->sc_naddrs6) 1853 sc->sc_if.if_flags |= IFF_UP; 1854 carp_set_enaddr(sc); 1855 s = splnet(); 1856 sc->lh_cookie = hook_establish(ifp->if_linkstatehooks, 1, 1857 carp_carpdev_state, ifp); 1858 carp_carpdev_state(ifp); 1859 splx(s); 1860 } else { 1861 carpdetach(sc); 1862 sc->sc_if.if_flags &= ~(IFF_UP|IFF_RUNNING); 1863 } 1864 return (0); 1865 } 1866 1867 void 1868 carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) 1869 { 1870 struct carp_softc *sc = vhe->parent_sc; 1871 1872 if (vhe->vhid != 0 && sc->sc_carpdev) { 1873 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) 1874 vhe->vhe_enaddr[0] = 1; 1875 else 1876 vhe->vhe_enaddr[0] = 0; 1877 vhe->vhe_enaddr[1] = 0; 1878 vhe->vhe_enaddr[2] = 0x5e; 1879 vhe->vhe_enaddr[3] = 0; 1880 vhe->vhe_enaddr[4] = 1; 1881 vhe->vhe_enaddr[5] = vhe->vhid; 1882 1883 vhe->vhe_sdl.sdl_family = AF_LINK; 1884 vhe->vhe_sdl.sdl_alen = ETHER_ADDR_LEN; 1885 bcopy(vhe->vhe_enaddr, vhe->vhe_sdl.sdl_data, ETHER_ADDR_LEN); 1886 } else 1887 bzero(vhe->vhe_enaddr, ETHER_ADDR_LEN); 1888 } 1889 1890 void 1891 carp_set_enaddr(struct carp_softc *sc) 1892 { 1893 struct carp_vhost_entry *vhe; 1894 1895 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 1896 carp_set_vhe_enaddr(vhe); 1897 1898 vhe = LIST_FIRST(&sc->carp_vhosts); 1899 1900 /* 1901 * Use the carp lladdr if the running one isn't manually set. 1902 * Only compare static parts of the lladdr. 1903 */ 1904 if ((bcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1, 1905 ETHER_ADDR_LEN - 2) == 0) || 1906 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] && 1907 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] && 1908 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5])) 1909 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1910 1911 /* Make sure the enaddr has changed before further twiddling. */ 1912 if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) { 1913 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl), 1914 ETHER_ADDR_LEN); 1915 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN); 1916 #ifdef INET6 1917 /* 1918 * (re)attach a link-local address which matches 1919 * our new MAC address. 1920 */ 1921 in6_ifattach_linklocal(&sc->sc_if, NULL); 1922 #endif 1923 carp_set_state_all(sc, INIT); 1924 carp_setrun_all(sc, 0); 1925 } 1926 } 1927 1928 void 1929 carp_addr_updated(void *v) 1930 { 1931 struct carp_softc *sc = (struct carp_softc *) v; 1932 struct ifaddr *ifa; 1933 int new_naddrs = 0, new_naddrs6 = 0; 1934 1935 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1936 if (ifa->ifa_addr->sa_family == AF_INET) 1937 new_naddrs++; 1938 else if (ifa->ifa_addr->sa_family == AF_INET6 && 1939 !IN6_IS_ADDR_LINKLOCAL(&ifatoia6(ifa)->ia_addr.sin6_addr)) 1940 new_naddrs6++; 1941 } 1942 1943 /* Handle a callback after SIOCDIFADDR */ 1944 if (new_naddrs < sc->sc_naddrs || new_naddrs6 < sc->sc_naddrs6) { 1945 struct in_addr mc_addr; 1946 struct in_multi *inm; 1947 1948 sc->sc_naddrs = new_naddrs; 1949 sc->sc_naddrs6 = new_naddrs6; 1950 1951 /* Re-establish multicast membership removed by in_control */ 1952 if (IN_MULTICAST(sc->sc_peer.s_addr)) { 1953 mc_addr.s_addr = sc->sc_peer.s_addr; 1954 IN_LOOKUP_MULTI(mc_addr, &sc->sc_if, inm); 1955 if (inm == NULL) { 1956 struct in_multi **imm = 1957 sc->sc_imo.imo_membership; 1958 u_int16_t maxmem = 1959 sc->sc_imo.imo_max_memberships; 1960 1961 bzero(&sc->sc_imo, sizeof(sc->sc_imo)); 1962 sc->sc_imo.imo_membership = imm; 1963 sc->sc_imo.imo_max_memberships = maxmem; 1964 1965 if (sc->sc_carpdev != NULL && sc->sc_naddrs > 0) 1966 carp_join_multicast(sc); 1967 } 1968 } 1969 1970 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1971 sc->sc_if.if_flags &= ~IFF_UP; 1972 carp_set_state_all(sc, INIT); 1973 } else 1974 carp_hmac_prepare(sc); 1975 } 1976 1977 carp_setrun_all(sc, 0); 1978 } 1979 1980 int 1981 carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1982 { 1983 struct ifnet *ifp = sc->sc_carpdev; 1984 struct in_ifaddr *ia, *ia_if; 1985 int error = 0; 1986 1987 if (sin->sin_addr.s_addr == 0) { 1988 if (!(sc->sc_if.if_flags & IFF_UP)) 1989 carp_set_state_all(sc, INIT); 1990 if (sc->sc_naddrs) 1991 sc->sc_if.if_flags |= IFF_UP; 1992 carp_setrun_all(sc, 0); 1993 return (0); 1994 } 1995 1996 /* we have to do this by hand to ensure we don't match on ourselves */ 1997 ia_if = NULL; 1998 for (ia = TAILQ_FIRST(&in_ifaddr); ia; 1999 ia = TAILQ_NEXT(ia, ia_list)) { 2000 2001 /* and, yeah, we need a multicast-capable iface too */ 2002 if (ia->ia_ifp != &sc->sc_if && 2003 ia->ia_ifp->if_type != IFT_CARP && 2004 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 2005 ia->ia_ifp->if_rdomain == sc->sc_if.if_rdomain && 2006 (sin->sin_addr.s_addr & ia->ia_netmask) == 2007 ia->ia_net) { 2008 if (!ia_if) 2009 ia_if = ia; 2010 } 2011 } 2012 2013 if (ia_if) { 2014 ia = ia_if; 2015 if (ifp) { 2016 if (ifp != ia->ia_ifp) 2017 return (EADDRNOTAVAIL); 2018 } else { 2019 ifp = ia->ia_ifp; 2020 } 2021 } 2022 2023 if ((error = carp_set_ifp(sc, ifp))) 2024 return (error); 2025 2026 if (sc->sc_carpdev == NULL) 2027 return (EADDRNOTAVAIL); 2028 2029 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 2030 return (error); 2031 2032 sc->sc_naddrs++; 2033 if (sc->sc_carpdev != NULL) 2034 sc->sc_if.if_flags |= IFF_UP; 2035 2036 carp_set_state_all(sc, INIT); 2037 2038 /* 2039 * Hook if_addrhooks so that we get a callback after in_ifinit has run, 2040 * to correct any inappropriate routes that it inserted. 2041 */ 2042 if (sc->ah_cookie == NULL) 2043 sc->ah_cookie = hook_establish(sc->sc_if.if_addrhooks, 0, 2044 carp_addr_updated, sc); 2045 2046 return (0); 2047 } 2048 2049 int 2050 carp_join_multicast(struct carp_softc *sc) 2051 { 2052 struct ip_moptions *imo = &sc->sc_imo; 2053 struct in_multi *imm; 2054 struct in_addr addr; 2055 2056 if (!IN_MULTICAST(sc->sc_peer.s_addr)) 2057 return (0); 2058 2059 addr.s_addr = sc->sc_peer.s_addr; 2060 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 2061 return (ENOBUFS); 2062 2063 imo->imo_membership[0] = imm; 2064 imo->imo_num_memberships = 1; 2065 imo->imo_multicast_ifp = &sc->sc_if; 2066 imo->imo_multicast_ttl = CARP_DFLTTL; 2067 imo->imo_multicast_loop = 0; 2068 return (0); 2069 } 2070 2071 2072 #ifdef INET6 2073 int 2074 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 2075 { 2076 struct ifnet *ifp = sc->sc_carpdev; 2077 struct in6_ifaddr *ia, *ia_if; 2078 int error = 0; 2079 2080 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 2081 if (!(sc->sc_if.if_flags & IFF_UP)) 2082 carp_set_state_all(sc, INIT); 2083 if (sc->sc_naddrs6) 2084 sc->sc_if.if_flags |= IFF_UP; 2085 carp_setrun_all(sc, 0); 2086 return (0); 2087 } 2088 2089 /* we have to do this by hand to ensure we don't match on ourselves */ 2090 ia_if = NULL; 2091 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 2092 int i; 2093 2094 for (i = 0; i < 4; i++) { 2095 if ((sin6->sin6_addr.s6_addr32[i] & 2096 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 2097 (ia->ia_addr.sin6_addr.s6_addr32[i] & 2098 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 2099 break; 2100 } 2101 /* and, yeah, we need a multicast-capable iface too */ 2102 if (ia->ia_ifp != &sc->sc_if && 2103 ia->ia_ifp->if_type != IFT_CARP && 2104 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 2105 (i == 4)) { 2106 if (!ia_if) 2107 ia_if = ia; 2108 } 2109 } 2110 2111 if (ia_if) { 2112 ia = ia_if; 2113 if (sc->sc_carpdev) { 2114 if (sc->sc_carpdev != ia->ia_ifp) 2115 return (EADDRNOTAVAIL); 2116 } else { 2117 ifp = ia->ia_ifp; 2118 } 2119 } 2120 2121 if ((error = carp_set_ifp(sc, ifp))) 2122 return (error); 2123 2124 if (sc->sc_carpdev == NULL) 2125 return (EADDRNOTAVAIL); 2126 2127 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 2128 return (error); 2129 2130 if (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 2131 sc->sc_naddrs6++; 2132 if (sc->sc_carpdev != NULL && sc->sc_naddrs6) 2133 sc->sc_if.if_flags |= IFF_UP; 2134 carp_set_state_all(sc, INIT); 2135 carp_setrun_all(sc, 0); 2136 2137 return (0); 2138 } 2139 2140 int 2141 carp_join_multicast6(struct carp_softc *sc) 2142 { 2143 struct in6_multi_mship *imm, *imm2; 2144 struct ip6_moptions *im6o = &sc->sc_im6o; 2145 struct sockaddr_in6 addr6; 2146 int error; 2147 2148 /* Join IPv6 CARP multicast group */ 2149 bzero(&addr6, sizeof(addr6)); 2150 addr6.sin6_family = AF_INET6; 2151 addr6.sin6_len = sizeof(addr6); 2152 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 2153 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 2154 addr6.sin6_addr.s6_addr8[15] = 0x12; 2155 if ((imm = in6_joingroup(&sc->sc_if, 2156 &addr6.sin6_addr, &error)) == NULL) { 2157 return (error); 2158 } 2159 /* join solicited multicast address */ 2160 bzero(&addr6.sin6_addr, sizeof(addr6.sin6_addr)); 2161 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 2162 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 2163 addr6.sin6_addr.s6_addr32[1] = 0; 2164 addr6.sin6_addr.s6_addr32[2] = htonl(1); 2165 addr6.sin6_addr.s6_addr32[3] = 0; 2166 addr6.sin6_addr.s6_addr8[12] = 0xff; 2167 if ((imm2 = in6_joingroup(&sc->sc_if, 2168 &addr6.sin6_addr, &error)) == NULL) { 2169 in6_leavegroup(imm); 2170 return (error); 2171 } 2172 2173 /* apply v6 multicast membership */ 2174 im6o->im6o_multicast_ifp = &sc->sc_if; 2175 if (imm) 2176 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 2177 i6mm_chain); 2178 if (imm2) 2179 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 2180 i6mm_chain); 2181 2182 return (0); 2183 } 2184 2185 #endif /* INET6 */ 2186 2187 int 2188 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2189 { 2190 struct proc *p = curproc; /* XXX */ 2191 struct carp_softc *sc = ifp->if_softc; 2192 struct carp_vhost_entry *vhe; 2193 struct carpreq carpr; 2194 struct ifaddr *ifa = (struct ifaddr *)addr; 2195 struct ifreq *ifr = (struct ifreq *)addr; 2196 struct ifnet *cdev = NULL; 2197 int i, error = 0; 2198 2199 switch (cmd) { 2200 case SIOCSIFADDR: 2201 switch (ifa->ifa_addr->sa_family) { 2202 #ifdef INET 2203 case AF_INET: 2204 sc->sc_if.if_flags |= IFF_UP; 2205 /* 2206 * emulate arp_ifinit() without doing a gratious arp 2207 * request so that the routes are setup correctly. 2208 */ 2209 ifa->ifa_rtrequest = arp_rtrequest; 2210 ifa->ifa_flags |= RTF_CLONING; 2211 2212 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 2213 break; 2214 #endif /* INET */ 2215 #ifdef INET6 2216 case AF_INET6: 2217 sc->sc_if.if_flags |= IFF_UP; 2218 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 2219 break; 2220 #endif /* INET6 */ 2221 default: 2222 error = EAFNOSUPPORT; 2223 break; 2224 } 2225 break; 2226 2227 case SIOCSIFFLAGS: 2228 vhe = LIST_FIRST(&sc->carp_vhosts); 2229 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) { 2230 carp_del_all_timeouts(sc); 2231 2232 /* we need the interface up to bow out */ 2233 sc->sc_if.if_flags |= IFF_UP; 2234 sc->sc_bow_out = 1; 2235 carp_vhe_send_ad_all(sc); 2236 sc->sc_bow_out = 0; 2237 2238 sc->sc_if.if_flags &= ~IFF_UP; 2239 carp_set_state_all(sc, INIT); 2240 carp_setrun_all(sc, 0); 2241 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) { 2242 sc->sc_if.if_flags |= IFF_UP; 2243 carp_setrun_all(sc, 0); 2244 } 2245 break; 2246 2247 case SIOCSVH: 2248 vhe = LIST_FIRST(&sc->carp_vhosts); 2249 if ((error = suser(p, p->p_acflag)) != 0) 2250 break; 2251 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2252 break; 2253 error = 1; 2254 if (carpr.carpr_carpdev[0] != '\0' && 2255 (cdev = ifunit(carpr.carpr_carpdev)) == NULL) 2256 return (EINVAL); 2257 if (carpr.carpr_peer.s_addr == 0) 2258 sc->sc_peer.s_addr = INADDR_CARP_GROUP; 2259 else 2260 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr; 2261 if ((error = carp_set_ifp(sc, cdev))) 2262 return (error); 2263 if (vhe->state != INIT && carpr.carpr_state != vhe->state) { 2264 switch (carpr.carpr_state) { 2265 case BACKUP: 2266 timeout_del(&vhe->ad_tmo); 2267 carp_set_state_all(sc, BACKUP); 2268 carp_setrun_all(sc, 0); 2269 carp_setroute(sc, RTM_DELETE); 2270 break; 2271 case MASTER: 2272 LIST_FOREACH(vhe, &sc->carp_vhosts, 2273 vhost_entries) 2274 carp_master_down(vhe); 2275 break; 2276 default: 2277 break; 2278 } 2279 } 2280 if ((error = carp_vhids_ioctl(sc, &carpr))) 2281 return (error); 2282 if (carpr.carpr_advbase > 0) { 2283 if (carpr.carpr_advbase > 255) { 2284 error = EINVAL; 2285 break; 2286 } 2287 sc->sc_advbase = carpr.carpr_advbase; 2288 error--; 2289 } 2290 if (bcmp(sc->sc_advskews, carpr.carpr_advskews, 2291 sizeof(sc->sc_advskews))) { 2292 i = 0; 2293 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 2294 vhe->advskew = carpr.carpr_advskews[i++]; 2295 bcopy(carpr.carpr_advskews, sc->sc_advskews, 2296 sizeof(sc->sc_advskews)); 2297 } 2298 if (sc->sc_balancing != carpr.carpr_balancing) { 2299 if (carpr.carpr_balancing > CARP_BAL_MAXID) { 2300 error = EINVAL; 2301 break; 2302 } 2303 sc->sc_balancing = carpr.carpr_balancing; 2304 carp_set_enaddr(sc); 2305 carp_update_lsmask(sc); 2306 } 2307 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2308 if (error > 0) 2309 error = EINVAL; 2310 else { 2311 error = 0; 2312 carp_setrun_all(sc, 0); 2313 } 2314 break; 2315 2316 case SIOCGVH: 2317 bzero(&carpr, sizeof(carpr)); 2318 if (sc->sc_carpdev != NULL) 2319 strlcpy(carpr.carpr_carpdev, sc->sc_carpdev->if_xname, 2320 IFNAMSIZ); 2321 i = 0; 2322 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { 2323 carpr.carpr_vhids[i] = vhe->vhid; 2324 carpr.carpr_advskews[i] = vhe->advskew; 2325 carpr.carpr_states[i] = vhe->state; 2326 i++; 2327 } 2328 carpr.carpr_advbase = sc->sc_advbase; 2329 carpr.carpr_balancing = sc->sc_balancing; 2330 if (suser(p, p->p_acflag) == 0) 2331 bcopy(sc->sc_key, carpr.carpr_key, 2332 sizeof(carpr.carpr_key)); 2333 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr; 2334 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2335 break; 2336 2337 case SIOCADDMULTI: 2338 error = carp_ether_addmulti(sc, ifr); 2339 break; 2340 2341 case SIOCDELMULTI: 2342 error = carp_ether_delmulti(sc, ifr); 2343 break; 2344 case SIOCAIFGROUP: 2345 case SIOCDIFGROUP: 2346 if (sc->sc_demote_cnt) 2347 carp_ifgroup_ioctl(ifp, cmd, addr); 2348 break; 2349 case SIOCSIFGATTR: 2350 carp_ifgattr_ioctl(ifp, cmd, addr); 2351 break; 2352 default: 2353 error = ENOTTY; 2354 } 2355 2356 if (bcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) 2357 carp_set_enaddr(sc); 2358 carp_hmac_prepare(sc); 2359 return (error); 2360 } 2361 2362 int 2363 carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif, 2364 struct carpreq *carpr) 2365 { 2366 struct carp_softc *vr; 2367 struct carp_vhost_entry *vhe, *vhe0; 2368 int i; 2369 2370 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2371 if (vr == sc) 2372 continue; 2373 LIST_FOREACH(vhe, &vr->carp_vhosts, vhost_entries) { 2374 if (carpr) { 2375 for (i = 0; carpr->carpr_vhids[i]; i++) { 2376 if (vhe->vhid == carpr->carpr_vhids[i]) 2377 return (EINVAL); 2378 } 2379 } 2380 LIST_FOREACH(vhe0, &sc->carp_vhosts, vhost_entries) { 2381 if (vhe->vhid == vhe0->vhid) 2382 return (EINVAL); 2383 } 2384 } 2385 } 2386 return (0); 2387 } 2388 2389 int 2390 carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) 2391 { 2392 int i, j; 2393 u_int8_t taken_vhids[256]; 2394 2395 if (carpr->carpr_vhids[0] == 0 || 2396 !bcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) 2397 return (0); 2398 2399 bzero(taken_vhids, sizeof(taken_vhids)); 2400 for (i = 0; carpr->carpr_vhids[i]; i++) { 2401 if (taken_vhids[carpr->carpr_vhids[i]]) 2402 return (EINVAL); 2403 taken_vhids[carpr->carpr_vhids[i]] = 1; 2404 2405 if (sc->sc_carpdev) { 2406 struct carp_if *cif; 2407 cif = (struct carp_if *)sc->sc_carpdev->if_carp; 2408 if (carp_check_dup_vhids(sc, cif, carpr)) 2409 return (EINVAL); 2410 } 2411 if (carpr->carpr_advskews[i] >= 255) 2412 return (EINVAL); 2413 } 2414 /* set sane balancing defaults */ 2415 if (i <= 1) 2416 carpr->carpr_balancing = CARP_BAL_NONE; 2417 else if (carpr->carpr_balancing == CARP_BAL_NONE && 2418 sc->sc_balancing == CARP_BAL_NONE) 2419 carpr->carpr_balancing = CARP_BAL_IP; 2420 2421 /* destroy all */ 2422 carp_del_all_timeouts(sc); 2423 carp_destroy_vhosts(sc); 2424 bzero(sc->sc_vhids, sizeof(sc->sc_vhids)); 2425 2426 /* sort vhosts list by vhid */ 2427 for (j = 1; j <= 255; j++) { 2428 for (i = 0; carpr->carpr_vhids[i]; i++) { 2429 if (carpr->carpr_vhids[i] != j) 2430 continue; 2431 if (carp_new_vhost(sc, carpr->carpr_vhids[i], 2432 carpr->carpr_advskews[i])) 2433 return (ENOMEM); 2434 sc->sc_vhids[i] = carpr->carpr_vhids[i]; 2435 sc->sc_advskews[i] = carpr->carpr_advskews[i]; 2436 } 2437 } 2438 carp_set_enaddr(sc); 2439 carp_set_state_all(sc, INIT); 2440 return (0); 2441 } 2442 2443 void 2444 carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2445 { 2446 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2447 struct ifg_list *ifgl; 2448 int *dm, adj; 2449 2450 if (!strcmp(ifgr->ifgr_group, IFG_ALL)) 2451 return; 2452 adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2453 if (cmd == SIOCDIFGROUP) 2454 adj = adj * -1; 2455 2456 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 2457 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) { 2458 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2459 if (*dm + adj >= 0) 2460 *dm += adj; 2461 else 2462 *dm = 0; 2463 } 2464 } 2465 2466 void 2467 carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2468 { 2469 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2470 struct carp_softc *sc = ifp->if_softc; 2471 2472 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags & 2473 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) 2474 carp_vhe_send_ad_all(sc); 2475 } 2476 2477 /* 2478 * Start output on carp interface. This function should never be called. 2479 */ 2480 void 2481 carp_start(struct ifnet *ifp) 2482 { 2483 #ifdef DEBUG 2484 printf("%s: start called\n", ifp->if_xname); 2485 #endif 2486 } 2487 2488 int 2489 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2490 struct rtentry *rt) 2491 { 2492 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2493 struct carp_vhost_entry *vhe; 2494 2495 vhe = sc->cur_vhe ? sc->cur_vhe : LIST_FIRST(&sc->carp_vhosts); 2496 2497 if (sc->sc_carpdev != NULL && 2498 (sc->sc_balancing || vhe->state == MASTER)) 2499 return (sc->sc_carpdev->if_output(ifp, m, sa, rt)); 2500 else { 2501 m_freem(m); 2502 return (ENETUNREACH); 2503 } 2504 } 2505 2506 void 2507 carp_set_state_all(struct carp_softc *sc, int state) 2508 { 2509 struct carp_vhost_entry *vhe; 2510 2511 LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) 2512 carp_set_state(vhe, state); 2513 } 2514 2515 void 2516 carp_set_state(struct carp_vhost_entry *vhe, int state) 2517 { 2518 struct carp_softc *sc = vhe->parent_sc; 2519 static const char *carp_states[] = { CARP_STATES }; 2520 int loglevel; 2521 2522 if (vhe->state == state) 2523 return; 2524 if (vhe->state == INIT || state == INIT) 2525 loglevel = LOG_WARNING; 2526 else 2527 loglevel = LOG_CRIT; 2528 2529 if (sc->sc_vhe_count > 1) 2530 CARP_LOG(loglevel, sc, 2531 ("state transition (vhid %d): %s -> %s", vhe->vhid, 2532 carp_states[vhe->state], carp_states[state])); 2533 else 2534 CARP_LOG(loglevel, sc, 2535 ("state transition: %s -> %s", 2536 carp_states[vhe->state], carp_states[state])); 2537 2538 vhe->state = state; 2539 carp_update_lsmask(sc); 2540 2541 /* only the master vhe creates link state messages */ 2542 if (!vhe->vhe_leader) 2543 return; 2544 2545 switch (state) { 2546 case BACKUP: 2547 sc->sc_if.if_link_state = LINK_STATE_DOWN; 2548 break; 2549 case MASTER: 2550 sc->sc_if.if_link_state = LINK_STATE_UP; 2551 break; 2552 default: 2553 sc->sc_if.if_link_state = LINK_STATE_UNKNOWN; 2554 break; 2555 } 2556 if_link_state_change(&sc->sc_if); 2557 } 2558 2559 void 2560 carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason) 2561 { 2562 struct ifg_list *ifgl; 2563 int *dm; 2564 struct carp_softc *nil = NULL; 2565 2566 if (ifp->if_type == IFT_CARP) { 2567 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2568 if (*dm + adj >= 0) 2569 *dm += adj; 2570 else 2571 *dm = 0; 2572 } 2573 2574 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2575 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2576 continue; 2577 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2578 2579 if (*dm + adj >= 0) 2580 *dm += adj; 2581 else 2582 *dm = 0; 2583 2584 if (adj > 0 && *dm == 1) 2585 carp_send_ad_all(); 2586 CARP_LOG(LOG_NOTICE, nil, 2587 ("%s demoted group %s by %d to %d (%s)", 2588 ifp->if_xname, ifgl->ifgl_group->ifg_group, 2589 adj, *dm, reason)); 2590 } 2591 } 2592 2593 int 2594 carp_group_demote_count(struct carp_softc *sc) 2595 { 2596 struct ifg_list *ifgl; 2597 int count = 0; 2598 2599 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next) 2600 count += ifgl->ifgl_group->ifg_carp_demoted; 2601 2602 if (count == 0 && sc->sc_demote_cnt) 2603 count = sc->sc_demote_cnt; 2604 2605 return (count > 255 ? 255 : count); 2606 } 2607 2608 void 2609 carp_carpdev_state(void *v) 2610 { 2611 struct carp_if *cif; 2612 struct carp_softc *sc; 2613 struct ifnet *ifp = v; 2614 2615 if (ifp->if_type == IFT_CARP) 2616 return; 2617 2618 cif = (struct carp_if *)ifp->if_carp; 2619 2620 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 2621 int suppressed = sc->sc_suppress; 2622 2623 if (sc->sc_carpdev->if_link_state == LINK_STATE_DOWN || 2624 !(sc->sc_carpdev->if_flags & IFF_UP)) { 2625 sc->sc_if.if_flags &= ~IFF_RUNNING; 2626 carp_del_all_timeouts(sc); 2627 carp_set_state_all(sc, INIT); 2628 sc->sc_suppress = 1; 2629 carp_setrun_all(sc, 0); 2630 if (!suppressed) 2631 carp_group_demote_adj(&sc->sc_if, 1, "carpdev"); 2632 } else if (suppressed) { 2633 carp_set_state_all(sc, INIT); 2634 sc->sc_suppress = 0; 2635 carp_setrun_all(sc, 0); 2636 carp_group_demote_adj(&sc->sc_if, -1, "carpdev"); 2637 } 2638 } 2639 } 2640 2641 int 2642 carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2643 { 2644 struct ifnet *ifp; 2645 struct carp_mc_entry *mc; 2646 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2647 int error; 2648 2649 ifp = sc->sc_carpdev; 2650 if (ifp == NULL) 2651 return (EINVAL); 2652 2653 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2654 if (error != ENETRESET) 2655 return (error); 2656 2657 /* 2658 * This is new multicast address. We have to tell parent 2659 * about it. Also, remember this multicast address so that 2660 * we can delete them on unconfigure. 2661 */ 2662 mc = malloc(sizeof(struct carp_mc_entry), M_DEVBUF, M_NOWAIT); 2663 if (mc == NULL) { 2664 error = ENOMEM; 2665 goto alloc_failed; 2666 } 2667 2668 /* 2669 * As ether_addmulti() returns ENETRESET, following two 2670 * statement shouldn't fail. 2671 */ 2672 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2673 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2674 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2675 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2676 2677 error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)ifr); 2678 if (error != 0) 2679 goto ioctl_failed; 2680 2681 return (error); 2682 2683 ioctl_failed: 2684 LIST_REMOVE(mc, mc_entries); 2685 free(mc, M_DEVBUF); 2686 alloc_failed: 2687 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2688 2689 return (error); 2690 } 2691 2692 int 2693 carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2694 { 2695 struct ifnet *ifp; 2696 struct ether_multi *enm; 2697 struct carp_mc_entry *mc; 2698 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2699 int error; 2700 2701 ifp = sc->sc_carpdev; 2702 if (ifp == NULL) 2703 return (EINVAL); 2704 2705 /* 2706 * Find a key to lookup carp_mc_entry. We have to do this 2707 * before calling ether_delmulti for obvious reason. 2708 */ 2709 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0) 2710 return (error); 2711 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2712 if (enm == NULL) 2713 return (EINVAL); 2714 2715 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2716 if (mc->mc_enm == enm) 2717 break; 2718 2719 /* We won't delete entries we didn't add */ 2720 if (mc == NULL) 2721 return (EINVAL); 2722 2723 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2724 if (error != ENETRESET) 2725 return (error); 2726 2727 /* We no longer use this multicast address. Tell parent so. */ 2728 error = (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr); 2729 if (error == 0) { 2730 /* And forget about this address. */ 2731 LIST_REMOVE(mc, mc_entries); 2732 free(mc, M_DEVBUF); 2733 } else 2734 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2735 return (error); 2736 } 2737 2738 /* 2739 * Delete any multicast address we have asked to add from parent 2740 * interface. Called when the carp is being unconfigured. 2741 */ 2742 void 2743 carp_ether_purgemulti(struct carp_softc *sc) 2744 { 2745 struct ifnet *ifp = sc->sc_carpdev; /* Parent. */ 2746 struct carp_mc_entry *mc; 2747 union { 2748 struct ifreq ifreq; 2749 struct { 2750 char ifr_name[IFNAMSIZ]; 2751 struct sockaddr_storage ifr_ss; 2752 } ifreq_storage; 2753 } u; 2754 struct ifreq *ifr = &u.ifreq; 2755 2756 if (ifp == NULL) 2757 return; 2758 2759 memcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); 2760 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2761 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len); 2762 (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)ifr); 2763 LIST_REMOVE(mc, mc_entries); 2764 free(mc, M_DEVBUF); 2765 } 2766 } 2767