1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 /* 27 * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $ 28 * $DragonFly: src/sys/netinet/ip_carp.c,v 1.10 2008/07/27 10:06:57 sephe Exp $ 29 */ 30 31 #include "opt_carp.h" 32 #include "opt_inet.h" 33 #include "opt_inet6.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/in_cksum.h> 39 #include <sys/limits.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/time.h> 43 #include <sys/proc.h> 44 #include <sys/priv.h> 45 #include <sys/sockio.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 50 #include <machine/stdarg.h> 51 #include <crypto/sha1.h> 52 53 #include <net/bpf.h> 54 #include <net/ethernet.h> 55 #include <net/if.h> 56 #include <net/if_dl.h> 57 #include <net/if_types.h> 58 #include <net/route.h> 59 #include <net/if_clone.h> 60 61 #ifdef INET 62 #include <netinet/in.h> 63 #include <netinet/in_var.h> 64 #include <netinet/in_systm.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip_var.h> 67 #include <netinet/if_ether.h> 68 #endif 69 70 #ifdef INET6 71 #include <netinet/icmp6.h> 72 #include <netinet/ip6.h> 73 #include <netinet6/ip6_var.h> 74 #include <netinet6/scope6_var.h> 75 #include <netinet6/nd6.h> 76 #endif 77 78 #include <netinet/ip_carp.h> 79 80 #define CARP_IFNAME "carp" 81 #define CARP_IS_RUNNING(ifp) \ 82 (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) 83 84 struct carp_vhaddr { 85 uint32_t vha_flags; /* CARP_VHAF_ */ 86 const struct in_ifaddr *vha_ia; /* carp address */ 87 const struct in_ifaddr *vha_iaback; /* backing address */ 88 TAILQ_ENTRY(carp_vhaddr) vha_link; 89 }; 90 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr); 91 92 struct carp_softc { 93 struct ifnet sc_if; 94 struct ifnet *sc_carpdev; /* parent interface */ 95 struct carp_vhaddr_list sc_vha_list; /* virtual addr list */ 96 97 const struct in_ifaddr *sc_ia; /* primary iface address v4 */ 98 struct ip_moptions sc_imo; 99 100 #ifdef INET6 101 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 102 struct ip6_moptions sc_im6o; 103 #endif /* INET6 */ 104 TAILQ_ENTRY(carp_softc) sc_list; 105 106 enum { INIT = 0, BACKUP, MASTER } 107 sc_state; 108 int sc_dead; 109 110 int sc_suppress; 111 112 int sc_sendad_errors; 113 #define CARP_SENDAD_MAX_ERRORS 3 114 int sc_sendad_success; 115 #define CARP_SENDAD_MIN_SUCCESS 3 116 117 int sc_vhid; 118 int sc_advskew; 119 int sc_naddrs; /* actually used IPv4 vha */ 120 int sc_naddrs6; 121 int sc_advbase; /* seconds */ 122 int sc_init_counter; 123 uint64_t sc_counter; 124 125 /* authentication */ 126 #define CARP_HMAC_PAD 64 127 unsigned char sc_key[CARP_KEY_LEN]; 128 unsigned char sc_pad[CARP_HMAC_PAD]; 129 SHA1_CTX sc_sha1; 130 131 struct callout sc_ad_tmo; /* advertisement timeout */ 132 struct callout sc_md_tmo; /* master down timeout */ 133 struct callout sc_md6_tmo; /* master down timeout */ 134 135 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 136 }; 137 138 struct carp_if { 139 TAILQ_HEAD(, carp_softc) vhif_vrs; 140 }; 141 142 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 143 144 SYSCTL_DECL(_net_inet_carp); 145 146 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 147 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 148 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 149 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 150 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 151 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 152 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 153 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 154 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 155 156 static int carp_suppress_preempt = 0; 157 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 158 &carp_suppress_preempt, 0, "Preemption is suppressed"); 159 160 static struct carpstats carpstats; 161 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 162 &carpstats, carpstats, 163 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 164 165 #define CARP_LOG(...) do { \ 166 if (carp_opts[CARPCTL_LOG] > 0) \ 167 log(LOG_INFO, __VA_ARGS__); \ 168 } while (0) 169 170 #define CARP_DEBUG(...) do { \ 171 if (carp_opts[CARPCTL_LOG] > 1) \ 172 log(LOG_DEBUG, __VA_ARGS__); \ 173 } while (0) 174 175 static void carp_hmac_prepare(struct carp_softc *); 176 static void carp_hmac_generate(struct carp_softc *, uint32_t *, 177 unsigned char *); 178 static int carp_hmac_verify(struct carp_softc *, uint32_t *, 179 unsigned char *); 180 static void carp_setroute(struct carp_softc *, int); 181 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 182 static int carp_clone_create(struct if_clone *, int, caddr_t); 183 static int carp_clone_destroy(struct ifnet *); 184 static void carp_detach(struct carp_softc *, int); 185 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 186 struct carp_header *); 187 static void carp_send_ad_all(void); 188 static void carp_send_ad_timeout(void *); 189 static void carp_send_ad(struct carp_softc *); 190 static void carp_send_arp(struct carp_softc *); 191 static void carp_master_down_timeout(void *); 192 static void carp_master_down(struct carp_softc *); 193 static int carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 194 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 195 struct rtentry *); 196 static void carp_start(struct ifnet *); 197 static void carp_setrun(struct carp_softc *, sa_family_t); 198 static void carp_set_state(struct carp_softc *, int); 199 200 static void carp_multicast_cleanup(struct carp_softc *); 201 static void carp_add_addr(struct carp_softc *, struct ifaddr *); 202 static void carp_del_addr(struct carp_softc *, struct ifaddr *); 203 static void carp_config_addr(struct carp_softc *, struct ifaddr *); 204 static void carp_link_addrs(struct carp_softc *, struct ifnet *, 205 struct ifaddr *); 206 static void carp_unlink_addrs(struct carp_softc *, struct ifnet *, 207 struct ifaddr *); 208 209 static int carp_get_vhaddr(struct carp_softc *, struct ifdrv *); 210 static int carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *); 211 static int carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *, 212 struct ifnet *, const struct in_ifaddr *, int); 213 static void carp_deactivate_vhaddr(struct carp_softc *, 214 struct carp_vhaddr *); 215 216 static void carp_sc_state(struct carp_softc *); 217 #ifdef INET6 218 static void carp_send_na(struct carp_softc *); 219 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 220 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 221 static void carp_multicast6_cleanup(struct carp_softc *); 222 #endif 223 static void carp_stop(struct carp_softc *, int); 224 static void carp_reset(struct carp_softc *, int); 225 226 static void carp_ifaddr(void *, struct ifnet *, enum ifaddr_event, 227 struct ifaddr *); 228 static void carp_ifdetach(void *, struct ifnet *); 229 230 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 231 232 static LIST_HEAD(, carp_softc) carpif_list; 233 234 static struct if_clone carp_cloner = 235 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy, 236 0, IF_MAXUNIT); 237 238 static eventhandler_tag carp_ifdetach_event; 239 static eventhandler_tag carp_ifaddr_event; 240 241 static __inline void 242 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new) 243 { 244 struct carp_vhaddr *vha; 245 u_long new_addr, addr; 246 247 KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0); 248 249 /* 250 * Virtual address list is sorted; smaller one first 251 */ 252 new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr); 253 254 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 255 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr); 256 257 if (addr > new_addr) 258 break; 259 } 260 if (vha == NULL) 261 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link); 262 else 263 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link); 264 vha_new->vha_flags |= CARP_VHAF_ONLIST; 265 } 266 267 static __inline void 268 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 269 { 270 KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST); 271 vha->vha_flags &= ~CARP_VHAF_ONLIST; 272 TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link); 273 } 274 275 static void 276 carp_hmac_prepare(struct carp_softc *sc) 277 { 278 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 279 uint8_t vhid = sc->sc_vhid & 0xff; 280 int i; 281 #ifdef INET6 282 struct ifaddr_container *ifac; 283 struct in6_addr in6; 284 #endif 285 #ifdef INET 286 struct carp_vhaddr *vha; 287 #endif 288 289 /* XXX: possible race here */ 290 291 /* compute ipad from key */ 292 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 293 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 294 for (i = 0; i < sizeof(sc->sc_pad); i++) 295 sc->sc_pad[i] ^= 0x36; 296 297 /* precompute first part of inner hash */ 298 SHA1Init(&sc->sc_sha1); 299 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 300 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 301 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 302 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 303 #ifdef INET 304 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 305 SHA1Update(&sc->sc_sha1, 306 (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr, 307 sizeof(struct in_addr)); 308 } 309 #endif /* INET */ 310 #ifdef INET6 311 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 312 struct ifaddr *ifa = ifac->ifa; 313 314 if (ifa->ifa_addr->sa_family == AF_INET6) { 315 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 316 in6_clearscope(&in6); 317 SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); 318 } 319 } 320 #endif /* INET6 */ 321 322 /* convert ipad to opad */ 323 for (i = 0; i < sizeof(sc->sc_pad); i++) 324 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 325 } 326 327 static void 328 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 329 unsigned char md[20]) 330 { 331 SHA1_CTX sha1ctx; 332 333 /* fetch first half of inner hash */ 334 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 335 336 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 337 SHA1Final(md, &sha1ctx); 338 339 /* outer hash */ 340 SHA1Init(&sha1ctx); 341 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 342 SHA1Update(&sha1ctx, md, 20); 343 SHA1Final(md, &sha1ctx); 344 } 345 346 static int 347 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 348 unsigned char md[20]) 349 { 350 unsigned char md2[20]; 351 352 carp_hmac_generate(sc, counter, md2); 353 return (bcmp(md, md2, sizeof(md2))); 354 } 355 356 static void 357 carp_setroute(struct carp_softc *sc, int cmd) 358 { 359 #ifdef INET6 360 struct ifaddr_container *ifac; 361 362 crit_enter(); 363 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 364 struct ifaddr *ifa = ifac->ifa; 365 366 if (ifa->ifa_addr->sa_family == AF_INET6) { 367 if (cmd == RTM_ADD) 368 in6_ifaddloop(ifa); 369 else 370 in6_ifremloop(ifa); 371 } 372 } 373 crit_exit(); 374 #endif /* INET6 */ 375 } 376 377 static int 378 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 379 { 380 struct carp_softc *sc; 381 struct ifnet *ifp; 382 383 sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO); 384 ifp = &sc->sc_if; 385 386 sc->sc_suppress = 0; 387 sc->sc_advbase = CARP_DFLTINTV; 388 sc->sc_vhid = -1; /* required setting */ 389 sc->sc_advskew = 0; 390 sc->sc_init_counter = 1; 391 sc->sc_naddrs = 0; 392 sc->sc_naddrs6 = 0; 393 394 TAILQ_INIT(&sc->sc_vha_list); 395 396 #ifdef INET6 397 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 398 #endif 399 400 callout_init(&sc->sc_ad_tmo); 401 callout_init(&sc->sc_md_tmo); 402 callout_init(&sc->sc_md6_tmo); 403 404 ifp->if_softc = sc; 405 if_initname(ifp, CARP_IFNAME, unit); 406 ifp->if_mtu = ETHERMTU; 407 ifp->if_flags = IFF_LOOPBACK; 408 ifp->if_ioctl = carp_ioctl; 409 ifp->if_output = carp_looutput; 410 ifp->if_start = carp_start; 411 ifp->if_type = IFT_CARP; 412 ifp->if_snd.ifq_maxlen = ifqmaxlen; 413 ifp->if_hdrlen = 0; 414 if_attach(ifp, NULL); 415 bpfattach(ifp, DLT_NULL, sizeof(u_int)); 416 417 crit_enter(); 418 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 419 crit_exit(); 420 421 return (0); 422 } 423 424 static int 425 carp_clone_destroy(struct ifnet *ifp) 426 { 427 struct carp_softc *sc = ifp->if_softc; 428 429 sc->sc_dead = 1; 430 carp_detach(sc, 1); 431 432 crit_enter(); 433 LIST_REMOVE(sc, sc_next); 434 crit_exit(); 435 bpfdetach(ifp); 436 if_detach(ifp); 437 438 KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active\n")); 439 kfree(sc, M_CARP); 440 441 return 0; 442 } 443 444 static void 445 carp_detach(struct carp_softc *sc, int detach) 446 { 447 struct carp_if *cif; 448 449 carp_reset(sc, detach); 450 451 carp_multicast_cleanup(sc); 452 #ifdef INET6 453 carp_multicast6_cleanup(sc); 454 #endif 455 456 if (!sc->sc_dead && detach) { 457 struct carp_vhaddr *vha; 458 459 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 460 carp_deactivate_vhaddr(sc, vha); 461 KKASSERT(sc->sc_naddrs == 0); 462 } 463 464 if (sc->sc_carpdev != NULL) { 465 cif = sc->sc_carpdev->if_carp; 466 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 467 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 468 ifpromisc(sc->sc_carpdev, 0); 469 sc->sc_carpdev->if_carp = NULL; 470 kfree(cif, M_CARP); 471 } 472 sc->sc_carpdev = NULL; 473 sc->sc_ia = NULL; 474 } 475 } 476 477 /* Detach an interface from the carp. */ 478 static void 479 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 480 { 481 struct carp_if *cif = ifp->if_carp; 482 struct carp_softc *sc; 483 484 while (ifp->if_carp && 485 (sc = TAILQ_FIRST(&cif->vhif_vrs)) != NULL) 486 carp_detach(sc, 1); 487 } 488 489 /* 490 * process input packet. 491 * we have rearranged checks order compared to the rfc, 492 * but it seems more efficient this way or not possible otherwise. 493 */ 494 int 495 carp_input(struct mbuf **mp, int *offp, int proto) 496 { 497 struct mbuf *m = *mp; 498 struct ip *ip = mtod(m, struct ip *); 499 struct carp_header *ch; 500 int len, iphlen; 501 502 iphlen = *offp; 503 *mp = NULL; 504 505 carpstats.carps_ipackets++; 506 507 if (!carp_opts[CARPCTL_ALLOW]) { 508 m_freem(m); 509 return(IPPROTO_DONE); 510 } 511 512 /* Check if received on a valid carp interface */ 513 if (m->m_pkthdr.rcvif->if_carp == NULL) { 514 carpstats.carps_badif++; 515 CARP_LOG("carp_input: packet received on non-carp " 516 "interface: %s\n", 517 m->m_pkthdr.rcvif->if_xname); 518 m_freem(m); 519 return(IPPROTO_DONE); 520 } 521 522 /* Verify that the IP TTL is CARP_DFLTTL. */ 523 if (ip->ip_ttl != CARP_DFLTTL) { 524 carpstats.carps_badttl++; 525 CARP_LOG("carp_input: received ttl %d != %d on %s\n", 526 ip->ip_ttl, CARP_DFLTTL, 527 m->m_pkthdr.rcvif->if_xname); 528 m_freem(m); 529 return(IPPROTO_DONE); 530 } 531 532 /* Minimal CARP packet size */ 533 len = iphlen + sizeof(*ch); 534 535 /* 536 * Verify that the received packet length is 537 * not less than the CARP header 538 */ 539 if (m->m_pkthdr.len < len) { 540 carpstats.carps_badlen++; 541 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len, 542 m->m_pkthdr.rcvif->if_xname); 543 m_freem(m); 544 return(IPPROTO_DONE); 545 } 546 547 /* Make sure that CARP header is contiguous */ 548 if (len > m->m_len) { 549 m = m_pullup(m, len); 550 if (m == NULL) { 551 carpstats.carps_hdrops++; 552 CARP_LOG("carp_input: m_pullup failed\n"); 553 return(IPPROTO_DONE); 554 } 555 ip = mtod(m, struct ip *); 556 } 557 ch = (struct carp_header *)((uint8_t *)ip + iphlen); 558 559 /* Verify the CARP checksum */ 560 if (in_cksum_skip(m, len, iphlen)) { 561 carpstats.carps_badsum++; 562 CARP_LOG("carp_input: checksum failed on %s\n", 563 m->m_pkthdr.rcvif->if_xname); 564 m_freem(m); 565 return(IPPROTO_DONE); 566 } 567 carp_input_c(m, ch, AF_INET); 568 return(IPPROTO_DONE); 569 } 570 571 #ifdef INET6 572 int 573 carp6_input(struct mbuf **mp, int *offp, int proto) 574 { 575 struct mbuf *m = *mp; 576 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 577 struct carp_header *ch; 578 u_int len; 579 580 carpstats.carps_ipackets6++; 581 582 if (!carp_opts[CARPCTL_ALLOW]) { 583 m_freem(m); 584 return (IPPROTO_DONE); 585 } 586 587 /* check if received on a valid carp interface */ 588 if (m->m_pkthdr.rcvif->if_carp == NULL) { 589 carpstats.carps_badif++; 590 CARP_LOG("carp6_input: packet received on non-carp " 591 "interface: %s\n", 592 m->m_pkthdr.rcvif->if_xname); 593 m_freem(m); 594 return (IPPROTO_DONE); 595 } 596 597 /* verify that the IP TTL is 255 */ 598 if (ip6->ip6_hlim != CARP_DFLTTL) { 599 carpstats.carps_badttl++; 600 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n", 601 ip6->ip6_hlim, 602 m->m_pkthdr.rcvif->if_xname); 603 m_freem(m); 604 return (IPPROTO_DONE); 605 } 606 607 /* verify that we have a complete carp packet */ 608 len = m->m_len; 609 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 610 if (ch == NULL) { 611 carpstats.carps_badlen++; 612 CARP_LOG("carp6_input: packet size %u too small\n", len); 613 return (IPPROTO_DONE); 614 } 615 616 /* verify the CARP checksum */ 617 if (in_cksum_range(m, 0, *offp, sizeof(*ch))) { 618 carpstats.carps_badsum++; 619 CARP_LOG("carp6_input: checksum failed, on %s\n", 620 m->m_pkthdr.rcvif->if_xname); 621 m_freem(m); 622 return (IPPROTO_DONE); 623 } 624 625 carp_input_c(m, ch, AF_INET6); 626 return (IPPROTO_DONE); 627 } 628 #endif /* INET6 */ 629 630 static void 631 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 632 { 633 struct ifnet *ifp = m->m_pkthdr.rcvif; 634 struct ifnet *cifp; 635 struct carp_softc *sc; 636 uint64_t tmp_counter; 637 struct timeval sc_tv, ch_tv; 638 639 /* verify that the VHID is valid on the receiving interface */ 640 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 641 if (sc->sc_vhid == ch->carp_vhid) 642 break; 643 644 if (!sc || !CARP_IS_RUNNING(&sc->sc_if)) { 645 carpstats.carps_badvhid++; 646 m_freem(m); 647 return; 648 } 649 cifp = &sc->sc_if; 650 651 getmicrotime(&cifp->if_lastchange); 652 cifp->if_ipackets++; 653 cifp->if_ibytes += m->m_pkthdr.len; 654 655 if (cifp->if_bpf) { 656 struct ip *ip = mtod(m, struct ip *); 657 658 /* BPF wants net byte order */ 659 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 660 ip->ip_off = htons(ip->ip_off); 661 bpf_mtap(cifp->if_bpf, m); 662 } 663 664 /* verify the CARP version. */ 665 if (ch->carp_version != CARP_VERSION) { 666 carpstats.carps_badver++; 667 cifp->if_ierrors++; 668 CARP_LOG("%s; invalid version %d\n", cifp->if_xname, 669 ch->carp_version); 670 m_freem(m); 671 return; 672 } 673 674 /* verify the hash */ 675 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 676 carpstats.carps_badauth++; 677 cifp->if_ierrors++; 678 CARP_LOG("%s: incorrect hash\n", cifp->if_xname); 679 m_freem(m); 680 return; 681 } 682 683 tmp_counter = ntohl(ch->carp_counter[0]); 684 tmp_counter = tmp_counter<<32; 685 tmp_counter += ntohl(ch->carp_counter[1]); 686 687 /* XXX Replay protection goes here */ 688 689 sc->sc_init_counter = 0; 690 sc->sc_counter = tmp_counter; 691 692 sc_tv.tv_sec = sc->sc_advbase; 693 if (carp_suppress_preempt && sc->sc_advskew < 240) 694 sc_tv.tv_usec = 240 * 1000000 / 256; 695 else 696 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 697 ch_tv.tv_sec = ch->carp_advbase; 698 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 699 700 switch (sc->sc_state) { 701 case INIT: 702 break; 703 704 case MASTER: 705 /* 706 * If we receive an advertisement from a master who's going to 707 * be more frequent than us, go into BACKUP state. 708 */ 709 if (timevalcmp(&sc_tv, &ch_tv, >) || 710 timevalcmp(&sc_tv, &ch_tv, ==)) { 711 callout_stop(&sc->sc_ad_tmo); 712 CARP_DEBUG("%s: MASTER -> BACKUP " 713 "(more frequent advertisement received)\n", 714 cifp->if_xname); 715 carp_set_state(sc, BACKUP); 716 carp_setrun(sc, 0); 717 carp_setroute(sc, RTM_DELETE); 718 } 719 break; 720 721 case BACKUP: 722 /* 723 * If we're pre-empting masters who advertise slower than us, 724 * and this one claims to be slower, treat him as down. 725 */ 726 if (carp_opts[CARPCTL_PREEMPT] && 727 timevalcmp(&sc_tv, &ch_tv, <)) { 728 CARP_DEBUG("%s: BACKUP -> MASTER " 729 "(preempting a slower master)\n", cifp->if_xname); 730 carp_master_down(sc); 731 break; 732 } 733 734 /* 735 * If the master is going to advertise at such a low frequency 736 * that he's guaranteed to time out, we'd might as well just 737 * treat him as timed out now. 738 */ 739 sc_tv.tv_sec = sc->sc_advbase * 3; 740 if (timevalcmp(&sc_tv, &ch_tv, <)) { 741 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 742 cifp->if_xname); 743 carp_master_down(sc); 744 break; 745 } 746 747 /* 748 * Otherwise, we reset the counter and wait for the next 749 * advertisement. 750 */ 751 carp_setrun(sc, af); 752 break; 753 } 754 m_freem(m); 755 } 756 757 static int 758 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 759 { 760 struct ifnet *cifp = &sc->sc_if; 761 struct m_tag *mtag; 762 763 if (sc->sc_init_counter) { 764 /* this could also be seconds since unix epoch */ 765 sc->sc_counter = karc4random(); 766 sc->sc_counter = sc->sc_counter << 32; 767 sc->sc_counter += karc4random(); 768 } else { 769 sc->sc_counter++; 770 } 771 772 ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff); 773 ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff); 774 775 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 776 777 /* Tag packet for carp_output */ 778 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), MB_DONTWAIT); 779 if (mtag == NULL) { 780 m_freem(m); 781 cifp->if_oerrors++; 782 return ENOMEM; 783 } 784 bcopy(&cifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 785 m_tag_prepend(m, mtag); 786 787 return 0; 788 } 789 790 static void 791 carp_send_ad_all(void) 792 { 793 struct carp_softc *sc; 794 795 LIST_FOREACH(sc, &carpif_list, sc_next) { 796 if (sc->sc_carpdev == NULL) 797 continue; 798 799 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER) 800 carp_send_ad(sc); 801 } 802 } 803 804 static void 805 carp_send_ad_timeout(void *xsc) 806 { 807 carp_send_ad(xsc); 808 } 809 810 static void 811 carp_send_ad(struct carp_softc *sc) 812 { 813 struct ifnet *cifp = &sc->sc_if; 814 struct carp_header ch; 815 struct timeval tv; 816 struct carp_header *ch_ptr; 817 struct mbuf *m; 818 int len, advbase, advskew; 819 820 if (!CARP_IS_RUNNING(cifp)) { 821 /* Bow out */ 822 advbase = 255; 823 advskew = 255; 824 } else { 825 advbase = sc->sc_advbase; 826 if (!carp_suppress_preempt || sc->sc_advskew > 240) 827 advskew = sc->sc_advskew; 828 else 829 advskew = 240; 830 tv.tv_sec = advbase; 831 tv.tv_usec = advskew * 1000000 / 256; 832 } 833 834 ch.carp_version = CARP_VERSION; 835 ch.carp_type = CARP_ADVERTISEMENT; 836 ch.carp_vhid = sc->sc_vhid; 837 ch.carp_advbase = advbase; 838 ch.carp_advskew = advskew; 839 ch.carp_authlen = 7; /* XXX DEFINE */ 840 ch.carp_pad1 = 0; /* must be zero */ 841 ch.carp_cksum = 0; 842 843 #ifdef INET 844 if (sc->sc_ia != NULL) { 845 struct ip *ip; 846 847 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 848 if (m == NULL) { 849 cifp->if_oerrors++; 850 carpstats.carps_onomem++; 851 /* XXX maybe less ? */ 852 if (advbase != 255 || advskew != 255) 853 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 854 carp_send_ad_timeout, sc); 855 return; 856 } 857 len = sizeof(*ip) + sizeof(ch); 858 m->m_pkthdr.len = len; 859 m->m_pkthdr.rcvif = NULL; 860 m->m_len = len; 861 MH_ALIGN(m, m->m_len); 862 m->m_flags |= M_MCAST; 863 ip = mtod(m, struct ip *); 864 ip->ip_v = IPVERSION; 865 ip->ip_hl = sizeof(*ip) >> 2; 866 ip->ip_tos = IPTOS_LOWDELAY; 867 ip->ip_len = len; 868 ip->ip_id = ip_newid(); 869 ip->ip_off = IP_DF; 870 ip->ip_ttl = CARP_DFLTTL; 871 ip->ip_p = IPPROTO_CARP; 872 ip->ip_sum = 0; 873 ip->ip_src = sc->sc_ia->ia_addr.sin_addr; 874 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 875 876 ch_ptr = (struct carp_header *)(&ip[1]); 877 bcopy(&ch, ch_ptr, sizeof(ch)); 878 if (carp_prepare_ad(m, sc, ch_ptr)) 879 return; 880 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip)); 881 882 getmicrotime(&cifp->if_lastchange); 883 cifp->if_opackets++; 884 cifp->if_obytes += len; 885 carpstats.carps_opackets++; 886 887 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 888 cifp->if_oerrors++; 889 if (sc->sc_sendad_errors < INT_MAX) 890 sc->sc_sendad_errors++; 891 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 892 carp_suppress_preempt++; 893 if (carp_suppress_preempt == 1) { 894 carp_send_ad_all(); 895 } 896 } 897 sc->sc_sendad_success = 0; 898 } else { 899 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 900 if (++sc->sc_sendad_success >= 901 CARP_SENDAD_MIN_SUCCESS) { 902 carp_suppress_preempt--; 903 sc->sc_sendad_errors = 0; 904 } 905 } else { 906 sc->sc_sendad_errors = 0; 907 } 908 } 909 } 910 #endif /* INET */ 911 #ifdef INET6 912 if (sc->sc_ia6) { 913 struct ip6_hdr *ip6; 914 915 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 916 if (m == NULL) { 917 cifp->if_oerrors++; 918 carpstats.carps_onomem++; 919 /* XXX maybe less ? */ 920 if (advbase != 255 || advskew != 255) 921 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 922 carp_send_ad_timeout, sc); 923 return; 924 } 925 len = sizeof(*ip6) + sizeof(ch); 926 m->m_pkthdr.len = len; 927 m->m_pkthdr.rcvif = NULL; 928 m->m_len = len; 929 MH_ALIGN(m, m->m_len); 930 m->m_flags |= M_MCAST; 931 ip6 = mtod(m, struct ip6_hdr *); 932 bzero(ip6, sizeof(*ip6)); 933 ip6->ip6_vfc |= IPV6_VERSION; 934 ip6->ip6_hlim = CARP_DFLTTL; 935 ip6->ip6_nxt = IPPROTO_CARP; 936 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 937 sizeof(struct in6_addr)); 938 /* set the multicast destination */ 939 940 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 941 ip6->ip6_dst.s6_addr8[15] = 0x12; 942 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 943 cifp->if_oerrors++; 944 m_freem(m); 945 CARP_LOG("%s: in6_setscope failed\n", __func__); 946 return; 947 } 948 949 ch_ptr = (struct carp_header *)(&ip6[1]); 950 bcopy(&ch, ch_ptr, sizeof(ch)); 951 if (carp_prepare_ad(m, sc, ch_ptr)) 952 return; 953 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6)); 954 955 getmicrotime(&cifp->if_lastchange); 956 cifp->if_opackets++; 957 cifp->if_obytes += len; 958 carpstats.carps_opackets6++; 959 960 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 961 cifp->if_oerrors++; 962 if (sc->sc_sendad_errors < INT_MAX) 963 sc->sc_sendad_errors++; 964 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 965 carp_suppress_preempt++; 966 if (carp_suppress_preempt == 1) { 967 carp_send_ad_all(); 968 } 969 } 970 sc->sc_sendad_success = 0; 971 } else { 972 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 973 if (++sc->sc_sendad_success >= 974 CARP_SENDAD_MIN_SUCCESS) { 975 carp_suppress_preempt--; 976 sc->sc_sendad_errors = 0; 977 } 978 } else { 979 sc->sc_sendad_errors = 0; 980 } 981 } 982 } 983 #endif /* INET6 */ 984 985 if (advbase != 255 || advskew != 255) 986 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 987 carp_send_ad_timeout, sc); 988 } 989 990 /* 991 * Broadcast a gratuitous ARP request containing 992 * the virtual router MAC address for each IP address 993 * associated with the virtual router. 994 */ 995 static void 996 carp_send_arp(struct carp_softc *sc) 997 { 998 const struct carp_vhaddr *vha; 999 1000 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1001 if (vha->vha_iaback == NULL) 1002 continue; 1003 1004 arp_iainit(sc->sc_carpdev, &vha->vha_ia->ia_addr.sin_addr, 1005 IF_LLADDR(&sc->sc_if)); 1006 } 1007 } 1008 1009 #ifdef INET6 1010 static void 1011 carp_send_na(struct carp_softc *sc) 1012 { 1013 struct ifaddr_container *ifac; 1014 struct in6_addr *in6; 1015 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1016 1017 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 1018 struct ifaddr *ifa = ifac->ifa; 1019 1020 if (ifa->ifa_addr->sa_family != AF_INET6) 1021 continue; 1022 1023 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1024 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1025 ND_NA_FLAG_OVERRIDE, 1, NULL); 1026 DELAY(1000); /* XXX */ 1027 } 1028 } 1029 #endif /* INET6 */ 1030 1031 static __inline const struct carp_vhaddr * 1032 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr) 1033 { 1034 struct carp_vhaddr *vha; 1035 1036 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1037 if (vha->vha_iaback == NULL) 1038 continue; 1039 1040 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr) 1041 return vha; 1042 } 1043 return NULL; 1044 } 1045 1046 static int 1047 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr, 1048 const struct in_addr *isaddr, uint8_t **enaddr) 1049 { 1050 const struct carp_softc *vh; 1051 int index, count = 0; 1052 1053 /* 1054 * XXX proof of concept implementation. 1055 * We use the source ip to decide which virtual host should 1056 * handle the request. If we're master of that virtual host, 1057 * then we respond, otherwise, just drop the arp packet on 1058 * the floor. 1059 */ 1060 1061 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1062 if (!CARP_IS_RUNNING(&vh->sc_if)) 1063 continue; 1064 1065 if (carp_find_addr(vh, itaddr) != NULL) 1066 count++; 1067 } 1068 if (count == 0) 1069 return 0; 1070 1071 /* this should be a hash, like pf_hash() */ 1072 index = ntohl(isaddr->s_addr) % count; 1073 count = 0; 1074 1075 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1076 if (!CARP_IS_RUNNING(&vh->sc_if)) 1077 continue; 1078 1079 if (carp_find_addr(vh, itaddr) == NULL) 1080 continue; 1081 1082 if (count == index) { 1083 if (vh->sc_state == MASTER) { 1084 *enaddr = IF_LLADDR(&vh->sc_if); 1085 return 1; 1086 } else { 1087 return 0; 1088 } 1089 } 1090 count++; 1091 } 1092 return 0; 1093 } 1094 1095 int 1096 carp_iamatch(const void *v, const struct in_addr *itaddr, 1097 const struct in_addr *isaddr, uint8_t **enaddr) 1098 { 1099 const struct carp_if *cif = v; 1100 const struct carp_softc *vh; 1101 1102 if (carp_opts[CARPCTL_ARPBALANCE]) 1103 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr); 1104 1105 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1106 if (!CARP_IS_RUNNING(&vh->sc_if) || vh->sc_state != MASTER) 1107 continue; 1108 1109 if (carp_find_addr(vh, itaddr) != NULL) { 1110 *enaddr = IF_LLADDR(&vh->sc_if); 1111 return 1; 1112 } 1113 } 1114 return 0; 1115 } 1116 1117 #ifdef INET6 1118 struct ifaddr * 1119 carp_iamatch6(void *v, struct in6_addr *taddr) 1120 { 1121 struct carp_if *cif = v; 1122 struct carp_softc *vh; 1123 1124 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1125 struct ifaddr_container *ifac; 1126 1127 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid], 1128 ifa_link) { 1129 struct ifaddr *ifa = ifac->ifa; 1130 1131 if (IN6_ARE_ADDR_EQUAL(taddr, 1132 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1133 CARP_IS_RUNNING(&vh->sc_if) && 1134 vh->sc_state == MASTER) { 1135 return (ifa); 1136 } 1137 } 1138 } 1139 return (NULL); 1140 } 1141 1142 void * 1143 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1144 { 1145 struct m_tag *mtag; 1146 struct carp_if *cif = v; 1147 struct carp_softc *sc; 1148 1149 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1150 struct ifaddr_container *ifac; 1151 1152 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], 1153 ifa_link) { 1154 struct ifaddr *ifa = ifac->ifa; 1155 1156 if (IN6_ARE_ADDR_EQUAL(taddr, 1157 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1158 CARP_IS_RUNNING(&sc->sc_if)) { 1159 struct ifnet *ifp = &sc->sc_if; 1160 1161 mtag = m_tag_get(PACKET_TAG_CARP, 1162 sizeof(struct ifnet *), MB_DONTWAIT); 1163 if (mtag == NULL) { 1164 /* better a bit than nothing */ 1165 return (IF_LLADDR(ifp)); 1166 } 1167 bcopy(&ifp, (caddr_t)(mtag + 1), 1168 sizeof(struct ifnet *)); 1169 m_tag_prepend(m, mtag); 1170 1171 return (IF_LLADDR(ifp)); 1172 } 1173 } 1174 } 1175 return (NULL); 1176 } 1177 #endif 1178 1179 int 1180 carp_forus(const void *v, const void *dhost) 1181 { 1182 const struct carp_if *cif = v; 1183 const struct carp_softc *vh; 1184 const uint8_t *ena = dhost; 1185 1186 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1187 return 0; 1188 1189 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1190 const struct ifnet *cifp = &vh->sc_if; 1191 1192 if (CARP_IS_RUNNING(cifp) && vh->sc_state == MASTER && 1193 !bcmp(dhost, IF_LLADDR(cifp), ETHER_ADDR_LEN)) 1194 return 1; 1195 } 1196 return 0; 1197 } 1198 1199 static void 1200 carp_master_down_timeout(void *xsc) 1201 { 1202 struct carp_softc *sc = xsc; 1203 1204 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 1205 sc->sc_if.if_xname); 1206 carp_master_down(sc); 1207 } 1208 1209 static void 1210 carp_master_down(struct carp_softc *sc) 1211 { 1212 switch (sc->sc_state) { 1213 case INIT: 1214 kprintf("%s: master_down event in INIT state\n", 1215 sc->sc_if.if_xname); 1216 break; 1217 1218 case MASTER: 1219 break; 1220 1221 case BACKUP: 1222 carp_set_state(sc, MASTER); 1223 carp_send_ad(sc); 1224 carp_send_arp(sc); 1225 #ifdef INET6 1226 carp_send_na(sc); 1227 #endif /* INET6 */ 1228 carp_setrun(sc, 0); 1229 carp_setroute(sc, RTM_ADD); 1230 break; 1231 } 1232 } 1233 1234 /* 1235 * When in backup state, af indicates whether to reset the master down timer 1236 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1237 */ 1238 static void 1239 carp_setrun(struct carp_softc *sc, sa_family_t af) 1240 { 1241 struct ifnet *cifp = &sc->sc_if; 1242 struct timeval tv; 1243 1244 if (sc->sc_carpdev == NULL) { 1245 carp_set_state(sc, INIT); 1246 return; 1247 } 1248 1249 if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 && 1250 (sc->sc_naddrs || sc->sc_naddrs6)) { 1251 /* Nothing */ 1252 } else { 1253 carp_setroute(sc, RTM_DELETE); 1254 return; 1255 } 1256 1257 switch (sc->sc_state) { 1258 case INIT: 1259 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1260 carp_send_ad(sc); 1261 carp_send_arp(sc); 1262 #ifdef INET6 1263 carp_send_na(sc); 1264 #endif /* INET6 */ 1265 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1266 cifp->if_xname); 1267 carp_set_state(sc, MASTER); 1268 carp_setroute(sc, RTM_ADD); 1269 } else { 1270 CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname); 1271 carp_set_state(sc, BACKUP); 1272 carp_setroute(sc, RTM_DELETE); 1273 carp_setrun(sc, 0); 1274 } 1275 break; 1276 1277 case BACKUP: 1278 callout_stop(&sc->sc_ad_tmo); 1279 tv.tv_sec = 3 * sc->sc_advbase; 1280 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1281 switch (af) { 1282 #ifdef INET 1283 case AF_INET: 1284 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1285 carp_master_down_timeout, sc); 1286 break; 1287 #endif /* INET */ 1288 #ifdef INET6 1289 case AF_INET6: 1290 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1291 carp_master_down_timeout, sc); 1292 break; 1293 #endif /* INET6 */ 1294 default: 1295 if (sc->sc_naddrs) 1296 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1297 carp_master_down_timeout, sc); 1298 if (sc->sc_naddrs6) 1299 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1300 carp_master_down_timeout, sc); 1301 break; 1302 } 1303 break; 1304 1305 case MASTER: 1306 tv.tv_sec = sc->sc_advbase; 1307 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1308 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1309 carp_send_ad_timeout, sc); 1310 break; 1311 } 1312 } 1313 1314 static void 1315 carp_multicast_cleanup(struct carp_softc *sc) 1316 { 1317 struct ip_moptions *imo = &sc->sc_imo; 1318 1319 if (imo->imo_num_memberships == 0) 1320 return; 1321 KKASSERT(imo->imo_num_memberships == 1); 1322 1323 in_delmulti(imo->imo_membership[0]); 1324 imo->imo_membership[0] = NULL; 1325 imo->imo_num_memberships = 0; 1326 imo->imo_multicast_ifp = NULL; 1327 } 1328 1329 #ifdef INET6 1330 static void 1331 carp_multicast6_cleanup(struct carp_softc *sc) 1332 { 1333 struct ip6_moptions *im6o = &sc->sc_im6o; 1334 1335 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1336 struct in6_multi_mship *imm = 1337 LIST_FIRST(&im6o->im6o_memberships); 1338 1339 LIST_REMOVE(imm, i6mm_chain); 1340 in6_leavegroup(imm); 1341 } 1342 im6o->im6o_multicast_ifp = NULL; 1343 } 1344 #endif 1345 1346 static int 1347 carp_get_vhaddr(struct carp_softc *sc, struct ifdrv *ifd) 1348 { 1349 const struct carp_vhaddr *vha; 1350 struct ifcarpvhaddr *carpa, *carpa0; 1351 int count, len, error; 1352 1353 count = 0; 1354 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1355 ++count; 1356 1357 if (ifd->ifd_len == 0) { 1358 ifd->ifd_len = count * sizeof(*carpa); 1359 return 0; 1360 } else if (count == 0 || ifd->ifd_len < sizeof(*carpa)) { 1361 ifd->ifd_len = 0; 1362 return 0; 1363 } 1364 len = min(ifd->ifd_len, sizeof(*carpa) * count); 1365 KKASSERT(len >= sizeof(*carpa)); 1366 1367 carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO); 1368 if (carpa == NULL) 1369 return ENOMEM; 1370 1371 count = 0; 1372 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1373 if (len < sizeof(*carpa)) 1374 break; 1375 1376 carpa->carpa_flags = vha->vha_flags; 1377 carpa->carpa_addr.sin_family = AF_INET; 1378 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr; 1379 1380 carpa->carpa_baddr.sin_family = AF_INET; 1381 if (vha->vha_iaback == NULL) { 1382 carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY; 1383 } else { 1384 carpa->carpa_baddr.sin_addr = 1385 vha->vha_iaback->ia_addr.sin_addr; 1386 } 1387 1388 ++carpa; 1389 ++count; 1390 len -= sizeof(*carpa); 1391 } 1392 ifd->ifd_len = sizeof(*carpa) * count; 1393 KKASSERT(ifd->ifd_len > 0); 1394 1395 error = copyout(carpa0, ifd->ifd_data, ifd->ifd_len); 1396 kfree(carpa0, M_TEMP); 1397 return error; 1398 } 1399 1400 static int 1401 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 1402 { 1403 struct ifnet *ifp; 1404 struct in_ifaddr *ia_if; 1405 struct in_ifaddr_container *iac; 1406 const struct sockaddr_in *sin; 1407 u_long iaddr; 1408 int own; 1409 1410 KKASSERT(vha->vha_ia != NULL); 1411 1412 sin = &vha->vha_ia->ia_addr; 1413 iaddr = ntohl(sin->sin_addr.s_addr); 1414 1415 ia_if = NULL; 1416 own = 0; 1417 TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) { 1418 struct in_ifaddr *ia = iac->ia; 1419 1420 if ((ia->ia_flags & IFA_ROUTE) == 0) 1421 continue; 1422 1423 if (ia->ia_ifp->if_type == IFT_CARP) 1424 continue; 1425 1426 /* and, yeah, we need a multicast-capable iface too */ 1427 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) 1428 continue; 1429 1430 if ((iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1431 if (sin->sin_addr.s_addr == 1432 ia->ia_addr.sin_addr.s_addr) 1433 own = 1; 1434 if (ia_if == NULL) 1435 ia_if = ia; 1436 else if (sc->sc_carpdev != NULL && 1437 sc->sc_carpdev == ia->ia_ifp) 1438 ia_if = ia; 1439 } 1440 } 1441 1442 carp_deactivate_vhaddr(sc, vha); 1443 if (!ia_if) 1444 return ENOENT; 1445 1446 ifp = ia_if->ia_ifp; 1447 1448 /* XXX Don't allow parent iface to be changed */ 1449 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) 1450 return EEXIST; 1451 1452 return carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 1453 } 1454 1455 static void 1456 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1457 { 1458 struct carp_vhaddr *vha_new; 1459 struct in_ifaddr *carp_ia; 1460 #ifdef INVARIANTS 1461 struct carp_vhaddr *vha; 1462 #endif 1463 1464 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1465 carp_ia = ifatoia(carp_ifa); 1466 1467 #ifdef INVARIANTS 1468 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1469 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia); 1470 #endif 1471 1472 vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO); 1473 vha_new->vha_ia = carp_ia; 1474 carp_insert_vhaddr(sc, vha_new); 1475 1476 if (carp_config_vhaddr(sc, vha_new) != 0) { 1477 /* 1478 * If the above configuration fails, it may only mean 1479 * that the new address is problematic. However, the 1480 * carp(4) interface may already have several working 1481 * addresses. Since the expected behaviour of 1482 * SIOC[AS]IFADDR is to put the NIC into working state, 1483 * we try starting the state machine manually here with 1484 * the hope that the carp(4)'s previously working 1485 * addresses still could be brought up. 1486 */ 1487 carp_hmac_prepare(sc); 1488 carp_set_state(sc, INIT); 1489 carp_setrun(sc, 0); 1490 } 1491 } 1492 1493 static void 1494 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1495 { 1496 struct carp_vhaddr *vha; 1497 struct in_ifaddr *carp_ia; 1498 1499 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1500 carp_ia = ifatoia(carp_ifa); 1501 1502 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1503 KKASSERT(vha->vha_ia != NULL); 1504 if (vha->vha_ia == carp_ia) 1505 break; 1506 } 1507 KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); 1508 1509 /* 1510 * Remove the vhaddr from the list before deactivating 1511 * the vhaddr, so that the HMAC could be correctly 1512 * updated in carp_deactivate_vhaddr() 1513 */ 1514 carp_remove_vhaddr(sc, vha); 1515 1516 carp_deactivate_vhaddr(sc, vha); 1517 kfree(vha, M_CARP); 1518 } 1519 1520 static void 1521 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1522 { 1523 struct carp_vhaddr *vha; 1524 struct in_ifaddr *carp_ia; 1525 1526 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1527 carp_ia = ifatoia(carp_ifa); 1528 1529 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1530 KKASSERT(vha->vha_ia != NULL); 1531 if (vha->vha_ia == carp_ia) 1532 break; 1533 } 1534 KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); 1535 1536 /* Remove then reinsert, to keep the vhaddr list sorted */ 1537 carp_remove_vhaddr(sc, vha); 1538 carp_insert_vhaddr(sc, vha); 1539 1540 if (carp_config_vhaddr(sc, vha) != 0) { 1541 /* See the comment in carp_add_addr() */ 1542 carp_hmac_prepare(sc); 1543 carp_set_state(sc, INIT); 1544 carp_setrun(sc, 0); 1545 } 1546 } 1547 1548 #ifdef INET6 1549 static int 1550 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1551 { 1552 struct ifnet *ifp; 1553 struct carp_if *cif; 1554 struct in6_ifaddr *ia, *ia_if; 1555 struct ip6_moptions *im6o = &sc->sc_im6o; 1556 struct in6_multi_mship *imm; 1557 struct in6_addr in6; 1558 int own, error; 1559 1560 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1561 carp_setrun(sc, 0); 1562 return (0); 1563 } 1564 1565 /* we have to do it by hands to check we won't match on us */ 1566 ia_if = NULL; own = 0; 1567 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 1568 int i; 1569 1570 for (i = 0; i < 4; i++) { 1571 if ((sin6->sin6_addr.s6_addr32[i] & 1572 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1573 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1574 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1575 break; 1576 } 1577 /* and, yeah, we need a multicast-capable iface too */ 1578 if (ia->ia_ifp != &sc->sc_if && 1579 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1580 (i == 4)) { 1581 if (!ia_if) 1582 ia_if = ia; 1583 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1584 &ia->ia_addr.sin6_addr)) 1585 own++; 1586 } 1587 } 1588 1589 if (!ia_if) 1590 return (EADDRNOTAVAIL); 1591 ia = ia_if; 1592 ifp = ia->ia_ifp; 1593 1594 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1595 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 1596 return (EADDRNOTAVAIL); 1597 1598 if (!sc->sc_naddrs6) { 1599 im6o->im6o_multicast_ifp = ifp; 1600 1601 /* join CARP multicast address */ 1602 bzero(&in6, sizeof(in6)); 1603 in6.s6_addr16[0] = htons(0xff02); 1604 in6.s6_addr8[15] = 0x12; 1605 if (in6_setscope(&in6, ifp, NULL) != 0) 1606 goto cleanup; 1607 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 1608 goto cleanup; 1609 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1610 1611 /* join solicited multicast address */ 1612 bzero(&in6, sizeof(in6)); 1613 in6.s6_addr16[0] = htons(0xff02); 1614 in6.s6_addr32[1] = 0; 1615 in6.s6_addr32[2] = htonl(1); 1616 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1617 in6.s6_addr8[12] = 0xff; 1618 if (in6_setscope(&in6, ifp, NULL) != 0) 1619 goto cleanup; 1620 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 1621 goto cleanup; 1622 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1623 } 1624 1625 if (!ifp->if_carp) { 1626 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 1627 1628 if ((error = ifpromisc(ifp, 1))) { 1629 kfree(cif, M_CARP); 1630 goto cleanup; 1631 } 1632 1633 TAILQ_INIT(&cif->vhif_vrs); 1634 ifp->if_carp = cif; 1635 } else { 1636 struct carp_softc *vr; 1637 1638 cif = ifp->if_carp; 1639 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1640 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1641 error = EINVAL; 1642 goto cleanup; 1643 } 1644 } 1645 } 1646 sc->sc_ia6 = ia; 1647 sc->sc_carpdev = ifp; 1648 1649 { /* XXX prevent endless loop if already in queue */ 1650 struct carp_softc *vr, *after = NULL; 1651 int myself = 0; 1652 cif = ifp->if_carp; 1653 1654 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1655 if (vr == sc) 1656 myself = 1; 1657 if (vr->sc_vhid < sc->sc_vhid) 1658 after = vr; 1659 } 1660 1661 if (!myself) { 1662 /* We're trying to keep things in order */ 1663 if (after == NULL) 1664 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1665 else 1666 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1667 } 1668 } 1669 1670 sc->sc_naddrs6++; 1671 if (own) 1672 sc->sc_advskew = 0; 1673 carp_sc_state(sc); 1674 carp_setrun(sc, 0); 1675 1676 return (0); 1677 1678 cleanup: 1679 /* clean up multicast memberships */ 1680 if (!sc->sc_naddrs6) { 1681 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1682 imm = LIST_FIRST(&im6o->im6o_memberships); 1683 LIST_REMOVE(imm, i6mm_chain); 1684 in6_leavegroup(imm); 1685 } 1686 } 1687 return (error); 1688 } 1689 1690 static int 1691 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1692 { 1693 int error = 0; 1694 1695 if (!--sc->sc_naddrs6) { 1696 struct carp_if *cif = sc->sc_carpdev->if_carp; 1697 struct ip6_moptions *im6o = &sc->sc_im6o; 1698 1699 callout_stop(&sc->sc_ad_tmo); 1700 sc->sc_vhid = -1; 1701 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1702 struct in6_multi_mship *imm = 1703 LIST_FIRST(&im6o->im6o_memberships); 1704 1705 LIST_REMOVE(imm, i6mm_chain); 1706 in6_leavegroup(imm); 1707 } 1708 im6o->im6o_multicast_ifp = NULL; 1709 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1710 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 1711 sc->sc_carpdev->if_carp = NULL; 1712 kfree(cif, M_IFADDR); 1713 } 1714 } 1715 return (error); 1716 } 1717 #endif /* INET6 */ 1718 1719 static int 1720 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) 1721 { 1722 struct carp_softc *sc = ifp->if_softc, *vr; 1723 struct carpreq carpr; 1724 struct ifaddr *ifa; 1725 struct ifreq *ifr; 1726 struct ifaliasreq *ifra; 1727 struct ifdrv *ifd; 1728 char devname[IFNAMSIZ]; 1729 int error = 0; 1730 1731 ifa = (struct ifaddr *)addr; 1732 ifra = (struct ifaliasreq *)addr; 1733 ifr = (struct ifreq *)addr; 1734 ifd = (struct ifdrv *)addr; 1735 1736 switch (cmd) { 1737 case SIOCSIFADDR: 1738 switch (ifa->ifa_addr->sa_family) { 1739 #ifdef INET 1740 case AF_INET: 1741 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1742 break; 1743 #endif /* INET */ 1744 #ifdef INET6 1745 case AF_INET6: 1746 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1747 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1748 break; 1749 #endif /* INET6 */ 1750 default: 1751 error = EAFNOSUPPORT; 1752 break; 1753 } 1754 break; 1755 1756 case SIOCAIFADDR: 1757 switch (ifa->ifa_addr->sa_family) { 1758 #ifdef INET 1759 case AF_INET: 1760 panic("SIOCAIFADDR should never be seen\n"); 1761 #endif /* INET */ 1762 #ifdef INET6 1763 case AF_INET6: 1764 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1765 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1766 break; 1767 #endif /* INET6 */ 1768 default: 1769 error = EAFNOSUPPORT; 1770 break; 1771 } 1772 break; 1773 1774 case SIOCDIFADDR: 1775 switch (ifa->ifa_addr->sa_family) { 1776 #ifdef INET 1777 case AF_INET: 1778 panic("SIOCDIFADDR should never be seen\n"); 1779 #endif /* INET */ 1780 #ifdef INET6 1781 case AF_INET6: 1782 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1783 break; 1784 #endif /* INET6 */ 1785 default: 1786 error = EAFNOSUPPORT; 1787 break; 1788 } 1789 break; 1790 1791 case SIOCSIFFLAGS: 1792 if (ifp->if_flags & IFF_UP) { 1793 if ((ifp->if_flags & IFF_RUNNING) == 0) { 1794 ifp->if_flags |= IFF_RUNNING; 1795 carp_set_state(sc, INIT); 1796 carp_setrun(sc, 0); 1797 } 1798 } else if (ifp->if_flags & IFF_RUNNING) { 1799 carp_stop(sc, 0); 1800 } 1801 break; 1802 1803 case SIOCSVH: 1804 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 1805 if (error) 1806 break; 1807 error = copyin(ifr->ifr_data, &carpr, sizeof(carpr)); 1808 if (error) 1809 break; 1810 1811 error = 1; 1812 if ((ifp->if_flags & IFF_RUNNING) && 1813 sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1814 switch (carpr.carpr_state) { 1815 case BACKUP: 1816 callout_stop(&sc->sc_ad_tmo); 1817 carp_set_state(sc, BACKUP); 1818 carp_setrun(sc, 0); 1819 carp_setroute(sc, RTM_DELETE); 1820 break; 1821 1822 case MASTER: 1823 carp_master_down(sc); 1824 break; 1825 1826 default: 1827 break; 1828 } 1829 } 1830 if (carpr.carpr_vhid > 0) { 1831 if (carpr.carpr_vhid > 255) { 1832 error = EINVAL; 1833 break; 1834 } 1835 if (sc->sc_carpdev) { 1836 struct carp_if *cif = sc->sc_carpdev->if_carp; 1837 1838 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1839 if (vr != sc && 1840 vr->sc_vhid == carpr.carpr_vhid) 1841 return EEXIST; 1842 } 1843 } 1844 sc->sc_vhid = carpr.carpr_vhid; 1845 IF_LLADDR(ifp)[0] = 0; 1846 IF_LLADDR(ifp)[1] = 0; 1847 IF_LLADDR(ifp)[2] = 0x5e; 1848 IF_LLADDR(ifp)[3] = 0; 1849 IF_LLADDR(ifp)[4] = 1; 1850 IF_LLADDR(ifp)[5] = sc->sc_vhid; 1851 error--; 1852 } 1853 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 1854 if (carpr.carpr_advskew >= 255) { 1855 error = EINVAL; 1856 break; 1857 } 1858 if (carpr.carpr_advbase > 255) { 1859 error = EINVAL; 1860 break; 1861 } 1862 sc->sc_advbase = carpr.carpr_advbase; 1863 sc->sc_advskew = carpr.carpr_advskew; 1864 error--; 1865 } 1866 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1867 if (error > 0) { 1868 error = EINVAL; 1869 } else { 1870 error = 0; 1871 carp_setrun(sc, 0); 1872 } 1873 break; 1874 1875 case SIOCGVH: 1876 bzero(&carpr, sizeof(carpr)); 1877 carpr.carpr_state = sc->sc_state; 1878 carpr.carpr_vhid = sc->sc_vhid; 1879 carpr.carpr_advbase = sc->sc_advbase; 1880 carpr.carpr_advskew = sc->sc_advskew; 1881 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 1882 if (error == 0) { 1883 bcopy(sc->sc_key, carpr.carpr_key, 1884 sizeof(carpr.carpr_key)); 1885 } 1886 1887 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1888 break; 1889 1890 case SIOCGDRVSPEC: 1891 switch (ifd->ifd_cmd) { 1892 case CARPGDEVNAME: 1893 if (ifd->ifd_len != sizeof(devname)) 1894 error = EINVAL; 1895 break; 1896 1897 case CARPGVHADDR: 1898 break; 1899 1900 default: 1901 error = EINVAL; 1902 break; 1903 } 1904 if (error) 1905 break; 1906 1907 switch (ifd->ifd_cmd) { 1908 case CARPGVHADDR: 1909 error = carp_get_vhaddr(sc, ifd); 1910 break; 1911 1912 case CARPGDEVNAME: 1913 bzero(devname, sizeof(devname)); 1914 if (sc->sc_carpdev != NULL) { 1915 strlcpy(devname, sc->sc_carpdev->if_xname, 1916 sizeof(devname)); 1917 } 1918 error = copyout(devname, ifd->ifd_data, 1919 sizeof(devname)); 1920 break; 1921 } 1922 break; 1923 1924 default: 1925 error = EINVAL; 1926 break; 1927 } 1928 carp_hmac_prepare(sc); 1929 return error; 1930 } 1931 1932 /* 1933 * XXX: this is looutput. We should eventually use it from there. 1934 */ 1935 static int 1936 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1937 struct rtentry *rt) 1938 { 1939 uint32_t af; 1940 1941 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 1942 1943 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 1944 m_freem(m); 1945 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 1946 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 1947 } 1948 1949 ifp->if_opackets++; 1950 ifp->if_obytes += m->m_pkthdr.len; 1951 1952 /* BPF writes need to be handled specially. */ 1953 if (dst->sa_family == AF_UNSPEC) { 1954 bcopy(dst->sa_data, &af, sizeof(af)); 1955 dst->sa_family = af; 1956 } 1957 1958 #if 1 /* XXX */ 1959 switch (dst->sa_family) { 1960 case AF_INET: 1961 case AF_INET6: 1962 case AF_IPX: 1963 case AF_APPLETALK: 1964 break; 1965 1966 default: 1967 m_freem(m); 1968 return (EAFNOSUPPORT); 1969 } 1970 #endif 1971 return (if_simloop(ifp, m, dst->sa_family, 0)); 1972 } 1973 1974 /* 1975 * Start output on carp interface. This function should never be called. 1976 */ 1977 static void 1978 carp_start(struct ifnet *ifp) 1979 { 1980 #ifdef DEBUG 1981 kprintf("%s: start called\n", ifp->if_xname); 1982 #endif 1983 } 1984 1985 int 1986 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 1987 struct rtentry *rt) 1988 { 1989 struct m_tag *mtag; 1990 struct carp_softc *sc; 1991 struct ifnet *carp_ifp; 1992 struct ether_header *eh; 1993 1994 if (!sa) 1995 return (0); 1996 1997 switch (sa->sa_family) { 1998 #ifdef INET 1999 case AF_INET: 2000 break; 2001 #endif /* INET */ 2002 #ifdef INET6 2003 case AF_INET6: 2004 break; 2005 #endif /* INET6 */ 2006 default: 2007 return (0); 2008 } 2009 2010 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2011 if (mtag == NULL) 2012 return (0); 2013 2014 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 2015 sc = carp_ifp->if_softc; 2016 2017 /* Set the source MAC address to Virtual Router MAC Address */ 2018 switch (ifp->if_type) { 2019 case IFT_ETHER: 2020 case IFT_L2VLAN: 2021 eh = mtod(m, struct ether_header *); 2022 eh->ether_shost[0] = 0; 2023 eh->ether_shost[1] = 0; 2024 eh->ether_shost[2] = 0x5e; 2025 eh->ether_shost[3] = 0; 2026 eh->ether_shost[4] = 1; 2027 eh->ether_shost[5] = sc->sc_vhid; 2028 break; 2029 2030 default: 2031 if_printf(ifp, "carp is not supported for this " 2032 "interface type\n"); 2033 return (EOPNOTSUPP); 2034 } 2035 return (0); 2036 } 2037 2038 static void 2039 carp_set_state(struct carp_softc *sc, int state) 2040 { 2041 struct ifnet *cifp = &sc->sc_if; 2042 2043 if (sc->sc_state == state) 2044 return; 2045 sc->sc_state = state; 2046 2047 switch (sc->sc_state) { 2048 case BACKUP: 2049 cifp->if_link_state = LINK_STATE_DOWN; 2050 break; 2051 2052 case MASTER: 2053 cifp->if_link_state = LINK_STATE_UP; 2054 break; 2055 2056 default: 2057 cifp->if_link_state = LINK_STATE_UNKNOWN; 2058 break; 2059 } 2060 rt_ifmsg(cifp); 2061 } 2062 2063 void 2064 carp_group_demote_adj(struct ifnet *ifp, int adj) 2065 { 2066 struct ifg_list *ifgl; 2067 int *dm; 2068 2069 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2070 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2071 continue; 2072 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2073 2074 if (*dm + adj >= 0) 2075 *dm += adj; 2076 else 2077 *dm = 0; 2078 2079 if (adj > 0 && *dm == 1) 2080 carp_send_ad_all(); 2081 CARP_LOG("%s demoted group %s to %d", ifp->if_xname, 2082 ifgl->ifgl_group->ifg_group, *dm); 2083 } 2084 } 2085 2086 void 2087 carp_carpdev_state(void *v) 2088 { 2089 struct carp_if *cif = v; 2090 struct carp_softc *sc; 2091 2092 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2093 carp_sc_state(sc); 2094 } 2095 2096 static void 2097 carp_sc_state(struct carp_softc *sc) 2098 { 2099 if (!(sc->sc_carpdev->if_flags & IFF_UP)) { 2100 callout_stop(&sc->sc_ad_tmo); 2101 callout_stop(&sc->sc_md_tmo); 2102 callout_stop(&sc->sc_md6_tmo); 2103 carp_set_state(sc, INIT); 2104 carp_setrun(sc, 0); 2105 if (!sc->sc_suppress) { 2106 carp_suppress_preempt++; 2107 if (carp_suppress_preempt == 1) 2108 carp_send_ad_all(); 2109 } 2110 sc->sc_suppress = 1; 2111 } else { 2112 carp_set_state(sc, INIT); 2113 carp_setrun(sc, 0); 2114 if (sc->sc_suppress) 2115 carp_suppress_preempt--; 2116 sc->sc_suppress = 0; 2117 } 2118 } 2119 2120 static void 2121 carp_stop(struct carp_softc *sc, int detach) 2122 { 2123 sc->sc_if.if_flags &= ~IFF_RUNNING; 2124 2125 callout_stop(&sc->sc_ad_tmo); 2126 callout_stop(&sc->sc_md_tmo); 2127 callout_stop(&sc->sc_md6_tmo); 2128 2129 if (!detach && sc->sc_state == MASTER) 2130 carp_send_ad(sc); 2131 2132 if (sc->sc_suppress) 2133 carp_suppress_preempt--; 2134 sc->sc_suppress = 0; 2135 2136 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 2137 carp_suppress_preempt--; 2138 sc->sc_sendad_errors = 0; 2139 sc->sc_sendad_success = 0; 2140 2141 carp_set_state(sc, INIT); 2142 carp_setrun(sc, 0); 2143 } 2144 2145 static void 2146 carp_reset(struct carp_softc *sc, int detach) 2147 { 2148 struct ifnet *cifp = &sc->sc_if; 2149 2150 carp_stop(sc, detach); 2151 if (!sc->sc_dead && (cifp->if_flags & IFF_UP)) 2152 cifp->if_flags |= IFF_RUNNING; 2153 } 2154 2155 static int 2156 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 2157 struct ifnet *ifp, const struct in_ifaddr *ia_if, int own) 2158 { 2159 struct ip_moptions *imo = &sc->sc_imo; 2160 struct carp_if *cif; 2161 struct carp_softc *vr, *after = NULL; 2162 int onlist, error; 2163 #ifdef INVARIANTS 2164 int assert_onlist; 2165 #endif 2166 2167 KKASSERT(vha->vha_ia != NULL); 2168 2169 KASSERT(ia_if != NULL, ("NULL backing address\n")); 2170 KASSERT(vha->vha_iaback == NULL, ("%p is already activated\n", vha)); 2171 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2172 ("inactive vhaddr %p is the address owner\n", vha)); 2173 2174 KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp, 2175 ("%s is already on %s\n", sc->sc_if.if_xname, 2176 sc->sc_carpdev->if_xname)); 2177 2178 KASSERT(imo->imo_multicast_ifp == NULL || 2179 imo->imo_multicast_ifp == ifp, 2180 ("%s didn't leave mcast group on %s\n", 2181 sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname)); 2182 2183 if (imo->imo_num_memberships == 0) { 2184 struct in_addr addr; 2185 2186 addr.s_addr = htonl(INADDR_CARP_GROUP); 2187 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) 2188 return ENOBUFS; 2189 imo->imo_num_memberships++; 2190 imo->imo_multicast_ifp = ifp; 2191 imo->imo_multicast_ttl = CARP_DFLTTL; 2192 imo->imo_multicast_loop = 0; 2193 } 2194 2195 if (!ifp->if_carp) { 2196 KASSERT(sc->sc_carpdev == NULL, 2197 ("%s is already on %s\n", sc->sc_if.if_xname, 2198 sc->sc_carpdev->if_xname)); 2199 2200 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 2201 2202 error = ifpromisc(ifp, 1); 2203 if (error) { 2204 kfree(cif, M_CARP); 2205 goto cleanup; 2206 } 2207 2208 TAILQ_INIT(&cif->vhif_vrs); 2209 ifp->if_carp = cif; 2210 } else { 2211 cif = ifp->if_carp; 2212 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2213 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 2214 error = EINVAL; 2215 goto cleanup; 2216 } 2217 } 2218 } 2219 2220 #ifdef INVARIANTS 2221 if (sc->sc_carpdev != NULL) 2222 assert_onlist = 1; 2223 else 2224 assert_onlist = 0; 2225 #endif 2226 sc->sc_ia = ia_if; 2227 sc->sc_carpdev = ifp; 2228 2229 cif = ifp->if_carp; 2230 onlist = 0; 2231 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2232 if (vr == sc) 2233 onlist = 1; 2234 if (vr->sc_vhid < sc->sc_vhid) 2235 after = vr; 2236 } 2237 2238 #ifdef INVARIANTS 2239 if (assert_onlist) { 2240 KASSERT(onlist, ("%s is not on %s carp list\n", 2241 sc->sc_if.if_xname, ifp->if_xname)); 2242 } else { 2243 KASSERT(!onlist, ("%s is already on %s carp list\n", 2244 sc->sc_if.if_xname, ifp->if_xname)); 2245 } 2246 #endif 2247 2248 if (!onlist) { 2249 /* We're trying to keep things in order */ 2250 if (after == NULL) 2251 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 2252 else 2253 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 2254 } 2255 2256 vha->vha_iaback = ia_if; 2257 sc->sc_naddrs++; 2258 2259 if (own) { 2260 vha->vha_flags |= CARP_VHAF_OWNER; 2261 2262 /* XXX save user configured advskew? */ 2263 sc->sc_advskew = 0; 2264 } 2265 2266 carp_hmac_prepare(sc); 2267 carp_set_state(sc, INIT); 2268 carp_setrun(sc, 0); 2269 return 0; 2270 cleanup: 2271 carp_multicast_cleanup(sc); 2272 return error; 2273 } 2274 2275 static void 2276 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 2277 { 2278 KKASSERT(vha->vha_ia != NULL); 2279 2280 carp_hmac_prepare(sc); 2281 2282 if (vha->vha_iaback == NULL) { 2283 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2284 ("inactive vhaddr %p is the address owner\n", vha)); 2285 return; 2286 } 2287 2288 vha->vha_flags &= ~CARP_VHAF_OWNER; 2289 2290 KKASSERT(sc->sc_naddrs > 0); 2291 vha->vha_iaback = NULL; 2292 sc->sc_naddrs--; 2293 if (!sc->sc_naddrs) { 2294 if (sc->sc_naddrs6) { 2295 carp_multicast_cleanup(sc); 2296 sc->sc_ia = NULL; 2297 } else { 2298 carp_detach(sc, 0); 2299 } 2300 } 2301 } 2302 2303 static void 2304 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if) 2305 { 2306 struct carp_vhaddr *vha; 2307 struct in_ifaddr *ia_if; 2308 2309 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2310 ia_if = ifatoia(ifa_if); 2311 2312 if ((ia_if->ia_flags & IFA_ROUTE) == 0) 2313 return; 2314 2315 /* 2316 * Test each inactive vhaddr against the newly added address. 2317 * If the newly added address could be the backing address, 2318 * then activate the matching vhaddr. 2319 */ 2320 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2321 const struct in_ifaddr *ia; 2322 u_long iaddr; 2323 int own; 2324 2325 if (vha->vha_iaback != NULL) 2326 continue; 2327 2328 ia = vha->vha_ia; 2329 iaddr = ntohl(ia->ia_addr.sin_addr.s_addr); 2330 2331 if ((iaddr & ia_if->ia_subnetmask) != ia_if->ia_subnet) 2332 continue; 2333 2334 own = 0; 2335 if (ia->ia_addr.sin_addr.s_addr == 2336 ia_if->ia_addr.sin_addr.s_addr) 2337 own = 1; 2338 2339 carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 2340 } 2341 } 2342 2343 static void 2344 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp, 2345 struct ifaddr *ifa_if) 2346 { 2347 struct carp_vhaddr *vha; 2348 struct in_ifaddr *ia_if; 2349 2350 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2351 ia_if = ifatoia(ifa_if); 2352 2353 /* 2354 * Ad src address is deleted; set it to NULL. 2355 * Following loop will try pick up a new ad src address 2356 * if one of the vhaddr could retain its backing address. 2357 */ 2358 if (sc->sc_ia == ia_if) 2359 sc->sc_ia = NULL; 2360 2361 /* 2362 * Test each active vhaddr against the deleted address. 2363 * If the deleted address is vhaddr address's backing 2364 * address, then deactivate the vhaddr. 2365 */ 2366 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2367 if (vha->vha_iaback == NULL) 2368 continue; 2369 2370 if (vha->vha_iaback == ia_if) 2371 carp_deactivate_vhaddr(sc, vha); 2372 else if (sc->sc_ia == NULL) 2373 sc->sc_ia = vha->vha_iaback; 2374 } 2375 } 2376 2377 static void 2378 carp_update_addrs(struct carp_softc *sc) 2379 { 2380 struct carp_vhaddr *vha; 2381 2382 KKASSERT(sc->sc_carpdev == NULL); 2383 2384 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 2385 carp_config_vhaddr(sc, vha); 2386 } 2387 2388 static void 2389 carp_ifaddr(void *arg __unused, struct ifnet *ifp, 2390 enum ifaddr_event event, struct ifaddr *ifa) 2391 { 2392 struct carp_softc *sc; 2393 2394 if (ifa->ifa_addr->sa_family != AF_INET) 2395 return; 2396 2397 if (ifp->if_type == IFT_CARP) { 2398 /* 2399 * Address is changed on carp(4) interface 2400 */ 2401 switch (event) { 2402 case IFADDR_EVENT_ADD: 2403 carp_add_addr(ifp->if_softc, ifa); 2404 break; 2405 2406 case IFADDR_EVENT_CHANGE: 2407 carp_config_addr(ifp->if_softc, ifa); 2408 break; 2409 2410 case IFADDR_EVENT_DELETE: 2411 carp_del_addr(ifp->if_softc, ifa); 2412 break; 2413 } 2414 return; 2415 } 2416 2417 /* 2418 * Address is changed on non-carp(4) interface 2419 */ 2420 if ((ifp->if_flags & IFF_MULTICAST) == 0) 2421 return; 2422 2423 crit_enter(); 2424 LIST_FOREACH(sc, &carpif_list, sc_next) { 2425 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) { 2426 /* Not the parent iface; skip */ 2427 continue; 2428 } 2429 2430 switch (event) { 2431 case IFADDR_EVENT_ADD: 2432 carp_link_addrs(sc, ifp, ifa); 2433 break; 2434 2435 case IFADDR_EVENT_DELETE: 2436 if (sc->sc_carpdev != NULL) { 2437 carp_unlink_addrs(sc, ifp, ifa); 2438 if (sc->sc_carpdev == NULL) 2439 carp_update_addrs(sc); 2440 } else { 2441 /* 2442 * The carp(4) interface didn't have a 2443 * parent iface, so it is not possible 2444 * that it will contain any address to 2445 * be unlinked. 2446 */ 2447 } 2448 break; 2449 2450 case IFADDR_EVENT_CHANGE: 2451 if (sc->sc_carpdev == NULL) { 2452 /* 2453 * The carp(4) interface didn't have a 2454 * parent iface, so it is not possible 2455 * that it will contain any address to 2456 * be updated. 2457 */ 2458 carp_link_addrs(sc, ifp, ifa); 2459 } else { 2460 /* 2461 * First try breaking tie with the old 2462 * address. Then see whether we could 2463 * link certain vhaddr to the new address. 2464 * If that fails, i.e. carpdev is NULL, 2465 * we try a global update. 2466 * 2467 * NOTE: The above order is critical. 2468 */ 2469 carp_unlink_addrs(sc, ifp, ifa); 2470 carp_link_addrs(sc, ifp, ifa); 2471 if (sc->sc_carpdev == NULL) 2472 carp_update_addrs(sc); 2473 } 2474 break; 2475 } 2476 } 2477 crit_exit(); 2478 } 2479 2480 static int 2481 carp_modevent(module_t mod, int type, void *data) 2482 { 2483 switch (type) { 2484 case MOD_LOAD: 2485 LIST_INIT(&carpif_list); 2486 carp_ifdetach_event = 2487 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL, 2488 EVENTHANDLER_PRI_ANY); 2489 carp_ifaddr_event = 2490 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL, 2491 EVENTHANDLER_PRI_ANY); 2492 if_clone_attach(&carp_cloner); 2493 break; 2494 2495 case MOD_UNLOAD: 2496 EVENTHANDLER_DEREGISTER(ifnet_detach_event, 2497 carp_ifdetach_event); 2498 EVENTHANDLER_DEREGISTER(ifaddr_event, 2499 carp_ifaddr_event); 2500 if_clone_detach(&carp_cloner); 2501 break; 2502 2503 default: 2504 return (EINVAL); 2505 } 2506 return (0); 2507 } 2508 2509 static moduledata_t carp_mod = { 2510 "carp", 2511 carp_modevent, 2512 0 2513 }; 2514 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2515