1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 /* 27 * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $ 28 * $DragonFly: src/sys/netinet/ip_carp.c,v 1.10 2008/07/27 10:06:57 sephe Exp $ 29 */ 30 31 #include "opt_carp.h" 32 #include "opt_inet.h" 33 #include "opt_inet6.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/in_cksum.h> 39 #include <sys/limits.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/time.h> 43 #include <sys/proc.h> 44 #include <sys/priv.h> 45 #include <sys/sockio.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 50 #include <machine/stdarg.h> 51 #include <crypto/sha1.h> 52 53 #include <net/bpf.h> 54 #include <net/ethernet.h> 55 #include <net/if.h> 56 #include <net/if_dl.h> 57 #include <net/if_types.h> 58 #include <net/route.h> 59 #include <net/if_clone.h> 60 61 #ifdef INET 62 #include <netinet/in.h> 63 #include <netinet/in_var.h> 64 #include <netinet/in_systm.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip_var.h> 67 #include <netinet/if_ether.h> 68 #endif 69 70 #ifdef INET6 71 #include <netinet/icmp6.h> 72 #include <netinet/ip6.h> 73 #include <netinet6/ip6_var.h> 74 #include <netinet6/scope6_var.h> 75 #include <netinet6/nd6.h> 76 #endif 77 78 #include <netinet/ip_carp.h> 79 80 #define CARP_IFNAME "carp" 81 #define CARP_IS_RUNNING(ifp) \ 82 (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) 83 84 struct carp_vhaddr { 85 uint32_t vha_flags; /* CARP_VHAF_ */ 86 const struct in_ifaddr *vha_ia; /* carp address */ 87 const struct in_ifaddr *vha_iaback; /* backing address */ 88 TAILQ_ENTRY(carp_vhaddr) vha_link; 89 }; 90 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr); 91 92 struct carp_softc { 93 struct ifnet sc_if; 94 struct ifnet *sc_carpdev; /* parent interface */ 95 struct carp_vhaddr_list sc_vha_list; /* virtual addr list */ 96 97 const struct in_ifaddr *sc_ia; /* primary iface address v4 */ 98 struct ip_moptions sc_imo; 99 100 #ifdef INET6 101 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 102 struct ip6_moptions sc_im6o; 103 #endif /* INET6 */ 104 TAILQ_ENTRY(carp_softc) sc_list; 105 106 enum { INIT = 0, BACKUP, MASTER } 107 sc_state; 108 int sc_dead; 109 110 int sc_suppress; 111 112 int sc_sendad_errors; 113 #define CARP_SENDAD_MAX_ERRORS 3 114 int sc_sendad_success; 115 #define CARP_SENDAD_MIN_SUCCESS 3 116 117 int sc_vhid; 118 int sc_advskew; 119 int sc_naddrs; /* actually used IPv4 vha */ 120 int sc_naddrs6; 121 int sc_advbase; /* seconds */ 122 int sc_init_counter; 123 uint64_t sc_counter; 124 125 /* authentication */ 126 #define CARP_HMAC_PAD 64 127 unsigned char sc_key[CARP_KEY_LEN]; 128 unsigned char sc_pad[CARP_HMAC_PAD]; 129 SHA1_CTX sc_sha1; 130 131 struct callout sc_ad_tmo; /* advertisement timeout */ 132 struct callout sc_md_tmo; /* master down timeout */ 133 struct callout sc_md6_tmo; /* master down timeout */ 134 135 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 136 }; 137 138 struct carp_if { 139 TAILQ_HEAD(, carp_softc) vhif_vrs; 140 }; 141 142 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 143 144 SYSCTL_DECL(_net_inet_carp); 145 146 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 147 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 148 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 149 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 150 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 151 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 152 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 153 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 154 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 155 156 static int carp_suppress_preempt = 0; 157 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 158 &carp_suppress_preempt, 0, "Preemption is suppressed"); 159 160 static struct carpstats carpstats; 161 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 162 &carpstats, carpstats, 163 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 164 165 #define CARP_LOG(...) do { \ 166 if (carp_opts[CARPCTL_LOG] > 0) \ 167 log(LOG_INFO, __VA_ARGS__); \ 168 } while (0) 169 170 #define CARP_DEBUG(...) do { \ 171 if (carp_opts[CARPCTL_LOG] > 1) \ 172 log(LOG_DEBUG, __VA_ARGS__); \ 173 } while (0) 174 175 static void carp_hmac_prepare(struct carp_softc *); 176 static void carp_hmac_generate(struct carp_softc *, uint32_t *, 177 unsigned char *); 178 static int carp_hmac_verify(struct carp_softc *, uint32_t *, 179 unsigned char *); 180 static void carp_setroute(struct carp_softc *, int); 181 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 182 static int carp_clone_create(struct if_clone *, int, caddr_t); 183 static void carp_clone_destroy(struct ifnet *); 184 static void carp_detach(struct carp_softc *, int); 185 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 186 struct carp_header *); 187 static void carp_send_ad_all(void); 188 static void carp_send_ad_timeout(void *); 189 static void carp_send_ad(struct carp_softc *); 190 static void carp_send_arp(struct carp_softc *); 191 static void carp_master_down_timeout(void *); 192 static void carp_master_down(struct carp_softc *); 193 static int carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 194 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 195 struct rtentry *); 196 static void carp_start(struct ifnet *); 197 static void carp_setrun(struct carp_softc *, sa_family_t); 198 static void carp_set_state(struct carp_softc *, int); 199 200 static void carp_multicast_cleanup(struct carp_softc *); 201 static void carp_add_addr(struct carp_softc *, struct ifaddr *); 202 static void carp_del_addr(struct carp_softc *, struct ifaddr *); 203 static void carp_config_addr(struct carp_softc *, struct ifaddr *); 204 static void carp_link_addrs(struct carp_softc *, struct ifnet *, 205 struct ifaddr *); 206 static void carp_unlink_addrs(struct carp_softc *, struct ifnet *, 207 struct ifaddr *); 208 209 static int carp_get_vhaddr(struct carp_softc *, struct ifdrv *); 210 static int carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *); 211 static int carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *, 212 struct ifnet *, const struct in_ifaddr *, int); 213 static void carp_deactivate_vhaddr(struct carp_softc *, 214 struct carp_vhaddr *); 215 216 static void carp_sc_state(struct carp_softc *); 217 #ifdef INET6 218 static void carp_send_na(struct carp_softc *); 219 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 220 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 221 static void carp_multicast6_cleanup(struct carp_softc *); 222 #endif 223 static void carp_stop(struct carp_softc *, int); 224 static void carp_reset(struct carp_softc *, int); 225 226 static void carp_ifaddr(void *, struct ifnet *, enum ifaddr_event, 227 struct ifaddr *); 228 static void carp_ifdetach(void *, struct ifnet *); 229 230 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 231 232 static LIST_HEAD(, carp_softc) carpif_list; 233 234 static struct if_clone carp_cloner = 235 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy, 236 0, IF_MAXUNIT); 237 238 static eventhandler_tag carp_ifdetach_event; 239 static eventhandler_tag carp_ifaddr_event; 240 241 static __inline void 242 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new) 243 { 244 struct carp_vhaddr *vha; 245 u_long new_addr, addr; 246 247 KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0); 248 249 /* 250 * Virtual address list is sorted; smaller one first 251 */ 252 new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr); 253 254 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 255 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr); 256 257 if (addr > new_addr) 258 break; 259 } 260 if (vha == NULL) 261 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link); 262 else 263 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link); 264 vha_new->vha_flags |= CARP_VHAF_ONLIST; 265 } 266 267 static __inline void 268 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 269 { 270 KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST); 271 vha->vha_flags &= ~CARP_VHAF_ONLIST; 272 TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link); 273 } 274 275 static void 276 carp_hmac_prepare(struct carp_softc *sc) 277 { 278 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 279 uint8_t vhid = sc->sc_vhid & 0xff; 280 int i; 281 #ifdef INET6 282 struct ifaddr_container *ifac; 283 struct in6_addr in6; 284 #endif 285 #ifdef INET 286 struct carp_vhaddr *vha; 287 #endif 288 289 /* XXX: possible race here */ 290 291 /* compute ipad from key */ 292 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 293 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 294 for (i = 0; i < sizeof(sc->sc_pad); i++) 295 sc->sc_pad[i] ^= 0x36; 296 297 /* precompute first part of inner hash */ 298 SHA1Init(&sc->sc_sha1); 299 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 300 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 301 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 302 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 303 #ifdef INET 304 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 305 SHA1Update(&sc->sc_sha1, 306 (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr, 307 sizeof(struct in_addr)); 308 } 309 #endif /* INET */ 310 #ifdef INET6 311 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 312 struct ifaddr *ifa = ifac->ifa; 313 314 if (ifa->ifa_addr->sa_family == AF_INET6) { 315 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 316 in6_clearscope(&in6); 317 SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); 318 } 319 } 320 #endif /* INET6 */ 321 322 /* convert ipad to opad */ 323 for (i = 0; i < sizeof(sc->sc_pad); i++) 324 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 325 } 326 327 static void 328 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 329 unsigned char md[20]) 330 { 331 SHA1_CTX sha1ctx; 332 333 /* fetch first half of inner hash */ 334 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 335 336 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 337 SHA1Final(md, &sha1ctx); 338 339 /* outer hash */ 340 SHA1Init(&sha1ctx); 341 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 342 SHA1Update(&sha1ctx, md, 20); 343 SHA1Final(md, &sha1ctx); 344 } 345 346 static int 347 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 348 unsigned char md[20]) 349 { 350 unsigned char md2[20]; 351 352 carp_hmac_generate(sc, counter, md2); 353 return (bcmp(md, md2, sizeof(md2))); 354 } 355 356 static void 357 carp_setroute(struct carp_softc *sc, int cmd) 358 { 359 #ifdef INET6 360 struct ifaddr_container *ifac; 361 362 crit_enter(); 363 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 364 struct ifaddr *ifa = ifac->ifa; 365 366 if (ifa->ifa_addr->sa_family == AF_INET6) { 367 if (cmd == RTM_ADD) 368 in6_ifaddloop(ifa); 369 else 370 in6_ifremloop(ifa); 371 } 372 } 373 crit_exit(); 374 #endif /* INET6 */ 375 } 376 377 static int 378 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 379 { 380 struct carp_softc *sc; 381 struct ifnet *ifp; 382 383 sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO); 384 ifp = &sc->sc_if; 385 386 sc->sc_suppress = 0; 387 sc->sc_advbase = CARP_DFLTINTV; 388 sc->sc_vhid = -1; /* required setting */ 389 sc->sc_advskew = 0; 390 sc->sc_init_counter = 1; 391 sc->sc_naddrs = 0; 392 sc->sc_naddrs6 = 0; 393 394 TAILQ_INIT(&sc->sc_vha_list); 395 396 #ifdef INET6 397 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 398 #endif 399 400 callout_init(&sc->sc_ad_tmo); 401 callout_init(&sc->sc_md_tmo); 402 callout_init(&sc->sc_md6_tmo); 403 404 ifp->if_softc = sc; 405 if_initname(ifp, CARP_IFNAME, unit); 406 ifp->if_mtu = ETHERMTU; 407 ifp->if_flags = IFF_LOOPBACK; 408 ifp->if_ioctl = carp_ioctl; 409 ifp->if_output = carp_looutput; 410 ifp->if_start = carp_start; 411 ifp->if_type = IFT_CARP; 412 ifp->if_snd.ifq_maxlen = ifqmaxlen; 413 ifp->if_hdrlen = 0; 414 if_attach(ifp, NULL); 415 bpfattach(ifp, DLT_NULL, sizeof(u_int)); 416 417 crit_enter(); 418 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 419 crit_exit(); 420 421 return (0); 422 } 423 424 static void 425 carp_clone_destroy(struct ifnet *ifp) 426 { 427 struct carp_softc *sc = ifp->if_softc; 428 429 sc->sc_dead = 1; 430 carp_detach(sc, 1); 431 432 crit_enter(); 433 LIST_REMOVE(sc, sc_next); 434 crit_exit(); 435 bpfdetach(ifp); 436 if_detach(ifp); 437 438 KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active\n")); 439 kfree(sc, M_CARP); 440 } 441 442 static void 443 carp_detach(struct carp_softc *sc, int detach) 444 { 445 struct carp_if *cif; 446 447 carp_reset(sc, detach); 448 449 carp_multicast_cleanup(sc); 450 #ifdef INET6 451 carp_multicast6_cleanup(sc); 452 #endif 453 454 if (!sc->sc_dead && detach) { 455 struct carp_vhaddr *vha; 456 457 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 458 carp_deactivate_vhaddr(sc, vha); 459 KKASSERT(sc->sc_naddrs == 0); 460 } 461 462 if (sc->sc_carpdev != NULL) { 463 cif = sc->sc_carpdev->if_carp; 464 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 465 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 466 ifpromisc(sc->sc_carpdev, 0); 467 sc->sc_carpdev->if_carp = NULL; 468 kfree(cif, M_CARP); 469 } 470 sc->sc_carpdev = NULL; 471 sc->sc_ia = NULL; 472 } 473 } 474 475 /* Detach an interface from the carp. */ 476 static void 477 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 478 { 479 struct carp_if *cif = ifp->if_carp; 480 struct carp_softc *sc; 481 482 while (ifp->if_carp && 483 (sc = TAILQ_FIRST(&cif->vhif_vrs)) != NULL) 484 carp_detach(sc, 1); 485 } 486 487 /* 488 * process input packet. 489 * we have rearranged checks order compared to the rfc, 490 * but it seems more efficient this way or not possible otherwise. 491 */ 492 int 493 carp_input(struct mbuf **mp, int *offp, int proto) 494 { 495 struct mbuf *m = *mp; 496 struct ip *ip = mtod(m, struct ip *); 497 struct carp_header *ch; 498 int len, iphlen; 499 500 iphlen = *offp; 501 *mp = NULL; 502 503 carpstats.carps_ipackets++; 504 505 if (!carp_opts[CARPCTL_ALLOW]) { 506 m_freem(m); 507 return(IPPROTO_DONE); 508 } 509 510 /* Check if received on a valid carp interface */ 511 if (m->m_pkthdr.rcvif->if_carp == NULL) { 512 carpstats.carps_badif++; 513 CARP_LOG("carp_input: packet received on non-carp " 514 "interface: %s\n", 515 m->m_pkthdr.rcvif->if_xname); 516 m_freem(m); 517 return(IPPROTO_DONE); 518 } 519 520 /* Verify that the IP TTL is CARP_DFLTTL. */ 521 if (ip->ip_ttl != CARP_DFLTTL) { 522 carpstats.carps_badttl++; 523 CARP_LOG("carp_input: received ttl %d != %d on %s\n", 524 ip->ip_ttl, CARP_DFLTTL, 525 m->m_pkthdr.rcvif->if_xname); 526 m_freem(m); 527 return(IPPROTO_DONE); 528 } 529 530 /* Minimal CARP packet size */ 531 len = iphlen + sizeof(*ch); 532 533 /* 534 * Verify that the received packet length is 535 * not less than the CARP header 536 */ 537 if (m->m_pkthdr.len < len) { 538 carpstats.carps_badlen++; 539 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len, 540 m->m_pkthdr.rcvif->if_xname); 541 m_freem(m); 542 return(IPPROTO_DONE); 543 } 544 545 /* Make sure that CARP header is contiguous */ 546 if (len > m->m_len) { 547 m = m_pullup(m, len); 548 if (m == NULL) { 549 carpstats.carps_hdrops++; 550 CARP_LOG("carp_input: m_pullup failed\n"); 551 return(IPPROTO_DONE); 552 } 553 ip = mtod(m, struct ip *); 554 } 555 ch = (struct carp_header *)((uint8_t *)ip + iphlen); 556 557 /* Verify the CARP checksum */ 558 if (in_cksum_skip(m, len, iphlen)) { 559 carpstats.carps_badsum++; 560 CARP_LOG("carp_input: checksum failed on %s\n", 561 m->m_pkthdr.rcvif->if_xname); 562 m_freem(m); 563 return(IPPROTO_DONE); 564 } 565 carp_input_c(m, ch, AF_INET); 566 return(IPPROTO_DONE); 567 } 568 569 #ifdef INET6 570 int 571 carp6_input(struct mbuf **mp, int *offp, int proto) 572 { 573 struct mbuf *m = *mp; 574 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 575 struct carp_header *ch; 576 u_int len; 577 578 carpstats.carps_ipackets6++; 579 580 if (!carp_opts[CARPCTL_ALLOW]) { 581 m_freem(m); 582 return (IPPROTO_DONE); 583 } 584 585 /* check if received on a valid carp interface */ 586 if (m->m_pkthdr.rcvif->if_carp == NULL) { 587 carpstats.carps_badif++; 588 CARP_LOG("carp6_input: packet received on non-carp " 589 "interface: %s\n", 590 m->m_pkthdr.rcvif->if_xname); 591 m_freem(m); 592 return (IPPROTO_DONE); 593 } 594 595 /* verify that the IP TTL is 255 */ 596 if (ip6->ip6_hlim != CARP_DFLTTL) { 597 carpstats.carps_badttl++; 598 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n", 599 ip6->ip6_hlim, 600 m->m_pkthdr.rcvif->if_xname); 601 m_freem(m); 602 return (IPPROTO_DONE); 603 } 604 605 /* verify that we have a complete carp packet */ 606 len = m->m_len; 607 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 608 if (ch == NULL) { 609 carpstats.carps_badlen++; 610 CARP_LOG("carp6_input: packet size %u too small\n", len); 611 return (IPPROTO_DONE); 612 } 613 614 /* verify the CARP checksum */ 615 if (in_cksum_range(m, 0, *offp, sizeof(*ch))) { 616 carpstats.carps_badsum++; 617 CARP_LOG("carp6_input: checksum failed, on %s\n", 618 m->m_pkthdr.rcvif->if_xname); 619 m_freem(m); 620 return (IPPROTO_DONE); 621 } 622 623 carp_input_c(m, ch, AF_INET6); 624 return (IPPROTO_DONE); 625 } 626 #endif /* INET6 */ 627 628 static void 629 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 630 { 631 struct ifnet *ifp = m->m_pkthdr.rcvif; 632 struct ifnet *cifp; 633 struct carp_softc *sc; 634 uint64_t tmp_counter; 635 struct timeval sc_tv, ch_tv; 636 637 /* verify that the VHID is valid on the receiving interface */ 638 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 639 if (sc->sc_vhid == ch->carp_vhid) 640 break; 641 642 if (!sc || !CARP_IS_RUNNING(&sc->sc_if)) { 643 carpstats.carps_badvhid++; 644 m_freem(m); 645 return; 646 } 647 cifp = &sc->sc_if; 648 649 getmicrotime(&cifp->if_lastchange); 650 cifp->if_ipackets++; 651 cifp->if_ibytes += m->m_pkthdr.len; 652 653 if (cifp->if_bpf) { 654 struct ip *ip = mtod(m, struct ip *); 655 656 /* BPF wants net byte order */ 657 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 658 ip->ip_off = htons(ip->ip_off); 659 bpf_mtap(cifp->if_bpf, m); 660 } 661 662 /* verify the CARP version. */ 663 if (ch->carp_version != CARP_VERSION) { 664 carpstats.carps_badver++; 665 cifp->if_ierrors++; 666 CARP_LOG("%s; invalid version %d\n", cifp->if_xname, 667 ch->carp_version); 668 m_freem(m); 669 return; 670 } 671 672 /* verify the hash */ 673 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 674 carpstats.carps_badauth++; 675 cifp->if_ierrors++; 676 CARP_LOG("%s: incorrect hash\n", cifp->if_xname); 677 m_freem(m); 678 return; 679 } 680 681 tmp_counter = ntohl(ch->carp_counter[0]); 682 tmp_counter = tmp_counter<<32; 683 tmp_counter += ntohl(ch->carp_counter[1]); 684 685 /* XXX Replay protection goes here */ 686 687 sc->sc_init_counter = 0; 688 sc->sc_counter = tmp_counter; 689 690 sc_tv.tv_sec = sc->sc_advbase; 691 if (carp_suppress_preempt && sc->sc_advskew < 240) 692 sc_tv.tv_usec = 240 * 1000000 / 256; 693 else 694 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 695 ch_tv.tv_sec = ch->carp_advbase; 696 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 697 698 switch (sc->sc_state) { 699 case INIT: 700 break; 701 702 case MASTER: 703 /* 704 * If we receive an advertisement from a master who's going to 705 * be more frequent than us, go into BACKUP state. 706 */ 707 if (timevalcmp(&sc_tv, &ch_tv, >) || 708 timevalcmp(&sc_tv, &ch_tv, ==)) { 709 callout_stop(&sc->sc_ad_tmo); 710 CARP_DEBUG("%s: MASTER -> BACKUP " 711 "(more frequent advertisement received)\n", 712 cifp->if_xname); 713 carp_set_state(sc, BACKUP); 714 carp_setrun(sc, 0); 715 carp_setroute(sc, RTM_DELETE); 716 } 717 break; 718 719 case BACKUP: 720 /* 721 * If we're pre-empting masters who advertise slower than us, 722 * and this one claims to be slower, treat him as down. 723 */ 724 if (carp_opts[CARPCTL_PREEMPT] && 725 timevalcmp(&sc_tv, &ch_tv, <)) { 726 CARP_DEBUG("%s: BACKUP -> MASTER " 727 "(preempting a slower master)\n", cifp->if_xname); 728 carp_master_down(sc); 729 break; 730 } 731 732 /* 733 * If the master is going to advertise at such a low frequency 734 * that he's guaranteed to time out, we'd might as well just 735 * treat him as timed out now. 736 */ 737 sc_tv.tv_sec = sc->sc_advbase * 3; 738 if (timevalcmp(&sc_tv, &ch_tv, <)) { 739 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 740 cifp->if_xname); 741 carp_master_down(sc); 742 break; 743 } 744 745 /* 746 * Otherwise, we reset the counter and wait for the next 747 * advertisement. 748 */ 749 carp_setrun(sc, af); 750 break; 751 } 752 m_freem(m); 753 } 754 755 static int 756 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 757 { 758 struct ifnet *cifp = &sc->sc_if; 759 struct m_tag *mtag; 760 761 if (sc->sc_init_counter) { 762 /* this could also be seconds since unix epoch */ 763 sc->sc_counter = karc4random(); 764 sc->sc_counter = sc->sc_counter << 32; 765 sc->sc_counter += karc4random(); 766 } else { 767 sc->sc_counter++; 768 } 769 770 ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff); 771 ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff); 772 773 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 774 775 /* Tag packet for carp_output */ 776 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), MB_DONTWAIT); 777 if (mtag == NULL) { 778 m_freem(m); 779 cifp->if_oerrors++; 780 return ENOMEM; 781 } 782 bcopy(&cifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 783 m_tag_prepend(m, mtag); 784 785 return 0; 786 } 787 788 static void 789 carp_send_ad_all(void) 790 { 791 struct carp_softc *sc; 792 793 LIST_FOREACH(sc, &carpif_list, sc_next) { 794 if (sc->sc_carpdev == NULL) 795 continue; 796 797 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER) 798 carp_send_ad(sc); 799 } 800 } 801 802 static void 803 carp_send_ad_timeout(void *xsc) 804 { 805 carp_send_ad(xsc); 806 } 807 808 static void 809 carp_send_ad(struct carp_softc *sc) 810 { 811 struct ifnet *cifp = &sc->sc_if; 812 struct carp_header ch; 813 struct timeval tv; 814 struct carp_header *ch_ptr; 815 struct mbuf *m; 816 int len, advbase, advskew; 817 818 if (!CARP_IS_RUNNING(cifp)) { 819 /* Bow out */ 820 advbase = 255; 821 advskew = 255; 822 } else { 823 advbase = sc->sc_advbase; 824 if (!carp_suppress_preempt || sc->sc_advskew > 240) 825 advskew = sc->sc_advskew; 826 else 827 advskew = 240; 828 tv.tv_sec = advbase; 829 tv.tv_usec = advskew * 1000000 / 256; 830 } 831 832 ch.carp_version = CARP_VERSION; 833 ch.carp_type = CARP_ADVERTISEMENT; 834 ch.carp_vhid = sc->sc_vhid; 835 ch.carp_advbase = advbase; 836 ch.carp_advskew = advskew; 837 ch.carp_authlen = 7; /* XXX DEFINE */ 838 ch.carp_pad1 = 0; /* must be zero */ 839 ch.carp_cksum = 0; 840 841 #ifdef INET 842 if (sc->sc_ia != NULL) { 843 struct ip *ip; 844 845 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 846 if (m == NULL) { 847 cifp->if_oerrors++; 848 carpstats.carps_onomem++; 849 /* XXX maybe less ? */ 850 if (advbase != 255 || advskew != 255) 851 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 852 carp_send_ad_timeout, sc); 853 return; 854 } 855 len = sizeof(*ip) + sizeof(ch); 856 m->m_pkthdr.len = len; 857 m->m_pkthdr.rcvif = NULL; 858 m->m_len = len; 859 MH_ALIGN(m, m->m_len); 860 m->m_flags |= M_MCAST; 861 ip = mtod(m, struct ip *); 862 ip->ip_v = IPVERSION; 863 ip->ip_hl = sizeof(*ip) >> 2; 864 ip->ip_tos = IPTOS_LOWDELAY; 865 ip->ip_len = len; 866 ip->ip_id = ip_newid(); 867 ip->ip_off = IP_DF; 868 ip->ip_ttl = CARP_DFLTTL; 869 ip->ip_p = IPPROTO_CARP; 870 ip->ip_sum = 0; 871 ip->ip_src = sc->sc_ia->ia_addr.sin_addr; 872 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 873 874 ch_ptr = (struct carp_header *)(&ip[1]); 875 bcopy(&ch, ch_ptr, sizeof(ch)); 876 if (carp_prepare_ad(m, sc, ch_ptr)) 877 return; 878 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip)); 879 880 getmicrotime(&cifp->if_lastchange); 881 cifp->if_opackets++; 882 cifp->if_obytes += len; 883 carpstats.carps_opackets++; 884 885 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 886 cifp->if_oerrors++; 887 if (sc->sc_sendad_errors < INT_MAX) 888 sc->sc_sendad_errors++; 889 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 890 carp_suppress_preempt++; 891 if (carp_suppress_preempt == 1) { 892 carp_send_ad_all(); 893 } 894 } 895 sc->sc_sendad_success = 0; 896 } else { 897 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 898 if (++sc->sc_sendad_success >= 899 CARP_SENDAD_MIN_SUCCESS) { 900 carp_suppress_preempt--; 901 sc->sc_sendad_errors = 0; 902 } 903 } else { 904 sc->sc_sendad_errors = 0; 905 } 906 } 907 } 908 #endif /* INET */ 909 #ifdef INET6 910 if (sc->sc_ia6) { 911 struct ip6_hdr *ip6; 912 913 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 914 if (m == NULL) { 915 cifp->if_oerrors++; 916 carpstats.carps_onomem++; 917 /* XXX maybe less ? */ 918 if (advbase != 255 || advskew != 255) 919 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 920 carp_send_ad_timeout, sc); 921 return; 922 } 923 len = sizeof(*ip6) + sizeof(ch); 924 m->m_pkthdr.len = len; 925 m->m_pkthdr.rcvif = NULL; 926 m->m_len = len; 927 MH_ALIGN(m, m->m_len); 928 m->m_flags |= M_MCAST; 929 ip6 = mtod(m, struct ip6_hdr *); 930 bzero(ip6, sizeof(*ip6)); 931 ip6->ip6_vfc |= IPV6_VERSION; 932 ip6->ip6_hlim = CARP_DFLTTL; 933 ip6->ip6_nxt = IPPROTO_CARP; 934 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 935 sizeof(struct in6_addr)); 936 /* set the multicast destination */ 937 938 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 939 ip6->ip6_dst.s6_addr8[15] = 0x12; 940 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 941 cifp->if_oerrors++; 942 m_freem(m); 943 CARP_LOG("%s: in6_setscope failed\n", __func__); 944 return; 945 } 946 947 ch_ptr = (struct carp_header *)(&ip6[1]); 948 bcopy(&ch, ch_ptr, sizeof(ch)); 949 if (carp_prepare_ad(m, sc, ch_ptr)) 950 return; 951 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6)); 952 953 getmicrotime(&cifp->if_lastchange); 954 cifp->if_opackets++; 955 cifp->if_obytes += len; 956 carpstats.carps_opackets6++; 957 958 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 959 cifp->if_oerrors++; 960 if (sc->sc_sendad_errors < INT_MAX) 961 sc->sc_sendad_errors++; 962 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 963 carp_suppress_preempt++; 964 if (carp_suppress_preempt == 1) { 965 carp_send_ad_all(); 966 } 967 } 968 sc->sc_sendad_success = 0; 969 } else { 970 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 971 if (++sc->sc_sendad_success >= 972 CARP_SENDAD_MIN_SUCCESS) { 973 carp_suppress_preempt--; 974 sc->sc_sendad_errors = 0; 975 } 976 } else { 977 sc->sc_sendad_errors = 0; 978 } 979 } 980 } 981 #endif /* INET6 */ 982 983 if (advbase != 255 || advskew != 255) 984 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 985 carp_send_ad_timeout, sc); 986 } 987 988 /* 989 * Broadcast a gratuitous ARP request containing 990 * the virtual router MAC address for each IP address 991 * associated with the virtual router. 992 */ 993 static void 994 carp_send_arp(struct carp_softc *sc) 995 { 996 const struct carp_vhaddr *vha; 997 998 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 999 if (vha->vha_iaback == NULL) 1000 continue; 1001 1002 arp_iainit(sc->sc_carpdev, &vha->vha_ia->ia_addr.sin_addr, 1003 IF_LLADDR(&sc->sc_if)); 1004 } 1005 } 1006 1007 #ifdef INET6 1008 static void 1009 carp_send_na(struct carp_softc *sc) 1010 { 1011 struct ifaddr_container *ifac; 1012 struct in6_addr *in6; 1013 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1014 1015 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 1016 struct ifaddr *ifa = ifac->ifa; 1017 1018 if (ifa->ifa_addr->sa_family != AF_INET6) 1019 continue; 1020 1021 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1022 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1023 ND_NA_FLAG_OVERRIDE, 1, NULL); 1024 DELAY(1000); /* XXX */ 1025 } 1026 } 1027 #endif /* INET6 */ 1028 1029 static __inline const struct carp_vhaddr * 1030 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr) 1031 { 1032 struct carp_vhaddr *vha; 1033 1034 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1035 if (vha->vha_iaback == NULL) 1036 continue; 1037 1038 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr) 1039 return vha; 1040 } 1041 return NULL; 1042 } 1043 1044 static int 1045 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr, 1046 const struct in_addr *isaddr, uint8_t **enaddr) 1047 { 1048 const struct carp_softc *vh; 1049 int index, count = 0; 1050 1051 /* 1052 * XXX proof of concept implementation. 1053 * We use the source ip to decide which virtual host should 1054 * handle the request. If we're master of that virtual host, 1055 * then we respond, otherwise, just drop the arp packet on 1056 * the floor. 1057 */ 1058 1059 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1060 if (!CARP_IS_RUNNING(&vh->sc_if)) 1061 continue; 1062 1063 if (carp_find_addr(vh, itaddr) != NULL) 1064 count++; 1065 } 1066 if (count == 0) 1067 return 0; 1068 1069 /* this should be a hash, like pf_hash() */ 1070 index = ntohl(isaddr->s_addr) % count; 1071 count = 0; 1072 1073 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1074 if (!CARP_IS_RUNNING(&vh->sc_if)) 1075 continue; 1076 1077 if (carp_find_addr(vh, itaddr) == NULL) 1078 continue; 1079 1080 if (count == index) { 1081 if (vh->sc_state == MASTER) { 1082 *enaddr = IF_LLADDR(&vh->sc_if); 1083 return 1; 1084 } else { 1085 return 0; 1086 } 1087 } 1088 count++; 1089 } 1090 return 0; 1091 } 1092 1093 int 1094 carp_iamatch(const void *v, const struct in_addr *itaddr, 1095 const struct in_addr *isaddr, uint8_t **enaddr) 1096 { 1097 const struct carp_if *cif = v; 1098 const struct carp_softc *vh; 1099 1100 if (carp_opts[CARPCTL_ARPBALANCE]) 1101 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr); 1102 1103 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1104 if (!CARP_IS_RUNNING(&vh->sc_if) || vh->sc_state != MASTER) 1105 continue; 1106 1107 if (carp_find_addr(vh, itaddr) != NULL) { 1108 *enaddr = IF_LLADDR(&vh->sc_if); 1109 return 1; 1110 } 1111 } 1112 return 0; 1113 } 1114 1115 #ifdef INET6 1116 struct ifaddr * 1117 carp_iamatch6(void *v, struct in6_addr *taddr) 1118 { 1119 struct carp_if *cif = v; 1120 struct carp_softc *vh; 1121 1122 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1123 struct ifaddr_container *ifac; 1124 1125 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid], 1126 ifa_link) { 1127 struct ifaddr *ifa = ifac->ifa; 1128 1129 if (IN6_ARE_ADDR_EQUAL(taddr, 1130 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1131 CARP_IS_RUNNING(&vh->sc_if) && 1132 vh->sc_state == MASTER) { 1133 return (ifa); 1134 } 1135 } 1136 } 1137 return (NULL); 1138 } 1139 1140 void * 1141 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1142 { 1143 struct m_tag *mtag; 1144 struct carp_if *cif = v; 1145 struct carp_softc *sc; 1146 1147 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1148 struct ifaddr_container *ifac; 1149 1150 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], 1151 ifa_link) { 1152 struct ifaddr *ifa = ifac->ifa; 1153 1154 if (IN6_ARE_ADDR_EQUAL(taddr, 1155 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1156 CARP_IS_RUNNING(&sc->sc_if)) { 1157 struct ifnet *ifp = &sc->sc_if; 1158 1159 mtag = m_tag_get(PACKET_TAG_CARP, 1160 sizeof(struct ifnet *), MB_DONTWAIT); 1161 if (mtag == NULL) { 1162 /* better a bit than nothing */ 1163 return (IF_LLADDR(ifp)); 1164 } 1165 bcopy(&ifp, (caddr_t)(mtag + 1), 1166 sizeof(struct ifnet *)); 1167 m_tag_prepend(m, mtag); 1168 1169 return (IF_LLADDR(ifp)); 1170 } 1171 } 1172 } 1173 return (NULL); 1174 } 1175 #endif 1176 1177 int 1178 carp_forus(const void *v, const void *dhost) 1179 { 1180 const struct carp_if *cif = v; 1181 const struct carp_softc *vh; 1182 const uint8_t *ena = dhost; 1183 1184 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1185 return 0; 1186 1187 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1188 const struct ifnet *cifp = &vh->sc_if; 1189 1190 if (CARP_IS_RUNNING(cifp) && vh->sc_state == MASTER && 1191 !bcmp(dhost, IF_LLADDR(cifp), ETHER_ADDR_LEN)) 1192 return 1; 1193 } 1194 return 0; 1195 } 1196 1197 static void 1198 carp_master_down_timeout(void *xsc) 1199 { 1200 struct carp_softc *sc = xsc; 1201 1202 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 1203 sc->sc_if.if_xname); 1204 carp_master_down(sc); 1205 } 1206 1207 static void 1208 carp_master_down(struct carp_softc *sc) 1209 { 1210 switch (sc->sc_state) { 1211 case INIT: 1212 kprintf("%s: master_down event in INIT state\n", 1213 sc->sc_if.if_xname); 1214 break; 1215 1216 case MASTER: 1217 break; 1218 1219 case BACKUP: 1220 carp_set_state(sc, MASTER); 1221 carp_send_ad(sc); 1222 carp_send_arp(sc); 1223 #ifdef INET6 1224 carp_send_na(sc); 1225 #endif /* INET6 */ 1226 carp_setrun(sc, 0); 1227 carp_setroute(sc, RTM_ADD); 1228 break; 1229 } 1230 } 1231 1232 /* 1233 * When in backup state, af indicates whether to reset the master down timer 1234 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1235 */ 1236 static void 1237 carp_setrun(struct carp_softc *sc, sa_family_t af) 1238 { 1239 struct ifnet *cifp = &sc->sc_if; 1240 struct timeval tv; 1241 1242 if (sc->sc_carpdev == NULL) { 1243 carp_set_state(sc, INIT); 1244 return; 1245 } 1246 1247 if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 && 1248 (sc->sc_naddrs || sc->sc_naddrs6)) { 1249 /* Nothing */ 1250 } else { 1251 carp_setroute(sc, RTM_DELETE); 1252 return; 1253 } 1254 1255 switch (sc->sc_state) { 1256 case INIT: 1257 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1258 carp_send_ad(sc); 1259 carp_send_arp(sc); 1260 #ifdef INET6 1261 carp_send_na(sc); 1262 #endif /* INET6 */ 1263 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1264 cifp->if_xname); 1265 carp_set_state(sc, MASTER); 1266 carp_setroute(sc, RTM_ADD); 1267 } else { 1268 CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname); 1269 carp_set_state(sc, BACKUP); 1270 carp_setroute(sc, RTM_DELETE); 1271 carp_setrun(sc, 0); 1272 } 1273 break; 1274 1275 case BACKUP: 1276 callout_stop(&sc->sc_ad_tmo); 1277 tv.tv_sec = 3 * sc->sc_advbase; 1278 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1279 switch (af) { 1280 #ifdef INET 1281 case AF_INET: 1282 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1283 carp_master_down_timeout, sc); 1284 break; 1285 #endif /* INET */ 1286 #ifdef INET6 1287 case AF_INET6: 1288 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1289 carp_master_down_timeout, sc); 1290 break; 1291 #endif /* INET6 */ 1292 default: 1293 if (sc->sc_naddrs) 1294 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1295 carp_master_down_timeout, sc); 1296 if (sc->sc_naddrs6) 1297 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1298 carp_master_down_timeout, sc); 1299 break; 1300 } 1301 break; 1302 1303 case MASTER: 1304 tv.tv_sec = sc->sc_advbase; 1305 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1306 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1307 carp_send_ad_timeout, sc); 1308 break; 1309 } 1310 } 1311 1312 static void 1313 carp_multicast_cleanup(struct carp_softc *sc) 1314 { 1315 struct ip_moptions *imo = &sc->sc_imo; 1316 1317 if (imo->imo_num_memberships == 0) 1318 return; 1319 KKASSERT(imo->imo_num_memberships == 1); 1320 1321 in_delmulti(imo->imo_membership[0]); 1322 imo->imo_membership[0] = NULL; 1323 imo->imo_num_memberships = 0; 1324 imo->imo_multicast_ifp = NULL; 1325 } 1326 1327 #ifdef INET6 1328 static void 1329 carp_multicast6_cleanup(struct carp_softc *sc) 1330 { 1331 struct ip6_moptions *im6o = &sc->sc_im6o; 1332 1333 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1334 struct in6_multi_mship *imm = 1335 LIST_FIRST(&im6o->im6o_memberships); 1336 1337 LIST_REMOVE(imm, i6mm_chain); 1338 in6_leavegroup(imm); 1339 } 1340 im6o->im6o_multicast_ifp = NULL; 1341 } 1342 #endif 1343 1344 static int 1345 carp_get_vhaddr(struct carp_softc *sc, struct ifdrv *ifd) 1346 { 1347 const struct carp_vhaddr *vha; 1348 struct ifcarpvhaddr *carpa, *carpa0; 1349 int count, len, error; 1350 1351 count = 0; 1352 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1353 ++count; 1354 1355 if (ifd->ifd_len == 0) { 1356 ifd->ifd_len = count * sizeof(*carpa); 1357 return 0; 1358 } else if (count == 0 || ifd->ifd_len < sizeof(*carpa)) { 1359 ifd->ifd_len = 0; 1360 return 0; 1361 } 1362 len = min(ifd->ifd_len, sizeof(*carpa) * count); 1363 KKASSERT(len >= sizeof(*carpa)); 1364 1365 carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO); 1366 if (carpa == NULL) 1367 return ENOMEM; 1368 1369 count = 0; 1370 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1371 if (len < sizeof(*carpa)) 1372 break; 1373 1374 carpa->carpa_flags = vha->vha_flags; 1375 carpa->carpa_addr.sin_family = AF_INET; 1376 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr; 1377 1378 carpa->carpa_baddr.sin_family = AF_INET; 1379 if (vha->vha_iaback == NULL) { 1380 carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY; 1381 } else { 1382 carpa->carpa_baddr.sin_addr = 1383 vha->vha_iaback->ia_addr.sin_addr; 1384 } 1385 1386 ++carpa; 1387 ++count; 1388 len -= sizeof(*carpa); 1389 } 1390 ifd->ifd_len = sizeof(*carpa) * count; 1391 KKASSERT(ifd->ifd_len > 0); 1392 1393 error = copyout(carpa0, ifd->ifd_data, ifd->ifd_len); 1394 kfree(carpa0, M_TEMP); 1395 return error; 1396 } 1397 1398 static int 1399 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 1400 { 1401 struct ifnet *ifp; 1402 struct in_ifaddr *ia_if; 1403 struct in_ifaddr_container *iac; 1404 const struct sockaddr_in *sin; 1405 u_long iaddr; 1406 int own; 1407 1408 KKASSERT(vha->vha_ia != NULL); 1409 1410 sin = &vha->vha_ia->ia_addr; 1411 iaddr = ntohl(sin->sin_addr.s_addr); 1412 1413 ia_if = NULL; 1414 own = 0; 1415 TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) { 1416 struct in_ifaddr *ia = iac->ia; 1417 1418 if ((ia->ia_flags & IFA_ROUTE) == 0) 1419 continue; 1420 1421 if (ia->ia_ifp->if_type == IFT_CARP) 1422 continue; 1423 1424 /* and, yeah, we need a multicast-capable iface too */ 1425 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) 1426 continue; 1427 1428 if ((iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1429 if (sin->sin_addr.s_addr == 1430 ia->ia_addr.sin_addr.s_addr) 1431 own = 1; 1432 if (ia_if == NULL) 1433 ia_if = ia; 1434 else if (sc->sc_carpdev != NULL && 1435 sc->sc_carpdev == ia->ia_ifp) 1436 ia_if = ia; 1437 } 1438 } 1439 1440 carp_deactivate_vhaddr(sc, vha); 1441 if (!ia_if) 1442 return ENOENT; 1443 1444 ifp = ia_if->ia_ifp; 1445 1446 /* XXX Don't allow parent iface to be changed */ 1447 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) 1448 return EEXIST; 1449 1450 return carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 1451 } 1452 1453 static void 1454 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1455 { 1456 struct carp_vhaddr *vha_new; 1457 struct in_ifaddr *carp_ia; 1458 #ifdef INVARIANTS 1459 struct carp_vhaddr *vha; 1460 #endif 1461 1462 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1463 carp_ia = ifatoia(carp_ifa); 1464 1465 #ifdef INVARIANTS 1466 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1467 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia); 1468 #endif 1469 1470 vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO); 1471 vha_new->vha_ia = carp_ia; 1472 carp_insert_vhaddr(sc, vha_new); 1473 1474 if (carp_config_vhaddr(sc, vha_new) != 0) { 1475 /* 1476 * If the above configuration fails, it may only mean 1477 * that the new address is problematic. However, the 1478 * carp(4) interface may already have several working 1479 * addresses. Since the expected behaviour of 1480 * SIOC[AS]IFADDR is to put the NIC into working state, 1481 * we try starting the state machine manually here with 1482 * the hope that the carp(4)'s previously working 1483 * addresses still could be brought up. 1484 */ 1485 carp_hmac_prepare(sc); 1486 carp_set_state(sc, INIT); 1487 carp_setrun(sc, 0); 1488 } 1489 } 1490 1491 static void 1492 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1493 { 1494 struct carp_vhaddr *vha; 1495 struct in_ifaddr *carp_ia; 1496 1497 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1498 carp_ia = ifatoia(carp_ifa); 1499 1500 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1501 KKASSERT(vha->vha_ia != NULL); 1502 if (vha->vha_ia == carp_ia) 1503 break; 1504 } 1505 KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); 1506 1507 /* 1508 * Remove the vhaddr from the list before deactivating 1509 * the vhaddr, so that the HMAC could be correctly 1510 * updated in carp_deactivate_vhaddr() 1511 */ 1512 carp_remove_vhaddr(sc, vha); 1513 1514 carp_deactivate_vhaddr(sc, vha); 1515 kfree(vha, M_CARP); 1516 } 1517 1518 static void 1519 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1520 { 1521 struct carp_vhaddr *vha; 1522 struct in_ifaddr *carp_ia; 1523 1524 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1525 carp_ia = ifatoia(carp_ifa); 1526 1527 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1528 KKASSERT(vha->vha_ia != NULL); 1529 if (vha->vha_ia == carp_ia) 1530 break; 1531 } 1532 KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); 1533 1534 /* Remove then reinsert, to keep the vhaddr list sorted */ 1535 carp_remove_vhaddr(sc, vha); 1536 carp_insert_vhaddr(sc, vha); 1537 1538 if (carp_config_vhaddr(sc, vha) != 0) { 1539 /* See the comment in carp_add_addr() */ 1540 carp_hmac_prepare(sc); 1541 carp_set_state(sc, INIT); 1542 carp_setrun(sc, 0); 1543 } 1544 } 1545 1546 #ifdef INET6 1547 static int 1548 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1549 { 1550 struct ifnet *ifp; 1551 struct carp_if *cif; 1552 struct in6_ifaddr *ia, *ia_if; 1553 struct ip6_moptions *im6o = &sc->sc_im6o; 1554 struct in6_multi_mship *imm; 1555 struct in6_addr in6; 1556 int own, error; 1557 1558 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1559 carp_setrun(sc, 0); 1560 return (0); 1561 } 1562 1563 /* we have to do it by hands to check we won't match on us */ 1564 ia_if = NULL; own = 0; 1565 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 1566 int i; 1567 1568 for (i = 0; i < 4; i++) { 1569 if ((sin6->sin6_addr.s6_addr32[i] & 1570 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1571 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1572 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1573 break; 1574 } 1575 /* and, yeah, we need a multicast-capable iface too */ 1576 if (ia->ia_ifp != &sc->sc_if && 1577 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1578 (i == 4)) { 1579 if (!ia_if) 1580 ia_if = ia; 1581 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1582 &ia->ia_addr.sin6_addr)) 1583 own++; 1584 } 1585 } 1586 1587 if (!ia_if) 1588 return (EADDRNOTAVAIL); 1589 ia = ia_if; 1590 ifp = ia->ia_ifp; 1591 1592 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1593 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 1594 return (EADDRNOTAVAIL); 1595 1596 if (!sc->sc_naddrs6) { 1597 im6o->im6o_multicast_ifp = ifp; 1598 1599 /* join CARP multicast address */ 1600 bzero(&in6, sizeof(in6)); 1601 in6.s6_addr16[0] = htons(0xff02); 1602 in6.s6_addr8[15] = 0x12; 1603 if (in6_setscope(&in6, ifp, NULL) != 0) 1604 goto cleanup; 1605 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 1606 goto cleanup; 1607 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1608 1609 /* join solicited multicast address */ 1610 bzero(&in6, sizeof(in6)); 1611 in6.s6_addr16[0] = htons(0xff02); 1612 in6.s6_addr32[1] = 0; 1613 in6.s6_addr32[2] = htonl(1); 1614 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1615 in6.s6_addr8[12] = 0xff; 1616 if (in6_setscope(&in6, ifp, NULL) != 0) 1617 goto cleanup; 1618 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 1619 goto cleanup; 1620 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1621 } 1622 1623 if (!ifp->if_carp) { 1624 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 1625 1626 if ((error = ifpromisc(ifp, 1))) { 1627 kfree(cif, M_CARP); 1628 goto cleanup; 1629 } 1630 1631 TAILQ_INIT(&cif->vhif_vrs); 1632 ifp->if_carp = cif; 1633 } else { 1634 struct carp_softc *vr; 1635 1636 cif = ifp->if_carp; 1637 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1638 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1639 error = EINVAL; 1640 goto cleanup; 1641 } 1642 } 1643 } 1644 sc->sc_ia6 = ia; 1645 sc->sc_carpdev = ifp; 1646 1647 { /* XXX prevent endless loop if already in queue */ 1648 struct carp_softc *vr, *after = NULL; 1649 int myself = 0; 1650 cif = ifp->if_carp; 1651 1652 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1653 if (vr == sc) 1654 myself = 1; 1655 if (vr->sc_vhid < sc->sc_vhid) 1656 after = vr; 1657 } 1658 1659 if (!myself) { 1660 /* We're trying to keep things in order */ 1661 if (after == NULL) 1662 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1663 else 1664 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1665 } 1666 } 1667 1668 sc->sc_naddrs6++; 1669 if (own) 1670 sc->sc_advskew = 0; 1671 carp_sc_state(sc); 1672 carp_setrun(sc, 0); 1673 1674 return (0); 1675 1676 cleanup: 1677 /* clean up multicast memberships */ 1678 if (!sc->sc_naddrs6) { 1679 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1680 imm = LIST_FIRST(&im6o->im6o_memberships); 1681 LIST_REMOVE(imm, i6mm_chain); 1682 in6_leavegroup(imm); 1683 } 1684 } 1685 return (error); 1686 } 1687 1688 static int 1689 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1690 { 1691 int error = 0; 1692 1693 if (!--sc->sc_naddrs6) { 1694 struct carp_if *cif = sc->sc_carpdev->if_carp; 1695 struct ip6_moptions *im6o = &sc->sc_im6o; 1696 1697 callout_stop(&sc->sc_ad_tmo); 1698 sc->sc_vhid = -1; 1699 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1700 struct in6_multi_mship *imm = 1701 LIST_FIRST(&im6o->im6o_memberships); 1702 1703 LIST_REMOVE(imm, i6mm_chain); 1704 in6_leavegroup(imm); 1705 } 1706 im6o->im6o_multicast_ifp = NULL; 1707 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1708 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 1709 sc->sc_carpdev->if_carp = NULL; 1710 kfree(cif, M_IFADDR); 1711 } 1712 } 1713 return (error); 1714 } 1715 #endif /* INET6 */ 1716 1717 static int 1718 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) 1719 { 1720 struct carp_softc *sc = ifp->if_softc, *vr; 1721 struct carpreq carpr; 1722 struct ifaddr *ifa; 1723 struct ifreq *ifr; 1724 struct ifaliasreq *ifra; 1725 struct ifdrv *ifd; 1726 char devname[IFNAMSIZ]; 1727 int error = 0; 1728 1729 ifa = (struct ifaddr *)addr; 1730 ifra = (struct ifaliasreq *)addr; 1731 ifr = (struct ifreq *)addr; 1732 ifd = (struct ifdrv *)addr; 1733 1734 switch (cmd) { 1735 case SIOCSIFADDR: 1736 switch (ifa->ifa_addr->sa_family) { 1737 #ifdef INET 1738 case AF_INET: 1739 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1740 break; 1741 #endif /* INET */ 1742 #ifdef INET6 1743 case AF_INET6: 1744 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1745 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1746 break; 1747 #endif /* INET6 */ 1748 default: 1749 error = EAFNOSUPPORT; 1750 break; 1751 } 1752 break; 1753 1754 case SIOCAIFADDR: 1755 switch (ifa->ifa_addr->sa_family) { 1756 #ifdef INET 1757 case AF_INET: 1758 panic("SIOCAIFADDR should never be seen\n"); 1759 #endif /* INET */ 1760 #ifdef INET6 1761 case AF_INET6: 1762 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1763 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1764 break; 1765 #endif /* INET6 */ 1766 default: 1767 error = EAFNOSUPPORT; 1768 break; 1769 } 1770 break; 1771 1772 case SIOCDIFADDR: 1773 switch (ifa->ifa_addr->sa_family) { 1774 #ifdef INET 1775 case AF_INET: 1776 panic("SIOCDIFADDR should never be seen\n"); 1777 #endif /* INET */ 1778 #ifdef INET6 1779 case AF_INET6: 1780 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1781 break; 1782 #endif /* INET6 */ 1783 default: 1784 error = EAFNOSUPPORT; 1785 break; 1786 } 1787 break; 1788 1789 case SIOCSIFFLAGS: 1790 if (ifp->if_flags & IFF_UP) { 1791 if ((ifp->if_flags & IFF_RUNNING) == 0) { 1792 ifp->if_flags |= IFF_RUNNING; 1793 carp_set_state(sc, INIT); 1794 carp_setrun(sc, 0); 1795 } 1796 } else if (ifp->if_flags & IFF_RUNNING) { 1797 carp_stop(sc, 0); 1798 } 1799 break; 1800 1801 case SIOCSVH: 1802 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 1803 if (error) 1804 break; 1805 error = copyin(ifr->ifr_data, &carpr, sizeof(carpr)); 1806 if (error) 1807 break; 1808 1809 error = 1; 1810 if ((ifp->if_flags & IFF_RUNNING) && 1811 sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1812 switch (carpr.carpr_state) { 1813 case BACKUP: 1814 callout_stop(&sc->sc_ad_tmo); 1815 carp_set_state(sc, BACKUP); 1816 carp_setrun(sc, 0); 1817 carp_setroute(sc, RTM_DELETE); 1818 break; 1819 1820 case MASTER: 1821 carp_master_down(sc); 1822 break; 1823 1824 default: 1825 break; 1826 } 1827 } 1828 if (carpr.carpr_vhid > 0) { 1829 if (carpr.carpr_vhid > 255) { 1830 error = EINVAL; 1831 break; 1832 } 1833 if (sc->sc_carpdev) { 1834 struct carp_if *cif = sc->sc_carpdev->if_carp; 1835 1836 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1837 if (vr != sc && 1838 vr->sc_vhid == carpr.carpr_vhid) 1839 return EEXIST; 1840 } 1841 } 1842 sc->sc_vhid = carpr.carpr_vhid; 1843 IF_LLADDR(ifp)[0] = 0; 1844 IF_LLADDR(ifp)[1] = 0; 1845 IF_LLADDR(ifp)[2] = 0x5e; 1846 IF_LLADDR(ifp)[3] = 0; 1847 IF_LLADDR(ifp)[4] = 1; 1848 IF_LLADDR(ifp)[5] = sc->sc_vhid; 1849 error--; 1850 } 1851 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 1852 if (carpr.carpr_advskew >= 255) { 1853 error = EINVAL; 1854 break; 1855 } 1856 if (carpr.carpr_advbase > 255) { 1857 error = EINVAL; 1858 break; 1859 } 1860 sc->sc_advbase = carpr.carpr_advbase; 1861 sc->sc_advskew = carpr.carpr_advskew; 1862 error--; 1863 } 1864 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1865 if (error > 0) { 1866 error = EINVAL; 1867 } else { 1868 error = 0; 1869 carp_setrun(sc, 0); 1870 } 1871 break; 1872 1873 case SIOCGVH: 1874 bzero(&carpr, sizeof(carpr)); 1875 carpr.carpr_state = sc->sc_state; 1876 carpr.carpr_vhid = sc->sc_vhid; 1877 carpr.carpr_advbase = sc->sc_advbase; 1878 carpr.carpr_advskew = sc->sc_advskew; 1879 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 1880 if (error == 0) { 1881 bcopy(sc->sc_key, carpr.carpr_key, 1882 sizeof(carpr.carpr_key)); 1883 } 1884 1885 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1886 break; 1887 1888 case SIOCGDRVSPEC: 1889 switch (ifd->ifd_cmd) { 1890 case CARPGDEVNAME: 1891 if (ifd->ifd_len != sizeof(devname)) 1892 error = EINVAL; 1893 break; 1894 1895 case CARPGVHADDR: 1896 break; 1897 1898 default: 1899 error = EINVAL; 1900 break; 1901 } 1902 if (error) 1903 break; 1904 1905 switch (ifd->ifd_cmd) { 1906 case CARPGVHADDR: 1907 error = carp_get_vhaddr(sc, ifd); 1908 break; 1909 1910 case CARPGDEVNAME: 1911 bzero(devname, sizeof(devname)); 1912 if (sc->sc_carpdev != NULL) { 1913 strlcpy(devname, sc->sc_carpdev->if_xname, 1914 sizeof(devname)); 1915 } 1916 error = copyout(devname, ifd->ifd_data, 1917 sizeof(devname)); 1918 break; 1919 } 1920 break; 1921 1922 default: 1923 error = EINVAL; 1924 break; 1925 } 1926 carp_hmac_prepare(sc); 1927 return error; 1928 } 1929 1930 /* 1931 * XXX: this is looutput. We should eventually use it from there. 1932 */ 1933 static int 1934 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1935 struct rtentry *rt) 1936 { 1937 uint32_t af; 1938 1939 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 1940 1941 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 1942 m_freem(m); 1943 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 1944 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 1945 } 1946 1947 ifp->if_opackets++; 1948 ifp->if_obytes += m->m_pkthdr.len; 1949 1950 /* BPF writes need to be handled specially. */ 1951 if (dst->sa_family == AF_UNSPEC) { 1952 bcopy(dst->sa_data, &af, sizeof(af)); 1953 dst->sa_family = af; 1954 } 1955 1956 #if 1 /* XXX */ 1957 switch (dst->sa_family) { 1958 case AF_INET: 1959 case AF_INET6: 1960 case AF_IPX: 1961 case AF_APPLETALK: 1962 break; 1963 1964 default: 1965 m_freem(m); 1966 return (EAFNOSUPPORT); 1967 } 1968 #endif 1969 return (if_simloop(ifp, m, dst->sa_family, 0)); 1970 } 1971 1972 /* 1973 * Start output on carp interface. This function should never be called. 1974 */ 1975 static void 1976 carp_start(struct ifnet *ifp) 1977 { 1978 #ifdef DEBUG 1979 kprintf("%s: start called\n", ifp->if_xname); 1980 #endif 1981 } 1982 1983 int 1984 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 1985 struct rtentry *rt) 1986 { 1987 struct m_tag *mtag; 1988 struct carp_softc *sc; 1989 struct ifnet *carp_ifp; 1990 struct ether_header *eh; 1991 1992 if (!sa) 1993 return (0); 1994 1995 switch (sa->sa_family) { 1996 #ifdef INET 1997 case AF_INET: 1998 break; 1999 #endif /* INET */ 2000 #ifdef INET6 2001 case AF_INET6: 2002 break; 2003 #endif /* INET6 */ 2004 default: 2005 return (0); 2006 } 2007 2008 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2009 if (mtag == NULL) 2010 return (0); 2011 2012 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 2013 sc = carp_ifp->if_softc; 2014 2015 /* Set the source MAC address to Virtual Router MAC Address */ 2016 switch (ifp->if_type) { 2017 case IFT_ETHER: 2018 case IFT_L2VLAN: 2019 eh = mtod(m, struct ether_header *); 2020 eh->ether_shost[0] = 0; 2021 eh->ether_shost[1] = 0; 2022 eh->ether_shost[2] = 0x5e; 2023 eh->ether_shost[3] = 0; 2024 eh->ether_shost[4] = 1; 2025 eh->ether_shost[5] = sc->sc_vhid; 2026 break; 2027 2028 default: 2029 if_printf(ifp, "carp is not supported for this " 2030 "interface type\n"); 2031 return (EOPNOTSUPP); 2032 } 2033 return (0); 2034 } 2035 2036 static void 2037 carp_set_state(struct carp_softc *sc, int state) 2038 { 2039 struct ifnet *cifp = &sc->sc_if; 2040 2041 if (sc->sc_state == state) 2042 return; 2043 sc->sc_state = state; 2044 2045 switch (sc->sc_state) { 2046 case BACKUP: 2047 cifp->if_link_state = LINK_STATE_DOWN; 2048 break; 2049 2050 case MASTER: 2051 cifp->if_link_state = LINK_STATE_UP; 2052 break; 2053 2054 default: 2055 cifp->if_link_state = LINK_STATE_UNKNOWN; 2056 break; 2057 } 2058 rt_ifmsg(cifp); 2059 } 2060 2061 void 2062 carp_group_demote_adj(struct ifnet *ifp, int adj) 2063 { 2064 struct ifg_list *ifgl; 2065 int *dm; 2066 2067 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2068 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2069 continue; 2070 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2071 2072 if (*dm + adj >= 0) 2073 *dm += adj; 2074 else 2075 *dm = 0; 2076 2077 if (adj > 0 && *dm == 1) 2078 carp_send_ad_all(); 2079 CARP_LOG("%s demoted group %s to %d", ifp->if_xname, 2080 ifgl->ifgl_group->ifg_group, *dm); 2081 } 2082 } 2083 2084 void 2085 carp_carpdev_state(void *v) 2086 { 2087 struct carp_if *cif = v; 2088 struct carp_softc *sc; 2089 2090 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2091 carp_sc_state(sc); 2092 } 2093 2094 static void 2095 carp_sc_state(struct carp_softc *sc) 2096 { 2097 if (!(sc->sc_carpdev->if_flags & IFF_UP)) { 2098 callout_stop(&sc->sc_ad_tmo); 2099 callout_stop(&sc->sc_md_tmo); 2100 callout_stop(&sc->sc_md6_tmo); 2101 carp_set_state(sc, INIT); 2102 carp_setrun(sc, 0); 2103 if (!sc->sc_suppress) { 2104 carp_suppress_preempt++; 2105 if (carp_suppress_preempt == 1) 2106 carp_send_ad_all(); 2107 } 2108 sc->sc_suppress = 1; 2109 } else { 2110 carp_set_state(sc, INIT); 2111 carp_setrun(sc, 0); 2112 if (sc->sc_suppress) 2113 carp_suppress_preempt--; 2114 sc->sc_suppress = 0; 2115 } 2116 } 2117 2118 static void 2119 carp_stop(struct carp_softc *sc, int detach) 2120 { 2121 sc->sc_if.if_flags &= ~IFF_RUNNING; 2122 2123 callout_stop(&sc->sc_ad_tmo); 2124 callout_stop(&sc->sc_md_tmo); 2125 callout_stop(&sc->sc_md6_tmo); 2126 2127 if (!detach && sc->sc_state == MASTER) 2128 carp_send_ad(sc); 2129 2130 if (sc->sc_suppress) 2131 carp_suppress_preempt--; 2132 sc->sc_suppress = 0; 2133 2134 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 2135 carp_suppress_preempt--; 2136 sc->sc_sendad_errors = 0; 2137 sc->sc_sendad_success = 0; 2138 2139 carp_set_state(sc, INIT); 2140 carp_setrun(sc, 0); 2141 } 2142 2143 static void 2144 carp_reset(struct carp_softc *sc, int detach) 2145 { 2146 struct ifnet *cifp = &sc->sc_if; 2147 2148 carp_stop(sc, detach); 2149 if (!sc->sc_dead && (cifp->if_flags & IFF_UP)) 2150 cifp->if_flags |= IFF_RUNNING; 2151 } 2152 2153 static int 2154 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 2155 struct ifnet *ifp, const struct in_ifaddr *ia_if, int own) 2156 { 2157 struct ip_moptions *imo = &sc->sc_imo; 2158 struct carp_if *cif; 2159 struct carp_softc *vr, *after = NULL; 2160 int onlist, error; 2161 #ifdef INVARIANTS 2162 int assert_onlist; 2163 #endif 2164 2165 KKASSERT(vha->vha_ia != NULL); 2166 2167 KASSERT(ia_if != NULL, ("NULL backing address\n")); 2168 KASSERT(vha->vha_iaback == NULL, ("%p is already activated\n", vha)); 2169 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2170 ("inactive vhaddr %p is the address owner\n", vha)); 2171 2172 KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp, 2173 ("%s is already on %s\n", sc->sc_if.if_xname, 2174 sc->sc_carpdev->if_xname)); 2175 2176 KASSERT(imo->imo_multicast_ifp == NULL || 2177 imo->imo_multicast_ifp == ifp, 2178 ("%s didn't leave mcast group on %s\n", 2179 sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname)); 2180 2181 if (imo->imo_num_memberships == 0) { 2182 struct in_addr addr; 2183 2184 addr.s_addr = htonl(INADDR_CARP_GROUP); 2185 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) 2186 return ENOBUFS; 2187 imo->imo_num_memberships++; 2188 imo->imo_multicast_ifp = ifp; 2189 imo->imo_multicast_ttl = CARP_DFLTTL; 2190 imo->imo_multicast_loop = 0; 2191 } 2192 2193 if (!ifp->if_carp) { 2194 KASSERT(sc->sc_carpdev == NULL, 2195 ("%s is already on %s\n", sc->sc_if.if_xname, 2196 sc->sc_carpdev->if_xname)); 2197 2198 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 2199 2200 error = ifpromisc(ifp, 1); 2201 if (error) { 2202 kfree(cif, M_CARP); 2203 goto cleanup; 2204 } 2205 2206 TAILQ_INIT(&cif->vhif_vrs); 2207 ifp->if_carp = cif; 2208 } else { 2209 cif = ifp->if_carp; 2210 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2211 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 2212 error = EINVAL; 2213 goto cleanup; 2214 } 2215 } 2216 } 2217 2218 #ifdef INVARIANTS 2219 if (sc->sc_carpdev != NULL) 2220 assert_onlist = 1; 2221 else 2222 assert_onlist = 0; 2223 #endif 2224 sc->sc_ia = ia_if; 2225 sc->sc_carpdev = ifp; 2226 2227 cif = ifp->if_carp; 2228 onlist = 0; 2229 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2230 if (vr == sc) 2231 onlist = 1; 2232 if (vr->sc_vhid < sc->sc_vhid) 2233 after = vr; 2234 } 2235 2236 #ifdef INVARIANTS 2237 if (assert_onlist) { 2238 KASSERT(onlist, ("%s is not on %s carp list\n", 2239 sc->sc_if.if_xname, ifp->if_xname)); 2240 } else { 2241 KASSERT(!onlist, ("%s is already on %s carp list\n", 2242 sc->sc_if.if_xname, ifp->if_xname)); 2243 } 2244 #endif 2245 2246 if (!onlist) { 2247 /* We're trying to keep things in order */ 2248 if (after == NULL) 2249 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 2250 else 2251 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 2252 } 2253 2254 vha->vha_iaback = ia_if; 2255 sc->sc_naddrs++; 2256 2257 if (own) { 2258 vha->vha_flags |= CARP_VHAF_OWNER; 2259 2260 /* XXX save user configured advskew? */ 2261 sc->sc_advskew = 0; 2262 } 2263 2264 carp_hmac_prepare(sc); 2265 carp_set_state(sc, INIT); 2266 carp_setrun(sc, 0); 2267 return 0; 2268 cleanup: 2269 carp_multicast_cleanup(sc); 2270 return error; 2271 } 2272 2273 static void 2274 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 2275 { 2276 KKASSERT(vha->vha_ia != NULL); 2277 2278 carp_hmac_prepare(sc); 2279 2280 if (vha->vha_iaback == NULL) { 2281 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2282 ("inactive vhaddr %p is the address owner\n", vha)); 2283 return; 2284 } 2285 2286 vha->vha_flags &= ~CARP_VHAF_OWNER; 2287 2288 KKASSERT(sc->sc_naddrs > 0); 2289 vha->vha_iaback = NULL; 2290 sc->sc_naddrs--; 2291 if (!sc->sc_naddrs) { 2292 if (sc->sc_naddrs6) { 2293 carp_multicast_cleanup(sc); 2294 sc->sc_ia = NULL; 2295 } else { 2296 carp_detach(sc, 0); 2297 } 2298 } 2299 } 2300 2301 static void 2302 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if) 2303 { 2304 struct carp_vhaddr *vha; 2305 struct in_ifaddr *ia_if; 2306 2307 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2308 ia_if = ifatoia(ifa_if); 2309 2310 if ((ia_if->ia_flags & IFA_ROUTE) == 0) 2311 return; 2312 2313 /* 2314 * Test each inactive vhaddr against the newly added address. 2315 * If the newly added address could be the backing address, 2316 * then activate the matching vhaddr. 2317 */ 2318 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2319 const struct in_ifaddr *ia; 2320 u_long iaddr; 2321 int own; 2322 2323 if (vha->vha_iaback != NULL) 2324 continue; 2325 2326 ia = vha->vha_ia; 2327 iaddr = ntohl(ia->ia_addr.sin_addr.s_addr); 2328 2329 if ((iaddr & ia_if->ia_subnetmask) != ia_if->ia_subnet) 2330 continue; 2331 2332 own = 0; 2333 if (ia->ia_addr.sin_addr.s_addr == 2334 ia_if->ia_addr.sin_addr.s_addr) 2335 own = 1; 2336 2337 carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 2338 } 2339 } 2340 2341 static void 2342 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp, 2343 struct ifaddr *ifa_if) 2344 { 2345 struct carp_vhaddr *vha; 2346 struct in_ifaddr *ia_if; 2347 2348 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2349 ia_if = ifatoia(ifa_if); 2350 2351 /* 2352 * Ad src address is deleted; set it to NULL. 2353 * Following loop will try pick up a new ad src address 2354 * if one of the vhaddr could retain its backing address. 2355 */ 2356 if (sc->sc_ia == ia_if) 2357 sc->sc_ia = NULL; 2358 2359 /* 2360 * Test each active vhaddr against the deleted address. 2361 * If the deleted address is vhaddr address's backing 2362 * address, then deactivate the vhaddr. 2363 */ 2364 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2365 if (vha->vha_iaback == NULL) 2366 continue; 2367 2368 if (vha->vha_iaback == ia_if) 2369 carp_deactivate_vhaddr(sc, vha); 2370 else if (sc->sc_ia == NULL) 2371 sc->sc_ia = vha->vha_iaback; 2372 } 2373 } 2374 2375 static void 2376 carp_update_addrs(struct carp_softc *sc) 2377 { 2378 struct carp_vhaddr *vha; 2379 2380 KKASSERT(sc->sc_carpdev == NULL); 2381 2382 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 2383 carp_config_vhaddr(sc, vha); 2384 } 2385 2386 static void 2387 carp_ifaddr(void *arg __unused, struct ifnet *ifp, 2388 enum ifaddr_event event, struct ifaddr *ifa) 2389 { 2390 struct carp_softc *sc; 2391 2392 if (ifa->ifa_addr->sa_family != AF_INET) 2393 return; 2394 2395 if (ifp->if_type == IFT_CARP) { 2396 /* 2397 * Address is changed on carp(4) interface 2398 */ 2399 switch (event) { 2400 case IFADDR_EVENT_ADD: 2401 carp_add_addr(ifp->if_softc, ifa); 2402 break; 2403 2404 case IFADDR_EVENT_CHANGE: 2405 carp_config_addr(ifp->if_softc, ifa); 2406 break; 2407 2408 case IFADDR_EVENT_DELETE: 2409 carp_del_addr(ifp->if_softc, ifa); 2410 break; 2411 } 2412 return; 2413 } 2414 2415 /* 2416 * Address is changed on non-carp(4) interface 2417 */ 2418 if ((ifp->if_flags & IFF_MULTICAST) == 0) 2419 return; 2420 2421 crit_enter(); 2422 LIST_FOREACH(sc, &carpif_list, sc_next) { 2423 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) { 2424 /* Not the parent iface; skip */ 2425 continue; 2426 } 2427 2428 switch (event) { 2429 case IFADDR_EVENT_ADD: 2430 carp_link_addrs(sc, ifp, ifa); 2431 break; 2432 2433 case IFADDR_EVENT_DELETE: 2434 if (sc->sc_carpdev != NULL) { 2435 carp_unlink_addrs(sc, ifp, ifa); 2436 if (sc->sc_carpdev == NULL) 2437 carp_update_addrs(sc); 2438 } else { 2439 /* 2440 * The carp(4) interface didn't have a 2441 * parent iface, so it is not possible 2442 * that it will contain any address to 2443 * be unlinked. 2444 */ 2445 } 2446 break; 2447 2448 case IFADDR_EVENT_CHANGE: 2449 if (sc->sc_carpdev == NULL) { 2450 /* 2451 * The carp(4) interface didn't have a 2452 * parent iface, so it is not possible 2453 * that it will contain any address to 2454 * be updated. 2455 */ 2456 carp_link_addrs(sc, ifp, ifa); 2457 } else { 2458 /* 2459 * First try breaking tie with the old 2460 * address. Then see whether we could 2461 * link certain vhaddr to the new address. 2462 * If that fails, i.e. carpdev is NULL, 2463 * we try a global update. 2464 * 2465 * NOTE: The above order is critical. 2466 */ 2467 carp_unlink_addrs(sc, ifp, ifa); 2468 carp_link_addrs(sc, ifp, ifa); 2469 if (sc->sc_carpdev == NULL) 2470 carp_update_addrs(sc); 2471 } 2472 break; 2473 } 2474 } 2475 crit_exit(); 2476 } 2477 2478 static int 2479 carp_modevent(module_t mod, int type, void *data) 2480 { 2481 switch (type) { 2482 case MOD_LOAD: 2483 LIST_INIT(&carpif_list); 2484 carp_ifdetach_event = 2485 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL, 2486 EVENTHANDLER_PRI_ANY); 2487 carp_ifaddr_event = 2488 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL, 2489 EVENTHANDLER_PRI_ANY); 2490 if_clone_attach(&carp_cloner); 2491 break; 2492 2493 case MOD_UNLOAD: 2494 EVENTHANDLER_DEREGISTER(ifnet_detach_event, 2495 carp_ifdetach_event); 2496 EVENTHANDLER_DEREGISTER(ifaddr_event, 2497 carp_ifaddr_event); 2498 if_clone_detach(&carp_cloner); 2499 break; 2500 2501 default: 2502 return (EINVAL); 2503 } 2504 return (0); 2505 } 2506 2507 static moduledata_t carp_mod = { 2508 "carp", 2509 carp_modevent, 2510 0 2511 }; 2512 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2513