1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 /* 27 * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $ 28 */ 29 30 #include "opt_carp.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/in_cksum.h> 38 #include <sys/limits.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/time.h> 42 #include <sys/proc.h> 43 #include <sys/priv.h> 44 #include <sys/sockio.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/syslog.h> 48 49 #include <machine/stdarg.h> 50 #include <crypto/sha1.h> 51 52 #include <net/bpf.h> 53 #include <net/ethernet.h> 54 #include <net/if.h> 55 #include <net/if_dl.h> 56 #include <net/if_types.h> 57 #include <net/route.h> 58 #include <net/if_clone.h> 59 60 #ifdef INET 61 #include <netinet/in.h> 62 #include <netinet/in_var.h> 63 #include <netinet/in_systm.h> 64 #include <netinet/ip.h> 65 #include <netinet/ip_var.h> 66 #include <netinet/if_ether.h> 67 #endif 68 69 #ifdef INET6 70 #include <netinet/icmp6.h> 71 #include <netinet/ip6.h> 72 #include <netinet6/ip6_var.h> 73 #include <netinet6/scope6_var.h> 74 #include <netinet6/nd6.h> 75 #endif 76 77 #include <netinet/ip_carp.h> 78 79 #define CARP_IFNAME "carp" 80 #define CARP_IS_RUNNING(ifp) \ 81 (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) 82 83 struct carp_vhaddr { 84 uint32_t vha_flags; /* CARP_VHAF_ */ 85 const struct in_ifaddr *vha_ia; /* carp address */ 86 const struct in_ifaddr *vha_iaback; /* backing address */ 87 TAILQ_ENTRY(carp_vhaddr) vha_link; 88 }; 89 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr); 90 91 struct carp_softc { 92 struct ifnet sc_if; 93 struct ifnet *sc_carpdev; /* parent interface */ 94 struct carp_vhaddr_list sc_vha_list; /* virtual addr list */ 95 96 const struct in_ifaddr *sc_ia; /* primary iface address v4 */ 97 struct ip_moptions sc_imo; 98 99 #ifdef INET6 100 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 101 struct ip6_moptions sc_im6o; 102 #endif /* INET6 */ 103 TAILQ_ENTRY(carp_softc) sc_list; 104 105 enum { INIT = 0, BACKUP, MASTER } 106 sc_state; 107 int sc_dead; 108 109 int sc_suppress; 110 111 int sc_sendad_errors; 112 #define CARP_SENDAD_MAX_ERRORS 3 113 int sc_sendad_success; 114 #define CARP_SENDAD_MIN_SUCCESS 3 115 116 int sc_vhid; 117 int sc_advskew; 118 int sc_naddrs; /* actually used IPv4 vha */ 119 int sc_naddrs6; 120 int sc_advbase; /* seconds */ 121 int sc_init_counter; 122 uint64_t sc_counter; 123 124 /* authentication */ 125 #define CARP_HMAC_PAD 64 126 unsigned char sc_key[CARP_KEY_LEN]; 127 unsigned char sc_pad[CARP_HMAC_PAD]; 128 SHA1_CTX sc_sha1; 129 130 struct callout sc_ad_tmo; /* advertisement timeout */ 131 struct callout sc_md_tmo; /* master down timeout */ 132 struct callout sc_md6_tmo; /* master down timeout */ 133 134 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 135 }; 136 137 struct carp_if { 138 TAILQ_HEAD(, carp_softc) vhif_vrs; 139 }; 140 141 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 142 143 SYSCTL_DECL(_net_inet_carp); 144 145 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 146 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 147 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 148 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 149 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 150 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 151 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 152 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 153 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 154 155 static int carp_suppress_preempt = 0; 156 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 157 &carp_suppress_preempt, 0, "Preemption is suppressed"); 158 159 static struct carpstats carpstats; 160 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 161 &carpstats, carpstats, 162 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 163 164 #define CARP_LOG(...) do { \ 165 if (carp_opts[CARPCTL_LOG] > 0) \ 166 log(LOG_INFO, __VA_ARGS__); \ 167 } while (0) 168 169 #define CARP_DEBUG(...) do { \ 170 if (carp_opts[CARPCTL_LOG] > 1) \ 171 log(LOG_DEBUG, __VA_ARGS__); \ 172 } while (0) 173 174 static void carp_hmac_prepare(struct carp_softc *); 175 static void carp_hmac_generate(struct carp_softc *, uint32_t *, 176 unsigned char *); 177 static int carp_hmac_verify(struct carp_softc *, uint32_t *, 178 unsigned char *); 179 static void carp_setroute(struct carp_softc *, int); 180 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 181 static int carp_clone_create(struct if_clone *, int, caddr_t); 182 static int carp_clone_destroy(struct ifnet *); 183 static void carp_detach(struct carp_softc *, int); 184 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 185 struct carp_header *); 186 static void carp_send_ad_all(void); 187 static void carp_send_ad_timeout(void *); 188 static void carp_send_ad(struct carp_softc *); 189 static void carp_send_arp(struct carp_softc *); 190 static void carp_master_down_timeout(void *); 191 static void carp_master_down(struct carp_softc *); 192 static int carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 193 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 194 struct rtentry *); 195 static void carp_start(struct ifnet *); 196 static void carp_setrun(struct carp_softc *, sa_family_t); 197 static void carp_set_state(struct carp_softc *, int); 198 199 static void carp_multicast_cleanup(struct carp_softc *); 200 static void carp_add_addr(struct carp_softc *, struct ifaddr *); 201 static void carp_del_addr(struct carp_softc *, struct ifaddr *); 202 static void carp_config_addr(struct carp_softc *, struct ifaddr *); 203 static void carp_link_addrs(struct carp_softc *, struct ifnet *, 204 struct ifaddr *); 205 static void carp_unlink_addrs(struct carp_softc *, struct ifnet *, 206 struct ifaddr *); 207 208 static int carp_get_vhaddr(struct carp_softc *, struct ifdrv *); 209 static int carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *); 210 static int carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *, 211 struct ifnet *, const struct in_ifaddr *, int); 212 static void carp_deactivate_vhaddr(struct carp_softc *, 213 struct carp_vhaddr *); 214 215 static void carp_sc_state(struct carp_softc *); 216 #ifdef INET6 217 static void carp_send_na(struct carp_softc *); 218 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 219 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 220 static void carp_multicast6_cleanup(struct carp_softc *); 221 #endif 222 static void carp_stop(struct carp_softc *, int); 223 static void carp_reset(struct carp_softc *, int); 224 225 static void carp_ifaddr(void *, struct ifnet *, enum ifaddr_event, 226 struct ifaddr *); 227 static void carp_ifdetach(void *, struct ifnet *); 228 229 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 230 231 static LIST_HEAD(, carp_softc) carpif_list; 232 233 static struct if_clone carp_cloner = 234 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy, 235 0, IF_MAXUNIT); 236 237 static eventhandler_tag carp_ifdetach_event; 238 static eventhandler_tag carp_ifaddr_event; 239 240 static __inline void 241 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new) 242 { 243 struct carp_vhaddr *vha; 244 u_long new_addr, addr; 245 246 KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0); 247 248 /* 249 * Virtual address list is sorted; smaller one first 250 */ 251 new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr); 252 253 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 254 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr); 255 256 if (addr > new_addr) 257 break; 258 } 259 if (vha == NULL) 260 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link); 261 else 262 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link); 263 vha_new->vha_flags |= CARP_VHAF_ONLIST; 264 } 265 266 static __inline void 267 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 268 { 269 KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST); 270 vha->vha_flags &= ~CARP_VHAF_ONLIST; 271 TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link); 272 } 273 274 static void 275 carp_hmac_prepare(struct carp_softc *sc) 276 { 277 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 278 uint8_t vhid = sc->sc_vhid & 0xff; 279 int i; 280 #ifdef INET6 281 struct ifaddr_container *ifac; 282 struct in6_addr in6; 283 #endif 284 #ifdef INET 285 struct carp_vhaddr *vha; 286 #endif 287 288 /* XXX: possible race here */ 289 290 /* compute ipad from key */ 291 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 292 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 293 for (i = 0; i < sizeof(sc->sc_pad); i++) 294 sc->sc_pad[i] ^= 0x36; 295 296 /* precompute first part of inner hash */ 297 SHA1Init(&sc->sc_sha1); 298 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 299 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 300 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 301 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 302 #ifdef INET 303 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 304 SHA1Update(&sc->sc_sha1, 305 (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr, 306 sizeof(struct in_addr)); 307 } 308 #endif /* INET */ 309 #ifdef INET6 310 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 311 struct ifaddr *ifa = ifac->ifa; 312 313 if (ifa->ifa_addr->sa_family == AF_INET6) { 314 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 315 in6_clearscope(&in6); 316 SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); 317 } 318 } 319 #endif /* INET6 */ 320 321 /* convert ipad to opad */ 322 for (i = 0; i < sizeof(sc->sc_pad); i++) 323 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 324 } 325 326 static void 327 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 328 unsigned char md[20]) 329 { 330 SHA1_CTX sha1ctx; 331 332 /* fetch first half of inner hash */ 333 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 334 335 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 336 SHA1Final(md, &sha1ctx); 337 338 /* outer hash */ 339 SHA1Init(&sha1ctx); 340 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 341 SHA1Update(&sha1ctx, md, 20); 342 SHA1Final(md, &sha1ctx); 343 } 344 345 static int 346 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 347 unsigned char md[20]) 348 { 349 unsigned char md2[20]; 350 351 carp_hmac_generate(sc, counter, md2); 352 return (bcmp(md, md2, sizeof(md2))); 353 } 354 355 static void 356 carp_setroute(struct carp_softc *sc, int cmd) 357 { 358 #ifdef INET6 359 struct ifaddr_container *ifac; 360 361 crit_enter(); 362 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 363 struct ifaddr *ifa = ifac->ifa; 364 365 if (ifa->ifa_addr->sa_family == AF_INET6) { 366 if (cmd == RTM_ADD) 367 in6_ifaddloop(ifa); 368 else 369 in6_ifremloop(ifa); 370 } 371 } 372 crit_exit(); 373 #endif /* INET6 */ 374 } 375 376 static int 377 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 378 { 379 struct carp_softc *sc; 380 struct ifnet *ifp; 381 382 sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO); 383 ifp = &sc->sc_if; 384 385 sc->sc_suppress = 0; 386 sc->sc_advbase = CARP_DFLTINTV; 387 sc->sc_vhid = -1; /* required setting */ 388 sc->sc_advskew = 0; 389 sc->sc_init_counter = 1; 390 sc->sc_naddrs = 0; 391 sc->sc_naddrs6 = 0; 392 393 TAILQ_INIT(&sc->sc_vha_list); 394 395 #ifdef INET6 396 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 397 #endif 398 399 callout_init(&sc->sc_ad_tmo); 400 callout_init(&sc->sc_md_tmo); 401 callout_init(&sc->sc_md6_tmo); 402 403 ifp->if_softc = sc; 404 if_initname(ifp, CARP_IFNAME, unit); 405 ifp->if_mtu = ETHERMTU; 406 ifp->if_flags = IFF_LOOPBACK; 407 ifp->if_ioctl = carp_ioctl; 408 ifp->if_output = carp_looutput; 409 ifp->if_start = carp_start; 410 ifp->if_type = IFT_CARP; 411 ifp->if_snd.ifq_maxlen = ifqmaxlen; 412 ifp->if_hdrlen = 0; 413 if_attach(ifp, NULL); 414 bpfattach(ifp, DLT_NULL, sizeof(u_int)); 415 416 crit_enter(); 417 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 418 crit_exit(); 419 420 return (0); 421 } 422 423 static int 424 carp_clone_destroy(struct ifnet *ifp) 425 { 426 struct carp_softc *sc = ifp->if_softc; 427 428 sc->sc_dead = 1; 429 carp_detach(sc, 1); 430 431 crit_enter(); 432 LIST_REMOVE(sc, sc_next); 433 crit_exit(); 434 bpfdetach(ifp); 435 if_detach(ifp); 436 437 KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active\n")); 438 kfree(sc, M_CARP); 439 440 return 0; 441 } 442 443 static void 444 carp_detach(struct carp_softc *sc, int detach) 445 { 446 struct carp_if *cif; 447 448 carp_reset(sc, detach); 449 450 carp_multicast_cleanup(sc); 451 #ifdef INET6 452 carp_multicast6_cleanup(sc); 453 #endif 454 455 if (!sc->sc_dead && detach) { 456 struct carp_vhaddr *vha; 457 458 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 459 carp_deactivate_vhaddr(sc, vha); 460 KKASSERT(sc->sc_naddrs == 0); 461 } 462 463 if (sc->sc_carpdev != NULL) { 464 cif = sc->sc_carpdev->if_carp; 465 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 466 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 467 ifpromisc(sc->sc_carpdev, 0); 468 sc->sc_carpdev->if_carp = NULL; 469 kfree(cif, M_CARP); 470 } 471 sc->sc_carpdev = NULL; 472 sc->sc_ia = NULL; 473 } 474 } 475 476 /* Detach an interface from the carp. */ 477 static void 478 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 479 { 480 struct carp_if *cif = ifp->if_carp; 481 struct carp_softc *sc; 482 483 while (ifp->if_carp && 484 (sc = TAILQ_FIRST(&cif->vhif_vrs)) != NULL) 485 carp_detach(sc, 1); 486 } 487 488 /* 489 * process input packet. 490 * we have rearranged checks order compared to the rfc, 491 * but it seems more efficient this way or not possible otherwise. 492 */ 493 int 494 carp_input(struct mbuf **mp, int *offp, int proto) 495 { 496 struct mbuf *m = *mp; 497 struct ip *ip = mtod(m, struct ip *); 498 struct carp_header *ch; 499 int len, iphlen; 500 501 iphlen = *offp; 502 *mp = NULL; 503 504 carpstats.carps_ipackets++; 505 506 if (!carp_opts[CARPCTL_ALLOW]) { 507 m_freem(m); 508 return(IPPROTO_DONE); 509 } 510 511 /* Check if received on a valid carp interface */ 512 if (m->m_pkthdr.rcvif->if_carp == NULL) { 513 carpstats.carps_badif++; 514 CARP_LOG("carp_input: packet received on non-carp " 515 "interface: %s\n", 516 m->m_pkthdr.rcvif->if_xname); 517 m_freem(m); 518 return(IPPROTO_DONE); 519 } 520 521 /* Verify that the IP TTL is CARP_DFLTTL. */ 522 if (ip->ip_ttl != CARP_DFLTTL) { 523 carpstats.carps_badttl++; 524 CARP_LOG("carp_input: received ttl %d != %d on %s\n", 525 ip->ip_ttl, CARP_DFLTTL, 526 m->m_pkthdr.rcvif->if_xname); 527 m_freem(m); 528 return(IPPROTO_DONE); 529 } 530 531 /* Minimal CARP packet size */ 532 len = iphlen + sizeof(*ch); 533 534 /* 535 * Verify that the received packet length is 536 * not less than the CARP header 537 */ 538 if (m->m_pkthdr.len < len) { 539 carpstats.carps_badlen++; 540 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len, 541 m->m_pkthdr.rcvif->if_xname); 542 m_freem(m); 543 return(IPPROTO_DONE); 544 } 545 546 /* Make sure that CARP header is contiguous */ 547 if (len > m->m_len) { 548 m = m_pullup(m, len); 549 if (m == NULL) { 550 carpstats.carps_hdrops++; 551 CARP_LOG("carp_input: m_pullup failed\n"); 552 return(IPPROTO_DONE); 553 } 554 ip = mtod(m, struct ip *); 555 } 556 ch = (struct carp_header *)((uint8_t *)ip + iphlen); 557 558 /* Verify the CARP checksum */ 559 if (in_cksum_skip(m, len, iphlen)) { 560 carpstats.carps_badsum++; 561 CARP_LOG("carp_input: checksum failed on %s\n", 562 m->m_pkthdr.rcvif->if_xname); 563 m_freem(m); 564 return(IPPROTO_DONE); 565 } 566 carp_input_c(m, ch, AF_INET); 567 return(IPPROTO_DONE); 568 } 569 570 #ifdef INET6 571 int 572 carp6_input(struct mbuf **mp, int *offp, int proto) 573 { 574 struct mbuf *m = *mp; 575 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 576 struct carp_header *ch; 577 u_int len; 578 579 carpstats.carps_ipackets6++; 580 581 if (!carp_opts[CARPCTL_ALLOW]) { 582 m_freem(m); 583 return (IPPROTO_DONE); 584 } 585 586 /* check if received on a valid carp interface */ 587 if (m->m_pkthdr.rcvif->if_carp == NULL) { 588 carpstats.carps_badif++; 589 CARP_LOG("carp6_input: packet received on non-carp " 590 "interface: %s\n", 591 m->m_pkthdr.rcvif->if_xname); 592 m_freem(m); 593 return (IPPROTO_DONE); 594 } 595 596 /* verify that the IP TTL is 255 */ 597 if (ip6->ip6_hlim != CARP_DFLTTL) { 598 carpstats.carps_badttl++; 599 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n", 600 ip6->ip6_hlim, 601 m->m_pkthdr.rcvif->if_xname); 602 m_freem(m); 603 return (IPPROTO_DONE); 604 } 605 606 /* verify that we have a complete carp packet */ 607 len = m->m_len; 608 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 609 if (ch == NULL) { 610 carpstats.carps_badlen++; 611 CARP_LOG("carp6_input: packet size %u too small\n", len); 612 return (IPPROTO_DONE); 613 } 614 615 /* verify the CARP checksum */ 616 if (in_cksum_range(m, 0, *offp, sizeof(*ch))) { 617 carpstats.carps_badsum++; 618 CARP_LOG("carp6_input: checksum failed, on %s\n", 619 m->m_pkthdr.rcvif->if_xname); 620 m_freem(m); 621 return (IPPROTO_DONE); 622 } 623 624 carp_input_c(m, ch, AF_INET6); 625 return (IPPROTO_DONE); 626 } 627 #endif /* INET6 */ 628 629 static void 630 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 631 { 632 struct ifnet *ifp = m->m_pkthdr.rcvif; 633 struct ifnet *cifp; 634 struct carp_softc *sc; 635 uint64_t tmp_counter; 636 struct timeval sc_tv, ch_tv; 637 638 /* verify that the VHID is valid on the receiving interface */ 639 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 640 if (sc->sc_vhid == ch->carp_vhid) 641 break; 642 643 if (!sc || !CARP_IS_RUNNING(&sc->sc_if)) { 644 carpstats.carps_badvhid++; 645 m_freem(m); 646 return; 647 } 648 cifp = &sc->sc_if; 649 650 getmicrotime(&cifp->if_lastchange); 651 cifp->if_ipackets++; 652 cifp->if_ibytes += m->m_pkthdr.len; 653 654 if (cifp->if_bpf) { 655 struct ip *ip = mtod(m, struct ip *); 656 657 /* BPF wants net byte order */ 658 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 659 ip->ip_off = htons(ip->ip_off); 660 bpf_mtap(cifp->if_bpf, m); 661 } 662 663 /* verify the CARP version. */ 664 if (ch->carp_version != CARP_VERSION) { 665 carpstats.carps_badver++; 666 cifp->if_ierrors++; 667 CARP_LOG("%s; invalid version %d\n", cifp->if_xname, 668 ch->carp_version); 669 m_freem(m); 670 return; 671 } 672 673 /* verify the hash */ 674 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 675 carpstats.carps_badauth++; 676 cifp->if_ierrors++; 677 CARP_LOG("%s: incorrect hash\n", cifp->if_xname); 678 m_freem(m); 679 return; 680 } 681 682 tmp_counter = ntohl(ch->carp_counter[0]); 683 tmp_counter = tmp_counter<<32; 684 tmp_counter += ntohl(ch->carp_counter[1]); 685 686 /* XXX Replay protection goes here */ 687 688 sc->sc_init_counter = 0; 689 sc->sc_counter = tmp_counter; 690 691 sc_tv.tv_sec = sc->sc_advbase; 692 if (carp_suppress_preempt && sc->sc_advskew < 240) 693 sc_tv.tv_usec = 240 * 1000000 / 256; 694 else 695 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 696 ch_tv.tv_sec = ch->carp_advbase; 697 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 698 699 switch (sc->sc_state) { 700 case INIT: 701 break; 702 703 case MASTER: 704 /* 705 * If we receive an advertisement from a master who's going to 706 * be more frequent than us, go into BACKUP state. 707 */ 708 if (timevalcmp(&sc_tv, &ch_tv, >) || 709 timevalcmp(&sc_tv, &ch_tv, ==)) { 710 callout_stop(&sc->sc_ad_tmo); 711 CARP_DEBUG("%s: MASTER -> BACKUP " 712 "(more frequent advertisement received)\n", 713 cifp->if_xname); 714 carp_set_state(sc, BACKUP); 715 carp_setrun(sc, 0); 716 carp_setroute(sc, RTM_DELETE); 717 } 718 break; 719 720 case BACKUP: 721 /* 722 * If we're pre-empting masters who advertise slower than us, 723 * and this one claims to be slower, treat him as down. 724 */ 725 if (carp_opts[CARPCTL_PREEMPT] && 726 timevalcmp(&sc_tv, &ch_tv, <)) { 727 CARP_DEBUG("%s: BACKUP -> MASTER " 728 "(preempting a slower master)\n", cifp->if_xname); 729 carp_master_down(sc); 730 break; 731 } 732 733 /* 734 * If the master is going to advertise at such a low frequency 735 * that he's guaranteed to time out, we'd might as well just 736 * treat him as timed out now. 737 */ 738 sc_tv.tv_sec = sc->sc_advbase * 3; 739 if (timevalcmp(&sc_tv, &ch_tv, <)) { 740 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 741 cifp->if_xname); 742 carp_master_down(sc); 743 break; 744 } 745 746 /* 747 * Otherwise, we reset the counter and wait for the next 748 * advertisement. 749 */ 750 carp_setrun(sc, af); 751 break; 752 } 753 m_freem(m); 754 } 755 756 static int 757 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 758 { 759 struct ifnet *cifp = &sc->sc_if; 760 struct m_tag *mtag; 761 762 if (sc->sc_init_counter) { 763 /* this could also be seconds since unix epoch */ 764 sc->sc_counter = karc4random(); 765 sc->sc_counter = sc->sc_counter << 32; 766 sc->sc_counter += karc4random(); 767 } else { 768 sc->sc_counter++; 769 } 770 771 ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff); 772 ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff); 773 774 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 775 776 /* Tag packet for carp_output */ 777 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), MB_DONTWAIT); 778 if (mtag == NULL) { 779 m_freem(m); 780 cifp->if_oerrors++; 781 return ENOMEM; 782 } 783 bcopy(&cifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 784 m_tag_prepend(m, mtag); 785 786 return 0; 787 } 788 789 static void 790 carp_send_ad_all(void) 791 { 792 struct carp_softc *sc; 793 794 LIST_FOREACH(sc, &carpif_list, sc_next) { 795 if (sc->sc_carpdev == NULL) 796 continue; 797 798 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER) 799 carp_send_ad(sc); 800 } 801 } 802 803 static void 804 carp_send_ad_timeout(void *xsc) 805 { 806 carp_send_ad(xsc); 807 } 808 809 static void 810 carp_send_ad(struct carp_softc *sc) 811 { 812 struct ifnet *cifp = &sc->sc_if; 813 struct carp_header ch; 814 struct timeval tv; 815 struct carp_header *ch_ptr; 816 struct mbuf *m; 817 int len, advbase, advskew; 818 819 if (!CARP_IS_RUNNING(cifp)) { 820 /* Bow out */ 821 advbase = 255; 822 advskew = 255; 823 } else { 824 advbase = sc->sc_advbase; 825 if (!carp_suppress_preempt || sc->sc_advskew > 240) 826 advskew = sc->sc_advskew; 827 else 828 advskew = 240; 829 tv.tv_sec = advbase; 830 tv.tv_usec = advskew * 1000000 / 256; 831 } 832 833 ch.carp_version = CARP_VERSION; 834 ch.carp_type = CARP_ADVERTISEMENT; 835 ch.carp_vhid = sc->sc_vhid; 836 ch.carp_advbase = advbase; 837 ch.carp_advskew = advskew; 838 ch.carp_authlen = 7; /* XXX DEFINE */ 839 ch.carp_pad1 = 0; /* must be zero */ 840 ch.carp_cksum = 0; 841 842 #ifdef INET 843 if (sc->sc_ia != NULL) { 844 struct ip *ip; 845 846 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 847 if (m == NULL) { 848 cifp->if_oerrors++; 849 carpstats.carps_onomem++; 850 /* XXX maybe less ? */ 851 if (advbase != 255 || advskew != 255) 852 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 853 carp_send_ad_timeout, sc); 854 return; 855 } 856 len = sizeof(*ip) + sizeof(ch); 857 m->m_pkthdr.len = len; 858 m->m_pkthdr.rcvif = NULL; 859 m->m_len = len; 860 MH_ALIGN(m, m->m_len); 861 m->m_flags |= M_MCAST; 862 ip = mtod(m, struct ip *); 863 ip->ip_v = IPVERSION; 864 ip->ip_hl = sizeof(*ip) >> 2; 865 ip->ip_tos = IPTOS_LOWDELAY; 866 ip->ip_len = len; 867 ip->ip_id = ip_newid(); 868 ip->ip_off = IP_DF; 869 ip->ip_ttl = CARP_DFLTTL; 870 ip->ip_p = IPPROTO_CARP; 871 ip->ip_sum = 0; 872 ip->ip_src = sc->sc_ia->ia_addr.sin_addr; 873 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 874 875 ch_ptr = (struct carp_header *)(&ip[1]); 876 bcopy(&ch, ch_ptr, sizeof(ch)); 877 if (carp_prepare_ad(m, sc, ch_ptr)) 878 return; 879 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip)); 880 881 getmicrotime(&cifp->if_lastchange); 882 cifp->if_opackets++; 883 cifp->if_obytes += len; 884 carpstats.carps_opackets++; 885 886 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 887 cifp->if_oerrors++; 888 if (sc->sc_sendad_errors < INT_MAX) 889 sc->sc_sendad_errors++; 890 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 891 carp_suppress_preempt++; 892 if (carp_suppress_preempt == 1) { 893 carp_send_ad_all(); 894 } 895 } 896 sc->sc_sendad_success = 0; 897 } else { 898 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 899 if (++sc->sc_sendad_success >= 900 CARP_SENDAD_MIN_SUCCESS) { 901 carp_suppress_preempt--; 902 sc->sc_sendad_errors = 0; 903 } 904 } else { 905 sc->sc_sendad_errors = 0; 906 } 907 } 908 } 909 #endif /* INET */ 910 #ifdef INET6 911 if (sc->sc_ia6) { 912 struct ip6_hdr *ip6; 913 914 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 915 if (m == NULL) { 916 cifp->if_oerrors++; 917 carpstats.carps_onomem++; 918 /* XXX maybe less ? */ 919 if (advbase != 255 || advskew != 255) 920 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 921 carp_send_ad_timeout, sc); 922 return; 923 } 924 len = sizeof(*ip6) + sizeof(ch); 925 m->m_pkthdr.len = len; 926 m->m_pkthdr.rcvif = NULL; 927 m->m_len = len; 928 MH_ALIGN(m, m->m_len); 929 m->m_flags |= M_MCAST; 930 ip6 = mtod(m, struct ip6_hdr *); 931 bzero(ip6, sizeof(*ip6)); 932 ip6->ip6_vfc |= IPV6_VERSION; 933 ip6->ip6_hlim = CARP_DFLTTL; 934 ip6->ip6_nxt = IPPROTO_CARP; 935 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 936 sizeof(struct in6_addr)); 937 /* set the multicast destination */ 938 939 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 940 ip6->ip6_dst.s6_addr8[15] = 0x12; 941 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 942 cifp->if_oerrors++; 943 m_freem(m); 944 CARP_LOG("%s: in6_setscope failed\n", __func__); 945 return; 946 } 947 948 ch_ptr = (struct carp_header *)(&ip6[1]); 949 bcopy(&ch, ch_ptr, sizeof(ch)); 950 if (carp_prepare_ad(m, sc, ch_ptr)) 951 return; 952 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6)); 953 954 getmicrotime(&cifp->if_lastchange); 955 cifp->if_opackets++; 956 cifp->if_obytes += len; 957 carpstats.carps_opackets6++; 958 959 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 960 cifp->if_oerrors++; 961 if (sc->sc_sendad_errors < INT_MAX) 962 sc->sc_sendad_errors++; 963 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 964 carp_suppress_preempt++; 965 if (carp_suppress_preempt == 1) { 966 carp_send_ad_all(); 967 } 968 } 969 sc->sc_sendad_success = 0; 970 } else { 971 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 972 if (++sc->sc_sendad_success >= 973 CARP_SENDAD_MIN_SUCCESS) { 974 carp_suppress_preempt--; 975 sc->sc_sendad_errors = 0; 976 } 977 } else { 978 sc->sc_sendad_errors = 0; 979 } 980 } 981 } 982 #endif /* INET6 */ 983 984 if (advbase != 255 || advskew != 255) 985 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 986 carp_send_ad_timeout, sc); 987 } 988 989 /* 990 * Broadcast a gratuitous ARP request containing 991 * the virtual router MAC address for each IP address 992 * associated with the virtual router. 993 */ 994 static void 995 carp_send_arp(struct carp_softc *sc) 996 { 997 const struct carp_vhaddr *vha; 998 999 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1000 if (vha->vha_iaback == NULL) 1001 continue; 1002 1003 arp_iainit(sc->sc_carpdev, &vha->vha_ia->ia_addr.sin_addr, 1004 IF_LLADDR(&sc->sc_if)); 1005 } 1006 } 1007 1008 #ifdef INET6 1009 static void 1010 carp_send_na(struct carp_softc *sc) 1011 { 1012 struct ifaddr_container *ifac; 1013 struct in6_addr *in6; 1014 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1015 1016 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 1017 struct ifaddr *ifa = ifac->ifa; 1018 1019 if (ifa->ifa_addr->sa_family != AF_INET6) 1020 continue; 1021 1022 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1023 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1024 ND_NA_FLAG_OVERRIDE, 1, NULL); 1025 DELAY(1000); /* XXX */ 1026 } 1027 } 1028 #endif /* INET6 */ 1029 1030 static __inline const struct carp_vhaddr * 1031 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr) 1032 { 1033 struct carp_vhaddr *vha; 1034 1035 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1036 if (vha->vha_iaback == NULL) 1037 continue; 1038 1039 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr) 1040 return vha; 1041 } 1042 return NULL; 1043 } 1044 1045 static int 1046 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr, 1047 const struct in_addr *isaddr, uint8_t **enaddr) 1048 { 1049 const struct carp_softc *vh; 1050 int index, count = 0; 1051 1052 /* 1053 * XXX proof of concept implementation. 1054 * We use the source ip to decide which virtual host should 1055 * handle the request. If we're master of that virtual host, 1056 * then we respond, otherwise, just drop the arp packet on 1057 * the floor. 1058 */ 1059 1060 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1061 if (!CARP_IS_RUNNING(&vh->sc_if)) 1062 continue; 1063 1064 if (carp_find_addr(vh, itaddr) != NULL) 1065 count++; 1066 } 1067 if (count == 0) 1068 return 0; 1069 1070 /* this should be a hash, like pf_hash() */ 1071 index = ntohl(isaddr->s_addr) % count; 1072 count = 0; 1073 1074 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1075 if (!CARP_IS_RUNNING(&vh->sc_if)) 1076 continue; 1077 1078 if (carp_find_addr(vh, itaddr) == NULL) 1079 continue; 1080 1081 if (count == index) { 1082 if (vh->sc_state == MASTER) { 1083 *enaddr = IF_LLADDR(&vh->sc_if); 1084 return 1; 1085 } else { 1086 return 0; 1087 } 1088 } 1089 count++; 1090 } 1091 return 0; 1092 } 1093 1094 int 1095 carp_iamatch(const void *v, const struct in_addr *itaddr, 1096 const struct in_addr *isaddr, uint8_t **enaddr) 1097 { 1098 const struct carp_if *cif = v; 1099 const struct carp_softc *vh; 1100 1101 if (carp_opts[CARPCTL_ARPBALANCE]) 1102 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr); 1103 1104 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1105 if (!CARP_IS_RUNNING(&vh->sc_if) || vh->sc_state != MASTER) 1106 continue; 1107 1108 if (carp_find_addr(vh, itaddr) != NULL) { 1109 *enaddr = IF_LLADDR(&vh->sc_if); 1110 return 1; 1111 } 1112 } 1113 return 0; 1114 } 1115 1116 #ifdef INET6 1117 struct ifaddr * 1118 carp_iamatch6(void *v, struct in6_addr *taddr) 1119 { 1120 struct carp_if *cif = v; 1121 struct carp_softc *vh; 1122 1123 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1124 struct ifaddr_container *ifac; 1125 1126 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid], 1127 ifa_link) { 1128 struct ifaddr *ifa = ifac->ifa; 1129 1130 if (IN6_ARE_ADDR_EQUAL(taddr, 1131 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1132 CARP_IS_RUNNING(&vh->sc_if) && 1133 vh->sc_state == MASTER) { 1134 return (ifa); 1135 } 1136 } 1137 } 1138 return (NULL); 1139 } 1140 1141 void * 1142 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1143 { 1144 struct m_tag *mtag; 1145 struct carp_if *cif = v; 1146 struct carp_softc *sc; 1147 1148 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1149 struct ifaddr_container *ifac; 1150 1151 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], 1152 ifa_link) { 1153 struct ifaddr *ifa = ifac->ifa; 1154 1155 if (IN6_ARE_ADDR_EQUAL(taddr, 1156 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1157 CARP_IS_RUNNING(&sc->sc_if)) { 1158 struct ifnet *ifp = &sc->sc_if; 1159 1160 mtag = m_tag_get(PACKET_TAG_CARP, 1161 sizeof(struct ifnet *), MB_DONTWAIT); 1162 if (mtag == NULL) { 1163 /* better a bit than nothing */ 1164 return (IF_LLADDR(ifp)); 1165 } 1166 bcopy(&ifp, (caddr_t)(mtag + 1), 1167 sizeof(struct ifnet *)); 1168 m_tag_prepend(m, mtag); 1169 1170 return (IF_LLADDR(ifp)); 1171 } 1172 } 1173 } 1174 return (NULL); 1175 } 1176 #endif 1177 1178 int 1179 carp_forus(const void *v, const void *dhost) 1180 { 1181 const struct carp_if *cif = v; 1182 const struct carp_softc *vh; 1183 const uint8_t *ena = dhost; 1184 1185 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1186 return 0; 1187 1188 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1189 const struct ifnet *cifp = &vh->sc_if; 1190 1191 if (CARP_IS_RUNNING(cifp) && vh->sc_state == MASTER && 1192 !bcmp(dhost, IF_LLADDR(cifp), ETHER_ADDR_LEN)) 1193 return 1; 1194 } 1195 return 0; 1196 } 1197 1198 static void 1199 carp_master_down_timeout(void *xsc) 1200 { 1201 struct carp_softc *sc = xsc; 1202 1203 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 1204 sc->sc_if.if_xname); 1205 carp_master_down(sc); 1206 } 1207 1208 static void 1209 carp_master_down(struct carp_softc *sc) 1210 { 1211 switch (sc->sc_state) { 1212 case INIT: 1213 kprintf("%s: master_down event in INIT state\n", 1214 sc->sc_if.if_xname); 1215 break; 1216 1217 case MASTER: 1218 break; 1219 1220 case BACKUP: 1221 carp_set_state(sc, MASTER); 1222 carp_send_ad(sc); 1223 carp_send_arp(sc); 1224 #ifdef INET6 1225 carp_send_na(sc); 1226 #endif /* INET6 */ 1227 carp_setrun(sc, 0); 1228 carp_setroute(sc, RTM_ADD); 1229 break; 1230 } 1231 } 1232 1233 /* 1234 * When in backup state, af indicates whether to reset the master down timer 1235 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1236 */ 1237 static void 1238 carp_setrun(struct carp_softc *sc, sa_family_t af) 1239 { 1240 struct ifnet *cifp = &sc->sc_if; 1241 struct timeval tv; 1242 1243 if (sc->sc_carpdev == NULL) { 1244 carp_set_state(sc, INIT); 1245 return; 1246 } 1247 1248 if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 && 1249 (sc->sc_naddrs || sc->sc_naddrs6)) { 1250 /* Nothing */ 1251 } else { 1252 carp_setroute(sc, RTM_DELETE); 1253 return; 1254 } 1255 1256 switch (sc->sc_state) { 1257 case INIT: 1258 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1259 carp_send_ad(sc); 1260 carp_send_arp(sc); 1261 #ifdef INET6 1262 carp_send_na(sc); 1263 #endif /* INET6 */ 1264 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1265 cifp->if_xname); 1266 carp_set_state(sc, MASTER); 1267 carp_setroute(sc, RTM_ADD); 1268 } else { 1269 CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname); 1270 carp_set_state(sc, BACKUP); 1271 carp_setroute(sc, RTM_DELETE); 1272 carp_setrun(sc, 0); 1273 } 1274 break; 1275 1276 case BACKUP: 1277 callout_stop(&sc->sc_ad_tmo); 1278 tv.tv_sec = 3 * sc->sc_advbase; 1279 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1280 switch (af) { 1281 #ifdef INET 1282 case AF_INET: 1283 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1284 carp_master_down_timeout, sc); 1285 break; 1286 #endif /* INET */ 1287 #ifdef INET6 1288 case AF_INET6: 1289 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1290 carp_master_down_timeout, sc); 1291 break; 1292 #endif /* INET6 */ 1293 default: 1294 if (sc->sc_naddrs) 1295 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1296 carp_master_down_timeout, sc); 1297 if (sc->sc_naddrs6) 1298 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1299 carp_master_down_timeout, sc); 1300 break; 1301 } 1302 break; 1303 1304 case MASTER: 1305 tv.tv_sec = sc->sc_advbase; 1306 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1307 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1308 carp_send_ad_timeout, sc); 1309 break; 1310 } 1311 } 1312 1313 static void 1314 carp_multicast_cleanup(struct carp_softc *sc) 1315 { 1316 struct ip_moptions *imo = &sc->sc_imo; 1317 1318 if (imo->imo_num_memberships == 0) 1319 return; 1320 KKASSERT(imo->imo_num_memberships == 1); 1321 1322 in_delmulti(imo->imo_membership[0]); 1323 imo->imo_membership[0] = NULL; 1324 imo->imo_num_memberships = 0; 1325 imo->imo_multicast_ifp = NULL; 1326 } 1327 1328 #ifdef INET6 1329 static void 1330 carp_multicast6_cleanup(struct carp_softc *sc) 1331 { 1332 struct ip6_moptions *im6o = &sc->sc_im6o; 1333 1334 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1335 struct in6_multi_mship *imm = 1336 LIST_FIRST(&im6o->im6o_memberships); 1337 1338 LIST_REMOVE(imm, i6mm_chain); 1339 in6_leavegroup(imm); 1340 } 1341 im6o->im6o_multicast_ifp = NULL; 1342 } 1343 #endif 1344 1345 static int 1346 carp_get_vhaddr(struct carp_softc *sc, struct ifdrv *ifd) 1347 { 1348 const struct carp_vhaddr *vha; 1349 struct ifcarpvhaddr *carpa, *carpa0; 1350 int count, len, error; 1351 1352 count = 0; 1353 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1354 ++count; 1355 1356 if (ifd->ifd_len == 0) { 1357 ifd->ifd_len = count * sizeof(*carpa); 1358 return 0; 1359 } else if (count == 0 || ifd->ifd_len < sizeof(*carpa)) { 1360 ifd->ifd_len = 0; 1361 return 0; 1362 } 1363 len = min(ifd->ifd_len, sizeof(*carpa) * count); 1364 KKASSERT(len >= sizeof(*carpa)); 1365 1366 carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO); 1367 if (carpa == NULL) 1368 return ENOMEM; 1369 1370 count = 0; 1371 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1372 if (len < sizeof(*carpa)) 1373 break; 1374 1375 carpa->carpa_flags = vha->vha_flags; 1376 carpa->carpa_addr.sin_family = AF_INET; 1377 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr; 1378 1379 carpa->carpa_baddr.sin_family = AF_INET; 1380 if (vha->vha_iaback == NULL) { 1381 carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY; 1382 } else { 1383 carpa->carpa_baddr.sin_addr = 1384 vha->vha_iaback->ia_addr.sin_addr; 1385 } 1386 1387 ++carpa; 1388 ++count; 1389 len -= sizeof(*carpa); 1390 } 1391 ifd->ifd_len = sizeof(*carpa) * count; 1392 KKASSERT(ifd->ifd_len > 0); 1393 1394 error = copyout(carpa0, ifd->ifd_data, ifd->ifd_len); 1395 kfree(carpa0, M_TEMP); 1396 return error; 1397 } 1398 1399 static int 1400 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 1401 { 1402 struct ifnet *ifp; 1403 struct in_ifaddr *ia_if; 1404 struct in_ifaddr_container *iac; 1405 const struct sockaddr_in *sin; 1406 u_long iaddr; 1407 int own; 1408 1409 KKASSERT(vha->vha_ia != NULL); 1410 1411 sin = &vha->vha_ia->ia_addr; 1412 iaddr = ntohl(sin->sin_addr.s_addr); 1413 1414 ia_if = NULL; 1415 own = 0; 1416 TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) { 1417 struct in_ifaddr *ia = iac->ia; 1418 1419 if ((ia->ia_flags & IFA_ROUTE) == 0) 1420 continue; 1421 1422 if (ia->ia_ifp->if_type == IFT_CARP) 1423 continue; 1424 1425 /* and, yeah, we need a multicast-capable iface too */ 1426 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) 1427 continue; 1428 1429 if ((iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1430 if (sin->sin_addr.s_addr == 1431 ia->ia_addr.sin_addr.s_addr) 1432 own = 1; 1433 if (ia_if == NULL) 1434 ia_if = ia; 1435 else if (sc->sc_carpdev != NULL && 1436 sc->sc_carpdev == ia->ia_ifp) 1437 ia_if = ia; 1438 } 1439 } 1440 1441 carp_deactivate_vhaddr(sc, vha); 1442 if (!ia_if) 1443 return ENOENT; 1444 1445 ifp = ia_if->ia_ifp; 1446 1447 /* XXX Don't allow parent iface to be changed */ 1448 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) 1449 return EEXIST; 1450 1451 return carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 1452 } 1453 1454 static void 1455 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1456 { 1457 struct carp_vhaddr *vha_new; 1458 struct in_ifaddr *carp_ia; 1459 #ifdef INVARIANTS 1460 struct carp_vhaddr *vha; 1461 #endif 1462 1463 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1464 carp_ia = ifatoia(carp_ifa); 1465 1466 #ifdef INVARIANTS 1467 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1468 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia); 1469 #endif 1470 1471 vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO); 1472 vha_new->vha_ia = carp_ia; 1473 carp_insert_vhaddr(sc, vha_new); 1474 1475 if (carp_config_vhaddr(sc, vha_new) != 0) { 1476 /* 1477 * If the above configuration fails, it may only mean 1478 * that the new address is problematic. However, the 1479 * carp(4) interface may already have several working 1480 * addresses. Since the expected behaviour of 1481 * SIOC[AS]IFADDR is to put the NIC into working state, 1482 * we try starting the state machine manually here with 1483 * the hope that the carp(4)'s previously working 1484 * addresses still could be brought up. 1485 */ 1486 carp_hmac_prepare(sc); 1487 carp_set_state(sc, INIT); 1488 carp_setrun(sc, 0); 1489 } 1490 } 1491 1492 static void 1493 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1494 { 1495 struct carp_vhaddr *vha; 1496 struct in_ifaddr *carp_ia; 1497 1498 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1499 carp_ia = ifatoia(carp_ifa); 1500 1501 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1502 KKASSERT(vha->vha_ia != NULL); 1503 if (vha->vha_ia == carp_ia) 1504 break; 1505 } 1506 KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); 1507 1508 /* 1509 * Remove the vhaddr from the list before deactivating 1510 * the vhaddr, so that the HMAC could be correctly 1511 * updated in carp_deactivate_vhaddr() 1512 */ 1513 carp_remove_vhaddr(sc, vha); 1514 1515 carp_deactivate_vhaddr(sc, vha); 1516 kfree(vha, M_CARP); 1517 } 1518 1519 static void 1520 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1521 { 1522 struct carp_vhaddr *vha; 1523 struct in_ifaddr *carp_ia; 1524 1525 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1526 carp_ia = ifatoia(carp_ifa); 1527 1528 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1529 KKASSERT(vha->vha_ia != NULL); 1530 if (vha->vha_ia == carp_ia) 1531 break; 1532 } 1533 KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); 1534 1535 /* Remove then reinsert, to keep the vhaddr list sorted */ 1536 carp_remove_vhaddr(sc, vha); 1537 carp_insert_vhaddr(sc, vha); 1538 1539 if (carp_config_vhaddr(sc, vha) != 0) { 1540 /* See the comment in carp_add_addr() */ 1541 carp_hmac_prepare(sc); 1542 carp_set_state(sc, INIT); 1543 carp_setrun(sc, 0); 1544 } 1545 } 1546 1547 #ifdef INET6 1548 static int 1549 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1550 { 1551 struct ifnet *ifp; 1552 struct carp_if *cif; 1553 struct in6_ifaddr *ia, *ia_if; 1554 struct ip6_moptions *im6o = &sc->sc_im6o; 1555 struct in6_multi_mship *imm; 1556 struct in6_addr in6; 1557 int own, error; 1558 1559 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1560 carp_setrun(sc, 0); 1561 return (0); 1562 } 1563 1564 /* we have to do it by hands to check we won't match on us */ 1565 ia_if = NULL; own = 0; 1566 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 1567 int i; 1568 1569 for (i = 0; i < 4; i++) { 1570 if ((sin6->sin6_addr.s6_addr32[i] & 1571 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1572 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1573 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1574 break; 1575 } 1576 /* and, yeah, we need a multicast-capable iface too */ 1577 if (ia->ia_ifp != &sc->sc_if && 1578 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1579 (i == 4)) { 1580 if (!ia_if) 1581 ia_if = ia; 1582 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1583 &ia->ia_addr.sin6_addr)) 1584 own++; 1585 } 1586 } 1587 1588 if (!ia_if) 1589 return (EADDRNOTAVAIL); 1590 ia = ia_if; 1591 ifp = ia->ia_ifp; 1592 1593 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1594 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 1595 return (EADDRNOTAVAIL); 1596 1597 if (!sc->sc_naddrs6) { 1598 im6o->im6o_multicast_ifp = ifp; 1599 1600 /* join CARP multicast address */ 1601 bzero(&in6, sizeof(in6)); 1602 in6.s6_addr16[0] = htons(0xff02); 1603 in6.s6_addr8[15] = 0x12; 1604 if (in6_setscope(&in6, ifp, NULL) != 0) 1605 goto cleanup; 1606 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 1607 goto cleanup; 1608 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1609 1610 /* join solicited multicast address */ 1611 bzero(&in6, sizeof(in6)); 1612 in6.s6_addr16[0] = htons(0xff02); 1613 in6.s6_addr32[1] = 0; 1614 in6.s6_addr32[2] = htonl(1); 1615 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1616 in6.s6_addr8[12] = 0xff; 1617 if (in6_setscope(&in6, ifp, NULL) != 0) 1618 goto cleanup; 1619 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 1620 goto cleanup; 1621 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1622 } 1623 1624 if (!ifp->if_carp) { 1625 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 1626 1627 if ((error = ifpromisc(ifp, 1))) { 1628 kfree(cif, M_CARP); 1629 goto cleanup; 1630 } 1631 1632 TAILQ_INIT(&cif->vhif_vrs); 1633 ifp->if_carp = cif; 1634 } else { 1635 struct carp_softc *vr; 1636 1637 cif = ifp->if_carp; 1638 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1639 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1640 error = EINVAL; 1641 goto cleanup; 1642 } 1643 } 1644 } 1645 sc->sc_ia6 = ia; 1646 sc->sc_carpdev = ifp; 1647 1648 { /* XXX prevent endless loop if already in queue */ 1649 struct carp_softc *vr, *after = NULL; 1650 int myself = 0; 1651 cif = ifp->if_carp; 1652 1653 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1654 if (vr == sc) 1655 myself = 1; 1656 if (vr->sc_vhid < sc->sc_vhid) 1657 after = vr; 1658 } 1659 1660 if (!myself) { 1661 /* We're trying to keep things in order */ 1662 if (after == NULL) 1663 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1664 else 1665 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1666 } 1667 } 1668 1669 sc->sc_naddrs6++; 1670 if (own) 1671 sc->sc_advskew = 0; 1672 carp_sc_state(sc); 1673 carp_setrun(sc, 0); 1674 1675 return (0); 1676 1677 cleanup: 1678 /* clean up multicast memberships */ 1679 if (!sc->sc_naddrs6) { 1680 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1681 imm = LIST_FIRST(&im6o->im6o_memberships); 1682 LIST_REMOVE(imm, i6mm_chain); 1683 in6_leavegroup(imm); 1684 } 1685 } 1686 return (error); 1687 } 1688 1689 static int 1690 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1691 { 1692 int error = 0; 1693 1694 if (!--sc->sc_naddrs6) { 1695 struct carp_if *cif = sc->sc_carpdev->if_carp; 1696 struct ip6_moptions *im6o = &sc->sc_im6o; 1697 1698 callout_stop(&sc->sc_ad_tmo); 1699 sc->sc_vhid = -1; 1700 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1701 struct in6_multi_mship *imm = 1702 LIST_FIRST(&im6o->im6o_memberships); 1703 1704 LIST_REMOVE(imm, i6mm_chain); 1705 in6_leavegroup(imm); 1706 } 1707 im6o->im6o_multicast_ifp = NULL; 1708 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1709 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 1710 sc->sc_carpdev->if_carp = NULL; 1711 kfree(cif, M_IFADDR); 1712 } 1713 } 1714 return (error); 1715 } 1716 #endif /* INET6 */ 1717 1718 static int 1719 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) 1720 { 1721 struct carp_softc *sc = ifp->if_softc, *vr; 1722 struct carpreq carpr; 1723 struct ifaddr *ifa; 1724 struct ifreq *ifr; 1725 struct ifaliasreq *ifra; 1726 struct ifdrv *ifd; 1727 char devname[IFNAMSIZ]; 1728 int error = 0; 1729 1730 ifa = (struct ifaddr *)addr; 1731 ifra = (struct ifaliasreq *)addr; 1732 ifr = (struct ifreq *)addr; 1733 ifd = (struct ifdrv *)addr; 1734 1735 switch (cmd) { 1736 case SIOCSIFADDR: 1737 switch (ifa->ifa_addr->sa_family) { 1738 #ifdef INET 1739 case AF_INET: 1740 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1741 break; 1742 #endif /* INET */ 1743 #ifdef INET6 1744 case AF_INET6: 1745 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1746 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1747 break; 1748 #endif /* INET6 */ 1749 default: 1750 error = EAFNOSUPPORT; 1751 break; 1752 } 1753 break; 1754 1755 case SIOCAIFADDR: 1756 switch (ifa->ifa_addr->sa_family) { 1757 #ifdef INET 1758 case AF_INET: 1759 panic("SIOCAIFADDR should never be seen\n"); 1760 #endif /* INET */ 1761 #ifdef INET6 1762 case AF_INET6: 1763 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1764 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1765 break; 1766 #endif /* INET6 */ 1767 default: 1768 error = EAFNOSUPPORT; 1769 break; 1770 } 1771 break; 1772 1773 case SIOCDIFADDR: 1774 switch (ifa->ifa_addr->sa_family) { 1775 #ifdef INET 1776 case AF_INET: 1777 panic("SIOCDIFADDR should never be seen\n"); 1778 #endif /* INET */ 1779 #ifdef INET6 1780 case AF_INET6: 1781 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1782 break; 1783 #endif /* INET6 */ 1784 default: 1785 error = EAFNOSUPPORT; 1786 break; 1787 } 1788 break; 1789 1790 case SIOCSIFFLAGS: 1791 if (ifp->if_flags & IFF_UP) { 1792 if ((ifp->if_flags & IFF_RUNNING) == 0) { 1793 ifp->if_flags |= IFF_RUNNING; 1794 carp_set_state(sc, INIT); 1795 carp_setrun(sc, 0); 1796 } 1797 } else if (ifp->if_flags & IFF_RUNNING) { 1798 carp_stop(sc, 0); 1799 } 1800 break; 1801 1802 case SIOCSVH: 1803 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 1804 if (error) 1805 break; 1806 error = copyin(ifr->ifr_data, &carpr, sizeof(carpr)); 1807 if (error) 1808 break; 1809 1810 error = 1; 1811 if ((ifp->if_flags & IFF_RUNNING) && 1812 sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1813 switch (carpr.carpr_state) { 1814 case BACKUP: 1815 callout_stop(&sc->sc_ad_tmo); 1816 carp_set_state(sc, BACKUP); 1817 carp_setrun(sc, 0); 1818 carp_setroute(sc, RTM_DELETE); 1819 break; 1820 1821 case MASTER: 1822 carp_master_down(sc); 1823 break; 1824 1825 default: 1826 break; 1827 } 1828 } 1829 if (carpr.carpr_vhid > 0) { 1830 if (carpr.carpr_vhid > 255) { 1831 error = EINVAL; 1832 break; 1833 } 1834 if (sc->sc_carpdev) { 1835 struct carp_if *cif = sc->sc_carpdev->if_carp; 1836 1837 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1838 if (vr != sc && 1839 vr->sc_vhid == carpr.carpr_vhid) 1840 return EEXIST; 1841 } 1842 } 1843 sc->sc_vhid = carpr.carpr_vhid; 1844 IF_LLADDR(ifp)[0] = 0; 1845 IF_LLADDR(ifp)[1] = 0; 1846 IF_LLADDR(ifp)[2] = 0x5e; 1847 IF_LLADDR(ifp)[3] = 0; 1848 IF_LLADDR(ifp)[4] = 1; 1849 IF_LLADDR(ifp)[5] = sc->sc_vhid; 1850 error--; 1851 } 1852 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 1853 if (carpr.carpr_advskew >= 255) { 1854 error = EINVAL; 1855 break; 1856 } 1857 if (carpr.carpr_advbase > 255) { 1858 error = EINVAL; 1859 break; 1860 } 1861 sc->sc_advbase = carpr.carpr_advbase; 1862 sc->sc_advskew = carpr.carpr_advskew; 1863 error--; 1864 } 1865 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1866 if (error > 0) { 1867 error = EINVAL; 1868 } else { 1869 error = 0; 1870 carp_setrun(sc, 0); 1871 } 1872 break; 1873 1874 case SIOCGVH: 1875 bzero(&carpr, sizeof(carpr)); 1876 carpr.carpr_state = sc->sc_state; 1877 carpr.carpr_vhid = sc->sc_vhid; 1878 carpr.carpr_advbase = sc->sc_advbase; 1879 carpr.carpr_advskew = sc->sc_advskew; 1880 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 1881 if (error == 0) { 1882 bcopy(sc->sc_key, carpr.carpr_key, 1883 sizeof(carpr.carpr_key)); 1884 } 1885 1886 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1887 break; 1888 1889 case SIOCGDRVSPEC: 1890 switch (ifd->ifd_cmd) { 1891 case CARPGDEVNAME: 1892 if (ifd->ifd_len != sizeof(devname)) 1893 error = EINVAL; 1894 break; 1895 1896 case CARPGVHADDR: 1897 break; 1898 1899 default: 1900 error = EINVAL; 1901 break; 1902 } 1903 if (error) 1904 break; 1905 1906 switch (ifd->ifd_cmd) { 1907 case CARPGVHADDR: 1908 error = carp_get_vhaddr(sc, ifd); 1909 break; 1910 1911 case CARPGDEVNAME: 1912 bzero(devname, sizeof(devname)); 1913 if (sc->sc_carpdev != NULL) { 1914 strlcpy(devname, sc->sc_carpdev->if_xname, 1915 sizeof(devname)); 1916 } 1917 error = copyout(devname, ifd->ifd_data, 1918 sizeof(devname)); 1919 break; 1920 } 1921 break; 1922 1923 default: 1924 error = EINVAL; 1925 break; 1926 } 1927 carp_hmac_prepare(sc); 1928 return error; 1929 } 1930 1931 /* 1932 * XXX: this is looutput. We should eventually use it from there. 1933 */ 1934 static int 1935 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1936 struct rtentry *rt) 1937 { 1938 uint32_t af; 1939 1940 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 1941 1942 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 1943 m_freem(m); 1944 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 1945 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 1946 } 1947 1948 ifp->if_opackets++; 1949 ifp->if_obytes += m->m_pkthdr.len; 1950 1951 /* BPF writes need to be handled specially. */ 1952 if (dst->sa_family == AF_UNSPEC) { 1953 bcopy(dst->sa_data, &af, sizeof(af)); 1954 dst->sa_family = af; 1955 } 1956 1957 #if 1 /* XXX */ 1958 switch (dst->sa_family) { 1959 case AF_INET: 1960 case AF_INET6: 1961 case AF_IPX: 1962 break; 1963 1964 default: 1965 m_freem(m); 1966 return (EAFNOSUPPORT); 1967 } 1968 #endif 1969 return (if_simloop(ifp, m, dst->sa_family, 0)); 1970 } 1971 1972 /* 1973 * Start output on carp interface. This function should never be called. 1974 */ 1975 static void 1976 carp_start(struct ifnet *ifp) 1977 { 1978 #ifdef DEBUG 1979 kprintf("%s: start called\n", ifp->if_xname); 1980 #endif 1981 } 1982 1983 int 1984 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 1985 struct rtentry *rt) 1986 { 1987 struct m_tag *mtag; 1988 struct carp_softc *sc; 1989 struct ifnet *carp_ifp; 1990 struct ether_header *eh; 1991 1992 if (!sa) 1993 return (0); 1994 1995 switch (sa->sa_family) { 1996 #ifdef INET 1997 case AF_INET: 1998 break; 1999 #endif /* INET */ 2000 #ifdef INET6 2001 case AF_INET6: 2002 break; 2003 #endif /* INET6 */ 2004 default: 2005 return (0); 2006 } 2007 2008 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2009 if (mtag == NULL) 2010 return (0); 2011 2012 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 2013 sc = carp_ifp->if_softc; 2014 2015 /* Set the source MAC address to Virtual Router MAC Address */ 2016 switch (ifp->if_type) { 2017 case IFT_ETHER: 2018 case IFT_L2VLAN: 2019 eh = mtod(m, struct ether_header *); 2020 eh->ether_shost[0] = 0; 2021 eh->ether_shost[1] = 0; 2022 eh->ether_shost[2] = 0x5e; 2023 eh->ether_shost[3] = 0; 2024 eh->ether_shost[4] = 1; 2025 eh->ether_shost[5] = sc->sc_vhid; 2026 break; 2027 2028 default: 2029 if_printf(ifp, "carp is not supported for this " 2030 "interface type\n"); 2031 return (EOPNOTSUPP); 2032 } 2033 return (0); 2034 } 2035 2036 static void 2037 carp_set_state(struct carp_softc *sc, int state) 2038 { 2039 struct ifnet *cifp = &sc->sc_if; 2040 2041 if (sc->sc_state == state) 2042 return; 2043 sc->sc_state = state; 2044 2045 switch (sc->sc_state) { 2046 case BACKUP: 2047 cifp->if_link_state = LINK_STATE_DOWN; 2048 break; 2049 2050 case MASTER: 2051 cifp->if_link_state = LINK_STATE_UP; 2052 break; 2053 2054 default: 2055 cifp->if_link_state = LINK_STATE_UNKNOWN; 2056 break; 2057 } 2058 rt_ifmsg(cifp); 2059 } 2060 2061 void 2062 carp_group_demote_adj(struct ifnet *ifp, int adj) 2063 { 2064 struct ifg_list *ifgl; 2065 int *dm; 2066 2067 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2068 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2069 continue; 2070 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2071 2072 if (*dm + adj >= 0) 2073 *dm += adj; 2074 else 2075 *dm = 0; 2076 2077 if (adj > 0 && *dm == 1) 2078 carp_send_ad_all(); 2079 CARP_LOG("%s demoted group %s to %d", ifp->if_xname, 2080 ifgl->ifgl_group->ifg_group, *dm); 2081 } 2082 } 2083 2084 void 2085 carp_carpdev_state(void *v) 2086 { 2087 struct carp_if *cif = v; 2088 struct carp_softc *sc; 2089 2090 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2091 carp_sc_state(sc); 2092 } 2093 2094 static void 2095 carp_sc_state(struct carp_softc *sc) 2096 { 2097 if (!(sc->sc_carpdev->if_flags & IFF_UP)) { 2098 callout_stop(&sc->sc_ad_tmo); 2099 callout_stop(&sc->sc_md_tmo); 2100 callout_stop(&sc->sc_md6_tmo); 2101 carp_set_state(sc, INIT); 2102 carp_setrun(sc, 0); 2103 if (!sc->sc_suppress) { 2104 carp_suppress_preempt++; 2105 if (carp_suppress_preempt == 1) 2106 carp_send_ad_all(); 2107 } 2108 sc->sc_suppress = 1; 2109 } else { 2110 carp_set_state(sc, INIT); 2111 carp_setrun(sc, 0); 2112 if (sc->sc_suppress) 2113 carp_suppress_preempt--; 2114 sc->sc_suppress = 0; 2115 } 2116 } 2117 2118 static void 2119 carp_stop(struct carp_softc *sc, int detach) 2120 { 2121 sc->sc_if.if_flags &= ~IFF_RUNNING; 2122 2123 callout_stop(&sc->sc_ad_tmo); 2124 callout_stop(&sc->sc_md_tmo); 2125 callout_stop(&sc->sc_md6_tmo); 2126 2127 if (!detach && sc->sc_state == MASTER) 2128 carp_send_ad(sc); 2129 2130 if (sc->sc_suppress) 2131 carp_suppress_preempt--; 2132 sc->sc_suppress = 0; 2133 2134 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 2135 carp_suppress_preempt--; 2136 sc->sc_sendad_errors = 0; 2137 sc->sc_sendad_success = 0; 2138 2139 carp_set_state(sc, INIT); 2140 carp_setrun(sc, 0); 2141 } 2142 2143 static void 2144 carp_reset(struct carp_softc *sc, int detach) 2145 { 2146 struct ifnet *cifp = &sc->sc_if; 2147 2148 carp_stop(sc, detach); 2149 if (!sc->sc_dead && (cifp->if_flags & IFF_UP)) 2150 cifp->if_flags |= IFF_RUNNING; 2151 } 2152 2153 static int 2154 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 2155 struct ifnet *ifp, const struct in_ifaddr *ia_if, int own) 2156 { 2157 struct ip_moptions *imo = &sc->sc_imo; 2158 struct carp_if *cif; 2159 struct carp_softc *vr, *after = NULL; 2160 int onlist, error; 2161 #ifdef INVARIANTS 2162 int assert_onlist; 2163 #endif 2164 2165 KKASSERT(vha->vha_ia != NULL); 2166 2167 KASSERT(ia_if != NULL, ("NULL backing address\n")); 2168 KASSERT(vha->vha_iaback == NULL, ("%p is already activated\n", vha)); 2169 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2170 ("inactive vhaddr %p is the address owner\n", vha)); 2171 2172 KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp, 2173 ("%s is already on %s\n", sc->sc_if.if_xname, 2174 sc->sc_carpdev->if_xname)); 2175 2176 KASSERT(imo->imo_multicast_ifp == NULL || 2177 imo->imo_multicast_ifp == ifp, 2178 ("%s didn't leave mcast group on %s\n", 2179 sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname)); 2180 2181 if (imo->imo_num_memberships == 0) { 2182 struct in_addr addr; 2183 2184 addr.s_addr = htonl(INADDR_CARP_GROUP); 2185 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) 2186 return ENOBUFS; 2187 imo->imo_num_memberships++; 2188 imo->imo_multicast_ifp = ifp; 2189 imo->imo_multicast_ttl = CARP_DFLTTL; 2190 imo->imo_multicast_loop = 0; 2191 } 2192 2193 if (!ifp->if_carp) { 2194 KASSERT(sc->sc_carpdev == NULL, 2195 ("%s is already on %s\n", sc->sc_if.if_xname, 2196 sc->sc_carpdev->if_xname)); 2197 2198 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 2199 2200 error = ifpromisc(ifp, 1); 2201 if (error) { 2202 kfree(cif, M_CARP); 2203 goto cleanup; 2204 } 2205 2206 TAILQ_INIT(&cif->vhif_vrs); 2207 ifp->if_carp = cif; 2208 } else { 2209 cif = ifp->if_carp; 2210 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2211 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 2212 error = EINVAL; 2213 goto cleanup; 2214 } 2215 } 2216 } 2217 2218 #ifdef INVARIANTS 2219 if (sc->sc_carpdev != NULL) 2220 assert_onlist = 1; 2221 else 2222 assert_onlist = 0; 2223 #endif 2224 sc->sc_ia = ia_if; 2225 sc->sc_carpdev = ifp; 2226 2227 cif = ifp->if_carp; 2228 onlist = 0; 2229 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2230 if (vr == sc) 2231 onlist = 1; 2232 if (vr->sc_vhid < sc->sc_vhid) 2233 after = vr; 2234 } 2235 2236 #ifdef INVARIANTS 2237 if (assert_onlist) { 2238 KASSERT(onlist, ("%s is not on %s carp list\n", 2239 sc->sc_if.if_xname, ifp->if_xname)); 2240 } else { 2241 KASSERT(!onlist, ("%s is already on %s carp list\n", 2242 sc->sc_if.if_xname, ifp->if_xname)); 2243 } 2244 #endif 2245 2246 if (!onlist) { 2247 /* We're trying to keep things in order */ 2248 if (after == NULL) 2249 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 2250 else 2251 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 2252 } 2253 2254 vha->vha_iaback = ia_if; 2255 sc->sc_naddrs++; 2256 2257 if (own) { 2258 vha->vha_flags |= CARP_VHAF_OWNER; 2259 2260 /* XXX save user configured advskew? */ 2261 sc->sc_advskew = 0; 2262 } 2263 2264 carp_hmac_prepare(sc); 2265 carp_set_state(sc, INIT); 2266 carp_setrun(sc, 0); 2267 return 0; 2268 cleanup: 2269 carp_multicast_cleanup(sc); 2270 return error; 2271 } 2272 2273 static void 2274 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 2275 { 2276 KKASSERT(vha->vha_ia != NULL); 2277 2278 carp_hmac_prepare(sc); 2279 2280 if (vha->vha_iaback == NULL) { 2281 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2282 ("inactive vhaddr %p is the address owner\n", vha)); 2283 return; 2284 } 2285 2286 vha->vha_flags &= ~CARP_VHAF_OWNER; 2287 2288 KKASSERT(sc->sc_naddrs > 0); 2289 vha->vha_iaback = NULL; 2290 sc->sc_naddrs--; 2291 if (!sc->sc_naddrs) { 2292 if (sc->sc_naddrs6) { 2293 carp_multicast_cleanup(sc); 2294 sc->sc_ia = NULL; 2295 } else { 2296 carp_detach(sc, 0); 2297 } 2298 } 2299 } 2300 2301 static void 2302 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if) 2303 { 2304 struct carp_vhaddr *vha; 2305 struct in_ifaddr *ia_if; 2306 2307 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2308 ia_if = ifatoia(ifa_if); 2309 2310 if ((ia_if->ia_flags & IFA_ROUTE) == 0) 2311 return; 2312 2313 /* 2314 * Test each inactive vhaddr against the newly added address. 2315 * If the newly added address could be the backing address, 2316 * then activate the matching vhaddr. 2317 */ 2318 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2319 const struct in_ifaddr *ia; 2320 u_long iaddr; 2321 int own; 2322 2323 if (vha->vha_iaback != NULL) 2324 continue; 2325 2326 ia = vha->vha_ia; 2327 iaddr = ntohl(ia->ia_addr.sin_addr.s_addr); 2328 2329 if ((iaddr & ia_if->ia_subnetmask) != ia_if->ia_subnet) 2330 continue; 2331 2332 own = 0; 2333 if (ia->ia_addr.sin_addr.s_addr == 2334 ia_if->ia_addr.sin_addr.s_addr) 2335 own = 1; 2336 2337 carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 2338 } 2339 } 2340 2341 static void 2342 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp, 2343 struct ifaddr *ifa_if) 2344 { 2345 struct carp_vhaddr *vha; 2346 struct in_ifaddr *ia_if; 2347 2348 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2349 ia_if = ifatoia(ifa_if); 2350 2351 /* 2352 * Ad src address is deleted; set it to NULL. 2353 * Following loop will try pick up a new ad src address 2354 * if one of the vhaddr could retain its backing address. 2355 */ 2356 if (sc->sc_ia == ia_if) 2357 sc->sc_ia = NULL; 2358 2359 /* 2360 * Test each active vhaddr against the deleted address. 2361 * If the deleted address is vhaddr address's backing 2362 * address, then deactivate the vhaddr. 2363 */ 2364 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2365 if (vha->vha_iaback == NULL) 2366 continue; 2367 2368 if (vha->vha_iaback == ia_if) 2369 carp_deactivate_vhaddr(sc, vha); 2370 else if (sc->sc_ia == NULL) 2371 sc->sc_ia = vha->vha_iaback; 2372 } 2373 } 2374 2375 static void 2376 carp_update_addrs(struct carp_softc *sc) 2377 { 2378 struct carp_vhaddr *vha; 2379 2380 KKASSERT(sc->sc_carpdev == NULL); 2381 2382 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 2383 carp_config_vhaddr(sc, vha); 2384 } 2385 2386 static void 2387 carp_ifaddr(void *arg __unused, struct ifnet *ifp, 2388 enum ifaddr_event event, struct ifaddr *ifa) 2389 { 2390 struct carp_softc *sc; 2391 2392 if (ifa->ifa_addr->sa_family != AF_INET) 2393 return; 2394 2395 if (ifp->if_type == IFT_CARP) { 2396 /* 2397 * Address is changed on carp(4) interface 2398 */ 2399 switch (event) { 2400 case IFADDR_EVENT_ADD: 2401 carp_add_addr(ifp->if_softc, ifa); 2402 break; 2403 2404 case IFADDR_EVENT_CHANGE: 2405 carp_config_addr(ifp->if_softc, ifa); 2406 break; 2407 2408 case IFADDR_EVENT_DELETE: 2409 carp_del_addr(ifp->if_softc, ifa); 2410 break; 2411 } 2412 return; 2413 } 2414 2415 /* 2416 * Address is changed on non-carp(4) interface 2417 */ 2418 if ((ifp->if_flags & IFF_MULTICAST) == 0) 2419 return; 2420 2421 crit_enter(); 2422 LIST_FOREACH(sc, &carpif_list, sc_next) { 2423 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) { 2424 /* Not the parent iface; skip */ 2425 continue; 2426 } 2427 2428 switch (event) { 2429 case IFADDR_EVENT_ADD: 2430 carp_link_addrs(sc, ifp, ifa); 2431 break; 2432 2433 case IFADDR_EVENT_DELETE: 2434 if (sc->sc_carpdev != NULL) { 2435 carp_unlink_addrs(sc, ifp, ifa); 2436 if (sc->sc_carpdev == NULL) 2437 carp_update_addrs(sc); 2438 } else { 2439 /* 2440 * The carp(4) interface didn't have a 2441 * parent iface, so it is not possible 2442 * that it will contain any address to 2443 * be unlinked. 2444 */ 2445 } 2446 break; 2447 2448 case IFADDR_EVENT_CHANGE: 2449 if (sc->sc_carpdev == NULL) { 2450 /* 2451 * The carp(4) interface didn't have a 2452 * parent iface, so it is not possible 2453 * that it will contain any address to 2454 * be updated. 2455 */ 2456 carp_link_addrs(sc, ifp, ifa); 2457 } else { 2458 /* 2459 * First try breaking tie with the old 2460 * address. Then see whether we could 2461 * link certain vhaddr to the new address. 2462 * If that fails, i.e. carpdev is NULL, 2463 * we try a global update. 2464 * 2465 * NOTE: The above order is critical. 2466 */ 2467 carp_unlink_addrs(sc, ifp, ifa); 2468 carp_link_addrs(sc, ifp, ifa); 2469 if (sc->sc_carpdev == NULL) 2470 carp_update_addrs(sc); 2471 } 2472 break; 2473 } 2474 } 2475 crit_exit(); 2476 } 2477 2478 static int 2479 carp_modevent(module_t mod, int type, void *data) 2480 { 2481 switch (type) { 2482 case MOD_LOAD: 2483 LIST_INIT(&carpif_list); 2484 carp_ifdetach_event = 2485 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL, 2486 EVENTHANDLER_PRI_ANY); 2487 carp_ifaddr_event = 2488 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL, 2489 EVENTHANDLER_PRI_ANY); 2490 if_clone_attach(&carp_cloner); 2491 break; 2492 2493 case MOD_UNLOAD: 2494 EVENTHANDLER_DEREGISTER(ifnet_detach_event, 2495 carp_ifdetach_event); 2496 EVENTHANDLER_DEREGISTER(ifaddr_event, 2497 carp_ifaddr_event); 2498 if_clone_detach(&carp_cloner); 2499 break; 2500 2501 default: 2502 return (EINVAL); 2503 } 2504 return (0); 2505 } 2506 2507 static moduledata_t carp_mod = { 2508 "carp", 2509 carp_modevent, 2510 0 2511 }; 2512 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2513