1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 /* 27 * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $ 28 * $DragonFly: src/sys/netinet/ip_carp.c,v 1.10 2008/07/27 10:06:57 sephe Exp $ 29 */ 30 31 #include "opt_carp.h" 32 #include "opt_inet.h" 33 #include "opt_inet6.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/in_cksum.h> 39 #include <sys/limits.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/time.h> 43 #include <sys/proc.h> 44 #include <sys/sockio.h> 45 #include <sys/socket.h> 46 #include <sys/sysctl.h> 47 #include <sys/syslog.h> 48 49 #include <machine/stdarg.h> 50 #include <crypto/sha1.h> 51 52 #include <net/bpf.h> 53 #include <net/ethernet.h> 54 #include <net/if.h> 55 #include <net/if_dl.h> 56 #include <net/if_types.h> 57 #include <net/route.h> 58 #include <net/if_clone.h> 59 60 #ifdef INET 61 #include <netinet/in.h> 62 #include <netinet/in_var.h> 63 #include <netinet/in_systm.h> 64 #include <netinet/ip.h> 65 #include <netinet/ip_var.h> 66 #include <netinet/if_ether.h> 67 #endif 68 69 #ifdef INET6 70 #include <netinet/icmp6.h> 71 #include <netinet/ip6.h> 72 #include <netinet6/ip6_var.h> 73 #include <netinet6/scope6_var.h> 74 #include <netinet6/nd6.h> 75 #endif 76 77 #include <netinet/ip_carp.h> 78 79 #define CARP_IFNAME "carp" 80 #define CARP_IS_RUNNING(ifp) \ 81 (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) 82 83 struct carp_vhaddr { 84 uint32_t vha_flags; /* CARP_VHAF_ */ 85 const struct in_ifaddr *vha_ia; /* carp address */ 86 const struct in_ifaddr *vha_iaback; /* backing address */ 87 TAILQ_ENTRY(carp_vhaddr) vha_link; 88 }; 89 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr); 90 91 struct carp_softc { 92 struct ifnet sc_if; 93 struct ifnet *sc_carpdev; /* parent interface */ 94 struct carp_vhaddr_list sc_vha_list; /* virtual addr list */ 95 96 const struct in_ifaddr *sc_ia; /* primary iface address v4 */ 97 struct ip_moptions sc_imo; 98 99 #ifdef INET6 100 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 101 struct ip6_moptions sc_im6o; 102 #endif /* INET6 */ 103 TAILQ_ENTRY(carp_softc) sc_list; 104 105 enum { INIT = 0, BACKUP, MASTER } 106 sc_state; 107 int sc_dead; 108 109 int sc_suppress; 110 111 int sc_sendad_errors; 112 #define CARP_SENDAD_MAX_ERRORS 3 113 int sc_sendad_success; 114 #define CARP_SENDAD_MIN_SUCCESS 3 115 116 int sc_vhid; 117 int sc_advskew; 118 int sc_naddrs; /* actually used IPv4 vha */ 119 int sc_naddrs6; 120 int sc_advbase; /* seconds */ 121 int sc_init_counter; 122 uint64_t sc_counter; 123 124 /* authentication */ 125 #define CARP_HMAC_PAD 64 126 unsigned char sc_key[CARP_KEY_LEN]; 127 unsigned char sc_pad[CARP_HMAC_PAD]; 128 SHA1_CTX sc_sha1; 129 130 struct callout sc_ad_tmo; /* advertisement timeout */ 131 struct callout sc_md_tmo; /* master down timeout */ 132 struct callout sc_md6_tmo; /* master down timeout */ 133 134 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 135 }; 136 137 struct carp_if { 138 TAILQ_HEAD(, carp_softc) vhif_vrs; 139 }; 140 141 enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING }; 142 143 SYSCTL_DECL(_net_inet_carp); 144 145 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 146 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 147 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 148 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 149 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 150 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 151 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 152 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 153 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 154 155 static int carp_suppress_preempt = 0; 156 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 157 &carp_suppress_preempt, 0, "Preemption is suppressed"); 158 159 static struct carpstats carpstats; 160 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 161 &carpstats, carpstats, 162 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 163 164 #define CARP_LOG(...) do { \ 165 if (carp_opts[CARPCTL_LOG] > 0) \ 166 log(LOG_INFO, __VA_ARGS__); \ 167 } while (0) 168 169 #define CARP_DEBUG(...) do { \ 170 if (carp_opts[CARPCTL_LOG] > 1) \ 171 log(LOG_DEBUG, __VA_ARGS__); \ 172 } while (0) 173 174 static void carp_hmac_prepare(struct carp_softc *); 175 static void carp_hmac_generate(struct carp_softc *, uint32_t *, 176 unsigned char *); 177 static int carp_hmac_verify(struct carp_softc *, uint32_t *, 178 unsigned char *); 179 static void carp_setroute(struct carp_softc *, int); 180 static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); 181 static int carp_clone_create(struct if_clone *, int); 182 static void carp_clone_destroy(struct ifnet *); 183 static void carp_detach(struct carp_softc *, int); 184 static int carp_prepare_ad(struct mbuf *, struct carp_softc *, 185 struct carp_header *); 186 static void carp_send_ad_all(void); 187 static void carp_send_ad_timeout(void *); 188 static void carp_send_ad(struct carp_softc *); 189 static void carp_send_arp(struct carp_softc *); 190 static void carp_master_down_timeout(void *); 191 static void carp_master_down(struct carp_softc *); 192 static int carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 193 static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *, 194 struct rtentry *); 195 static void carp_start(struct ifnet *); 196 static void carp_setrun(struct carp_softc *, sa_family_t); 197 static void carp_set_state(struct carp_softc *, int); 198 199 static void carp_multicast_cleanup(struct carp_softc *); 200 static void carp_add_addr(struct carp_softc *, struct ifaddr *); 201 static void carp_del_addr(struct carp_softc *, struct ifaddr *); 202 static void carp_config_addr(struct carp_softc *, struct ifaddr *); 203 static void carp_link_addrs(struct carp_softc *, struct ifnet *, 204 struct ifaddr *); 205 static void carp_unlink_addrs(struct carp_softc *, struct ifnet *, 206 struct ifaddr *); 207 208 static int carp_get_vhaddr(struct carp_softc *, struct ifdrv *); 209 static int carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *); 210 static int carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *, 211 struct ifnet *, const struct in_ifaddr *, int); 212 static void carp_deactivate_vhaddr(struct carp_softc *, 213 struct carp_vhaddr *); 214 215 static void carp_sc_state(struct carp_softc *); 216 #ifdef INET6 217 static void carp_send_na(struct carp_softc *); 218 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 219 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 220 static void carp_multicast6_cleanup(struct carp_softc *); 221 #endif 222 static void carp_stop(struct carp_softc *, int); 223 static void carp_reset(struct carp_softc *, int); 224 225 static void carp_ifaddr(void *, struct ifnet *, enum ifaddr_event, 226 struct ifaddr *); 227 static void carp_ifdetach(void *, struct ifnet *); 228 229 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 230 231 static LIST_HEAD(, carp_softc) carpif_list; 232 233 static struct if_clone carp_cloner = 234 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy, 235 0, IF_MAXUNIT); 236 237 static eventhandler_tag carp_ifdetach_event; 238 static eventhandler_tag carp_ifaddr_event; 239 240 static __inline void 241 ifa_set_prflags(struct ifaddr *ifa, uint16_t prflags) 242 { 243 int cpu; 244 245 for (cpu = 0; cpu < ncpus; ++cpu) 246 ifa->ifa_containers[cpu].ifa_prflags |= prflags; 247 } 248 249 static __inline void 250 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new) 251 { 252 struct carp_vhaddr *vha; 253 u_long new_addr, addr; 254 255 KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0); 256 257 /* 258 * Virtual address list is sorted; smaller one first 259 */ 260 new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr); 261 262 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 263 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr); 264 265 if (addr > new_addr) 266 break; 267 } 268 if (vha == NULL) 269 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link); 270 else 271 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link); 272 vha_new->vha_flags |= CARP_VHAF_ONLIST; 273 } 274 275 static __inline void 276 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 277 { 278 KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST); 279 vha->vha_flags &= ~CARP_VHAF_ONLIST; 280 TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link); 281 } 282 283 static void 284 carp_hmac_prepare(struct carp_softc *sc) 285 { 286 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 287 uint8_t vhid = sc->sc_vhid & 0xff; 288 struct ifaddr_container *ifac; 289 int i; 290 #ifdef INET6 291 struct in6_addr in6; 292 #endif 293 #ifdef INET 294 struct carp_vhaddr *vha; 295 #endif 296 297 /* XXX: possible race here */ 298 299 /* compute ipad from key */ 300 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 301 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 302 for (i = 0; i < sizeof(sc->sc_pad); i++) 303 sc->sc_pad[i] ^= 0x36; 304 305 /* precompute first part of inner hash */ 306 SHA1Init(&sc->sc_sha1); 307 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 308 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 309 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 310 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 311 #ifdef INET 312 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 313 SHA1Update(&sc->sc_sha1, 314 (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr, 315 sizeof(struct in_addr)); 316 } 317 #endif /* INET */ 318 #ifdef INET6 319 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 320 struct ifaddr *ifa = ifac->ifa; 321 322 if (ifa->ifa_addr->sa_family == AF_INET6) { 323 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 324 in6_clearscope(&in6); 325 SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); 326 } 327 } 328 #endif /* INET6 */ 329 330 /* convert ipad to opad */ 331 for (i = 0; i < sizeof(sc->sc_pad); i++) 332 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 333 } 334 335 static void 336 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 337 unsigned char md[20]) 338 { 339 SHA1_CTX sha1ctx; 340 341 /* fetch first half of inner hash */ 342 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 343 344 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 345 SHA1Final(md, &sha1ctx); 346 347 /* outer hash */ 348 SHA1Init(&sha1ctx); 349 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 350 SHA1Update(&sha1ctx, md, 20); 351 SHA1Final(md, &sha1ctx); 352 } 353 354 static int 355 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 356 unsigned char md[20]) 357 { 358 unsigned char md2[20]; 359 360 carp_hmac_generate(sc, counter, md2); 361 return (bcmp(md, md2, sizeof(md2))); 362 } 363 364 static void 365 carp_setroute(struct carp_softc *sc, int cmd) 366 { 367 #ifdef INET6 368 struct ifaddr_container *ifac; 369 370 crit_enter(); 371 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 372 struct ifaddr *ifa = ifac->ifa; 373 374 if (ifa->ifa_addr->sa_family == AF_INET6) { 375 if (cmd == RTM_ADD) 376 in6_ifaddloop(ifa); 377 else 378 in6_ifremloop(ifa); 379 } 380 } 381 crit_exit(); 382 #endif /* INET6 */ 383 } 384 385 static int 386 carp_clone_create(struct if_clone *ifc, int unit) 387 { 388 struct carp_softc *sc; 389 struct ifnet *ifp; 390 391 sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO); 392 ifp = &sc->sc_if; 393 394 sc->sc_suppress = 0; 395 sc->sc_advbase = CARP_DFLTINTV; 396 sc->sc_vhid = -1; /* required setting */ 397 sc->sc_advskew = 0; 398 sc->sc_init_counter = 1; 399 sc->sc_naddrs = 0; 400 sc->sc_naddrs6 = 0; 401 402 TAILQ_INIT(&sc->sc_vha_list); 403 404 #ifdef INET6 405 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 406 #endif 407 408 callout_init(&sc->sc_ad_tmo); 409 callout_init(&sc->sc_md_tmo); 410 callout_init(&sc->sc_md6_tmo); 411 412 ifp->if_softc = sc; 413 if_initname(ifp, CARP_IFNAME, unit); 414 ifp->if_mtu = ETHERMTU; 415 ifp->if_flags = IFF_LOOPBACK; 416 ifp->if_ioctl = carp_ioctl; 417 ifp->if_output = carp_looutput; 418 ifp->if_start = carp_start; 419 ifp->if_type = IFT_CARP; 420 ifp->if_snd.ifq_maxlen = ifqmaxlen; 421 ifp->if_hdrlen = 0; 422 if_attach(ifp, NULL); 423 bpfattach(ifp, DLT_NULL, sizeof(u_int)); 424 425 crit_enter(); 426 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 427 crit_exit(); 428 429 return (0); 430 } 431 432 static void 433 carp_clone_destroy(struct ifnet *ifp) 434 { 435 struct carp_softc *sc = ifp->if_softc; 436 437 sc->sc_dead = 1; 438 carp_detach(sc, 1); 439 440 crit_enter(); 441 LIST_REMOVE(sc, sc_next); 442 crit_exit(); 443 bpfdetach(ifp); 444 if_detach(ifp); 445 446 KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active\n")); 447 kfree(sc, M_CARP); 448 } 449 450 static void 451 carp_detach(struct carp_softc *sc, int detach) 452 { 453 struct carp_if *cif; 454 455 carp_reset(sc, detach); 456 457 carp_multicast_cleanup(sc); 458 #ifdef INET6 459 carp_multicast6_cleanup(sc); 460 #endif 461 462 if (!sc->sc_dead && detach) { 463 struct carp_vhaddr *vha; 464 465 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 466 carp_deactivate_vhaddr(sc, vha); 467 KKASSERT(sc->sc_naddrs == 0); 468 } 469 470 if (sc->sc_carpdev != NULL) { 471 cif = sc->sc_carpdev->if_carp; 472 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 473 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 474 ifpromisc(sc->sc_carpdev, 0); 475 sc->sc_carpdev->if_carp = NULL; 476 kfree(cif, M_CARP); 477 } 478 sc->sc_carpdev = NULL; 479 sc->sc_ia = NULL; 480 } 481 } 482 483 /* Detach an interface from the carp. */ 484 static void 485 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 486 { 487 struct carp_if *cif = ifp->if_carp; 488 struct carp_softc *sc; 489 490 while (ifp->if_carp && 491 (sc = TAILQ_FIRST(&cif->vhif_vrs)) != NULL) 492 carp_detach(sc, 1); 493 } 494 495 /* 496 * process input packet. 497 * we have rearranged checks order compared to the rfc, 498 * but it seems more efficient this way or not possible otherwise. 499 */ 500 void 501 carp_input(struct mbuf *m, ...) 502 { 503 struct ip *ip = mtod(m, struct ip *); 504 struct carp_header *ch; 505 int len, iphlen; 506 __va_list ap; 507 508 __va_start(ap, m); 509 iphlen = __va_arg(ap, int); 510 __va_end(ap); 511 512 carpstats.carps_ipackets++; 513 514 if (!carp_opts[CARPCTL_ALLOW]) { 515 m_freem(m); 516 return; 517 } 518 519 /* Check if received on a valid carp interface */ 520 if (m->m_pkthdr.rcvif->if_carp == NULL) { 521 carpstats.carps_badif++; 522 CARP_LOG("carp_input: packet received on non-carp " 523 "interface: %s\n", 524 m->m_pkthdr.rcvif->if_xname); 525 m_freem(m); 526 return; 527 } 528 529 /* Verify that the IP TTL is CARP_DFLTTL. */ 530 if (ip->ip_ttl != CARP_DFLTTL) { 531 carpstats.carps_badttl++; 532 CARP_LOG("carp_input: received ttl %d != %d on %s\n", 533 ip->ip_ttl, CARP_DFLTTL, 534 m->m_pkthdr.rcvif->if_xname); 535 m_freem(m); 536 return; 537 } 538 539 /* Minimal CARP packet size */ 540 len = iphlen + sizeof(*ch); 541 542 /* 543 * Verify that the received packet length is 544 * not less than the CARP header 545 */ 546 if (m->m_pkthdr.len < len) { 547 carpstats.carps_badlen++; 548 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len, 549 m->m_pkthdr.rcvif->if_xname); 550 m_freem(m); 551 return; 552 } 553 554 /* Make sure that CARP header is contiguous */ 555 if (len > m->m_len) { 556 m = m_pullup(m, len); 557 if (m == NULL) { 558 carpstats.carps_hdrops++; 559 CARP_LOG("carp_input: m_pullup failed\n"); 560 return; 561 } 562 ip = mtod(m, struct ip *); 563 } 564 ch = (struct carp_header *)((uint8_t *)ip + iphlen); 565 566 /* Verify the CARP checksum */ 567 if (in_cksum_skip(m, len, iphlen)) { 568 carpstats.carps_badsum++; 569 CARP_LOG("carp_input: checksum failed on %s\n", 570 m->m_pkthdr.rcvif->if_xname); 571 m_freem(m); 572 return; 573 } 574 carp_input_c(m, ch, AF_INET); 575 } 576 577 #ifdef INET6 578 int 579 carp6_input(struct mbuf **mp, int *offp, int proto) 580 { 581 struct mbuf *m = *mp; 582 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 583 struct carp_header *ch; 584 u_int len; 585 586 carpstats.carps_ipackets6++; 587 588 if (!carp_opts[CARPCTL_ALLOW]) { 589 m_freem(m); 590 return (IPPROTO_DONE); 591 } 592 593 /* check if received on a valid carp interface */ 594 if (m->m_pkthdr.rcvif->if_carp == NULL) { 595 carpstats.carps_badif++; 596 CARP_LOG("carp6_input: packet received on non-carp " 597 "interface: %s\n", 598 m->m_pkthdr.rcvif->if_xname); 599 m_freem(m); 600 return (IPPROTO_DONE); 601 } 602 603 /* verify that the IP TTL is 255 */ 604 if (ip6->ip6_hlim != CARP_DFLTTL) { 605 carpstats.carps_badttl++; 606 CARP_LOG("carp6_input: received ttl %d != 255 on %s\n", 607 ip6->ip6_hlim, 608 m->m_pkthdr.rcvif->if_xname); 609 m_freem(m); 610 return (IPPROTO_DONE); 611 } 612 613 /* verify that we have a complete carp packet */ 614 len = m->m_len; 615 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 616 if (ch == NULL) { 617 carpstats.carps_badlen++; 618 CARP_LOG("carp6_input: packet size %u too small\n", len); 619 return (IPPROTO_DONE); 620 } 621 622 /* verify the CARP checksum */ 623 if (in_cksum_range(m, 0, *offp, sizeof(*ch))) { 624 carpstats.carps_badsum++; 625 CARP_LOG("carp6_input: checksum failed, on %s\n", 626 m->m_pkthdr.rcvif->if_xname); 627 m_freem(m); 628 return (IPPROTO_DONE); 629 } 630 631 carp_input_c(m, ch, AF_INET6); 632 return (IPPROTO_DONE); 633 } 634 #endif /* INET6 */ 635 636 static void 637 carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) 638 { 639 struct ifnet *ifp = m->m_pkthdr.rcvif; 640 struct ifnet *cifp; 641 struct carp_softc *sc; 642 uint64_t tmp_counter; 643 struct timeval sc_tv, ch_tv; 644 645 /* verify that the VHID is valid on the receiving interface */ 646 TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list) 647 if (sc->sc_vhid == ch->carp_vhid) 648 break; 649 650 if (!sc || !CARP_IS_RUNNING(&sc->sc_if)) { 651 carpstats.carps_badvhid++; 652 m_freem(m); 653 return; 654 } 655 cifp = &sc->sc_if; 656 657 getmicrotime(&cifp->if_lastchange); 658 cifp->if_ipackets++; 659 cifp->if_ibytes += m->m_pkthdr.len; 660 661 if (cifp->if_bpf) { 662 struct ip *ip = mtod(m, struct ip *); 663 664 /* BPF wants net byte order */ 665 ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2)); 666 ip->ip_off = htons(ip->ip_off); 667 bpf_mtap(cifp->if_bpf, m); 668 } 669 670 /* verify the CARP version. */ 671 if (ch->carp_version != CARP_VERSION) { 672 carpstats.carps_badver++; 673 cifp->if_ierrors++; 674 CARP_LOG("%s; invalid version %d\n", cifp->if_xname, 675 ch->carp_version); 676 m_freem(m); 677 return; 678 } 679 680 /* verify the hash */ 681 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 682 carpstats.carps_badauth++; 683 cifp->if_ierrors++; 684 CARP_LOG("%s: incorrect hash\n", cifp->if_xname); 685 m_freem(m); 686 return; 687 } 688 689 tmp_counter = ntohl(ch->carp_counter[0]); 690 tmp_counter = tmp_counter<<32; 691 tmp_counter += ntohl(ch->carp_counter[1]); 692 693 /* XXX Replay protection goes here */ 694 695 sc->sc_init_counter = 0; 696 sc->sc_counter = tmp_counter; 697 698 sc_tv.tv_sec = sc->sc_advbase; 699 if (carp_suppress_preempt && sc->sc_advskew < 240) 700 sc_tv.tv_usec = 240 * 1000000 / 256; 701 else 702 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 703 ch_tv.tv_sec = ch->carp_advbase; 704 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 705 706 switch (sc->sc_state) { 707 case INIT: 708 break; 709 710 case MASTER: 711 /* 712 * If we receive an advertisement from a master who's going to 713 * be more frequent than us, go into BACKUP state. 714 */ 715 if (timevalcmp(&sc_tv, &ch_tv, >) || 716 timevalcmp(&sc_tv, &ch_tv, ==)) { 717 callout_stop(&sc->sc_ad_tmo); 718 CARP_DEBUG("%s: MASTER -> BACKUP " 719 "(more frequent advertisement received)\n", 720 cifp->if_xname); 721 carp_set_state(sc, BACKUP); 722 carp_setrun(sc, 0); 723 carp_setroute(sc, RTM_DELETE); 724 } 725 break; 726 727 case BACKUP: 728 /* 729 * If we're pre-empting masters who advertise slower than us, 730 * and this one claims to be slower, treat him as down. 731 */ 732 if (carp_opts[CARPCTL_PREEMPT] && 733 timevalcmp(&sc_tv, &ch_tv, <)) { 734 CARP_DEBUG("%s: BACKUP -> MASTER " 735 "(preempting a slower master)\n", cifp->if_xname); 736 carp_master_down(sc); 737 break; 738 } 739 740 /* 741 * If the master is going to advertise at such a low frequency 742 * that he's guaranteed to time out, we'd might as well just 743 * treat him as timed out now. 744 */ 745 sc_tv.tv_sec = sc->sc_advbase * 3; 746 if (timevalcmp(&sc_tv, &ch_tv, <)) { 747 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 748 cifp->if_xname); 749 carp_master_down(sc); 750 break; 751 } 752 753 /* 754 * Otherwise, we reset the counter and wait for the next 755 * advertisement. 756 */ 757 carp_setrun(sc, af); 758 break; 759 } 760 m_freem(m); 761 } 762 763 static int 764 carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) 765 { 766 struct ifnet *cifp = &sc->sc_if; 767 struct m_tag *mtag; 768 769 if (sc->sc_init_counter) { 770 /* this could also be seconds since unix epoch */ 771 sc->sc_counter = karc4random(); 772 sc->sc_counter = sc->sc_counter << 32; 773 sc->sc_counter += karc4random(); 774 } else { 775 sc->sc_counter++; 776 } 777 778 ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff); 779 ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff); 780 781 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 782 783 /* Tag packet for carp_output */ 784 mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), MB_DONTWAIT); 785 if (mtag == NULL) { 786 m_freem(m); 787 cifp->if_oerrors++; 788 return ENOMEM; 789 } 790 bcopy(&cifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); 791 m_tag_prepend(m, mtag); 792 793 return 0; 794 } 795 796 static void 797 carp_send_ad_all(void) 798 { 799 struct carp_softc *sc; 800 801 LIST_FOREACH(sc, &carpif_list, sc_next) { 802 if (sc->sc_carpdev == NULL) 803 continue; 804 805 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER) 806 carp_send_ad(sc); 807 } 808 } 809 810 static void 811 carp_send_ad_timeout(void *xsc) 812 { 813 carp_send_ad(xsc); 814 } 815 816 static void 817 carp_send_ad(struct carp_softc *sc) 818 { 819 struct ifnet *cifp = &sc->sc_if; 820 struct carp_header ch; 821 struct timeval tv; 822 struct carp_header *ch_ptr; 823 struct mbuf *m; 824 int len, advbase, advskew; 825 826 if (!CARP_IS_RUNNING(cifp)) { 827 /* Bow out */ 828 advbase = 255; 829 advskew = 255; 830 } else { 831 advbase = sc->sc_advbase; 832 if (!carp_suppress_preempt || sc->sc_advskew > 240) 833 advskew = sc->sc_advskew; 834 else 835 advskew = 240; 836 tv.tv_sec = advbase; 837 tv.tv_usec = advskew * 1000000 / 256; 838 } 839 840 ch.carp_version = CARP_VERSION; 841 ch.carp_type = CARP_ADVERTISEMENT; 842 ch.carp_vhid = sc->sc_vhid; 843 ch.carp_advbase = advbase; 844 ch.carp_advskew = advskew; 845 ch.carp_authlen = 7; /* XXX DEFINE */ 846 ch.carp_pad1 = 0; /* must be zero */ 847 ch.carp_cksum = 0; 848 849 #ifdef INET 850 if (sc->sc_ia != NULL) { 851 struct ip *ip; 852 853 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 854 if (m == NULL) { 855 cifp->if_oerrors++; 856 carpstats.carps_onomem++; 857 /* XXX maybe less ? */ 858 if (advbase != 255 || advskew != 255) 859 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 860 carp_send_ad_timeout, sc); 861 return; 862 } 863 len = sizeof(*ip) + sizeof(ch); 864 m->m_pkthdr.len = len; 865 m->m_pkthdr.rcvif = NULL; 866 m->m_len = len; 867 MH_ALIGN(m, m->m_len); 868 m->m_flags |= M_MCAST; 869 ip = mtod(m, struct ip *); 870 ip->ip_v = IPVERSION; 871 ip->ip_hl = sizeof(*ip) >> 2; 872 ip->ip_tos = IPTOS_LOWDELAY; 873 ip->ip_len = len; 874 ip->ip_id = ip_newid(); 875 ip->ip_off = IP_DF; 876 ip->ip_ttl = CARP_DFLTTL; 877 ip->ip_p = IPPROTO_CARP; 878 ip->ip_sum = 0; 879 ip->ip_src = sc->sc_ia->ia_addr.sin_addr; 880 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 881 882 ch_ptr = (struct carp_header *)(&ip[1]); 883 bcopy(&ch, ch_ptr, sizeof(ch)); 884 if (carp_prepare_ad(m, sc, ch_ptr)) 885 return; 886 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip)); 887 888 getmicrotime(&cifp->if_lastchange); 889 cifp->if_opackets++; 890 cifp->if_obytes += len; 891 carpstats.carps_opackets++; 892 893 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 894 cifp->if_oerrors++; 895 if (sc->sc_sendad_errors < INT_MAX) 896 sc->sc_sendad_errors++; 897 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 898 carp_suppress_preempt++; 899 if (carp_suppress_preempt == 1) { 900 carp_send_ad_all(); 901 } 902 } 903 sc->sc_sendad_success = 0; 904 } else { 905 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 906 if (++sc->sc_sendad_success >= 907 CARP_SENDAD_MIN_SUCCESS) { 908 carp_suppress_preempt--; 909 sc->sc_sendad_errors = 0; 910 } 911 } else { 912 sc->sc_sendad_errors = 0; 913 } 914 } 915 } 916 #endif /* INET */ 917 #ifdef INET6 918 if (sc->sc_ia6) { 919 struct ip6_hdr *ip6; 920 921 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 922 if (m == NULL) { 923 cifp->if_oerrors++; 924 carpstats.carps_onomem++; 925 /* XXX maybe less ? */ 926 if (advbase != 255 || advskew != 255) 927 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 928 carp_send_ad_timeout, sc); 929 return; 930 } 931 len = sizeof(*ip6) + sizeof(ch); 932 m->m_pkthdr.len = len; 933 m->m_pkthdr.rcvif = NULL; 934 m->m_len = len; 935 MH_ALIGN(m, m->m_len); 936 m->m_flags |= M_MCAST; 937 ip6 = mtod(m, struct ip6_hdr *); 938 bzero(ip6, sizeof(*ip6)); 939 ip6->ip6_vfc |= IPV6_VERSION; 940 ip6->ip6_hlim = CARP_DFLTTL; 941 ip6->ip6_nxt = IPPROTO_CARP; 942 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 943 sizeof(struct in6_addr)); 944 /* set the multicast destination */ 945 946 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 947 ip6->ip6_dst.s6_addr8[15] = 0x12; 948 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 949 cifp->if_oerrors++; 950 m_freem(m); 951 CARP_LOG("%s: in6_setscope failed\n", __func__); 952 return; 953 } 954 955 ch_ptr = (struct carp_header *)(&ip6[1]); 956 bcopy(&ch, ch_ptr, sizeof(ch)); 957 if (carp_prepare_ad(m, sc, ch_ptr)) 958 return; 959 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6)); 960 961 getmicrotime(&cifp->if_lastchange); 962 cifp->if_opackets++; 963 cifp->if_obytes += len; 964 carpstats.carps_opackets6++; 965 966 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 967 cifp->if_oerrors++; 968 if (sc->sc_sendad_errors < INT_MAX) 969 sc->sc_sendad_errors++; 970 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 971 carp_suppress_preempt++; 972 if (carp_suppress_preempt == 1) { 973 carp_send_ad_all(); 974 } 975 } 976 sc->sc_sendad_success = 0; 977 } else { 978 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 979 if (++sc->sc_sendad_success >= 980 CARP_SENDAD_MIN_SUCCESS) { 981 carp_suppress_preempt--; 982 sc->sc_sendad_errors = 0; 983 } 984 } else { 985 sc->sc_sendad_errors = 0; 986 } 987 } 988 } 989 #endif /* INET6 */ 990 991 if (advbase != 255 || advskew != 255) 992 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 993 carp_send_ad_timeout, sc); 994 } 995 996 /* 997 * Broadcast a gratuitous ARP request containing 998 * the virtual router MAC address for each IP address 999 * associated with the virtual router. 1000 */ 1001 static void 1002 carp_send_arp(struct carp_softc *sc) 1003 { 1004 const struct carp_vhaddr *vha; 1005 1006 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1007 if (vha->vha_iaback == NULL) 1008 continue; 1009 1010 arp_iainit(sc->sc_carpdev, &vha->vha_ia->ia_addr.sin_addr, 1011 IF_LLADDR(&sc->sc_if)); 1012 } 1013 } 1014 1015 #ifdef INET6 1016 static void 1017 carp_send_na(struct carp_softc *sc) 1018 { 1019 struct ifaddr_container *ifac; 1020 struct in6_addr *in6; 1021 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1022 1023 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 1024 struct ifaddr *ifa = ifac->ifa; 1025 1026 if (ifa->ifa_addr->sa_family != AF_INET6) 1027 continue; 1028 1029 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1030 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1031 ND_NA_FLAG_OVERRIDE, 1, NULL); 1032 DELAY(1000); /* XXX */ 1033 } 1034 } 1035 #endif /* INET6 */ 1036 1037 static __inline const struct carp_vhaddr * 1038 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr) 1039 { 1040 struct carp_vhaddr *vha; 1041 1042 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1043 if (vha->vha_iaback == NULL) 1044 continue; 1045 1046 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr) 1047 return vha; 1048 } 1049 return NULL; 1050 } 1051 1052 static int 1053 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr, 1054 const struct in_addr *isaddr, uint8_t **enaddr) 1055 { 1056 const struct carp_softc *vh; 1057 int index, count = 0; 1058 1059 /* 1060 * XXX proof of concept implementation. 1061 * We use the source ip to decide which virtual host should 1062 * handle the request. If we're master of that virtual host, 1063 * then we respond, otherwise, just drop the arp packet on 1064 * the floor. 1065 */ 1066 1067 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1068 if (!CARP_IS_RUNNING(&vh->sc_if)) 1069 continue; 1070 1071 if (carp_find_addr(vh, itaddr) != NULL) 1072 count++; 1073 } 1074 if (count == 0) 1075 return 0; 1076 1077 /* this should be a hash, like pf_hash() */ 1078 index = ntohl(isaddr->s_addr) % count; 1079 count = 0; 1080 1081 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1082 if (!CARP_IS_RUNNING(&vh->sc_if)) 1083 continue; 1084 1085 if (carp_find_addr(vh, itaddr) == NULL) 1086 continue; 1087 1088 if (count == index) { 1089 if (vh->sc_state == MASTER) { 1090 *enaddr = IF_LLADDR(&vh->sc_if); 1091 return 1; 1092 } else { 1093 return 0; 1094 } 1095 } 1096 count++; 1097 } 1098 return 0; 1099 } 1100 1101 int 1102 carp_iamatch(const void *v, const struct in_addr *itaddr, 1103 const struct in_addr *isaddr, uint8_t **enaddr) 1104 { 1105 const struct carp_if *cif = v; 1106 const struct carp_softc *vh; 1107 1108 if (carp_opts[CARPCTL_ARPBALANCE]) 1109 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr); 1110 1111 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1112 if (!CARP_IS_RUNNING(&vh->sc_if) || vh->sc_state != MASTER) 1113 continue; 1114 1115 if (carp_find_addr(vh, itaddr) != NULL) { 1116 *enaddr = IF_LLADDR(&vh->sc_if); 1117 return 1; 1118 } 1119 } 1120 return 0; 1121 } 1122 1123 #ifdef INET6 1124 struct ifaddr * 1125 carp_iamatch6(void *v, struct in6_addr *taddr) 1126 { 1127 struct carp_if *cif = v; 1128 struct carp_softc *vh; 1129 1130 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1131 struct ifaddr_container *ifac; 1132 1133 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid], 1134 ifa_link) { 1135 struct ifaddr *ifa = ifac->ifa; 1136 1137 if (IN6_ARE_ADDR_EQUAL(taddr, 1138 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1139 CARP_IS_RUNNING(&vh->sc_if) && 1140 vh->sc_state == MASTER) { 1141 return (ifa); 1142 } 1143 } 1144 } 1145 return (NULL); 1146 } 1147 1148 void * 1149 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1150 { 1151 struct m_tag *mtag; 1152 struct carp_if *cif = v; 1153 struct carp_softc *sc; 1154 1155 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1156 struct ifaddr_container *ifac; 1157 1158 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], 1159 ifa_link) { 1160 struct ifaddr *ifa = ifac->ifa; 1161 1162 if (IN6_ARE_ADDR_EQUAL(taddr, 1163 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1164 CARP_IS_RUNNING(&sc->sc_if)) { 1165 struct ifnet *ifp = &sc->sc_if; 1166 1167 mtag = m_tag_get(PACKET_TAG_CARP, 1168 sizeof(struct ifnet *), MB_DONTWAIT); 1169 if (mtag == NULL) { 1170 /* better a bit than nothing */ 1171 return (IF_LLADDR(ifp)); 1172 } 1173 bcopy(&ifp, (caddr_t)(mtag + 1), 1174 sizeof(struct ifnet *)); 1175 m_tag_prepend(m, mtag); 1176 1177 return (IF_LLADDR(ifp)); 1178 } 1179 } 1180 } 1181 return (NULL); 1182 } 1183 #endif 1184 1185 int 1186 carp_forus(const void *v, const void *dhost) 1187 { 1188 const struct carp_if *cif = v; 1189 const struct carp_softc *vh; 1190 const uint8_t *ena = dhost; 1191 1192 if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) 1193 return 0; 1194 1195 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1196 const struct ifnet *cifp = &vh->sc_if; 1197 1198 if (CARP_IS_RUNNING(cifp) && vh->sc_state == MASTER && 1199 !bcmp(dhost, IF_LLADDR(cifp), ETHER_ADDR_LEN)) 1200 return 1; 1201 } 1202 return 0; 1203 } 1204 1205 static void 1206 carp_master_down_timeout(void *xsc) 1207 { 1208 struct carp_softc *sc = xsc; 1209 1210 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 1211 sc->sc_if.if_xname); 1212 carp_master_down(sc); 1213 } 1214 1215 static void 1216 carp_master_down(struct carp_softc *sc) 1217 { 1218 switch (sc->sc_state) { 1219 case INIT: 1220 kprintf("%s: master_down event in INIT state\n", 1221 sc->sc_if.if_xname); 1222 break; 1223 1224 case MASTER: 1225 break; 1226 1227 case BACKUP: 1228 carp_set_state(sc, MASTER); 1229 carp_send_ad(sc); 1230 carp_send_arp(sc); 1231 #ifdef INET6 1232 carp_send_na(sc); 1233 #endif /* INET6 */ 1234 carp_setrun(sc, 0); 1235 carp_setroute(sc, RTM_ADD); 1236 break; 1237 } 1238 } 1239 1240 /* 1241 * When in backup state, af indicates whether to reset the master down timer 1242 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1243 */ 1244 static void 1245 carp_setrun(struct carp_softc *sc, sa_family_t af) 1246 { 1247 struct ifnet *cifp = &sc->sc_if; 1248 struct timeval tv; 1249 1250 if (sc->sc_carpdev == NULL) { 1251 carp_set_state(sc, INIT); 1252 return; 1253 } 1254 1255 if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 && 1256 (sc->sc_naddrs || sc->sc_naddrs6)) { 1257 /* Nothing */ 1258 } else { 1259 carp_setroute(sc, RTM_DELETE); 1260 return; 1261 } 1262 1263 switch (sc->sc_state) { 1264 case INIT: 1265 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1266 carp_send_ad(sc); 1267 carp_send_arp(sc); 1268 #ifdef INET6 1269 carp_send_na(sc); 1270 #endif /* INET6 */ 1271 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1272 cifp->if_xname); 1273 carp_set_state(sc, MASTER); 1274 carp_setroute(sc, RTM_ADD); 1275 } else { 1276 CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname); 1277 carp_set_state(sc, BACKUP); 1278 carp_setroute(sc, RTM_DELETE); 1279 carp_setrun(sc, 0); 1280 } 1281 break; 1282 1283 case BACKUP: 1284 callout_stop(&sc->sc_ad_tmo); 1285 tv.tv_sec = 3 * sc->sc_advbase; 1286 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1287 switch (af) { 1288 #ifdef INET 1289 case AF_INET: 1290 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1291 carp_master_down_timeout, sc); 1292 break; 1293 #endif /* INET */ 1294 #ifdef INET6 1295 case AF_INET6: 1296 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1297 carp_master_down_timeout, sc); 1298 break; 1299 #endif /* INET6 */ 1300 default: 1301 if (sc->sc_naddrs) 1302 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1303 carp_master_down_timeout, sc); 1304 if (sc->sc_naddrs6) 1305 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1306 carp_master_down_timeout, sc); 1307 break; 1308 } 1309 break; 1310 1311 case MASTER: 1312 tv.tv_sec = sc->sc_advbase; 1313 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1314 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1315 carp_send_ad_timeout, sc); 1316 break; 1317 } 1318 } 1319 1320 static void 1321 carp_multicast_cleanup(struct carp_softc *sc) 1322 { 1323 struct ip_moptions *imo = &sc->sc_imo; 1324 1325 if (imo->imo_num_memberships == 0) 1326 return; 1327 KKASSERT(imo->imo_num_memberships == 1); 1328 1329 in_delmulti(imo->imo_membership[0]); 1330 imo->imo_membership[0] = NULL; 1331 imo->imo_num_memberships = 0; 1332 imo->imo_multicast_ifp = NULL; 1333 } 1334 1335 #ifdef INET6 1336 static void 1337 carp_multicast6_cleanup(struct carp_softc *sc) 1338 { 1339 struct ip6_moptions *im6o = &sc->sc_im6o; 1340 1341 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1342 struct in6_multi_mship *imm = 1343 LIST_FIRST(&im6o->im6o_memberships); 1344 1345 LIST_REMOVE(imm, i6mm_chain); 1346 in6_leavegroup(imm); 1347 } 1348 im6o->im6o_multicast_ifp = NULL; 1349 } 1350 #endif 1351 1352 static int 1353 carp_get_vhaddr(struct carp_softc *sc, struct ifdrv *ifd) 1354 { 1355 const struct carp_vhaddr *vha; 1356 struct ifcarpvhaddr *carpa, *carpa0; 1357 int count, len, error; 1358 1359 count = 0; 1360 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1361 ++count; 1362 1363 if (ifd->ifd_len == 0) { 1364 ifd->ifd_len = count * sizeof(*carpa); 1365 return 0; 1366 } else if (count == 0 || ifd->ifd_len < sizeof(*carpa)) { 1367 ifd->ifd_len = 0; 1368 return 0; 1369 } 1370 len = min(ifd->ifd_len, sizeof(*carpa) * count); 1371 KKASSERT(len >= sizeof(*carpa)); 1372 1373 carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO); 1374 if (carpa == NULL) 1375 return ENOMEM; 1376 1377 count = 0; 1378 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1379 if (len < sizeof(*carpa)) 1380 break; 1381 1382 carpa->carpa_flags = vha->vha_flags; 1383 carpa->carpa_addr.sin_family = AF_INET; 1384 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr; 1385 1386 carpa->carpa_baddr.sin_family = AF_INET; 1387 if (vha->vha_iaback == NULL) { 1388 carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY; 1389 } else { 1390 carpa->carpa_baddr.sin_addr = 1391 vha->vha_iaback->ia_addr.sin_addr; 1392 } 1393 1394 ++carpa; 1395 ++count; 1396 len -= sizeof(*carpa); 1397 } 1398 ifd->ifd_len = sizeof(*carpa) * count; 1399 KKASSERT(ifd->ifd_len > 0); 1400 1401 error = copyout(carpa0, ifd->ifd_data, ifd->ifd_len); 1402 kfree(carpa0, M_TEMP); 1403 return error; 1404 } 1405 1406 static int 1407 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 1408 { 1409 struct ifnet *ifp; 1410 struct in_ifaddr *ia_if; 1411 struct in_ifaddr_container *iac; 1412 const struct sockaddr_in *sin; 1413 u_long iaddr; 1414 int own; 1415 1416 KKASSERT(vha->vha_ia != NULL); 1417 1418 sin = &vha->vha_ia->ia_addr; 1419 iaddr = ntohl(sin->sin_addr.s_addr); 1420 1421 ia_if = NULL; 1422 own = 0; 1423 TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) { 1424 struct in_ifaddr *ia = iac->ia; 1425 1426 if ((ia->ia_flags & IFA_ROUTE) == 0) 1427 continue; 1428 1429 if (ia->ia_ifp->if_type == IFT_CARP) 1430 continue; 1431 1432 /* and, yeah, we need a multicast-capable iface too */ 1433 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) 1434 continue; 1435 1436 if ((iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1437 if (sin->sin_addr.s_addr == 1438 ia->ia_addr.sin_addr.s_addr) 1439 own = 1; 1440 if (ia_if == NULL) 1441 ia_if = ia; 1442 else if (sc->sc_carpdev != NULL && 1443 sc->sc_carpdev == ia->ia_ifp) 1444 ia_if = ia; 1445 } 1446 } 1447 1448 carp_deactivate_vhaddr(sc, vha); 1449 if (!ia_if) 1450 return ENOENT; 1451 1452 ifp = ia_if->ia_ifp; 1453 1454 /* XXX Don't allow parent iface to be changed */ 1455 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) 1456 return EEXIST; 1457 1458 return carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 1459 } 1460 1461 static void 1462 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1463 { 1464 struct carp_vhaddr *vha_new; 1465 struct in_ifaddr *carp_ia; 1466 #ifdef INVARIANTS 1467 struct carp_vhaddr *vha; 1468 #endif 1469 1470 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1471 carp_ia = ifatoia(carp_ifa); 1472 1473 #ifdef INVARIANTS 1474 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1475 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia); 1476 #endif 1477 1478 vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO); 1479 vha_new->vha_ia = carp_ia; 1480 carp_insert_vhaddr(sc, vha_new); 1481 1482 if (carp_config_vhaddr(sc, vha_new) != 0) { 1483 /* 1484 * If the above configuration fails, it may only mean 1485 * that the new address is problematic. However, the 1486 * carp(4) interface may already have several working 1487 * addresses. Since the expected behaviour of 1488 * SIOC[AS]IFADDR is to put the NIC into working state, 1489 * we try starting the state machine manually here with 1490 * the hope that the carp(4)'s previously working 1491 * addresses still could be brought up. 1492 */ 1493 carp_hmac_prepare(sc); 1494 carp_set_state(sc, INIT); 1495 carp_setrun(sc, 0); 1496 } 1497 } 1498 1499 static void 1500 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1501 { 1502 struct carp_vhaddr *vha; 1503 struct in_ifaddr *carp_ia; 1504 1505 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1506 carp_ia = ifatoia(carp_ifa); 1507 1508 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1509 KKASSERT(vha->vha_ia != NULL); 1510 if (vha->vha_ia == carp_ia) 1511 break; 1512 } 1513 KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); 1514 1515 /* 1516 * Remove the vhaddr from the list before deactivating 1517 * the vhaddr, so that the HMAC could be correctly 1518 * updated in carp_deactivate_vhaddr() 1519 */ 1520 carp_remove_vhaddr(sc, vha); 1521 1522 carp_deactivate_vhaddr(sc, vha); 1523 kfree(vha, M_CARP); 1524 } 1525 1526 static void 1527 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 1528 { 1529 struct carp_vhaddr *vha; 1530 struct in_ifaddr *carp_ia; 1531 1532 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 1533 carp_ia = ifatoia(carp_ifa); 1534 1535 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1536 KKASSERT(vha->vha_ia != NULL); 1537 if (vha->vha_ia == carp_ia) 1538 break; 1539 } 1540 KASSERT(vha != NULL, ("no corresponding vhaddr %p\n", carp_ifa)); 1541 1542 /* Remove then reinsert, to keep the vhaddr list sorted */ 1543 carp_remove_vhaddr(sc, vha); 1544 carp_insert_vhaddr(sc, vha); 1545 1546 if (carp_config_vhaddr(sc, vha) != 0) { 1547 /* See the comment in carp_add_addr() */ 1548 carp_hmac_prepare(sc); 1549 carp_set_state(sc, INIT); 1550 carp_setrun(sc, 0); 1551 } 1552 } 1553 1554 #ifdef INET6 1555 static int 1556 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1557 { 1558 struct ifnet *ifp; 1559 struct carp_if *cif; 1560 struct in6_ifaddr *ia, *ia_if; 1561 struct ip6_moptions *im6o = &sc->sc_im6o; 1562 struct in6_multi_mship *imm; 1563 struct in6_addr in6; 1564 int own, error; 1565 1566 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1567 carp_setrun(sc, 0); 1568 return (0); 1569 } 1570 1571 /* we have to do it by hands to check we won't match on us */ 1572 ia_if = NULL; own = 0; 1573 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 1574 int i; 1575 1576 for (i = 0; i < 4; i++) { 1577 if ((sin6->sin6_addr.s6_addr32[i] & 1578 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 1579 (ia->ia_addr.sin6_addr.s6_addr32[i] & 1580 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 1581 break; 1582 } 1583 /* and, yeah, we need a multicast-capable iface too */ 1584 if (ia->ia_ifp != &sc->sc_if && 1585 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 1586 (i == 4)) { 1587 if (!ia_if) 1588 ia_if = ia; 1589 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 1590 &ia->ia_addr.sin6_addr)) 1591 own++; 1592 } 1593 } 1594 1595 if (!ia_if) 1596 return (EADDRNOTAVAIL); 1597 ia = ia_if; 1598 ifp = ia->ia_ifp; 1599 1600 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 1601 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 1602 return (EADDRNOTAVAIL); 1603 1604 if (!sc->sc_naddrs6) { 1605 im6o->im6o_multicast_ifp = ifp; 1606 1607 /* join CARP multicast address */ 1608 bzero(&in6, sizeof(in6)); 1609 in6.s6_addr16[0] = htons(0xff02); 1610 in6.s6_addr8[15] = 0x12; 1611 if (in6_setscope(&in6, ifp, NULL) != 0) 1612 goto cleanup; 1613 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 1614 goto cleanup; 1615 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1616 1617 /* join solicited multicast address */ 1618 bzero(&in6, sizeof(in6)); 1619 in6.s6_addr16[0] = htons(0xff02); 1620 in6.s6_addr32[1] = 0; 1621 in6.s6_addr32[2] = htonl(1); 1622 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 1623 in6.s6_addr8[12] = 0xff; 1624 if (in6_setscope(&in6, ifp, NULL) != 0) 1625 goto cleanup; 1626 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 1627 goto cleanup; 1628 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 1629 } 1630 1631 if (!ifp->if_carp) { 1632 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 1633 1634 if ((error = ifpromisc(ifp, 1))) { 1635 kfree(cif, M_CARP); 1636 goto cleanup; 1637 } 1638 1639 TAILQ_INIT(&cif->vhif_vrs); 1640 ifp->if_carp = cif; 1641 } else { 1642 struct carp_softc *vr; 1643 1644 cif = ifp->if_carp; 1645 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1646 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 1647 error = EINVAL; 1648 goto cleanup; 1649 } 1650 } 1651 } 1652 sc->sc_ia6 = ia; 1653 sc->sc_carpdev = ifp; 1654 1655 { /* XXX prevent endless loop if already in queue */ 1656 struct carp_softc *vr, *after = NULL; 1657 int myself = 0; 1658 cif = ifp->if_carp; 1659 1660 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1661 if (vr == sc) 1662 myself = 1; 1663 if (vr->sc_vhid < sc->sc_vhid) 1664 after = vr; 1665 } 1666 1667 if (!myself) { 1668 /* We're trying to keep things in order */ 1669 if (after == NULL) 1670 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 1671 else 1672 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 1673 } 1674 } 1675 1676 sc->sc_naddrs6++; 1677 if (own) 1678 sc->sc_advskew = 0; 1679 carp_sc_state(sc); 1680 carp_setrun(sc, 0); 1681 1682 return (0); 1683 1684 cleanup: 1685 /* clean up multicast memberships */ 1686 if (!sc->sc_naddrs6) { 1687 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1688 imm = LIST_FIRST(&im6o->im6o_memberships); 1689 LIST_REMOVE(imm, i6mm_chain); 1690 in6_leavegroup(imm); 1691 } 1692 } 1693 return (error); 1694 } 1695 1696 static int 1697 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1698 { 1699 int error = 0; 1700 1701 if (!--sc->sc_naddrs6) { 1702 struct carp_if *cif = sc->sc_carpdev->if_carp; 1703 struct ip6_moptions *im6o = &sc->sc_im6o; 1704 1705 callout_stop(&sc->sc_ad_tmo); 1706 sc->sc_vhid = -1; 1707 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1708 struct in6_multi_mship *imm = 1709 LIST_FIRST(&im6o->im6o_memberships); 1710 1711 LIST_REMOVE(imm, i6mm_chain); 1712 in6_leavegroup(imm); 1713 } 1714 im6o->im6o_multicast_ifp = NULL; 1715 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 1716 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 1717 sc->sc_carpdev->if_carp = NULL; 1718 kfree(cif, M_IFADDR); 1719 } 1720 } 1721 return (error); 1722 } 1723 #endif /* INET6 */ 1724 1725 static int 1726 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) 1727 { 1728 struct carp_softc *sc = ifp->if_softc, *vr; 1729 struct carpreq carpr; 1730 struct ifaddr *ifa; 1731 struct ifreq *ifr; 1732 struct ifaliasreq *ifra; 1733 struct ifdrv *ifd; 1734 char devname[IFNAMSIZ]; 1735 int error = 0; 1736 1737 ifa = (struct ifaddr *)addr; 1738 ifra = (struct ifaliasreq *)addr; 1739 ifr = (struct ifreq *)addr; 1740 ifd = (struct ifdrv *)addr; 1741 1742 switch (cmd) { 1743 case SIOCSIFADDR: 1744 switch (ifa->ifa_addr->sa_family) { 1745 #ifdef INET 1746 case AF_INET: 1747 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1748 ifa_set_prflags(ifa, IA_PRF_RTEXISTOK); 1749 break; 1750 #endif /* INET */ 1751 #ifdef INET6 1752 case AF_INET6: 1753 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1754 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1755 break; 1756 #endif /* INET6 */ 1757 default: 1758 error = EAFNOSUPPORT; 1759 break; 1760 } 1761 break; 1762 1763 case SIOCAIFADDR: 1764 switch (ifa->ifa_addr->sa_family) { 1765 #ifdef INET 1766 case AF_INET: 1767 panic("SIOCAIFADDR should never be seen\n"); 1768 #endif /* INET */ 1769 #ifdef INET6 1770 case AF_INET6: 1771 ifp->if_flags |= IFF_UP | IFF_RUNNING; 1772 error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr)); 1773 break; 1774 #endif /* INET6 */ 1775 default: 1776 error = EAFNOSUPPORT; 1777 break; 1778 } 1779 break; 1780 1781 case SIOCDIFADDR: 1782 switch (ifa->ifa_addr->sa_family) { 1783 #ifdef INET 1784 case AF_INET: 1785 panic("SIOCDIFADDR should never be seen\n"); 1786 #endif /* INET */ 1787 #ifdef INET6 1788 case AF_INET6: 1789 error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr)); 1790 break; 1791 #endif /* INET6 */ 1792 default: 1793 error = EAFNOSUPPORT; 1794 break; 1795 } 1796 break; 1797 1798 case SIOCSIFFLAGS: 1799 if (ifp->if_flags & IFF_UP) { 1800 if ((ifp->if_flags & IFF_RUNNING) == 0) { 1801 ifp->if_flags |= IFF_RUNNING; 1802 carp_set_state(sc, INIT); 1803 carp_setrun(sc, 0); 1804 } 1805 } else if (ifp->if_flags & IFF_RUNNING) { 1806 carp_stop(sc, 0); 1807 } 1808 break; 1809 1810 case SIOCSVH: 1811 error = suser_cred(cr, NULL_CRED_OKAY); 1812 if (error) 1813 break; 1814 error = copyin(ifr->ifr_data, &carpr, sizeof(carpr)); 1815 if (error) 1816 break; 1817 1818 error = 1; 1819 if ((ifp->if_flags & IFF_RUNNING) && 1820 sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { 1821 switch (carpr.carpr_state) { 1822 case BACKUP: 1823 callout_stop(&sc->sc_ad_tmo); 1824 carp_set_state(sc, BACKUP); 1825 carp_setrun(sc, 0); 1826 carp_setroute(sc, RTM_DELETE); 1827 break; 1828 1829 case MASTER: 1830 carp_master_down(sc); 1831 break; 1832 1833 default: 1834 break; 1835 } 1836 } 1837 if (carpr.carpr_vhid > 0) { 1838 if (carpr.carpr_vhid > 255) { 1839 error = EINVAL; 1840 break; 1841 } 1842 if (sc->sc_carpdev) { 1843 struct carp_if *cif = sc->sc_carpdev->if_carp; 1844 1845 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 1846 if (vr != sc && 1847 vr->sc_vhid == carpr.carpr_vhid) 1848 return EEXIST; 1849 } 1850 } 1851 sc->sc_vhid = carpr.carpr_vhid; 1852 IF_LLADDR(ifp)[0] = 0; 1853 IF_LLADDR(ifp)[1] = 0; 1854 IF_LLADDR(ifp)[2] = 0x5e; 1855 IF_LLADDR(ifp)[3] = 0; 1856 IF_LLADDR(ifp)[4] = 1; 1857 IF_LLADDR(ifp)[5] = sc->sc_vhid; 1858 error--; 1859 } 1860 if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) { 1861 if (carpr.carpr_advskew >= 255) { 1862 error = EINVAL; 1863 break; 1864 } 1865 if (carpr.carpr_advbase > 255) { 1866 error = EINVAL; 1867 break; 1868 } 1869 sc->sc_advbase = carpr.carpr_advbase; 1870 sc->sc_advskew = carpr.carpr_advskew; 1871 error--; 1872 } 1873 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 1874 if (error > 0) { 1875 error = EINVAL; 1876 } else { 1877 error = 0; 1878 carp_setrun(sc, 0); 1879 } 1880 break; 1881 1882 case SIOCGVH: 1883 bzero(&carpr, sizeof(carpr)); 1884 carpr.carpr_state = sc->sc_state; 1885 carpr.carpr_vhid = sc->sc_vhid; 1886 carpr.carpr_advbase = sc->sc_advbase; 1887 carpr.carpr_advskew = sc->sc_advskew; 1888 1889 error = suser_cred(cr, NULL_CRED_OKAY); 1890 if (error == 0) { 1891 bcopy(sc->sc_key, carpr.carpr_key, 1892 sizeof(carpr.carpr_key)); 1893 } 1894 1895 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 1896 break; 1897 1898 case SIOCGDRVSPEC: 1899 switch (ifd->ifd_cmd) { 1900 case CARPGDEVNAME: 1901 if (ifd->ifd_len != sizeof(devname)) 1902 error = EINVAL; 1903 break; 1904 1905 case CARPGVHADDR: 1906 break; 1907 1908 default: 1909 error = EINVAL; 1910 break; 1911 } 1912 if (error) 1913 break; 1914 1915 switch (ifd->ifd_cmd) { 1916 case CARPGVHADDR: 1917 error = carp_get_vhaddr(sc, ifd); 1918 break; 1919 1920 case CARPGDEVNAME: 1921 bzero(devname, sizeof(devname)); 1922 if (sc->sc_carpdev != NULL) { 1923 strlcpy(devname, sc->sc_carpdev->if_xname, 1924 sizeof(devname)); 1925 } 1926 error = copyout(devname, ifd->ifd_data, 1927 sizeof(devname)); 1928 break; 1929 } 1930 break; 1931 1932 default: 1933 error = EINVAL; 1934 break; 1935 } 1936 carp_hmac_prepare(sc); 1937 return error; 1938 } 1939 1940 /* 1941 * XXX: this is looutput. We should eventually use it from there. 1942 */ 1943 static int 1944 carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1945 struct rtentry *rt) 1946 { 1947 uint32_t af; 1948 1949 M_ASSERTPKTHDR(m); /* check if we have the packet header */ 1950 1951 if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 1952 m_freem(m); 1953 return (rt->rt_flags & RTF_BLACKHOLE ? 0 : 1954 rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 1955 } 1956 1957 ifp->if_opackets++; 1958 ifp->if_obytes += m->m_pkthdr.len; 1959 1960 /* BPF writes need to be handled specially. */ 1961 if (dst->sa_family == AF_UNSPEC) { 1962 bcopy(dst->sa_data, &af, sizeof(af)); 1963 dst->sa_family = af; 1964 } 1965 1966 #if 1 /* XXX */ 1967 switch (dst->sa_family) { 1968 case AF_INET: 1969 case AF_INET6: 1970 case AF_IPX: 1971 case AF_APPLETALK: 1972 break; 1973 1974 default: 1975 m_freem(m); 1976 return (EAFNOSUPPORT); 1977 } 1978 #endif 1979 return (if_simloop(ifp, m, dst->sa_family, 0)); 1980 } 1981 1982 /* 1983 * Start output on carp interface. This function should never be called. 1984 */ 1985 static void 1986 carp_start(struct ifnet *ifp) 1987 { 1988 #ifdef DEBUG 1989 kprintf("%s: start called\n", ifp->if_xname); 1990 #endif 1991 } 1992 1993 int 1994 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 1995 struct rtentry *rt) 1996 { 1997 struct m_tag *mtag; 1998 struct carp_softc *sc; 1999 struct ifnet *carp_ifp; 2000 struct ether_header *eh; 2001 2002 if (!sa) 2003 return (0); 2004 2005 switch (sa->sa_family) { 2006 #ifdef INET 2007 case AF_INET: 2008 break; 2009 #endif /* INET */ 2010 #ifdef INET6 2011 case AF_INET6: 2012 break; 2013 #endif /* INET6 */ 2014 default: 2015 return (0); 2016 } 2017 2018 mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); 2019 if (mtag == NULL) 2020 return (0); 2021 2022 bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *)); 2023 sc = carp_ifp->if_softc; 2024 2025 /* Set the source MAC address to Virtual Router MAC Address */ 2026 switch (ifp->if_type) { 2027 case IFT_ETHER: 2028 case IFT_L2VLAN: 2029 eh = mtod(m, struct ether_header *); 2030 eh->ether_shost[0] = 0; 2031 eh->ether_shost[1] = 0; 2032 eh->ether_shost[2] = 0x5e; 2033 eh->ether_shost[3] = 0; 2034 eh->ether_shost[4] = 1; 2035 eh->ether_shost[5] = sc->sc_vhid; 2036 break; 2037 2038 default: 2039 if_printf(ifp, "carp is not supported for this " 2040 "interface type\n"); 2041 return (EOPNOTSUPP); 2042 } 2043 return (0); 2044 } 2045 2046 static void 2047 carp_set_state(struct carp_softc *sc, int state) 2048 { 2049 struct ifnet *cifp = &sc->sc_if; 2050 2051 if (sc->sc_state == state) 2052 return; 2053 sc->sc_state = state; 2054 2055 switch (sc->sc_state) { 2056 case BACKUP: 2057 cifp->if_link_state = LINK_STATE_DOWN; 2058 break; 2059 2060 case MASTER: 2061 cifp->if_link_state = LINK_STATE_UP; 2062 break; 2063 2064 default: 2065 cifp->if_link_state = LINK_STATE_UNKNOWN; 2066 break; 2067 } 2068 rt_ifmsg(cifp); 2069 } 2070 2071 void 2072 carp_carpdev_state(void *v) 2073 { 2074 struct carp_if *cif = v; 2075 struct carp_softc *sc; 2076 2077 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2078 carp_sc_state(sc); 2079 } 2080 2081 static void 2082 carp_sc_state(struct carp_softc *sc) 2083 { 2084 if (!(sc->sc_carpdev->if_flags & IFF_UP)) { 2085 callout_stop(&sc->sc_ad_tmo); 2086 callout_stop(&sc->sc_md_tmo); 2087 callout_stop(&sc->sc_md6_tmo); 2088 carp_set_state(sc, INIT); 2089 carp_setrun(sc, 0); 2090 if (!sc->sc_suppress) { 2091 carp_suppress_preempt++; 2092 if (carp_suppress_preempt == 1) 2093 carp_send_ad_all(); 2094 } 2095 sc->sc_suppress = 1; 2096 } else { 2097 carp_set_state(sc, INIT); 2098 carp_setrun(sc, 0); 2099 if (sc->sc_suppress) 2100 carp_suppress_preempt--; 2101 sc->sc_suppress = 0; 2102 } 2103 } 2104 2105 static void 2106 carp_stop(struct carp_softc *sc, int detach) 2107 { 2108 sc->sc_if.if_flags &= ~IFF_RUNNING; 2109 2110 callout_stop(&sc->sc_ad_tmo); 2111 callout_stop(&sc->sc_md_tmo); 2112 callout_stop(&sc->sc_md6_tmo); 2113 2114 if (!detach && sc->sc_state == MASTER) 2115 carp_send_ad(sc); 2116 2117 if (sc->sc_suppress) 2118 carp_suppress_preempt--; 2119 sc->sc_suppress = 0; 2120 2121 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 2122 carp_suppress_preempt--; 2123 sc->sc_sendad_errors = 0; 2124 sc->sc_sendad_success = 0; 2125 2126 carp_set_state(sc, INIT); 2127 carp_setrun(sc, 0); 2128 } 2129 2130 static void 2131 carp_reset(struct carp_softc *sc, int detach) 2132 { 2133 struct ifnet *cifp = &sc->sc_if; 2134 2135 carp_stop(sc, detach); 2136 if (!sc->sc_dead && (cifp->if_flags & IFF_UP)) 2137 cifp->if_flags |= IFF_RUNNING; 2138 } 2139 2140 static int 2141 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 2142 struct ifnet *ifp, const struct in_ifaddr *ia_if, int own) 2143 { 2144 struct ip_moptions *imo = &sc->sc_imo; 2145 struct carp_if *cif; 2146 struct carp_softc *vr, *after = NULL; 2147 int onlist, error; 2148 #ifdef INVARIANTS 2149 int assert_onlist; 2150 #endif 2151 2152 KKASSERT(vha->vha_ia != NULL); 2153 2154 KASSERT(ia_if != NULL, ("NULL backing address\n")); 2155 KASSERT(vha->vha_iaback == NULL, ("%p is already activated\n", vha)); 2156 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2157 ("inactive vhaddr %p is the address owner\n", vha)); 2158 2159 KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp, 2160 ("%s is already on %s\n", sc->sc_if.if_xname, 2161 sc->sc_carpdev->if_xname)); 2162 2163 KASSERT(imo->imo_multicast_ifp == NULL || 2164 imo->imo_multicast_ifp == ifp, 2165 ("%s didn't leave mcast group on %s\n", 2166 sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname)); 2167 2168 if (imo->imo_num_memberships == 0) { 2169 struct in_addr addr; 2170 2171 addr.s_addr = htonl(INADDR_CARP_GROUP); 2172 if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL) 2173 return ENOBUFS; 2174 imo->imo_num_memberships++; 2175 imo->imo_multicast_ifp = ifp; 2176 imo->imo_multicast_ttl = CARP_DFLTTL; 2177 imo->imo_multicast_loop = 0; 2178 } 2179 2180 if (!ifp->if_carp) { 2181 KASSERT(sc->sc_carpdev == NULL, 2182 ("%s is already on %s\n", sc->sc_if.if_xname, 2183 sc->sc_carpdev->if_xname)); 2184 2185 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 2186 2187 error = ifpromisc(ifp, 1); 2188 if (error) { 2189 kfree(cif, M_CARP); 2190 goto cleanup; 2191 } 2192 2193 TAILQ_INIT(&cif->vhif_vrs); 2194 ifp->if_carp = cif; 2195 } else { 2196 cif = ifp->if_carp; 2197 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2198 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 2199 error = EINVAL; 2200 goto cleanup; 2201 } 2202 } 2203 } 2204 2205 #ifdef INVARIANTS 2206 if (sc->sc_carpdev != NULL) 2207 assert_onlist = 1; 2208 else 2209 assert_onlist = 0; 2210 #endif 2211 sc->sc_ia = ia_if; 2212 sc->sc_carpdev = ifp; 2213 2214 cif = ifp->if_carp; 2215 onlist = 0; 2216 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2217 if (vr == sc) 2218 onlist = 1; 2219 if (vr->sc_vhid < sc->sc_vhid) 2220 after = vr; 2221 } 2222 2223 #ifdef INVARIANTS 2224 if (assert_onlist) { 2225 KASSERT(onlist, ("%s is not on %s carp list\n", 2226 sc->sc_if.if_xname, ifp->if_xname)); 2227 } else { 2228 KASSERT(!onlist, ("%s is already on %s carp list\n", 2229 sc->sc_if.if_xname, ifp->if_xname)); 2230 } 2231 #endif 2232 2233 if (!onlist) { 2234 /* We're trying to keep things in order */ 2235 if (after == NULL) 2236 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 2237 else 2238 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 2239 } 2240 2241 vha->vha_iaback = ia_if; 2242 sc->sc_naddrs++; 2243 2244 if (own) { 2245 vha->vha_flags |= CARP_VHAF_OWNER; 2246 2247 /* XXX save user configured advskew? */ 2248 sc->sc_advskew = 0; 2249 } 2250 2251 carp_hmac_prepare(sc); 2252 carp_set_state(sc, INIT); 2253 carp_setrun(sc, 0); 2254 return 0; 2255 cleanup: 2256 carp_multicast_cleanup(sc); 2257 return error; 2258 } 2259 2260 static void 2261 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 2262 { 2263 KKASSERT(vha->vha_ia != NULL); 2264 2265 carp_hmac_prepare(sc); 2266 2267 if (vha->vha_iaback == NULL) { 2268 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2269 ("inactive vhaddr %p is the address owner\n", vha)); 2270 return; 2271 } 2272 2273 vha->vha_flags &= ~CARP_VHAF_OWNER; 2274 2275 KKASSERT(sc->sc_naddrs > 0); 2276 vha->vha_iaback = NULL; 2277 sc->sc_naddrs--; 2278 if (!sc->sc_naddrs) { 2279 if (sc->sc_naddrs6) { 2280 carp_multicast_cleanup(sc); 2281 sc->sc_ia = NULL; 2282 } else { 2283 carp_detach(sc, 0); 2284 } 2285 } 2286 } 2287 2288 static void 2289 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if) 2290 { 2291 struct carp_vhaddr *vha; 2292 struct in_ifaddr *ia_if; 2293 2294 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2295 ia_if = ifatoia(ifa_if); 2296 2297 if ((ia_if->ia_flags & IFA_ROUTE) == 0) 2298 return; 2299 2300 /* 2301 * Test each inactive vhaddr against the newly added address. 2302 * If the newly added address could be the backing address, 2303 * then activate the matching vhaddr. 2304 */ 2305 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2306 const struct in_ifaddr *ia; 2307 u_long iaddr; 2308 int own; 2309 2310 if (vha->vha_iaback != NULL) 2311 continue; 2312 2313 ia = vha->vha_ia; 2314 iaddr = ntohl(ia->ia_addr.sin_addr.s_addr); 2315 2316 if ((iaddr & ia_if->ia_subnetmask) != ia_if->ia_subnet) 2317 continue; 2318 2319 own = 0; 2320 if (ia->ia_addr.sin_addr.s_addr == 2321 ia_if->ia_addr.sin_addr.s_addr) 2322 own = 1; 2323 2324 carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 2325 } 2326 } 2327 2328 static void 2329 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp, 2330 struct ifaddr *ifa_if) 2331 { 2332 struct carp_vhaddr *vha; 2333 struct in_ifaddr *ia_if; 2334 2335 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2336 ia_if = ifatoia(ifa_if); 2337 2338 /* 2339 * Ad src address is deleted; set it to NULL. 2340 * Following loop will try pick up a new ad src address 2341 * if one of the vhaddr could retain its backing address. 2342 */ 2343 if (sc->sc_ia == ia_if) 2344 sc->sc_ia = NULL; 2345 2346 /* 2347 * Test each active vhaddr against the deleted address. 2348 * If the deleted address is vhaddr address's backing 2349 * address, then deactivate the vhaddr. 2350 */ 2351 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2352 if (vha->vha_iaback == NULL) 2353 continue; 2354 2355 if (vha->vha_iaback == ia_if) 2356 carp_deactivate_vhaddr(sc, vha); 2357 else if (sc->sc_ia == NULL) 2358 sc->sc_ia = vha->vha_iaback; 2359 } 2360 } 2361 2362 static void 2363 carp_update_addrs(struct carp_softc *sc) 2364 { 2365 struct carp_vhaddr *vha; 2366 2367 KKASSERT(sc->sc_carpdev == NULL); 2368 2369 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 2370 carp_config_vhaddr(sc, vha); 2371 } 2372 2373 static void 2374 carp_ifaddr(void *arg __unused, struct ifnet *ifp, 2375 enum ifaddr_event event, struct ifaddr *ifa) 2376 { 2377 struct carp_softc *sc; 2378 2379 if (ifa->ifa_addr->sa_family != AF_INET) 2380 return; 2381 2382 if (ifp->if_type == IFT_CARP) { 2383 /* 2384 * Address is changed on carp(4) interface 2385 */ 2386 switch (event) { 2387 case IFADDR_EVENT_ADD: 2388 carp_add_addr(ifp->if_softc, ifa); 2389 break; 2390 2391 case IFADDR_EVENT_CHANGE: 2392 carp_config_addr(ifp->if_softc, ifa); 2393 break; 2394 2395 case IFADDR_EVENT_DELETE: 2396 carp_del_addr(ifp->if_softc, ifa); 2397 break; 2398 } 2399 return; 2400 } 2401 2402 /* 2403 * Address is changed on non-carp(4) interface 2404 */ 2405 if ((ifp->if_flags & IFF_MULTICAST) == 0) 2406 return; 2407 2408 crit_enter(); 2409 LIST_FOREACH(sc, &carpif_list, sc_next) { 2410 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) { 2411 /* Not the parent iface; skip */ 2412 continue; 2413 } 2414 2415 switch (event) { 2416 case IFADDR_EVENT_ADD: 2417 carp_link_addrs(sc, ifp, ifa); 2418 break; 2419 2420 case IFADDR_EVENT_DELETE: 2421 if (sc->sc_carpdev != NULL) { 2422 carp_unlink_addrs(sc, ifp, ifa); 2423 if (sc->sc_carpdev == NULL) 2424 carp_update_addrs(sc); 2425 } else { 2426 /* 2427 * The carp(4) interface didn't have a 2428 * parent iface, so it is not possible 2429 * that it will contain any address to 2430 * be unlinked. 2431 */ 2432 } 2433 break; 2434 2435 case IFADDR_EVENT_CHANGE: 2436 if (sc->sc_carpdev == NULL) { 2437 /* 2438 * The carp(4) interface didn't have a 2439 * parent iface, so it is not possible 2440 * that it will contain any address to 2441 * be updated. 2442 */ 2443 carp_link_addrs(sc, ifp, ifa); 2444 } else { 2445 /* 2446 * First try breaking tie with the old 2447 * address. Then see whether we could 2448 * link certain vhaddr to the new address. 2449 * If that fails, i.e. carpdev is NULL, 2450 * we try a global update. 2451 * 2452 * NOTE: The above order is critical. 2453 */ 2454 carp_unlink_addrs(sc, ifp, ifa); 2455 carp_link_addrs(sc, ifp, ifa); 2456 if (sc->sc_carpdev == NULL) 2457 carp_update_addrs(sc); 2458 } 2459 break; 2460 } 2461 } 2462 crit_exit(); 2463 } 2464 2465 static int 2466 carp_modevent(module_t mod, int type, void *data) 2467 { 2468 switch (type) { 2469 case MOD_LOAD: 2470 LIST_INIT(&carpif_list); 2471 carp_ifdetach_event = 2472 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL, 2473 EVENTHANDLER_PRI_ANY); 2474 carp_ifaddr_event = 2475 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL, 2476 EVENTHANDLER_PRI_ANY); 2477 if_clone_attach(&carp_cloner); 2478 break; 2479 2480 case MOD_UNLOAD: 2481 EVENTHANDLER_DEREGISTER(ifnet_detach_event, 2482 carp_ifdetach_event); 2483 EVENTHANDLER_DEREGISTER(ifaddr_event, 2484 carp_ifaddr_event); 2485 if_clone_detach(&carp_cloner); 2486 break; 2487 2488 default: 2489 return (EINVAL); 2490 } 2491 return (0); 2492 } 2493 2494 static moduledata_t carp_mod = { 2495 "carp", 2496 carp_modevent, 2497 0 2498 }; 2499 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 2500