1 /* 2 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 3 * Copyright (c) 2003 Ryan McBride. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 /* 27 * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $ 28 */ 29 30 #include "opt_carp.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/in_cksum.h> 38 #include <sys/limits.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/msgport2.h> 42 #include <sys/time.h> 43 #include <sys/proc.h> 44 #include <sys/priv.h> 45 #include <sys/sockio.h> 46 #include <sys/socket.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/thread.h> 50 51 #include <machine/stdarg.h> 52 #include <crypto/sha1.h> 53 54 #include <net/bpf.h> 55 #include <net/ethernet.h> 56 #include <net/if.h> 57 #include <net/if_dl.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 #include <net/if_clone.h> 61 #include <net/if_var.h> 62 #include <net/ifq_var.h> 63 #include <net/netmsg2.h> 64 65 #ifdef INET 66 #include <netinet/in.h> 67 #include <netinet/in_var.h> 68 #include <netinet/in_systm.h> 69 #include <netinet/ip.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/if_ether.h> 72 #endif 73 74 #ifdef INET6 75 #include <netinet/icmp6.h> 76 #include <netinet/ip6.h> 77 #include <netinet6/ip6_var.h> 78 #include <netinet6/scope6_var.h> 79 #include <netinet6/nd6.h> 80 #endif 81 82 #include <netinet/ip_carp.h> 83 84 /* 85 * Note about carp's MP safe approach: 86 * 87 * Brief: carp_softc (softc), carp_softc_container (scc) 88 * 89 * - All configuration operation, e.g. ioctl, add/delete inet addresses 90 * is serialized by netisr0; not by carp's serializer 91 * 92 * - Backing interface's if_carp and carp_softc's relationship: 93 * 94 * +---------+ 95 * if_carp -->| carp_if | 96 * +---------+ 97 * | 98 * | 99 * V +---------+ 100 * +-----+ | | 101 * | scc |-->| softc | 102 * +-----+ | | 103 * | +---------+ 104 * | 105 * V +---------+ 106 * +-----+ | | 107 * | scc |-->| softc | 108 * +-----+ | | 109 * +---------+ 110 * 111 * - if_carp creation, modification and deletion all happen in netisr0, 112 * as stated previously. Since if_carp is accessed by multiple netisrs, 113 * the modification to if_carp is conducted in the following way: 114 * 115 * Adding carp_softc: 116 * 117 * 1) Duplicate the old carp_if to new carp_if (ncif), and insert the 118 * to-be-added carp_softc to the new carp_if (ncif): 119 * 120 * if_carp ncif 121 * | | 122 * V V 123 * +---------+ +---------+ 124 * | carp_if | | carp_if | 125 * +---------+ +---------+ 126 * | | 127 * | | 128 * V +-------+ V 129 * +-----+ | | +-----+ 130 * | scc |---->| softc |<----| scc | 131 * +-----+ | | +-----+ 132 * | +-------+ | 133 * | | 134 * V +-------+ V 135 * +-----+ | | +-----+ 136 * | scc |---->| softc |<----| scc | 137 * +-----+ | | +-----+ 138 * +-------+ | 139 * | 140 * +-------+ V 141 * | | +-----+ 142 * | softc |<----| scc | 143 * | | +-----+ 144 * +-------+ 145 * 146 * 2) Switch save if_carp into ocif and switch if_carp to ncif: 147 * 148 * ocif if_carp 149 * | | 150 * V V 151 * +---------+ +---------+ 152 * | carp_if | | carp_if | 153 * +---------+ +---------+ 154 * | | 155 * | | 156 * V +-------+ V 157 * +-----+ | | +-----+ 158 * | scc |---->| softc |<----| scc | 159 * +-----+ | | +-----+ 160 * | +-------+ | 161 * | | 162 * V +-------+ V 163 * +-----+ | | +-----+ 164 * | scc |---->| softc |<----| scc | 165 * +-----+ | | +-----+ 166 * +-------+ | 167 * | 168 * +-------+ V 169 * | | +-----+ 170 * | softc |<----| scc | 171 * | | +-----+ 172 * +-------+ 173 * 174 * 3) Run netmsg_service_sync(), which will make sure that 175 * ocif is no longer accessed (all network operations 176 * are happened only in network threads). 177 * 4) Free ocif -- only carp_if and scc are freed. 178 * 179 * 180 * Removing carp_softc: 181 * 182 * 1) Duplicate the old carp_if to new carp_if (ncif); the to-be-deleted 183 * carp_softc will not be duplicated. 184 * 185 * if_carp ncif 186 * | | 187 * V V 188 * +---------+ +---------+ 189 * | carp_if | | carp_if | 190 * +---------+ +---------+ 191 * | | 192 * | | 193 * V +-------+ V 194 * +-----+ | | +-----+ 195 * | scc |---->| softc |<----| scc | 196 * +-----+ | | +-----+ 197 * | +-------+ | 198 * | | 199 * V +-------+ | 200 * +-----+ | | | 201 * | scc |---->| softc | | 202 * +-----+ | | | 203 * | +-------+ | 204 * | | 205 * V +-------+ V 206 * +-----+ | | +-----+ 207 * | scc |---->| softc |<----| scc | 208 * +-----+ | | +-----+ 209 * +-------+ 210 * 211 * 2) Switch save if_carp into ocif and switch if_carp to ncif: 212 * 213 * ocif if_carp 214 * | | 215 * V V 216 * +---------+ +---------+ 217 * | carp_if | | carp_if | 218 * +---------+ +---------+ 219 * | | 220 * | | 221 * V +-------+ V 222 * +-----+ | | +-----+ 223 * | scc |---->| softc |<----| scc | 224 * +-----+ | | +-----+ 225 * | +-------+ | 226 * | | 227 * V +-------+ | 228 * +-----+ | | | 229 * | scc |---->| softc | | 230 * +-----+ | | | 231 * | +-------+ | 232 * | | 233 * V +-------+ V 234 * +-----+ | | +-----+ 235 * | scc |---->| softc |<----| scc | 236 * +-----+ | | +-----+ 237 * +-------+ 238 * 239 * 3) Run netmsg_service_sync(), which will make sure that 240 * ocif is no longer accessed (all network operations 241 * are happened only in network threads). 242 * 4) Free ocif -- only carp_if and scc are freed. 243 * 244 * - if_carp accessing: 245 * The accessing code should cache the if_carp in a local temporary 246 * variable and accessing the temporary variable along the code path 247 * instead of accessing if_carp later on. 248 */ 249 250 #define CARP_IFNAME "carp" 251 #define CARP_IS_RUNNING(ifp) \ 252 (((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING)) 253 254 struct carp_softc; 255 256 struct carp_vhaddr { 257 uint32_t vha_flags; /* CARP_VHAF_ */ 258 struct in_ifaddr *vha_ia; /* carp address */ 259 struct in_ifaddr *vha_iaback; /* backing address */ 260 TAILQ_ENTRY(carp_vhaddr) vha_link; 261 }; 262 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr); 263 264 struct netmsg_carp { 265 struct netmsg_base base; 266 struct ifnet *nc_carpdev; 267 struct carp_softc *nc_softc; 268 void *nc_data; 269 size_t nc_datalen; 270 }; 271 272 struct carp_softc { 273 struct arpcom arpcom; 274 struct ifnet *sc_carpdev; /* parent interface */ 275 struct carp_vhaddr_list sc_vha_list; /* virtual addr list */ 276 277 const struct in_ifaddr *sc_ia; /* primary iface address v4 */ 278 struct ip_moptions sc_imo; 279 280 #ifdef INET6 281 struct in6_ifaddr *sc_ia6; /* primary iface address v6 */ 282 struct ip6_moptions sc_im6o; 283 #endif /* INET6 */ 284 285 enum { INIT = 0, BACKUP, MASTER } 286 sc_state; 287 boolean_t sc_dead; 288 289 int sc_suppress; 290 291 int sc_sendad_errors; 292 #define CARP_SENDAD_MAX_ERRORS 3 293 int sc_sendad_success; 294 #define CARP_SENDAD_MIN_SUCCESS 3 295 296 int sc_vhid; 297 int sc_advskew; 298 int sc_naddrs; /* actually used IPv4 vha */ 299 int sc_naddrs6; 300 int sc_advbase; /* seconds */ 301 int sc_init_counter; 302 uint64_t sc_counter; 303 304 /* authentication */ 305 #define CARP_HMAC_PAD 64 306 unsigned char sc_key[CARP_KEY_LEN]; 307 unsigned char sc_pad[CARP_HMAC_PAD]; 308 SHA1_CTX sc_sha1; 309 310 struct callout sc_ad_tmo; /* advertisement timeout */ 311 struct netmsg_carp sc_ad_msg; /* adv timeout netmsg */ 312 struct callout sc_md_tmo; /* ip4 master down timeout */ 313 struct callout sc_md6_tmo; /* ip6 master down timeout */ 314 struct netmsg_carp sc_md_msg; /* master down timeout netmsg */ 315 316 LIST_ENTRY(carp_softc) sc_next; /* Interface clue */ 317 }; 318 319 #define sc_if arpcom.ac_if 320 321 struct carp_softc_container { 322 TAILQ_ENTRY(carp_softc_container) scc_link; 323 struct carp_softc *scc_softc; 324 }; 325 TAILQ_HEAD(carp_if, carp_softc_container); 326 327 SYSCTL_DECL(_net_inet_carp); 328 329 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */ 330 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, 331 &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); 332 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, 333 &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); 334 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, 335 &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); 336 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW, 337 &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses"); 338 339 static int carp_suppress_preempt = 0; 340 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, 341 &carp_suppress_preempt, 0, "Preemption is suppressed"); 342 343 static struct carpstats carpstats; 344 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, 345 &carpstats, carpstats, 346 "CARP statistics (struct carpstats, netinet/ip_carp.h)"); 347 348 #define CARP_LOG(...) do { \ 349 if (carp_opts[CARPCTL_LOG] > 0) \ 350 log(LOG_INFO, __VA_ARGS__); \ 351 } while (0) 352 353 #define CARP_DEBUG(...) do { \ 354 if (carp_opts[CARPCTL_LOG] > 1) \ 355 log(LOG_DEBUG, __VA_ARGS__); \ 356 } while (0) 357 358 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token); 359 360 static void carp_hmac_prepare(struct carp_softc *); 361 static void carp_hmac_generate(struct carp_softc *, uint32_t *, 362 unsigned char *); 363 static int carp_hmac_verify(struct carp_softc *, uint32_t *, 364 unsigned char *); 365 static void carp_setroute(struct carp_softc *, int); 366 static void carp_proto_input_c(struct carp_softc *, struct mbuf *, 367 struct carp_header *, sa_family_t); 368 static int carp_clone_create(struct if_clone *, int, caddr_t); 369 static int carp_clone_destroy(struct ifnet *); 370 static void carp_detach(struct carp_softc *, boolean_t, boolean_t); 371 static void carp_prepare_ad(struct carp_softc *, struct carp_header *); 372 static void carp_send_ad_all(void); 373 static void carp_send_ad_timeout(void *); 374 static void carp_send_ad(struct carp_softc *); 375 static void carp_send_arp(struct carp_softc *); 376 static void carp_master_down_timeout(void *); 377 static void carp_master_down(struct carp_softc *); 378 static void carp_setrun(struct carp_softc *, sa_family_t); 379 static void carp_set_state(struct carp_softc *, int); 380 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *); 381 382 static void carp_init(void *); 383 static int carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 384 static int carp_output(struct ifnet *, struct mbuf *, struct sockaddr *, 385 struct rtentry *); 386 static void carp_start(struct ifnet *, struct ifaltq_subque *); 387 388 static void carp_multicast_cleanup(struct carp_softc *); 389 static void carp_add_addr(struct carp_softc *, struct ifaddr *); 390 static void carp_del_addr(struct carp_softc *, struct ifaddr *); 391 static void carp_config_addr(struct carp_softc *, struct ifaddr *); 392 static void carp_link_addrs(struct carp_softc *, struct ifnet *, 393 struct ifaddr *); 394 static void carp_unlink_addrs(struct carp_softc *, struct ifnet *, 395 struct ifaddr *); 396 static void carp_update_addrs(struct carp_softc *, struct ifaddr *); 397 398 static int carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *, 399 struct in_ifaddr *); 400 static int carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *, 401 struct ifnet *, struct in_ifaddr *, int); 402 static void carp_deactivate_vhaddr(struct carp_softc *, 403 struct carp_vhaddr *, boolean_t); 404 static int carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *); 405 static void carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *, 406 boolean_t); 407 408 #ifdef foo 409 static void carp_sc_state(struct carp_softc *); 410 #endif 411 #ifdef INET6 412 static void carp_send_na(struct carp_softc *); 413 #ifdef notyet 414 static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 415 static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *); 416 #endif 417 static void carp_multicast6_cleanup(struct carp_softc *); 418 #endif 419 static void carp_stop(struct carp_softc *, boolean_t); 420 static void carp_suspend(struct carp_softc *, boolean_t); 421 static void carp_ioctl_stop(struct carp_softc *); 422 static int carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *); 423 static int carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *); 424 static int carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *); 425 static int carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *); 426 427 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *); 428 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *); 429 static void carp_if_free(struct carp_if *); 430 431 static void carp_ifaddr(void *, struct ifnet *, enum ifaddr_event, 432 struct ifaddr *); 433 static void carp_ifdetach(void *, struct ifnet *); 434 435 static void carp_ifdetach_dispatch(netmsg_t); 436 static void carp_clone_destroy_dispatch(netmsg_t); 437 static void carp_init_dispatch(netmsg_t); 438 static void carp_ioctl_stop_dispatch(netmsg_t); 439 static void carp_ioctl_setvh_dispatch(netmsg_t); 440 static void carp_ioctl_getvh_dispatch(netmsg_t); 441 static void carp_ioctl_getdevname_dispatch(netmsg_t); 442 static void carp_ioctl_getvhaddr_dispatch(netmsg_t); 443 static void carp_send_ad_timeout_dispatch(netmsg_t); 444 static void carp_master_down_timeout_dispatch(netmsg_t); 445 446 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces"); 447 448 static LIST_HEAD(, carp_softc) carpif_list; 449 450 static struct if_clone carp_cloner = 451 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy, 452 0, IF_MAXUNIT); 453 454 static uint8_t carp_etheraddr[ETHER_ADDR_LEN] = { 0, 0, 0x5e, 0, 1, 0 }; 455 456 static eventhandler_tag carp_ifdetach_event; 457 static eventhandler_tag carp_ifaddr_event; 458 459 static __inline void 460 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new) 461 { 462 struct carp_vhaddr *vha; 463 u_long new_addr, addr; 464 465 KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0); 466 467 /* 468 * Virtual address list is sorted; smaller one first 469 */ 470 new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr); 471 472 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 473 addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr); 474 475 if (addr > new_addr) 476 break; 477 } 478 if (vha == NULL) 479 TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link); 480 else 481 TAILQ_INSERT_BEFORE(vha, vha_new, vha_link); 482 vha_new->vha_flags |= CARP_VHAF_ONLIST; 483 } 484 485 static __inline void 486 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 487 { 488 KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST); 489 vha->vha_flags &= ~CARP_VHAF_ONLIST; 490 TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link); 491 } 492 493 static void 494 carp_hmac_prepare(struct carp_softc *sc) 495 { 496 uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 497 uint8_t vhid = sc->sc_vhid & 0xff; 498 int i; 499 #ifdef INET6 500 struct ifaddr_container *ifac; 501 struct in6_addr in6; 502 #endif 503 #ifdef INET 504 struct carp_vhaddr *vha; 505 #endif 506 507 /* XXX: possible race here */ 508 509 /* compute ipad from key */ 510 bzero(sc->sc_pad, sizeof(sc->sc_pad)); 511 bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); 512 for (i = 0; i < sizeof(sc->sc_pad); i++) 513 sc->sc_pad[i] ^= 0x36; 514 515 /* precompute first part of inner hash */ 516 SHA1Init(&sc->sc_sha1); 517 SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); 518 SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); 519 SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); 520 SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); 521 #ifdef INET 522 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 523 SHA1Update(&sc->sc_sha1, 524 (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr, 525 sizeof(struct in_addr)); 526 } 527 #endif /* INET */ 528 #ifdef INET6 529 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 530 struct ifaddr *ifa = ifac->ifa; 531 532 if (ifa->ifa_addr->sa_family == AF_INET6) { 533 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 534 in6_clearscope(&in6); 535 SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6)); 536 } 537 } 538 #endif /* INET6 */ 539 540 /* convert ipad to opad */ 541 for (i = 0; i < sizeof(sc->sc_pad); i++) 542 sc->sc_pad[i] ^= 0x36 ^ 0x5c; 543 } 544 545 static void 546 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], 547 unsigned char md[20]) 548 { 549 SHA1_CTX sha1ctx; 550 551 /* fetch first half of inner hash */ 552 bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); 553 554 SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); 555 SHA1Final(md, &sha1ctx); 556 557 /* outer hash */ 558 SHA1Init(&sha1ctx); 559 SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); 560 SHA1Update(&sha1ctx, md, 20); 561 SHA1Final(md, &sha1ctx); 562 } 563 564 static int 565 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], 566 unsigned char md[20]) 567 { 568 unsigned char md2[20]; 569 570 carp_hmac_generate(sc, counter, md2); 571 return (bcmp(md, md2, sizeof(md2))); 572 } 573 574 static void 575 carp_setroute(struct carp_softc *sc, int cmd) 576 { 577 #ifdef INET6 578 struct ifaddr_container *ifac; 579 #endif 580 struct carp_vhaddr *vha; 581 582 KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD); 583 584 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 585 if (vha->vha_iaback == NULL) 586 continue; 587 if (cmd == RTM_DELETE) 588 carp_delroute_vhaddr(sc, vha, FALSE); 589 else 590 carp_addroute_vhaddr(sc, vha); 591 } 592 593 #ifdef INET6 594 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 595 struct ifaddr *ifa = ifac->ifa; 596 597 if (ifa->ifa_addr->sa_family == AF_INET6) { 598 if (cmd == RTM_ADD) 599 in6_ifaddloop(ifa); 600 else 601 in6_ifremloop(ifa); 602 } 603 } 604 #endif /* INET6 */ 605 } 606 607 static int 608 carp_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 609 { 610 struct carp_softc *sc; 611 struct ifnet *ifp; 612 613 sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO); 614 ifp = &sc->sc_if; 615 616 sc->sc_suppress = 0; 617 sc->sc_advbase = CARP_DFLTINTV; 618 sc->sc_vhid = -1; /* required setting */ 619 sc->sc_advskew = 0; 620 sc->sc_init_counter = 1; 621 sc->sc_naddrs = 0; 622 sc->sc_naddrs6 = 0; 623 624 TAILQ_INIT(&sc->sc_vha_list); 625 626 #ifdef INET6 627 sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL; 628 #endif 629 630 callout_init_mp(&sc->sc_ad_tmo); 631 netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport, 632 MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch); 633 sc->sc_ad_msg.nc_softc = sc; 634 635 callout_init_mp(&sc->sc_md_tmo); 636 callout_init_mp(&sc->sc_md6_tmo); 637 netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport, 638 MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch); 639 sc->sc_md_msg.nc_softc = sc; 640 641 if_initname(ifp, CARP_IFNAME, unit); 642 ifp->if_softc = sc; 643 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 644 ifp->if_init = carp_init; 645 ifp->if_ioctl = carp_ioctl; 646 ifp->if_start = carp_start; 647 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 648 ifq_set_ready(&ifp->if_snd); 649 650 ether_ifattach(ifp, carp_etheraddr, NULL); 651 652 ifp->if_type = IFT_CARP; 653 ifp->if_output = carp_output; 654 655 lwkt_gettoken(&carp_listtok); 656 LIST_INSERT_HEAD(&carpif_list, sc, sc_next); 657 lwkt_reltoken(&carp_listtok); 658 659 return (0); 660 } 661 662 static void 663 carp_clone_destroy_dispatch(netmsg_t msg) 664 { 665 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 666 struct carp_softc *sc = cmsg->nc_softc; 667 668 sc->sc_dead = TRUE; 669 carp_detach(sc, TRUE, FALSE); 670 671 callout_stop_sync(&sc->sc_ad_tmo); 672 callout_stop_sync(&sc->sc_md_tmo); 673 callout_stop_sync(&sc->sc_md6_tmo); 674 675 crit_enter(); 676 lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg); 677 lwkt_dropmsg(&sc->sc_md_msg.base.lmsg); 678 crit_exit(); 679 680 lwkt_replymsg(&cmsg->base.lmsg, 0); 681 } 682 683 static int 684 carp_clone_destroy(struct ifnet *ifp) 685 { 686 struct carp_softc *sc = ifp->if_softc; 687 struct netmsg_carp cmsg; 688 689 bzero(&cmsg, sizeof(cmsg)); 690 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 691 carp_clone_destroy_dispatch); 692 cmsg.nc_softc = sc; 693 694 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 695 696 lwkt_gettoken(&carp_listtok); 697 LIST_REMOVE(sc, sc_next); 698 lwkt_reltoken(&carp_listtok); 699 700 bpfdetach(ifp); 701 if_detach(ifp); 702 703 KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active")); 704 kfree(sc, M_CARP); 705 706 return 0; 707 } 708 709 static struct carp_if * 710 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc) 711 { 712 struct carp_softc_container *oscc, *scc; 713 struct carp_if *cif; 714 int count = 0; 715 #ifdef INVARIANTS 716 int found = 0; 717 #endif 718 719 TAILQ_FOREACH(oscc, ocif, scc_link) { 720 ++count; 721 #ifdef INVARIANTS 722 if (oscc->scc_softc == sc) 723 found = 1; 724 #endif 725 } 726 KASSERT(found, ("%s carp_softc is not on carp_if", __func__)); 727 728 if (count == 1) { 729 /* Last one is going to be unlinked */ 730 return NULL; 731 } 732 733 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 734 TAILQ_INIT(cif); 735 736 TAILQ_FOREACH(oscc, ocif, scc_link) { 737 if (oscc->scc_softc == sc) 738 continue; 739 740 scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO); 741 scc->scc_softc = oscc->scc_softc; 742 TAILQ_INSERT_TAIL(cif, scc, scc_link); 743 } 744 745 return cif; 746 } 747 748 static struct carp_if * 749 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc) 750 { 751 struct carp_softc_container *oscc; 752 int onlist; 753 754 onlist = 0; 755 if (ocif != NULL) { 756 TAILQ_FOREACH(oscc, ocif, scc_link) { 757 if (oscc->scc_softc == sc) 758 onlist = 1; 759 } 760 } 761 762 #ifdef INVARIANTS 763 if (sc->sc_carpdev != NULL) { 764 KASSERT(onlist, ("%s is not on %s carp list", 765 sc->sc_if.if_xname, sc->sc_carpdev->if_xname)); 766 } else { 767 KASSERT(!onlist, ("%s is already on carp list", 768 sc->sc_if.if_xname)); 769 } 770 #endif 771 772 if (!onlist) { 773 struct carp_if *cif; 774 struct carp_softc_container *new_scc, *scc; 775 int inserted = 0; 776 777 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 778 TAILQ_INIT(cif); 779 780 new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO); 781 new_scc->scc_softc = sc; 782 783 if (ocif != NULL) { 784 TAILQ_FOREACH(oscc, ocif, scc_link) { 785 if (!inserted && 786 oscc->scc_softc->sc_vhid > sc->sc_vhid) { 787 TAILQ_INSERT_TAIL(cif, new_scc, 788 scc_link); 789 inserted = 1; 790 } 791 792 scc = kmalloc(sizeof(*scc), M_CARP, 793 M_WAITOK | M_ZERO); 794 scc->scc_softc = oscc->scc_softc; 795 TAILQ_INSERT_TAIL(cif, scc, scc_link); 796 } 797 } 798 if (!inserted) 799 TAILQ_INSERT_TAIL(cif, new_scc, scc_link); 800 801 return cif; 802 } else { 803 return ocif; 804 } 805 } 806 807 static void 808 carp_if_free(struct carp_if *cif) 809 { 810 struct carp_softc_container *scc; 811 812 while ((scc = TAILQ_FIRST(cif)) != NULL) { 813 TAILQ_REMOVE(cif, scc, scc_link); 814 kfree(scc, M_CARP); 815 } 816 kfree(cif, M_CARP); 817 } 818 819 static void 820 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback) 821 { 822 carp_suspend(sc, detach); 823 824 carp_multicast_cleanup(sc); 825 #ifdef INET6 826 carp_multicast6_cleanup(sc); 827 #endif 828 829 if (!sc->sc_dead && detach) { 830 struct carp_vhaddr *vha; 831 832 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 833 carp_deactivate_vhaddr(sc, vha, del_iaback); 834 KKASSERT(sc->sc_naddrs == 0); 835 } 836 837 if (sc->sc_carpdev != NULL) { 838 struct ifnet *ifp = sc->sc_carpdev; 839 struct carp_if *ocif = ifp->if_carp; 840 841 ifp->if_carp = carp_if_remove(ocif, sc); 842 KASSERT(ifp->if_carp != ocif, 843 ("%s carp_if_remove failed", __func__)); 844 845 sc->sc_carpdev = NULL; 846 sc->sc_ia = NULL; 847 848 /* 849 * Make sure that all protocol threads see the 850 * sc_carpdev and if_carp changes 851 */ 852 netmsg_service_sync(); 853 854 if (ifp->if_carp == NULL) { 855 /* 856 * No more carp interfaces using 857 * ifp as the backing interface, 858 * move it out of promiscous mode. 859 */ 860 ifpromisc(ifp, 0); 861 } 862 863 /* 864 * The old carp list could be safely free now, 865 * since no one can access it. 866 */ 867 carp_if_free(ocif); 868 } 869 } 870 871 static void 872 carp_ifdetach_dispatch(netmsg_t msg) 873 { 874 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 875 struct ifnet *ifp = cmsg->nc_carpdev; 876 877 while (ifp->if_carp) { 878 struct carp_softc_container *scc; 879 880 scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp)); 881 carp_detach(scc->scc_softc, TRUE, TRUE); 882 } 883 lwkt_replymsg(&cmsg->base.lmsg, 0); 884 } 885 886 /* Detach an interface from the carp. */ 887 static void 888 carp_ifdetach(void *arg __unused, struct ifnet *ifp) 889 { 890 struct netmsg_carp cmsg; 891 892 ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp); 893 894 bzero(&cmsg, sizeof(cmsg)); 895 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 896 carp_ifdetach_dispatch); 897 cmsg.nc_carpdev = ifp; 898 899 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 900 } 901 902 /* 903 * process input packet. 904 * we have rearranged checks order compared to the rfc, 905 * but it seems more efficient this way or not possible otherwise. 906 */ 907 int 908 carp_proto_input(struct mbuf **mp, int *offp, int proto) 909 { 910 struct mbuf *m = *mp; 911 struct ip *ip = mtod(m, struct ip *); 912 struct ifnet *ifp = m->m_pkthdr.rcvif; 913 struct carp_header *ch; 914 struct carp_softc *sc; 915 int len, iphlen; 916 917 iphlen = *offp; 918 *mp = NULL; 919 920 carpstats.carps_ipackets++; 921 922 if (!carp_opts[CARPCTL_ALLOW]) { 923 m_freem(m); 924 goto back; 925 } 926 927 /* Check if received on a valid carp interface */ 928 if (ifp->if_type != IFT_CARP) { 929 carpstats.carps_badif++; 930 CARP_LOG("carp_proto_input: packet received on non-carp " 931 "interface: %s\n", ifp->if_xname); 932 m_freem(m); 933 goto back; 934 } 935 936 if (!CARP_IS_RUNNING(ifp)) { 937 carpstats.carps_badif++; 938 CARP_LOG("carp_proto_input: packet received on stopped carp " 939 "interface: %s\n", ifp->if_xname); 940 m_freem(m); 941 goto back; 942 } 943 944 sc = ifp->if_softc; 945 if (sc->sc_carpdev == NULL) { 946 carpstats.carps_badif++; 947 CARP_LOG("carp_proto_input: packet received on defunc carp " 948 "interface: %s\n", ifp->if_xname); 949 m_freem(m); 950 goto back; 951 } 952 953 if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 954 carpstats.carps_badif++; 955 CARP_LOG("carp_proto_input: non-mcast packet on " 956 "interface: %s\n", ifp->if_xname); 957 m_freem(m); 958 goto back; 959 } 960 961 /* Verify that the IP TTL is CARP_DFLTTL. */ 962 if (ip->ip_ttl != CARP_DFLTTL) { 963 carpstats.carps_badttl++; 964 CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n", 965 ip->ip_ttl, CARP_DFLTTL, ifp->if_xname); 966 m_freem(m); 967 goto back; 968 } 969 970 /* Minimal CARP packet size */ 971 len = iphlen + sizeof(*ch); 972 973 /* 974 * Verify that the received packet length is 975 * not less than the CARP header 976 */ 977 if (m->m_pkthdr.len < len) { 978 carpstats.carps_badlen++; 979 CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len, 980 ifp->if_xname); 981 m_freem(m); 982 goto back; 983 } 984 985 /* Make sure that CARP header is contiguous */ 986 if (len > m->m_len) { 987 m = m_pullup(m, len); 988 if (m == NULL) { 989 carpstats.carps_hdrops++; 990 CARP_LOG("carp_proto_input: m_pullup failed\n"); 991 goto back; 992 } 993 ip = mtod(m, struct ip *); 994 } 995 ch = (struct carp_header *)((uint8_t *)ip + iphlen); 996 997 /* Verify the CARP checksum */ 998 if (in_cksum_skip(m, len, iphlen)) { 999 carpstats.carps_badsum++; 1000 CARP_LOG("carp_proto_input: checksum failed on %s\n", 1001 ifp->if_xname); 1002 m_freem(m); 1003 goto back; 1004 } 1005 carp_proto_input_c(sc, m, ch, AF_INET); 1006 back: 1007 return(IPPROTO_DONE); 1008 } 1009 1010 #ifdef INET6 1011 int 1012 carp6_proto_input(struct mbuf **mp, int *offp, int proto) 1013 { 1014 struct mbuf *m = *mp; 1015 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 1016 struct ifnet *ifp = m->m_pkthdr.rcvif; 1017 struct carp_header *ch; 1018 struct carp_softc *sc; 1019 u_int len; 1020 1021 carpstats.carps_ipackets6++; 1022 1023 if (!carp_opts[CARPCTL_ALLOW]) { 1024 m_freem(m); 1025 goto back; 1026 } 1027 1028 /* check if received on a valid carp interface */ 1029 if (ifp->if_type != IFT_CARP) { 1030 carpstats.carps_badif++; 1031 CARP_LOG("carp6_proto_input: packet received on non-carp " 1032 "interface: %s\n", ifp->if_xname); 1033 m_freem(m); 1034 goto back; 1035 } 1036 1037 if (!CARP_IS_RUNNING(ifp)) { 1038 carpstats.carps_badif++; 1039 CARP_LOG("carp_proto_input: packet received on stopped carp " 1040 "interface: %s\n", ifp->if_xname); 1041 m_freem(m); 1042 goto back; 1043 } 1044 1045 sc = ifp->if_softc; 1046 if (sc->sc_carpdev == NULL) { 1047 carpstats.carps_badif++; 1048 CARP_LOG("carp6_proto_input: packet received on defunc-carp " 1049 "interface: %s\n", ifp->if_xname); 1050 m_freem(m); 1051 goto back; 1052 } 1053 1054 /* verify that the IP TTL is 255 */ 1055 if (ip6->ip6_hlim != CARP_DFLTTL) { 1056 carpstats.carps_badttl++; 1057 CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n", 1058 ip6->ip6_hlim, ifp->if_xname); 1059 m_freem(m); 1060 goto back; 1061 } 1062 1063 /* verify that we have a complete carp packet */ 1064 len = m->m_len; 1065 IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); 1066 if (ch == NULL) { 1067 carpstats.carps_badlen++; 1068 CARP_LOG("carp6_proto_input: packet size %u too small\n", len); 1069 goto back; 1070 } 1071 1072 /* verify the CARP checksum */ 1073 if (in_cksum_range(m, 0, *offp, sizeof(*ch))) { 1074 carpstats.carps_badsum++; 1075 CARP_LOG("carp6_proto_input: checksum failed, on %s\n", 1076 ifp->if_xname); 1077 m_freem(m); 1078 goto back; 1079 } 1080 1081 carp_proto_input_c(sc, m, ch, AF_INET6); 1082 back: 1083 return (IPPROTO_DONE); 1084 } 1085 #endif /* INET6 */ 1086 1087 static void 1088 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m, 1089 struct carp_header *ch, sa_family_t af) 1090 { 1091 struct ifnet *cifp; 1092 uint64_t tmp_counter; 1093 struct timeval sc_tv, ch_tv; 1094 1095 if (sc->sc_vhid != ch->carp_vhid) { 1096 /* 1097 * CARP uses multicast, however, multicast packets 1098 * are tapped to all CARP interfaces on the physical 1099 * interface receiving the CARP packets, so we don't 1100 * update any stats here. 1101 */ 1102 m_freem(m); 1103 return; 1104 } 1105 cifp = &sc->sc_if; 1106 1107 /* verify the CARP version. */ 1108 if (ch->carp_version != CARP_VERSION) { 1109 carpstats.carps_badver++; 1110 CARP_LOG("%s; invalid version %d\n", cifp->if_xname, 1111 ch->carp_version); 1112 m_freem(m); 1113 return; 1114 } 1115 1116 /* verify the hash */ 1117 if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { 1118 carpstats.carps_badauth++; 1119 CARP_LOG("%s: incorrect hash\n", cifp->if_xname); 1120 m_freem(m); 1121 return; 1122 } 1123 1124 tmp_counter = ntohl(ch->carp_counter[0]); 1125 tmp_counter = tmp_counter<<32; 1126 tmp_counter += ntohl(ch->carp_counter[1]); 1127 1128 /* XXX Replay protection goes here */ 1129 1130 sc->sc_init_counter = 0; 1131 sc->sc_counter = tmp_counter; 1132 1133 sc_tv.tv_sec = sc->sc_advbase; 1134 if (carp_suppress_preempt && sc->sc_advskew < 240) 1135 sc_tv.tv_usec = 240 * 1000000 / 256; 1136 else 1137 sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1138 ch_tv.tv_sec = ch->carp_advbase; 1139 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 1140 1141 switch (sc->sc_state) { 1142 case INIT: 1143 break; 1144 1145 case MASTER: 1146 /* 1147 * If we receive an advertisement from a master who's going to 1148 * be more frequent than us, go into BACKUP state. 1149 */ 1150 if (timevalcmp(&sc_tv, &ch_tv, >) || 1151 timevalcmp(&sc_tv, &ch_tv, ==)) { 1152 callout_stop(&sc->sc_ad_tmo); 1153 CARP_DEBUG("%s: MASTER -> BACKUP " 1154 "(more frequent advertisement received)\n", 1155 cifp->if_xname); 1156 carp_set_state(sc, BACKUP); 1157 carp_setrun(sc, 0); 1158 carp_setroute(sc, RTM_DELETE); 1159 } 1160 break; 1161 1162 case BACKUP: 1163 /* 1164 * If we're pre-empting masters who advertise slower than us, 1165 * and this one claims to be slower, treat him as down. 1166 */ 1167 if (carp_opts[CARPCTL_PREEMPT] && 1168 timevalcmp(&sc_tv, &ch_tv, <)) { 1169 CARP_DEBUG("%s: BACKUP -> MASTER " 1170 "(preempting a slower master)\n", cifp->if_xname); 1171 carp_master_down(sc); 1172 break; 1173 } 1174 1175 /* 1176 * If the master is going to advertise at such a low frequency 1177 * that he's guaranteed to time out, we'd might as well just 1178 * treat him as timed out now. 1179 */ 1180 sc_tv.tv_sec = sc->sc_advbase * 3; 1181 if (timevalcmp(&sc_tv, &ch_tv, <)) { 1182 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 1183 cifp->if_xname); 1184 carp_master_down(sc); 1185 break; 1186 } 1187 1188 /* 1189 * Otherwise, we reset the counter and wait for the next 1190 * advertisement. 1191 */ 1192 carp_setrun(sc, af); 1193 break; 1194 } 1195 m_freem(m); 1196 } 1197 1198 struct mbuf * 1199 carp_input(void *v, struct mbuf *m) 1200 { 1201 struct carp_if *cif = v; 1202 struct ether_header *eh; 1203 struct carp_softc_container *scc; 1204 struct ifnet *ifp; 1205 1206 eh = mtod(m, struct ether_header *); 1207 1208 ifp = carp_forus(cif, eh->ether_dhost); 1209 if (ifp != NULL) { 1210 ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF); 1211 return NULL; 1212 } 1213 1214 if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) 1215 return m; 1216 1217 /* 1218 * XXX Should really check the list of multicast addresses 1219 * for each CARP interface _before_ copying. 1220 */ 1221 TAILQ_FOREACH(scc, cif, scc_link) { 1222 struct carp_softc *sc = scc->scc_softc; 1223 struct mbuf *m0; 1224 1225 if ((sc->sc_if.if_flags & IFF_UP) == 0) 1226 continue; 1227 1228 m0 = m_dup(m, MB_DONTWAIT); 1229 if (m0 == NULL) 1230 continue; 1231 1232 ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF); 1233 } 1234 return m; 1235 } 1236 1237 static void 1238 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch) 1239 { 1240 if (sc->sc_init_counter) { 1241 /* this could also be seconds since unix epoch */ 1242 sc->sc_counter = karc4random(); 1243 sc->sc_counter = sc->sc_counter << 32; 1244 sc->sc_counter += karc4random(); 1245 } else { 1246 sc->sc_counter++; 1247 } 1248 1249 ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff); 1250 ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff); 1251 1252 carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); 1253 } 1254 1255 static void 1256 carp_send_ad_all(void) 1257 { 1258 struct carp_softc *sc; 1259 1260 LIST_FOREACH(sc, &carpif_list, sc_next) { 1261 if (sc->sc_carpdev == NULL) 1262 continue; 1263 1264 if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER) 1265 carp_send_ad(sc); 1266 } 1267 } 1268 1269 static void 1270 carp_send_ad_timeout(void *xsc) 1271 { 1272 struct carp_softc *sc = xsc; 1273 struct netmsg_carp *cmsg = &sc->sc_ad_msg; 1274 1275 KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d", 1276 __func__, mycpuid)); 1277 1278 crit_enter(); 1279 if (cmsg->base.lmsg.ms_flags & MSGF_DONE) 1280 lwkt_sendmsg(netisr_portfn(0), &cmsg->base.lmsg); 1281 crit_exit(); 1282 } 1283 1284 static void 1285 carp_send_ad_timeout_dispatch(netmsg_t msg) 1286 { 1287 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 1288 struct carp_softc *sc = cmsg->nc_softc; 1289 1290 /* Reply ASAP */ 1291 crit_enter(); 1292 lwkt_replymsg(&cmsg->base.lmsg, 0); 1293 crit_exit(); 1294 1295 carp_send_ad(sc); 1296 } 1297 1298 static void 1299 carp_send_ad(struct carp_softc *sc) 1300 { 1301 struct ifnet *cifp = &sc->sc_if; 1302 struct carp_header ch; 1303 struct timeval tv; 1304 struct carp_header *ch_ptr; 1305 struct mbuf *m; 1306 int len, advbase, advskew; 1307 1308 if (!CARP_IS_RUNNING(cifp)) { 1309 /* Bow out */ 1310 advbase = 255; 1311 advskew = 255; 1312 } else { 1313 advbase = sc->sc_advbase; 1314 if (!carp_suppress_preempt || sc->sc_advskew > 240) 1315 advskew = sc->sc_advskew; 1316 else 1317 advskew = 240; 1318 tv.tv_sec = advbase; 1319 tv.tv_usec = advskew * 1000000 / 256; 1320 } 1321 1322 ch.carp_version = CARP_VERSION; 1323 ch.carp_type = CARP_ADVERTISEMENT; 1324 ch.carp_vhid = sc->sc_vhid; 1325 ch.carp_advbase = advbase; 1326 ch.carp_advskew = advskew; 1327 ch.carp_authlen = 7; /* XXX DEFINE */ 1328 ch.carp_pad1 = 0; /* must be zero */ 1329 ch.carp_cksum = 0; 1330 1331 #ifdef INET 1332 if (sc->sc_ia != NULL) { 1333 struct ip *ip; 1334 1335 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 1336 if (m == NULL) { 1337 IFNET_STAT_INC(cifp, oerrors, 1); 1338 carpstats.carps_onomem++; 1339 /* XXX maybe less ? */ 1340 if (advbase != 255 || advskew != 255) 1341 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1342 carp_send_ad_timeout, sc); 1343 return; 1344 } 1345 len = sizeof(*ip) + sizeof(ch); 1346 m->m_pkthdr.len = len; 1347 m->m_pkthdr.rcvif = NULL; 1348 m->m_len = len; 1349 MH_ALIGN(m, m->m_len); 1350 m->m_flags |= M_MCAST; 1351 ip = mtod(m, struct ip *); 1352 ip->ip_v = IPVERSION; 1353 ip->ip_hl = sizeof(*ip) >> 2; 1354 ip->ip_tos = IPTOS_LOWDELAY; 1355 ip->ip_len = len; 1356 ip->ip_id = ip_newid(); 1357 ip->ip_off = IP_DF; 1358 ip->ip_ttl = CARP_DFLTTL; 1359 ip->ip_p = IPPROTO_CARP; 1360 ip->ip_sum = 0; 1361 ip->ip_src = sc->sc_ia->ia_addr.sin_addr; 1362 ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); 1363 1364 ch_ptr = (struct carp_header *)(&ip[1]); 1365 bcopy(&ch, ch_ptr, sizeof(ch)); 1366 carp_prepare_ad(sc, ch_ptr); 1367 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip)); 1368 1369 getmicrotime(&cifp->if_lastchange); 1370 IFNET_STAT_INC(cifp, opackets, 1); 1371 IFNET_STAT_INC(cifp, obytes, len); 1372 carpstats.carps_opackets++; 1373 1374 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) { 1375 IFNET_STAT_INC(cifp, oerrors, 1); 1376 if (sc->sc_sendad_errors < INT_MAX) 1377 sc->sc_sendad_errors++; 1378 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1379 carp_suppress_preempt++; 1380 if (carp_suppress_preempt == 1) { 1381 carp_send_ad_all(); 1382 } 1383 } 1384 sc->sc_sendad_success = 0; 1385 } else { 1386 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1387 if (++sc->sc_sendad_success >= 1388 CARP_SENDAD_MIN_SUCCESS) { 1389 carp_suppress_preempt--; 1390 sc->sc_sendad_errors = 0; 1391 } 1392 } else { 1393 sc->sc_sendad_errors = 0; 1394 } 1395 } 1396 } 1397 #endif /* INET */ 1398 #ifdef INET6 1399 if (sc->sc_ia6) { 1400 struct ip6_hdr *ip6; 1401 1402 MGETHDR(m, MB_DONTWAIT, MT_HEADER); 1403 if (m == NULL) { 1404 IFNET_STAT_INC(cifp, oerrors, 1); 1405 carpstats.carps_onomem++; 1406 /* XXX maybe less ? */ 1407 if (advbase != 255 || advskew != 255) 1408 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1409 carp_send_ad_timeout, sc); 1410 return; 1411 } 1412 len = sizeof(*ip6) + sizeof(ch); 1413 m->m_pkthdr.len = len; 1414 m->m_pkthdr.rcvif = NULL; 1415 m->m_len = len; 1416 MH_ALIGN(m, m->m_len); 1417 m->m_flags |= M_MCAST; 1418 ip6 = mtod(m, struct ip6_hdr *); 1419 bzero(ip6, sizeof(*ip6)); 1420 ip6->ip6_vfc |= IPV6_VERSION; 1421 ip6->ip6_hlim = CARP_DFLTTL; 1422 ip6->ip6_nxt = IPPROTO_CARP; 1423 bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src, 1424 sizeof(struct in6_addr)); 1425 /* set the multicast destination */ 1426 1427 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1428 ip6->ip6_dst.s6_addr8[15] = 0x12; 1429 if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { 1430 IFNET_STAT_INC(cifp, oerrors, 1); 1431 m_freem(m); 1432 CARP_LOG("%s: in6_setscope failed\n", __func__); 1433 return; 1434 } 1435 1436 ch_ptr = (struct carp_header *)(&ip6[1]); 1437 bcopy(&ch, ch_ptr, sizeof(ch)); 1438 carp_prepare_ad(sc, ch_ptr); 1439 ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6)); 1440 1441 getmicrotime(&cifp->if_lastchange); 1442 IFNET_STAT_INC(cifp, opackets, 1); 1443 IFNET_STAT_INC(cifp, obytes, len); 1444 carpstats.carps_opackets6++; 1445 1446 if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) { 1447 IFNET_STAT_INC(cifp, oerrors, 1); 1448 if (sc->sc_sendad_errors < INT_MAX) 1449 sc->sc_sendad_errors++; 1450 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { 1451 carp_suppress_preempt++; 1452 if (carp_suppress_preempt == 1) { 1453 carp_send_ad_all(); 1454 } 1455 } 1456 sc->sc_sendad_success = 0; 1457 } else { 1458 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { 1459 if (++sc->sc_sendad_success >= 1460 CARP_SENDAD_MIN_SUCCESS) { 1461 carp_suppress_preempt--; 1462 sc->sc_sendad_errors = 0; 1463 } 1464 } else { 1465 sc->sc_sendad_errors = 0; 1466 } 1467 } 1468 } 1469 #endif /* INET6 */ 1470 1471 if (advbase != 255 || advskew != 255) 1472 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1473 carp_send_ad_timeout, sc); 1474 } 1475 1476 /* 1477 * Broadcast a gratuitous ARP request containing 1478 * the virtual router MAC address for each IP address 1479 * associated with the virtual router. 1480 */ 1481 static void 1482 carp_send_arp(struct carp_softc *sc) 1483 { 1484 const struct carp_vhaddr *vha; 1485 1486 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1487 if (vha->vha_iaback == NULL) 1488 continue; 1489 arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa); 1490 } 1491 } 1492 1493 #ifdef INET6 1494 static void 1495 carp_send_na(struct carp_softc *sc) 1496 { 1497 struct ifaddr_container *ifac; 1498 struct in6_addr *in6; 1499 static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1500 1501 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) { 1502 struct ifaddr *ifa = ifac->ifa; 1503 1504 if (ifa->ifa_addr->sa_family != AF_INET6) 1505 continue; 1506 1507 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1508 nd6_na_output(sc->sc_carpdev, &mcast, in6, 1509 ND_NA_FLAG_OVERRIDE, 1, NULL); 1510 DELAY(1000); /* XXX */ 1511 } 1512 } 1513 #endif /* INET6 */ 1514 1515 static __inline const struct carp_vhaddr * 1516 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr) 1517 { 1518 struct carp_vhaddr *vha; 1519 1520 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1521 if (vha->vha_iaback == NULL) 1522 continue; 1523 1524 if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr) 1525 return vha; 1526 } 1527 return NULL; 1528 } 1529 1530 #ifdef notyet 1531 static int 1532 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr, 1533 const struct in_addr *isaddr, uint8_t **enaddr) 1534 { 1535 const struct carp_softc *vh; 1536 int index, count = 0; 1537 1538 /* 1539 * XXX proof of concept implementation. 1540 * We use the source ip to decide which virtual host should 1541 * handle the request. If we're master of that virtual host, 1542 * then we respond, otherwise, just drop the arp packet on 1543 * the floor. 1544 */ 1545 1546 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1547 if (!CARP_IS_RUNNING(&vh->sc_if)) 1548 continue; 1549 1550 if (carp_find_addr(vh, itaddr) != NULL) 1551 count++; 1552 } 1553 if (count == 0) 1554 return 0; 1555 1556 /* this should be a hash, like pf_hash() */ 1557 index = ntohl(isaddr->s_addr) % count; 1558 count = 0; 1559 1560 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1561 if (!CARP_IS_RUNNING(&vh->sc_if)) 1562 continue; 1563 1564 if (carp_find_addr(vh, itaddr) == NULL) 1565 continue; 1566 1567 if (count == index) { 1568 if (vh->sc_state == MASTER) { 1569 *enaddr = IF_LLADDR(&vh->sc_if); 1570 return 1; 1571 } else { 1572 return 0; 1573 } 1574 } 1575 count++; 1576 } 1577 return 0; 1578 } 1579 #endif 1580 1581 int 1582 carp_iamatch(const struct in_ifaddr *ia) 1583 { 1584 const struct carp_softc *sc = ia->ia_ifp->if_softc; 1585 1586 KASSERT(&curthread->td_msgport == netisr_portfn(0), 1587 ("not in netisr0")); 1588 1589 #ifdef notyet 1590 if (carp_opts[CARPCTL_ARPBALANCE]) 1591 return carp_iamatch_balance(cif, itaddr, isaddr, enaddr); 1592 #endif 1593 1594 if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER) 1595 return 0; 1596 1597 return 1; 1598 } 1599 1600 #ifdef INET6 1601 struct ifaddr * 1602 carp_iamatch6(void *v, struct in6_addr *taddr) 1603 { 1604 #ifdef foo 1605 struct carp_if *cif = v; 1606 struct carp_softc *vh; 1607 1608 TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { 1609 struct ifaddr_container *ifac; 1610 1611 TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid], 1612 ifa_link) { 1613 struct ifaddr *ifa = ifac->ifa; 1614 1615 if (IN6_ARE_ADDR_EQUAL(taddr, 1616 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1617 CARP_IS_RUNNING(&vh->sc_if) && 1618 vh->sc_state == MASTER) { 1619 return (ifa); 1620 } 1621 } 1622 } 1623 #endif 1624 return (NULL); 1625 } 1626 1627 void * 1628 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr) 1629 { 1630 #ifdef foo 1631 struct m_tag *mtag; 1632 struct carp_if *cif = v; 1633 struct carp_softc *sc; 1634 1635 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) { 1636 struct ifaddr_container *ifac; 1637 1638 TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], 1639 ifa_link) { 1640 struct ifaddr *ifa = ifac->ifa; 1641 1642 if (IN6_ARE_ADDR_EQUAL(taddr, 1643 &ifatoia6(ifa)->ia_addr.sin6_addr) && 1644 CARP_IS_RUNNING(&sc->sc_if)) { 1645 struct ifnet *ifp = &sc->sc_if; 1646 1647 mtag = m_tag_get(PACKET_TAG_CARP, 1648 sizeof(struct ifnet *), MB_DONTWAIT); 1649 if (mtag == NULL) { 1650 /* better a bit than nothing */ 1651 return (IF_LLADDR(ifp)); 1652 } 1653 bcopy(&ifp, (caddr_t)(mtag + 1), 1654 sizeof(struct ifnet *)); 1655 m_tag_prepend(m, mtag); 1656 1657 return (IF_LLADDR(ifp)); 1658 } 1659 } 1660 } 1661 #endif 1662 return (NULL); 1663 } 1664 #endif 1665 1666 static struct ifnet * 1667 carp_forus(struct carp_if *cif, const uint8_t *dhost) 1668 { 1669 struct carp_softc_container *scc; 1670 1671 if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0) 1672 return NULL; 1673 1674 TAILQ_FOREACH(scc, cif, scc_link) { 1675 struct carp_softc *sc = scc->scc_softc; 1676 struct ifnet *ifp = &sc->sc_if; 1677 1678 if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER && 1679 !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN)) 1680 return ifp; 1681 } 1682 return NULL; 1683 } 1684 1685 static void 1686 carp_master_down_timeout(void *xsc) 1687 { 1688 struct carp_softc *sc = xsc; 1689 struct netmsg_carp *cmsg = &sc->sc_md_msg; 1690 1691 KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d", 1692 __func__, mycpuid)); 1693 1694 crit_enter(); 1695 if (cmsg->base.lmsg.ms_flags & MSGF_DONE) 1696 lwkt_sendmsg(netisr_portfn(0), &cmsg->base.lmsg); 1697 crit_exit(); 1698 } 1699 1700 static void 1701 carp_master_down_timeout_dispatch(netmsg_t msg) 1702 { 1703 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 1704 struct carp_softc *sc = cmsg->nc_softc; 1705 1706 /* Reply ASAP */ 1707 crit_enter(); 1708 lwkt_replymsg(&cmsg->base.lmsg, 0); 1709 crit_exit(); 1710 1711 CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n", 1712 sc->sc_if.if_xname); 1713 carp_master_down(sc); 1714 } 1715 1716 static void 1717 carp_master_down(struct carp_softc *sc) 1718 { 1719 switch (sc->sc_state) { 1720 case INIT: 1721 kprintf("%s: master_down event in INIT state\n", 1722 sc->sc_if.if_xname); 1723 break; 1724 1725 case MASTER: 1726 break; 1727 1728 case BACKUP: 1729 carp_set_state(sc, MASTER); 1730 carp_send_ad(sc); 1731 carp_send_arp(sc); 1732 #ifdef INET6 1733 carp_send_na(sc); 1734 #endif /* INET6 */ 1735 carp_setrun(sc, 0); 1736 carp_setroute(sc, RTM_ADD); 1737 break; 1738 } 1739 } 1740 1741 /* 1742 * When in backup state, af indicates whether to reset the master down timer 1743 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1744 */ 1745 static void 1746 carp_setrun(struct carp_softc *sc, sa_family_t af) 1747 { 1748 struct ifnet *cifp = &sc->sc_if; 1749 struct timeval tv; 1750 1751 if (sc->sc_carpdev == NULL) { 1752 carp_set_state(sc, INIT); 1753 return; 1754 } 1755 1756 if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 && 1757 (sc->sc_naddrs || sc->sc_naddrs6)) { 1758 /* Nothing */ 1759 } else { 1760 carp_setroute(sc, RTM_DELETE); 1761 return; 1762 } 1763 1764 switch (sc->sc_state) { 1765 case INIT: 1766 if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) { 1767 carp_send_ad(sc); 1768 carp_send_arp(sc); 1769 #ifdef INET6 1770 carp_send_na(sc); 1771 #endif /* INET6 */ 1772 CARP_DEBUG("%s: INIT -> MASTER (preempting)\n", 1773 cifp->if_xname); 1774 carp_set_state(sc, MASTER); 1775 carp_setroute(sc, RTM_ADD); 1776 } else { 1777 CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname); 1778 carp_set_state(sc, BACKUP); 1779 carp_setroute(sc, RTM_DELETE); 1780 carp_setrun(sc, 0); 1781 } 1782 break; 1783 1784 case BACKUP: 1785 callout_stop(&sc->sc_ad_tmo); 1786 tv.tv_sec = 3 * sc->sc_advbase; 1787 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1788 switch (af) { 1789 #ifdef INET 1790 case AF_INET: 1791 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1792 carp_master_down_timeout, sc); 1793 break; 1794 #endif /* INET */ 1795 #ifdef INET6 1796 case AF_INET6: 1797 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1798 carp_master_down_timeout, sc); 1799 break; 1800 #endif /* INET6 */ 1801 default: 1802 if (sc->sc_naddrs) 1803 callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv), 1804 carp_master_down_timeout, sc); 1805 if (sc->sc_naddrs6) 1806 callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv), 1807 carp_master_down_timeout, sc); 1808 break; 1809 } 1810 break; 1811 1812 case MASTER: 1813 tv.tv_sec = sc->sc_advbase; 1814 tv.tv_usec = sc->sc_advskew * 1000000 / 256; 1815 callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv), 1816 carp_send_ad_timeout, sc); 1817 break; 1818 } 1819 } 1820 1821 static void 1822 carp_multicast_cleanup(struct carp_softc *sc) 1823 { 1824 struct ip_moptions *imo = &sc->sc_imo; 1825 1826 if (imo->imo_num_memberships == 0) 1827 return; 1828 KKASSERT(imo->imo_num_memberships == 1); 1829 1830 in_delmulti(imo->imo_membership[0]); 1831 imo->imo_membership[0] = NULL; 1832 imo->imo_num_memberships = 0; 1833 imo->imo_multicast_ifp = NULL; 1834 } 1835 1836 #ifdef INET6 1837 static void 1838 carp_multicast6_cleanup(struct carp_softc *sc) 1839 { 1840 struct ip6_moptions *im6o = &sc->sc_im6o; 1841 1842 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1843 struct in6_multi_mship *imm = 1844 LIST_FIRST(&im6o->im6o_memberships); 1845 1846 LIST_REMOVE(imm, i6mm_chain); 1847 in6_leavegroup(imm); 1848 } 1849 im6o->im6o_multicast_ifp = NULL; 1850 } 1851 #endif 1852 1853 static void 1854 carp_ioctl_getvhaddr_dispatch(netmsg_t msg) 1855 { 1856 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 1857 struct carp_softc *sc = cmsg->nc_softc; 1858 const struct carp_vhaddr *vha; 1859 struct ifcarpvhaddr *carpa, *carpa0; 1860 int count, len, error = 0; 1861 1862 count = 0; 1863 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 1864 ++count; 1865 1866 if (cmsg->nc_datalen == 0) { 1867 cmsg->nc_datalen = count * sizeof(*carpa); 1868 goto back; 1869 } else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) { 1870 cmsg->nc_datalen = 0; 1871 goto back; 1872 } 1873 len = min(cmsg->nc_datalen, sizeof(*carpa) * count); 1874 KKASSERT(len >= sizeof(*carpa)); 1875 1876 carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO); 1877 if (carpa == NULL) { 1878 error = ENOMEM; 1879 goto back; 1880 } 1881 1882 count = 0; 1883 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 1884 if (len < sizeof(*carpa)) 1885 break; 1886 1887 carpa->carpa_flags = vha->vha_flags; 1888 carpa->carpa_addr.sin_family = AF_INET; 1889 carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr; 1890 1891 carpa->carpa_baddr.sin_family = AF_INET; 1892 if (vha->vha_iaback == NULL) { 1893 carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY; 1894 } else { 1895 carpa->carpa_baddr.sin_addr = 1896 vha->vha_iaback->ia_addr.sin_addr; 1897 } 1898 1899 ++carpa; 1900 ++count; 1901 len -= sizeof(*carpa); 1902 } 1903 cmsg->nc_datalen = sizeof(*carpa) * count; 1904 KKASSERT(cmsg->nc_datalen > 0); 1905 1906 cmsg->nc_data = carpa0; 1907 1908 back: 1909 lwkt_replymsg(&cmsg->base.lmsg, error); 1910 } 1911 1912 static int 1913 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd) 1914 { 1915 struct ifnet *ifp = &sc->arpcom.ac_if; 1916 struct netmsg_carp cmsg; 1917 int error; 1918 1919 ASSERT_IFNET_SERIALIZED_ALL(ifp); 1920 ifnet_deserialize_all(ifp); 1921 1922 bzero(&cmsg, sizeof(cmsg)); 1923 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 1924 carp_ioctl_getvhaddr_dispatch); 1925 cmsg.nc_softc = sc; 1926 cmsg.nc_datalen = ifd->ifd_len; 1927 1928 error = lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 1929 1930 if (!error) { 1931 if (cmsg.nc_data != NULL) { 1932 error = copyout(cmsg.nc_data, ifd->ifd_data, 1933 cmsg.nc_datalen); 1934 kfree(cmsg.nc_data, M_TEMP); 1935 } 1936 ifd->ifd_len = cmsg.nc_datalen; 1937 } else { 1938 KASSERT(cmsg.nc_data == NULL, 1939 ("%s temp vhaddr is alloc upon error", __func__)); 1940 } 1941 1942 ifnet_serialize_all(ifp); 1943 return error; 1944 } 1945 1946 static int 1947 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 1948 struct in_ifaddr *ia_del) 1949 { 1950 struct ifnet *ifp; 1951 struct in_ifaddr *ia_if; 1952 struct in_ifaddr_container *iac; 1953 const struct sockaddr_in *sin; 1954 u_long iaddr; 1955 int own; 1956 1957 KKASSERT(vha->vha_ia != NULL); 1958 1959 sin = &vha->vha_ia->ia_addr; 1960 iaddr = ntohl(sin->sin_addr.s_addr); 1961 1962 ia_if = NULL; 1963 own = 0; 1964 TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) { 1965 struct in_ifaddr *ia = iac->ia; 1966 1967 if (ia == ia_del) 1968 continue; 1969 1970 if (ia->ia_ifp->if_type == IFT_CARP) 1971 continue; 1972 1973 if ((ia->ia_ifp->if_flags & IFF_UP) == 0) 1974 continue; 1975 1976 /* and, yeah, we need a multicast-capable iface too */ 1977 if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0) 1978 continue; 1979 1980 if ((iaddr & ia->ia_subnetmask) == ia->ia_subnet) { 1981 if (sin->sin_addr.s_addr == 1982 ia->ia_addr.sin_addr.s_addr) 1983 own = 1; 1984 if (ia_if == NULL) 1985 ia_if = ia; 1986 else if (sc->sc_carpdev != NULL && 1987 sc->sc_carpdev == ia->ia_ifp) 1988 ia_if = ia; 1989 } 1990 } 1991 1992 carp_deactivate_vhaddr(sc, vha, FALSE); 1993 if (!ia_if) 1994 return ENOENT; 1995 1996 ifp = ia_if->ia_ifp; 1997 1998 /* XXX Don't allow parent iface to be changed */ 1999 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) 2000 return EEXIST; 2001 2002 return carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 2003 } 2004 2005 static void 2006 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 2007 { 2008 struct carp_vhaddr *vha_new; 2009 struct in_ifaddr *carp_ia; 2010 #ifdef INVARIANTS 2011 struct carp_vhaddr *vha; 2012 #endif 2013 2014 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 2015 carp_ia = ifatoia(carp_ifa); 2016 2017 #ifdef INVARIANTS 2018 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 2019 KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia); 2020 #endif 2021 2022 vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO); 2023 vha_new->vha_ia = carp_ia; 2024 carp_insert_vhaddr(sc, vha_new); 2025 2026 if (carp_config_vhaddr(sc, vha_new, NULL) != 0) { 2027 /* 2028 * If the above configuration fails, it may only mean 2029 * that the new address is problematic. However, the 2030 * carp(4) interface may already have several working 2031 * addresses. Since the expected behaviour of 2032 * SIOC[AS]IFADDR is to put the NIC into working state, 2033 * we try starting the state machine manually here with 2034 * the hope that the carp(4)'s previously working 2035 * addresses still could be brought up. 2036 */ 2037 carp_hmac_prepare(sc); 2038 carp_set_state(sc, INIT); 2039 carp_setrun(sc, 0); 2040 } 2041 } 2042 2043 static void 2044 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 2045 { 2046 struct carp_vhaddr *vha; 2047 struct in_ifaddr *carp_ia; 2048 2049 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 2050 carp_ia = ifatoia(carp_ifa); 2051 2052 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2053 KKASSERT(vha->vha_ia != NULL); 2054 if (vha->vha_ia == carp_ia) 2055 break; 2056 } 2057 KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa)); 2058 2059 /* 2060 * Remove the vhaddr from the list before deactivating 2061 * the vhaddr, so that the HMAC could be correctly 2062 * updated in carp_deactivate_vhaddr() 2063 */ 2064 carp_remove_vhaddr(sc, vha); 2065 2066 carp_deactivate_vhaddr(sc, vha, FALSE); 2067 kfree(vha, M_CARP); 2068 } 2069 2070 static void 2071 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa) 2072 { 2073 struct carp_vhaddr *vha; 2074 struct in_ifaddr *carp_ia; 2075 2076 KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET); 2077 carp_ia = ifatoia(carp_ifa); 2078 2079 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2080 KKASSERT(vha->vha_ia != NULL); 2081 if (vha->vha_ia == carp_ia) 2082 break; 2083 } 2084 KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa)); 2085 2086 /* Remove then reinsert, to keep the vhaddr list sorted */ 2087 carp_remove_vhaddr(sc, vha); 2088 carp_insert_vhaddr(sc, vha); 2089 2090 if (carp_config_vhaddr(sc, vha, NULL) != 0) { 2091 /* See the comment in carp_add_addr() */ 2092 carp_hmac_prepare(sc); 2093 carp_set_state(sc, INIT); 2094 carp_setrun(sc, 0); 2095 } 2096 } 2097 2098 #ifdef notyet 2099 2100 #ifdef INET6 2101 static int 2102 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 2103 { 2104 struct ifnet *ifp; 2105 struct carp_if *cif; 2106 struct in6_ifaddr *ia, *ia_if; 2107 struct ip6_moptions *im6o = &sc->sc_im6o; 2108 struct in6_multi_mship *imm; 2109 struct in6_addr in6; 2110 int own, error; 2111 2112 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 2113 carp_setrun(sc, 0); 2114 return (0); 2115 } 2116 2117 /* we have to do it by hands to check we won't match on us */ 2118 ia_if = NULL; own = 0; 2119 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 2120 int i; 2121 2122 for (i = 0; i < 4; i++) { 2123 if ((sin6->sin6_addr.s6_addr32[i] & 2124 ia->ia_prefixmask.sin6_addr.s6_addr32[i]) != 2125 (ia->ia_addr.sin6_addr.s6_addr32[i] & 2126 ia->ia_prefixmask.sin6_addr.s6_addr32[i])) 2127 break; 2128 } 2129 /* and, yeah, we need a multicast-capable iface too */ 2130 if (ia->ia_ifp != &sc->sc_if && 2131 (ia->ia_ifp->if_flags & IFF_MULTICAST) && 2132 (i == 4)) { 2133 if (!ia_if) 2134 ia_if = ia; 2135 if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 2136 &ia->ia_addr.sin6_addr)) 2137 own++; 2138 } 2139 } 2140 2141 if (!ia_if) 2142 return (EADDRNOTAVAIL); 2143 ia = ia_if; 2144 ifp = ia->ia_ifp; 2145 2146 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 || 2147 (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp)) 2148 return (EADDRNOTAVAIL); 2149 2150 if (!sc->sc_naddrs6) { 2151 im6o->im6o_multicast_ifp = ifp; 2152 2153 /* join CARP multicast address */ 2154 bzero(&in6, sizeof(in6)); 2155 in6.s6_addr16[0] = htons(0xff02); 2156 in6.s6_addr8[15] = 0x12; 2157 if (in6_setscope(&in6, ifp, NULL) != 0) 2158 goto cleanup; 2159 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 2160 goto cleanup; 2161 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 2162 2163 /* join solicited multicast address */ 2164 bzero(&in6, sizeof(in6)); 2165 in6.s6_addr16[0] = htons(0xff02); 2166 in6.s6_addr32[1] = 0; 2167 in6.s6_addr32[2] = htonl(1); 2168 in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3]; 2169 in6.s6_addr8[12] = 0xff; 2170 if (in6_setscope(&in6, ifp, NULL) != 0) 2171 goto cleanup; 2172 if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL) 2173 goto cleanup; 2174 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); 2175 } 2176 2177 #ifdef foo 2178 if (!ifp->if_carp) { 2179 cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO); 2180 2181 if ((error = ifpromisc(ifp, 1))) { 2182 kfree(cif, M_CARP); 2183 goto cleanup; 2184 } 2185 2186 TAILQ_INIT(&cif->vhif_vrs); 2187 ifp->if_carp = cif; 2188 } else { 2189 struct carp_softc *vr; 2190 2191 cif = ifp->if_carp; 2192 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2193 if (vr != sc && vr->sc_vhid == sc->sc_vhid) { 2194 error = EINVAL; 2195 goto cleanup; 2196 } 2197 } 2198 } 2199 #endif 2200 sc->sc_ia6 = ia; 2201 sc->sc_carpdev = ifp; 2202 2203 #ifdef foo 2204 { /* XXX prevent endless loop if already in queue */ 2205 struct carp_softc *vr, *after = NULL; 2206 int myself = 0; 2207 cif = ifp->if_carp; 2208 2209 TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) { 2210 if (vr == sc) 2211 myself = 1; 2212 if (vr->sc_vhid < sc->sc_vhid) 2213 after = vr; 2214 } 2215 2216 if (!myself) { 2217 /* We're trying to keep things in order */ 2218 if (after == NULL) 2219 TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list); 2220 else 2221 TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list); 2222 } 2223 } 2224 #endif 2225 2226 sc->sc_naddrs6++; 2227 if (own) 2228 sc->sc_advskew = 0; 2229 carp_sc_state(sc); 2230 carp_setrun(sc, 0); 2231 2232 return (0); 2233 2234 cleanup: 2235 /* clean up multicast memberships */ 2236 if (!sc->sc_naddrs6) { 2237 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 2238 imm = LIST_FIRST(&im6o->im6o_memberships); 2239 LIST_REMOVE(imm, i6mm_chain); 2240 in6_leavegroup(imm); 2241 } 2242 } 2243 return (error); 2244 } 2245 2246 static int 2247 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 2248 { 2249 int error = 0; 2250 2251 if (!--sc->sc_naddrs6) { 2252 struct carp_if *cif = sc->sc_carpdev->if_carp; 2253 struct ip6_moptions *im6o = &sc->sc_im6o; 2254 2255 callout_stop(&sc->sc_ad_tmo); 2256 sc->sc_vhid = -1; 2257 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 2258 struct in6_multi_mship *imm = 2259 LIST_FIRST(&im6o->im6o_memberships); 2260 2261 LIST_REMOVE(imm, i6mm_chain); 2262 in6_leavegroup(imm); 2263 } 2264 im6o->im6o_multicast_ifp = NULL; 2265 #ifdef foo 2266 TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list); 2267 if (TAILQ_EMPTY(&cif->vhif_vrs)) { 2268 sc->sc_carpdev->if_carp = NULL; 2269 kfree(cif, M_IFADDR); 2270 } 2271 #endif 2272 } 2273 return (error); 2274 } 2275 #endif /* INET6 */ 2276 2277 #endif 2278 2279 static int 2280 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr) 2281 { 2282 struct carp_softc *sc = ifp->if_softc; 2283 struct ifreq *ifr = (struct ifreq *)addr; 2284 struct ifdrv *ifd = (struct ifdrv *)addr; 2285 int error = 0; 2286 2287 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2288 2289 switch (cmd) { 2290 case SIOCSIFFLAGS: 2291 if (ifp->if_flags & IFF_UP) { 2292 if ((ifp->if_flags & IFF_RUNNING) == 0) 2293 carp_init(sc); 2294 } else if (ifp->if_flags & IFF_RUNNING) { 2295 carp_ioctl_stop(sc); 2296 } 2297 break; 2298 2299 case SIOCSVH: 2300 error = carp_ioctl_setvh(sc, ifr->ifr_data, cr); 2301 break; 2302 2303 case SIOCGVH: 2304 error = carp_ioctl_getvh(sc, ifr->ifr_data, cr); 2305 break; 2306 2307 case SIOCGDRVSPEC: 2308 switch (ifd->ifd_cmd) { 2309 case CARPGDEVNAME: 2310 error = carp_ioctl_getdevname(sc, ifd); 2311 break; 2312 2313 case CARPGVHADDR: 2314 error = carp_ioctl_getvhaddr(sc, ifd); 2315 break; 2316 2317 default: 2318 error = EINVAL; 2319 break; 2320 } 2321 break; 2322 2323 default: 2324 error = ether_ioctl(ifp, cmd, addr); 2325 break; 2326 } 2327 2328 return error; 2329 } 2330 2331 static void 2332 carp_ioctl_stop_dispatch(netmsg_t msg) 2333 { 2334 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 2335 struct carp_softc *sc = cmsg->nc_softc; 2336 2337 carp_stop(sc, FALSE); 2338 lwkt_replymsg(&cmsg->base.lmsg, 0); 2339 } 2340 2341 static void 2342 carp_ioctl_stop(struct carp_softc *sc) 2343 { 2344 struct ifnet *ifp = &sc->arpcom.ac_if; 2345 struct netmsg_carp cmsg; 2346 2347 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2348 2349 ifnet_deserialize_all(ifp); 2350 2351 bzero(&cmsg, sizeof(cmsg)); 2352 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 2353 carp_ioctl_stop_dispatch); 2354 cmsg.nc_softc = sc; 2355 2356 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 2357 2358 ifnet_serialize_all(ifp); 2359 } 2360 2361 static void 2362 carp_ioctl_setvh_dispatch(netmsg_t msg) 2363 { 2364 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 2365 struct carp_softc *sc = cmsg->nc_softc; 2366 struct ifnet *ifp = &sc->arpcom.ac_if; 2367 const struct carpreq *carpr = cmsg->nc_data; 2368 int error; 2369 2370 error = 1; 2371 if ((ifp->if_flags & IFF_RUNNING) && 2372 sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) { 2373 switch (carpr->carpr_state) { 2374 case BACKUP: 2375 callout_stop(&sc->sc_ad_tmo); 2376 carp_set_state(sc, BACKUP); 2377 carp_setrun(sc, 0); 2378 carp_setroute(sc, RTM_DELETE); 2379 break; 2380 2381 case MASTER: 2382 carp_master_down(sc); 2383 break; 2384 2385 default: 2386 break; 2387 } 2388 } 2389 if (carpr->carpr_vhid > 0) { 2390 if (carpr->carpr_vhid > 255) { 2391 error = EINVAL; 2392 goto back; 2393 } 2394 if (sc->sc_carpdev) { 2395 struct carp_if *cif = sc->sc_carpdev->if_carp; 2396 struct carp_softc_container *scc; 2397 2398 TAILQ_FOREACH(scc, cif, scc_link) { 2399 struct carp_softc *vr = scc->scc_softc; 2400 2401 if (vr != sc && 2402 vr->sc_vhid == carpr->carpr_vhid) { 2403 error = EEXIST; 2404 goto back; 2405 } 2406 } 2407 } 2408 sc->sc_vhid = carpr->carpr_vhid; 2409 2410 IF_LLADDR(ifp)[5] = sc->sc_vhid; 2411 bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr, 2412 ETHER_ADDR_LEN); 2413 2414 error--; 2415 } 2416 if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) { 2417 if (carpr->carpr_advskew >= 255) { 2418 error = EINVAL; 2419 goto back; 2420 } 2421 if (carpr->carpr_advbase > 255) { 2422 error = EINVAL; 2423 goto back; 2424 } 2425 sc->sc_advbase = carpr->carpr_advbase; 2426 sc->sc_advskew = carpr->carpr_advskew; 2427 error--; 2428 } 2429 bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2430 if (error > 0) { 2431 error = EINVAL; 2432 } else { 2433 error = 0; 2434 carp_setrun(sc, 0); 2435 } 2436 back: 2437 carp_hmac_prepare(sc); 2438 2439 lwkt_replymsg(&cmsg->base.lmsg, error); 2440 } 2441 2442 static int 2443 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr) 2444 { 2445 struct ifnet *ifp = &sc->arpcom.ac_if; 2446 struct netmsg_carp cmsg; 2447 struct carpreq carpr; 2448 int error; 2449 2450 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2451 ifnet_deserialize_all(ifp); 2452 2453 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 2454 if (error) 2455 goto back; 2456 2457 error = copyin(udata, &carpr, sizeof(carpr)); 2458 if (error) 2459 goto back; 2460 2461 bzero(&cmsg, sizeof(cmsg)); 2462 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 2463 carp_ioctl_setvh_dispatch); 2464 cmsg.nc_softc = sc; 2465 cmsg.nc_data = &carpr; 2466 2467 error = lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 2468 2469 back: 2470 ifnet_serialize_all(ifp); 2471 return error; 2472 } 2473 2474 static void 2475 carp_ioctl_getvh_dispatch(netmsg_t msg) 2476 { 2477 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 2478 struct carp_softc *sc = cmsg->nc_softc; 2479 struct carpreq *carpr = cmsg->nc_data; 2480 2481 carpr->carpr_state = sc->sc_state; 2482 carpr->carpr_vhid = sc->sc_vhid; 2483 carpr->carpr_advbase = sc->sc_advbase; 2484 carpr->carpr_advskew = sc->sc_advskew; 2485 bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); 2486 2487 lwkt_replymsg(&cmsg->base.lmsg, 0); 2488 } 2489 2490 static int 2491 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr) 2492 { 2493 struct ifnet *ifp = &sc->arpcom.ac_if; 2494 struct netmsg_carp cmsg; 2495 struct carpreq carpr; 2496 int error; 2497 2498 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2499 ifnet_deserialize_all(ifp); 2500 2501 bzero(&cmsg, sizeof(cmsg)); 2502 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 2503 carp_ioctl_getvh_dispatch); 2504 cmsg.nc_softc = sc; 2505 cmsg.nc_data = &carpr; 2506 2507 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 2508 2509 error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY); 2510 if (error) 2511 bzero(carpr.carpr_key, sizeof(carpr.carpr_key)); 2512 2513 error = copyout(&carpr, udata, sizeof(carpr)); 2514 2515 ifnet_serialize_all(ifp); 2516 return error; 2517 } 2518 2519 static void 2520 carp_ioctl_getdevname_dispatch(netmsg_t msg) 2521 { 2522 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 2523 struct carp_softc *sc = cmsg->nc_softc; 2524 char *devname = cmsg->nc_data; 2525 2526 bzero(devname, IFNAMSIZ); 2527 if (sc->sc_carpdev != NULL) 2528 strlcpy(devname, sc->sc_carpdev->if_xname, IFNAMSIZ); 2529 2530 lwkt_replymsg(&cmsg->base.lmsg, 0); 2531 } 2532 2533 static int 2534 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd) 2535 { 2536 struct ifnet *ifp = &sc->arpcom.ac_if; 2537 struct netmsg_carp cmsg; 2538 char devname[IFNAMSIZ]; 2539 int error; 2540 2541 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2542 2543 if (ifd->ifd_len != sizeof(devname)) 2544 return EINVAL; 2545 2546 ifnet_deserialize_all(ifp); 2547 2548 bzero(&cmsg, sizeof(cmsg)); 2549 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 2550 carp_ioctl_getdevname_dispatch); 2551 cmsg.nc_softc = sc; 2552 cmsg.nc_data = devname; 2553 2554 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 2555 2556 error = copyout(devname, ifd->ifd_data, sizeof(devname)); 2557 2558 ifnet_serialize_all(ifp); 2559 return error; 2560 } 2561 2562 static void 2563 carp_init_dispatch(netmsg_t msg) 2564 { 2565 struct netmsg_carp *cmsg = (struct netmsg_carp *)msg; 2566 struct carp_softc *sc = cmsg->nc_softc; 2567 2568 sc->sc_if.if_flags |= IFF_RUNNING; 2569 carp_hmac_prepare(sc); 2570 carp_set_state(sc, INIT); 2571 carp_setrun(sc, 0); 2572 2573 lwkt_replymsg(&cmsg->base.lmsg, 0); 2574 } 2575 2576 static void 2577 carp_init(void *xsc) 2578 { 2579 struct carp_softc *sc = xsc; 2580 struct ifnet *ifp = &sc->arpcom.ac_if; 2581 struct netmsg_carp cmsg; 2582 2583 ASSERT_IFNET_SERIALIZED_ALL(ifp); 2584 2585 ifnet_deserialize_all(ifp); 2586 2587 bzero(&cmsg, sizeof(cmsg)); 2588 netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0, 2589 carp_init_dispatch); 2590 cmsg.nc_softc = sc; 2591 2592 lwkt_domsg(netisr_portfn(0), &cmsg.base.lmsg, 0); 2593 2594 ifnet_serialize_all(ifp); 2595 } 2596 2597 static int 2598 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 2599 struct rtentry *rt) 2600 { 2601 struct carp_softc *sc = ifp->if_softc; 2602 struct ifnet *carpdev; 2603 int error = 0; 2604 2605 carpdev = sc->sc_carpdev; 2606 if (carpdev != NULL) { 2607 /* 2608 * NOTE: 2609 * CARP's ifp is passed to backing device's 2610 * if_output method. 2611 */ 2612 carpdev->if_output(ifp, m, dst, rt); 2613 } else { 2614 m_freem(m); 2615 error = ENETUNREACH; 2616 } 2617 return error; 2618 } 2619 2620 /* 2621 * Start output on carp interface. This function should never be called. 2622 */ 2623 static void 2624 carp_start(struct ifnet *ifp, struct ifaltq_subque *ifsq __unused) 2625 { 2626 panic("%s: start called", ifp->if_xname); 2627 } 2628 2629 static void 2630 carp_set_state(struct carp_softc *sc, int state) 2631 { 2632 struct ifnet *cifp = &sc->sc_if; 2633 2634 if (sc->sc_state == state) 2635 return; 2636 sc->sc_state = state; 2637 2638 switch (sc->sc_state) { 2639 case BACKUP: 2640 cifp->if_link_state = LINK_STATE_DOWN; 2641 break; 2642 2643 case MASTER: 2644 cifp->if_link_state = LINK_STATE_UP; 2645 break; 2646 2647 default: 2648 cifp->if_link_state = LINK_STATE_UNKNOWN; 2649 break; 2650 } 2651 rt_ifmsg(cifp); 2652 } 2653 2654 void 2655 carp_group_demote_adj(struct ifnet *ifp, int adj) 2656 { 2657 struct ifg_list *ifgl; 2658 int *dm; 2659 2660 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2661 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2662 continue; 2663 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2664 2665 if (*dm + adj >= 0) 2666 *dm += adj; 2667 else 2668 *dm = 0; 2669 2670 if (adj > 0 && *dm == 1) 2671 carp_send_ad_all(); 2672 CARP_LOG("%s demoted group %s to %d", ifp->if_xname, 2673 ifgl->ifgl_group->ifg_group, *dm); 2674 } 2675 } 2676 2677 #ifdef foo 2678 void 2679 carp_carpdev_state(void *v) 2680 { 2681 struct carp_if *cif = v; 2682 struct carp_softc *sc; 2683 2684 TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) 2685 carp_sc_state(sc); 2686 } 2687 2688 static void 2689 carp_sc_state(struct carp_softc *sc) 2690 { 2691 if (!(sc->sc_carpdev->if_flags & IFF_UP)) { 2692 callout_stop(&sc->sc_ad_tmo); 2693 callout_stop(&sc->sc_md_tmo); 2694 callout_stop(&sc->sc_md6_tmo); 2695 carp_set_state(sc, INIT); 2696 carp_setrun(sc, 0); 2697 if (!sc->sc_suppress) { 2698 carp_suppress_preempt++; 2699 if (carp_suppress_preempt == 1) 2700 carp_send_ad_all(); 2701 } 2702 sc->sc_suppress = 1; 2703 } else { 2704 carp_set_state(sc, INIT); 2705 carp_setrun(sc, 0); 2706 if (sc->sc_suppress) 2707 carp_suppress_preempt--; 2708 sc->sc_suppress = 0; 2709 } 2710 } 2711 #endif 2712 2713 static void 2714 carp_stop(struct carp_softc *sc, boolean_t detach) 2715 { 2716 sc->sc_if.if_flags &= ~IFF_RUNNING; 2717 2718 callout_stop(&sc->sc_ad_tmo); 2719 callout_stop(&sc->sc_md_tmo); 2720 callout_stop(&sc->sc_md6_tmo); 2721 2722 if (!detach && sc->sc_state == MASTER) 2723 carp_send_ad(sc); 2724 2725 if (sc->sc_suppress) 2726 carp_suppress_preempt--; 2727 sc->sc_suppress = 0; 2728 2729 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) 2730 carp_suppress_preempt--; 2731 sc->sc_sendad_errors = 0; 2732 sc->sc_sendad_success = 0; 2733 2734 carp_set_state(sc, INIT); 2735 carp_setrun(sc, 0); 2736 } 2737 2738 static void 2739 carp_suspend(struct carp_softc *sc, boolean_t detach) 2740 { 2741 struct ifnet *cifp = &sc->sc_if; 2742 2743 carp_stop(sc, detach); 2744 2745 /* Retain the running state, if we are not dead yet */ 2746 if (!sc->sc_dead && (cifp->if_flags & IFF_UP)) 2747 cifp->if_flags |= IFF_RUNNING; 2748 } 2749 2750 static int 2751 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 2752 struct ifnet *ifp, struct in_ifaddr *ia_if, int own) 2753 { 2754 struct ip_moptions *imo = &sc->sc_imo; 2755 struct carp_if *ocif = ifp->if_carp; 2756 int error; 2757 2758 KKASSERT(vha->vha_ia != NULL); 2759 2760 KASSERT(ia_if != NULL, ("NULL backing address")); 2761 KASSERT(vha->vha_iaback == NULL, ("%p is already activated", vha)); 2762 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2763 ("inactive vhaddr %p is the address owner", vha)); 2764 2765 KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp, 2766 ("%s is already on %s", sc->sc_if.if_xname, 2767 sc->sc_carpdev->if_xname)); 2768 2769 if (ocif == NULL) { 2770 KASSERT(sc->sc_carpdev == NULL, 2771 ("%s is already on %s", sc->sc_if.if_xname, 2772 sc->sc_carpdev->if_xname)); 2773 2774 error = ifpromisc(ifp, 1); 2775 if (error) 2776 return error; 2777 } else { 2778 struct carp_softc_container *scc; 2779 2780 TAILQ_FOREACH(scc, ocif, scc_link) { 2781 struct carp_softc *vr = scc->scc_softc; 2782 2783 if (vr != sc && vr->sc_vhid == sc->sc_vhid) 2784 return EINVAL; 2785 } 2786 } 2787 2788 ifp->if_carp = carp_if_insert(ocif, sc); 2789 KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed", __func__)); 2790 2791 sc->sc_ia = ia_if; 2792 sc->sc_carpdev = ifp; 2793 2794 /* 2795 * Make sure that all protocol threads see the sc_carpdev and 2796 * if_carp changes 2797 */ 2798 netmsg_service_sync(); 2799 2800 if (ocif != NULL && ifp->if_carp != ocif) { 2801 /* 2802 * The old carp list could be safely free now, 2803 * since no one can access it. 2804 */ 2805 carp_if_free(ocif); 2806 } 2807 2808 vha->vha_iaback = ia_if; 2809 sc->sc_naddrs++; 2810 2811 if (own) { 2812 vha->vha_flags |= CARP_VHAF_OWNER; 2813 2814 /* XXX save user configured advskew? */ 2815 sc->sc_advskew = 0; 2816 } 2817 2818 carp_addroute_vhaddr(sc, vha); 2819 2820 /* 2821 * Join the multicast group only after the backing interface 2822 * has been hooked with the CARP interface. 2823 */ 2824 KASSERT(imo->imo_multicast_ifp == NULL || 2825 imo->imo_multicast_ifp == &sc->sc_if, 2826 ("%s didn't leave mcast group on %s", 2827 sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname)); 2828 2829 if (imo->imo_num_memberships == 0) { 2830 struct in_addr addr; 2831 2832 addr.s_addr = htonl(INADDR_CARP_GROUP); 2833 imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if); 2834 if (imo->imo_membership[0] == NULL) { 2835 carp_deactivate_vhaddr(sc, vha, FALSE); 2836 return ENOBUFS; 2837 } 2838 2839 imo->imo_num_memberships++; 2840 imo->imo_multicast_ifp = &sc->sc_if; 2841 imo->imo_multicast_ttl = CARP_DFLTTL; 2842 imo->imo_multicast_loop = 0; 2843 } 2844 2845 carp_hmac_prepare(sc); 2846 carp_set_state(sc, INIT); 2847 carp_setrun(sc, 0); 2848 return 0; 2849 } 2850 2851 static void 2852 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 2853 boolean_t del_iaback) 2854 { 2855 KKASSERT(vha->vha_ia != NULL); 2856 2857 carp_hmac_prepare(sc); 2858 2859 if (vha->vha_iaback == NULL) { 2860 KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0, 2861 ("inactive vhaddr %p is the address owner", vha)); 2862 return; 2863 } 2864 2865 vha->vha_flags &= ~CARP_VHAF_OWNER; 2866 carp_delroute_vhaddr(sc, vha, del_iaback); 2867 2868 KKASSERT(sc->sc_naddrs > 0); 2869 vha->vha_iaback = NULL; 2870 sc->sc_naddrs--; 2871 if (!sc->sc_naddrs) { 2872 if (sc->sc_naddrs6) { 2873 carp_multicast_cleanup(sc); 2874 sc->sc_ia = NULL; 2875 } else { 2876 carp_detach(sc, FALSE, del_iaback); 2877 } 2878 } 2879 } 2880 2881 static void 2882 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if) 2883 { 2884 struct carp_vhaddr *vha; 2885 struct in_ifaddr *ia_if; 2886 2887 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2888 ia_if = ifatoia(ifa_if); 2889 2890 /* 2891 * Test each inactive vhaddr against the newly added address. 2892 * If the newly added address could be the backing address, 2893 * then activate the matching vhaddr. 2894 */ 2895 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2896 const struct in_ifaddr *ia; 2897 u_long iaddr; 2898 int own; 2899 2900 if (vha->vha_iaback != NULL) 2901 continue; 2902 2903 ia = vha->vha_ia; 2904 iaddr = ntohl(ia->ia_addr.sin_addr.s_addr); 2905 2906 if ((iaddr & ia_if->ia_subnetmask) != ia_if->ia_subnet) 2907 continue; 2908 2909 own = 0; 2910 if (ia->ia_addr.sin_addr.s_addr == 2911 ia_if->ia_addr.sin_addr.s_addr) 2912 own = 1; 2913 2914 carp_activate_vhaddr(sc, vha, ifp, ia_if, own); 2915 } 2916 } 2917 2918 static void 2919 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp, 2920 struct ifaddr *ifa_if) 2921 { 2922 struct carp_vhaddr *vha; 2923 struct in_ifaddr *ia_if; 2924 2925 KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET); 2926 ia_if = ifatoia(ifa_if); 2927 2928 /* 2929 * Ad src address is deleted; set it to NULL. 2930 * Following loop will try pick up a new ad src address 2931 * if one of the vhaddr could retain its backing address. 2932 */ 2933 if (sc->sc_ia == ia_if) 2934 sc->sc_ia = NULL; 2935 2936 /* 2937 * Test each active vhaddr against the deleted address. 2938 * If the deleted address is vhaddr address's backing 2939 * address, then deactivate the vhaddr. 2940 */ 2941 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) { 2942 if (vha->vha_iaback == NULL) 2943 continue; 2944 2945 if (vha->vha_iaback == ia_if) 2946 carp_deactivate_vhaddr(sc, vha, TRUE); 2947 else if (sc->sc_ia == NULL) 2948 sc->sc_ia = vha->vha_iaback; 2949 } 2950 } 2951 2952 static void 2953 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del) 2954 { 2955 struct carp_vhaddr *vha; 2956 2957 KKASSERT(sc->sc_carpdev == NULL); 2958 2959 TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) 2960 carp_config_vhaddr(sc, vha, ifatoia(ifa_del)); 2961 } 2962 2963 static void 2964 carp_ifaddr(void *arg __unused, struct ifnet *ifp, 2965 enum ifaddr_event event, struct ifaddr *ifa) 2966 { 2967 struct carp_softc *sc; 2968 2969 if (ifa->ifa_addr->sa_family != AF_INET) 2970 return; 2971 2972 KASSERT(&curthread->td_msgport == netisr_portfn(0), 2973 ("not in netisr0")); 2974 2975 if (ifp->if_type == IFT_CARP) { 2976 /* 2977 * Address is changed on carp(4) interface 2978 */ 2979 switch (event) { 2980 case IFADDR_EVENT_ADD: 2981 carp_add_addr(ifp->if_softc, ifa); 2982 break; 2983 2984 case IFADDR_EVENT_CHANGE: 2985 carp_config_addr(ifp->if_softc, ifa); 2986 break; 2987 2988 case IFADDR_EVENT_DELETE: 2989 carp_del_addr(ifp->if_softc, ifa); 2990 break; 2991 } 2992 return; 2993 } 2994 2995 /* 2996 * Address is changed on non-carp(4) interface 2997 */ 2998 if ((ifp->if_flags & IFF_MULTICAST) == 0) 2999 return; 3000 3001 LIST_FOREACH(sc, &carpif_list, sc_next) { 3002 if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) { 3003 /* Not the parent iface; skip */ 3004 continue; 3005 } 3006 3007 switch (event) { 3008 case IFADDR_EVENT_ADD: 3009 carp_link_addrs(sc, ifp, ifa); 3010 break; 3011 3012 case IFADDR_EVENT_DELETE: 3013 if (sc->sc_carpdev != NULL) { 3014 carp_unlink_addrs(sc, ifp, ifa); 3015 if (sc->sc_carpdev == NULL) { 3016 /* 3017 * We no longer have the parent 3018 * interface, however, certain 3019 * virtual addresses, which are 3020 * not used because they can't 3021 * match the previous parent 3022 * interface's addresses, may now 3023 * match different interface's 3024 * addresses. 3025 */ 3026 carp_update_addrs(sc, ifa); 3027 } 3028 } else { 3029 /* 3030 * The carp(4) interface didn't have a 3031 * parent iface, so it is not possible 3032 * that it will contain any address to 3033 * be unlinked. 3034 */ 3035 } 3036 break; 3037 3038 case IFADDR_EVENT_CHANGE: 3039 if (sc->sc_carpdev == NULL) { 3040 /* 3041 * The carp(4) interface didn't have a 3042 * parent iface, so it is not possible 3043 * that it will contain any address to 3044 * be updated. 3045 */ 3046 carp_link_addrs(sc, ifp, ifa); 3047 } else { 3048 /* 3049 * First try breaking tie with the old 3050 * address. Then see whether we could 3051 * link certain vhaddr to the new address. 3052 * If that fails, i.e. carpdev is NULL, 3053 * we try a global update. 3054 * 3055 * NOTE: The above order is critical. 3056 */ 3057 carp_unlink_addrs(sc, ifp, ifa); 3058 carp_link_addrs(sc, ifp, ifa); 3059 if (sc->sc_carpdev == NULL) { 3060 /* 3061 * See the comment in the above 3062 * IFADDR_EVENT_DELETE block. 3063 */ 3064 carp_update_addrs(sc, NULL); 3065 } 3066 } 3067 break; 3068 } 3069 } 3070 } 3071 3072 void 3073 carp_proto_ctlinput(netmsg_t msg) 3074 { 3075 int cmd = msg->ctlinput.nm_cmd; 3076 struct sockaddr *sa = msg->ctlinput.nm_arg; 3077 struct in_ifaddr_container *iac; 3078 3079 TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) { 3080 struct in_ifaddr *ia = iac->ia; 3081 struct ifnet *ifp = ia->ia_ifp; 3082 3083 if (ifp->if_type == IFT_CARP) 3084 continue; 3085 3086 if (ia->ia_ifa.ifa_addr == sa) { 3087 if (cmd == PRC_IFDOWN) { 3088 carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE, 3089 &ia->ia_ifa); 3090 } else if (cmd == PRC_IFUP) { 3091 carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD, 3092 &ia->ia_ifa); 3093 } 3094 break; 3095 } 3096 } 3097 3098 lwkt_replymsg(&msg->lmsg, 0); 3099 } 3100 3101 struct ifnet * 3102 carp_parent(struct ifnet *cifp) 3103 { 3104 struct carp_softc *sc; 3105 3106 KKASSERT(cifp->if_type == IFT_CARP); 3107 sc = cifp->if_softc; 3108 3109 return sc->sc_carpdev; 3110 } 3111 3112 #define rtinitflags(x) \ 3113 (((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \ 3114 ? RTF_HOST : 0) 3115 3116 static int 3117 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha) 3118 { 3119 struct in_ifaddr *ia, *iaback; 3120 int error; 3121 3122 if (sc->sc_state != MASTER) 3123 return 0; 3124 3125 ia = vha->vha_ia; 3126 KKASSERT(ia != NULL); 3127 3128 iaback = vha->vha_iaback; 3129 KKASSERT(iaback != NULL); 3130 3131 rtinit(&iaback->ia_ifa, RTM_DELETE, rtinitflags(iaback)); 3132 in_ifadown(&iaback->ia_ifa, 1); 3133 iaback->ia_flags &= ~IFA_ROUTE; 3134 3135 error = rtinit(&ia->ia_ifa, RTM_ADD, rtinitflags(ia) | RTF_UP); 3136 if (!error) 3137 ia->ia_flags |= IFA_ROUTE; 3138 return error; 3139 } 3140 3141 static void 3142 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha, 3143 boolean_t del_iaback) 3144 { 3145 struct in_ifaddr *ia, *iaback; 3146 3147 ia = vha->vha_ia; 3148 KKASSERT(ia != NULL); 3149 3150 iaback = vha->vha_iaback; 3151 KKASSERT(iaback != NULL); 3152 3153 rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia)); 3154 in_ifadown(&ia->ia_ifa, 1); 3155 ia->ia_flags &= ~IFA_ROUTE; 3156 3157 if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) { 3158 int error; 3159 3160 error = rtinit(&iaback->ia_ifa, RTM_ADD, 3161 rtinitflags(iaback) | RTF_UP); 3162 if (!error) 3163 iaback->ia_flags |= IFA_ROUTE; 3164 } 3165 } 3166 3167 static int 3168 carp_modevent(module_t mod, int type, void *data) 3169 { 3170 switch (type) { 3171 case MOD_LOAD: 3172 LIST_INIT(&carpif_list); 3173 carp_ifdetach_event = 3174 EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL, 3175 EVENTHANDLER_PRI_ANY); 3176 carp_ifaddr_event = 3177 EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL, 3178 EVENTHANDLER_PRI_FIRST); 3179 if_clone_attach(&carp_cloner); 3180 break; 3181 3182 case MOD_UNLOAD: 3183 EVENTHANDLER_DEREGISTER(ifnet_detach_event, 3184 carp_ifdetach_event); 3185 EVENTHANDLER_DEREGISTER(ifaddr_event, 3186 carp_ifaddr_event); 3187 if_clone_detach(&carp_cloner); 3188 break; 3189 3190 default: 3191 return (EINVAL); 3192 } 3193 return (0); 3194 } 3195 3196 static moduledata_t carp_mod = { 3197 "carp", 3198 carp_modevent, 3199 0 3200 }; 3201 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 3202