1 /* $OpenBSD: if_vxlan.c,v 1.80 2020/07/28 09:52:32 mvs Exp $ */ 2 3 /* 4 * Copyright (c) 2013 Reyk Floeter <reyk@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bpfilter.h" 20 #include "vxlan.h" 21 #include "vlan.h" 22 #include "pf.h" 23 #include "bridge.h" 24 25 #include <sys/param.h> 26 #include <sys/systm.h> 27 #include <sys/mbuf.h> 28 #include <sys/socket.h> 29 #include <sys/sockio.h> 30 #include <sys/ioctl.h> 31 32 #include <net/if.h> 33 #include <net/if_var.h> 34 #include <net/if_media.h> 35 #include <net/route.h> 36 37 #if NBPFILTER > 0 38 #include <net/bpf.h> 39 #endif 40 41 #include <netinet/in.h> 42 #include <netinet/in_var.h> 43 #include <netinet/if_ether.h> 44 #include <netinet/ip.h> 45 #include <netinet/ip_var.h> 46 #include <netinet/udp.h> 47 #include <netinet/udp_var.h> 48 #include <netinet/in_pcb.h> 49 50 #if NPF > 0 51 #include <net/pfvar.h> 52 #endif 53 54 #if NBRIDGE > 0 55 #include <net/if_bridge.h> 56 #endif 57 58 #include <net/if_vxlan.h> 59 60 struct vxlan_softc { 61 struct arpcom sc_ac; 62 struct ifmedia sc_media; 63 64 struct ip_moptions sc_imo; 65 struct task sc_atask; 66 struct task sc_ltask; 67 struct task sc_dtask; 68 69 struct sockaddr_storage sc_src; 70 struct sockaddr_storage sc_dst; 71 in_port_t sc_dstport; 72 u_int sc_rdomain; 73 int64_t sc_vnetid; 74 uint16_t sc_df; 75 u_int8_t sc_ttl; 76 int sc_txhprio; 77 78 struct task sc_sendtask; 79 80 LIST_ENTRY(vxlan_softc) sc_entry; 81 }; 82 83 void vxlanattach(int); 84 int vxlanioctl(struct ifnet *, u_long, caddr_t); 85 void vxlanstart(struct ifnet *); 86 int vxlan_clone_create(struct if_clone *, int); 87 int vxlan_clone_destroy(struct ifnet *); 88 void vxlan_multicast_cleanup(struct ifnet *); 89 int vxlan_multicast_join(struct ifnet *, struct sockaddr *, 90 struct sockaddr *); 91 int vxlan_media_change(struct ifnet *); 92 void vxlan_media_status(struct ifnet *, struct ifmediareq *); 93 int vxlan_config(struct ifnet *, struct sockaddr *, struct sockaddr *); 94 int vxlan_output(struct ifnet *, struct mbuf *); 95 void vxlan_addr_change(void *); 96 void vxlan_if_change(void *); 97 void vxlan_link_change(void *); 98 void vxlan_send_dispatch(void *); 99 100 int vxlan_sockaddr_cmp(struct sockaddr *, struct sockaddr *); 101 uint16_t vxlan_sockaddr_port(struct sockaddr *); 102 103 struct if_clone vxlan_cloner = 104 IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); 105 106 int vxlan_enable = 0; 107 u_long vxlan_tagmask; 108 109 #define VXLAN_TAGHASHSIZE 32 110 #define VXLAN_TAGHASH(tag) ((unsigned int)tag & vxlan_tagmask) 111 LIST_HEAD(vxlan_taghash, vxlan_softc) *vxlan_tagh, vxlan_any; 112 113 void 114 vxlanattach(int count) 115 { 116 /* Regular vxlan interfaces with a VNI */ 117 if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT, 118 &vxlan_tagmask)) == NULL) 119 panic("vxlanattach: hashinit"); 120 121 /* multipoint-to-multipoint interfaces that accept any VNI */ 122 LIST_INIT(&vxlan_any); 123 124 if_clone_attach(&vxlan_cloner); 125 } 126 127 int 128 vxlan_clone_create(struct if_clone *ifc, int unit) 129 { 130 struct ifnet *ifp; 131 struct vxlan_softc *sc; 132 133 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 134 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 135 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 136 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 137 sc->sc_dstport = htons(VXLAN_PORT); 138 sc->sc_vnetid = VXLAN_VNI_UNSET; 139 sc->sc_txhprio = IFQ_TOS2PRIO(IPTOS_PREC_ROUTINE); /* 0 */ 140 sc->sc_df = htons(0); 141 task_set(&sc->sc_atask, vxlan_addr_change, sc); 142 task_set(&sc->sc_ltask, vxlan_link_change, sc); 143 task_set(&sc->sc_dtask, vxlan_if_change, sc); 144 task_set(&sc->sc_sendtask, vxlan_send_dispatch, sc); 145 146 ifp = &sc->sc_ac.ac_if; 147 snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit); 148 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 149 ether_fakeaddr(ifp); 150 151 ifp->if_softc = sc; 152 ifp->if_ioctl = vxlanioctl; 153 ifp->if_start = vxlanstart; 154 ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN); 155 156 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 157 ifp->if_capabilities = IFCAP_VLAN_MTU; 158 ifp->if_xflags = IFXF_CLONED; 159 160 ifmedia_init(&sc->sc_media, 0, vxlan_media_change, 161 vxlan_media_status); 162 ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); 163 ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); 164 165 if_counters_alloc(ifp); 166 if_attach(ifp); 167 ether_ifattach(ifp); 168 169 #if 0 170 /* 171 * Instead of using a decreased MTU of 1450 bytes, prefer 172 * to use the default Ethernet-size MTU of 1500 bytes and to 173 * increase the MTU of the outer transport interfaces to 174 * at least 1550 bytes. The following is disabled by default. 175 */ 176 ifp->if_mtu = ETHERMTU - sizeof(struct ether_header); 177 ifp->if_mtu -= sizeof(struct vxlanudphdr) + sizeof(struct ipovly); 178 #endif 179 180 LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry); 181 vxlan_enable++; 182 183 return (0); 184 } 185 186 int 187 vxlan_clone_destroy(struct ifnet *ifp) 188 { 189 struct vxlan_softc *sc = ifp->if_softc; 190 191 NET_LOCK(); 192 vxlan_multicast_cleanup(ifp); 193 NET_UNLOCK(); 194 195 vxlan_enable--; 196 LIST_REMOVE(sc, sc_entry); 197 198 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); 199 ether_ifdetach(ifp); 200 if_detach(ifp); 201 202 if (!task_del(net_tq(ifp->if_index), &sc->sc_sendtask)) 203 taskq_barrier(net_tq(ifp->if_index)); 204 205 free(sc->sc_imo.imo_membership, M_IPMOPTS, 206 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 207 free(sc, M_DEVBUF, sizeof(*sc)); 208 209 return (0); 210 } 211 212 void 213 vxlan_multicast_cleanup(struct ifnet *ifp) 214 { 215 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 216 struct ip_moptions *imo = &sc->sc_imo; 217 struct ifnet *mifp; 218 219 mifp = if_get(imo->imo_ifidx); 220 if (mifp != NULL) { 221 if_addrhook_del(mifp, &sc->sc_atask); 222 if_linkstatehook_del(mifp, &sc->sc_ltask); 223 if_detachhook_del(mifp, &sc->sc_dtask); 224 225 if_put(mifp); 226 } 227 228 if (imo->imo_num_memberships > 0) { 229 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 230 imo->imo_ifidx = 0; 231 } 232 } 233 234 int 235 vxlan_multicast_join(struct ifnet *ifp, struct sockaddr *src, 236 struct sockaddr *dst) 237 { 238 struct vxlan_softc *sc = ifp->if_softc; 239 struct ip_moptions *imo = &sc->sc_imo; 240 struct sockaddr_in *src4, *dst4; 241 #ifdef INET6 242 struct sockaddr_in6 *dst6; 243 #endif /* INET6 */ 244 struct ifaddr *ifa; 245 struct ifnet *mifp; 246 247 switch (dst->sa_family) { 248 case AF_INET: 249 dst4 = satosin(dst); 250 if (!IN_MULTICAST(dst4->sin_addr.s_addr)) 251 return (0); 252 break; 253 #ifdef INET6 254 case AF_INET6: 255 dst6 = satosin6(dst); 256 if (!IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr)) 257 return (0); 258 259 /* Multicast mode is currently not supported for IPv6 */ 260 return (EAFNOSUPPORT); 261 #endif /* INET6 */ 262 default: 263 return (EAFNOSUPPORT); 264 } 265 266 src4 = satosin(src); 267 dst4 = satosin(dst); 268 269 if (src4->sin_addr.s_addr == INADDR_ANY || 270 IN_MULTICAST(src4->sin_addr.s_addr)) 271 return (EINVAL); 272 if ((ifa = ifa_ifwithaddr(src, sc->sc_rdomain)) == NULL || 273 (mifp = ifa->ifa_ifp) == NULL || 274 (mifp->if_flags & IFF_MULTICAST) == 0) 275 return (EADDRNOTAVAIL); 276 277 if ((imo->imo_membership[0] = 278 in_addmulti(&dst4->sin_addr, mifp)) == NULL) 279 return (ENOBUFS); 280 281 imo->imo_num_memberships++; 282 imo->imo_ifidx = mifp->if_index; 283 if (sc->sc_ttl > 0) 284 imo->imo_ttl = sc->sc_ttl; 285 else 286 imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; 287 imo->imo_loop = 0; 288 289 /* 290 * Use interface hooks to track any changes on the interface 291 * that is used to send out the tunnel traffic as multicast. 292 */ 293 if_addrhook_add(mifp, &sc->sc_atask); 294 if_linkstatehook_add(mifp, &sc->sc_ltask); 295 if_detachhook_add(mifp, &sc->sc_dtask); 296 297 return (0); 298 } 299 300 void 301 vxlanstart(struct ifnet *ifp) 302 { 303 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 304 305 task_add(net_tq(ifp->if_index), &sc->sc_sendtask); 306 } 307 308 void 309 vxlan_send_dispatch(void *xsc) 310 { 311 struct vxlan_softc *sc = xsc; 312 struct ifnet *ifp = &sc->sc_ac.ac_if; 313 struct mbuf *m; 314 struct mbuf_list ml; 315 316 ml_init(&ml); 317 for (;;) { 318 m = ifq_dequeue(&ifp->if_snd); 319 if (m == NULL) 320 break; 321 322 #if NBPFILTER > 0 323 if (ifp->if_bpf) 324 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 325 #endif 326 327 ml_enqueue(&ml, m); 328 } 329 330 if (ml_empty(&ml)) 331 return; 332 333 NET_LOCK(); 334 while ((m = ml_dequeue(&ml)) != NULL) { 335 vxlan_output(ifp, m); 336 } 337 NET_UNLOCK(); 338 } 339 340 341 int 342 vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) 343 { 344 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 345 int reset = 0, error, af; 346 socklen_t slen; 347 in_port_t port; 348 struct vxlan_taghash *tagh; 349 350 if (src != NULL && dst != NULL) { 351 if ((af = src->sa_family) != dst->sa_family) 352 return (EAFNOSUPPORT); 353 } else { 354 /* Reset current configuration */ 355 af = sc->sc_src.ss_family; 356 src = sstosa(&sc->sc_src); 357 dst = sstosa(&sc->sc_dst); 358 reset = 1; 359 } 360 361 switch (af) { 362 case AF_INET: 363 slen = sizeof(struct sockaddr_in); 364 break; 365 #ifdef INET6 366 case AF_INET6: 367 slen = sizeof(struct sockaddr_in6); 368 break; 369 #endif /* INET6 */ 370 default: 371 return (EAFNOSUPPORT); 372 } 373 374 if (src->sa_len != slen || dst->sa_len != slen) 375 return (EINVAL); 376 377 vxlan_multicast_cleanup(ifp); 378 379 /* returns without error if multicast is not configured */ 380 if ((error = vxlan_multicast_join(ifp, src, dst)) != 0) 381 return (error); 382 383 if ((port = vxlan_sockaddr_port(dst)) != 0) 384 sc->sc_dstport = port; 385 386 if (!reset) { 387 bzero(&sc->sc_src, sizeof(sc->sc_src)); 388 bzero(&sc->sc_dst, sizeof(sc->sc_dst)); 389 memcpy(&sc->sc_src, src, src->sa_len); 390 memcpy(&sc->sc_dst, dst, dst->sa_len); 391 } 392 393 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 394 /* 395 * If the interface accepts any VNI, put it into a separate 396 * list that is not part of the main hash. 397 */ 398 tagh = &vxlan_any; 399 } else 400 tagh = &vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)]; 401 402 LIST_REMOVE(sc, sc_entry); 403 LIST_INSERT_HEAD(tagh, sc, sc_entry); 404 405 return (0); 406 } 407 408 int 409 vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 410 { 411 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 412 struct ifreq *ifr = (struct ifreq *)data; 413 struct if_laddrreq *lifr = (struct if_laddrreq *)data; 414 int error = 0; 415 416 switch (cmd) { 417 case SIOCSIFADDR: 418 ifp->if_flags |= IFF_UP; 419 /* FALLTHROUGH */ 420 421 case SIOCSIFFLAGS: 422 if (ifp->if_flags & IFF_UP) { 423 ifp->if_flags |= IFF_RUNNING; 424 } else { 425 ifp->if_flags &= ~IFF_RUNNING; 426 } 427 break; 428 429 case SIOCADDMULTI: 430 case SIOCDELMULTI: 431 break; 432 433 case SIOCGIFMEDIA: 434 case SIOCSIFMEDIA: 435 error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); 436 break; 437 438 case SIOCSLIFPHYADDR: 439 error = vxlan_config(ifp, 440 sstosa(&lifr->addr), 441 sstosa(&lifr->dstaddr)); 442 break; 443 444 case SIOCDIFPHYADDR: 445 vxlan_multicast_cleanup(ifp); 446 bzero(&sc->sc_src, sizeof(sc->sc_src)); 447 bzero(&sc->sc_dst, sizeof(sc->sc_dst)); 448 sc->sc_dstport = htons(VXLAN_PORT); 449 break; 450 451 case SIOCGLIFPHYADDR: 452 if (sc->sc_dst.ss_family == AF_UNSPEC) { 453 error = EADDRNOTAVAIL; 454 break; 455 } 456 bzero(&lifr->addr, sizeof(lifr->addr)); 457 bzero(&lifr->dstaddr, sizeof(lifr->dstaddr)); 458 memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len); 459 memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len); 460 break; 461 462 case SIOCSLIFPHYRTABLE: 463 if (ifr->ifr_rdomainid < 0 || 464 ifr->ifr_rdomainid > RT_TABLEID_MAX || 465 !rtable_exists(ifr->ifr_rdomainid)) { 466 error = EINVAL; 467 break; 468 } 469 sc->sc_rdomain = ifr->ifr_rdomainid; 470 (void)vxlan_config(ifp, NULL, NULL); 471 break; 472 473 case SIOCGLIFPHYRTABLE: 474 ifr->ifr_rdomainid = sc->sc_rdomain; 475 break; 476 477 case SIOCSLIFPHYTTL: 478 if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { 479 error = EINVAL; 480 break; 481 } 482 if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl) 483 break; 484 sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl); 485 (void)vxlan_config(ifp, NULL, NULL); 486 break; 487 488 case SIOCGLIFPHYTTL: 489 ifr->ifr_ttl = (int)sc->sc_ttl; 490 break; 491 492 case SIOCSLIFPHYDF: 493 /* commit */ 494 sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 495 break; 496 case SIOCGLIFPHYDF: 497 ifr->ifr_df = sc->sc_df ? 1 : 0; 498 break; 499 500 case SIOCSTXHPRIO: 501 if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET) 502 ; /* fall through */ 503 else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN || 504 ifr->ifr_hdrprio > IF_HDRPRIO_MAX) { 505 error = EINVAL; 506 break; 507 } 508 509 sc->sc_txhprio = ifr->ifr_hdrprio; 510 break; 511 case SIOCGTXHPRIO: 512 ifr->ifr_hdrprio = sc->sc_txhprio; 513 break; 514 515 case SIOCSVNETID: 516 if (sc->sc_vnetid == ifr->ifr_vnetid) 517 break; 518 519 if ((ifr->ifr_vnetid != VXLAN_VNI_ANY) && 520 (ifr->ifr_vnetid > VXLAN_VNI_MAX || 521 ifr->ifr_vnetid < VXLAN_VNI_MIN)) { 522 error = EINVAL; 523 break; 524 } 525 526 sc->sc_vnetid = (int)ifr->ifr_vnetid; 527 (void)vxlan_config(ifp, NULL, NULL); 528 break; 529 530 case SIOCGVNETID: 531 if ((sc->sc_vnetid != VXLAN_VNI_ANY) && 532 (sc->sc_vnetid > VXLAN_VNI_MAX || 533 sc->sc_vnetid < VXLAN_VNI_MIN)) { 534 error = EADDRNOTAVAIL; 535 break; 536 } 537 538 ifr->ifr_vnetid = sc->sc_vnetid; 539 break; 540 541 case SIOCDVNETID: 542 sc->sc_vnetid = VXLAN_VNI_UNSET; 543 (void)vxlan_config(ifp, NULL, NULL); 544 break; 545 546 default: 547 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 548 break; 549 } 550 551 return (error); 552 } 553 554 int 555 vxlan_media_change(struct ifnet *ifp) 556 { 557 return (0); 558 } 559 560 void 561 vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr) 562 { 563 imr->ifm_status = IFM_AVALID | IFM_ACTIVE; 564 } 565 566 int 567 vxlan_sockaddr_cmp(struct sockaddr *srcsa, struct sockaddr *dstsa) 568 { 569 struct sockaddr_in *src4, *dst4; 570 #ifdef INET6 571 struct sockaddr_in6 *src6, *dst6; 572 #endif /* INET6 */ 573 574 if (srcsa->sa_family != dstsa->sa_family) 575 return (1); 576 577 switch (dstsa->sa_family) { 578 case AF_INET: 579 src4 = satosin(srcsa); 580 dst4 = satosin(dstsa); 581 if (src4->sin_addr.s_addr == dst4->sin_addr.s_addr) 582 return (0); 583 break; 584 #ifdef INET6 585 case AF_INET6: 586 src6 = satosin6(srcsa); 587 dst6 = satosin6(dstsa); 588 if (IN6_ARE_ADDR_EQUAL(&src6->sin6_addr, &dst6->sin6_addr) && 589 src6->sin6_scope_id == dst6->sin6_scope_id) 590 return (0); 591 break; 592 #endif /* INET6 */ 593 } 594 595 return (1); 596 } 597 598 uint16_t 599 vxlan_sockaddr_port(struct sockaddr *sa) 600 { 601 struct sockaddr_in *sin4; 602 #ifdef INET6 603 struct sockaddr_in6 *sin6; 604 #endif /* INET6 */ 605 606 switch (sa->sa_family) { 607 case AF_INET: 608 sin4 = satosin(sa); 609 return (sin4->sin_port); 610 #ifdef INET6 611 case AF_INET6: 612 sin6 = satosin6(sa); 613 return (sin6->sin6_port); 614 #endif /* INET6 */ 615 default: 616 break; 617 } 618 619 return (0); 620 } 621 622 int 623 vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen, 624 struct sockaddr *srcsa, struct sockaddr *dstsa) 625 { 626 struct vxlan_softc *sc = NULL, *sc_cand = NULL; 627 struct vxlan_header v; 628 int vni; 629 struct ifnet *ifp; 630 int skip; 631 #if NBRIDGE > 0 632 struct bridge_tunneltag *brtag; 633 #endif 634 struct mbuf *n; 635 int off; 636 637 /* XXX Should verify the UDP port first before copying the packet */ 638 skip = iphlen + sizeof(*uh); 639 if (m->m_pkthdr.len - skip < sizeof(v)) 640 return (0); 641 m_copydata(m, skip, sizeof(v), (caddr_t)&v); 642 skip += sizeof(v); 643 644 if (v.vxlan_flags & htonl(VXLAN_RESERVED1) || 645 v.vxlan_id & htonl(VXLAN_RESERVED2)) 646 return (0); 647 648 vni = ntohl(v.vxlan_id) >> VXLAN_VNI_S; 649 if ((v.vxlan_flags & htonl(VXLAN_FLAGS_VNI)) == 0) { 650 if (vni != 0) 651 return (0); 652 653 vni = VXLAN_VNI_UNSET; 654 } 655 656 NET_ASSERT_LOCKED(); 657 /* First search for a vxlan(4) interface with the packet's VNI */ 658 LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) { 659 if ((uh->uh_dport == sc->sc_dstport) && 660 vni == sc->sc_vnetid && 661 sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid)) { 662 sc_cand = sc; 663 if (vxlan_sockaddr_cmp(srcsa, sstosa(&sc->sc_dst)) == 0) 664 goto found; 665 } 666 } 667 668 /* 669 * Now loop through all the vxlan(4) interfaces that are configured 670 * to accept any VNI and operating in multipoint-to-multipoint mode 671 * that is used in combination with bridge(4) or switch(4). 672 * If a vxlan(4) interface has been found for the packet's VNI, this 673 * code is not reached as the other interface is more specific. 674 */ 675 LIST_FOREACH(sc, &vxlan_any, sc_entry) { 676 if ((uh->uh_dport == sc->sc_dstport) && 677 (sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid))) { 678 sc_cand = sc; 679 goto found; 680 } 681 } 682 683 if (sc_cand) { 684 sc = sc_cand; 685 goto found; 686 } 687 688 /* not found */ 689 return (0); 690 691 found: 692 if (m->m_pkthdr.len < skip + sizeof(struct ether_header)) { 693 m_freem(m); 694 return (EINVAL); 695 } 696 697 m_adj(m, skip); 698 ifp = &sc->sc_ac.ac_if; 699 700 #if NBRIDGE > 0 701 /* Store the tunnel src/dst IP and vni for the bridge or switch */ 702 if ((ifp->if_bridgeidx != 0 || ifp->if_switchport != NULL) && 703 srcsa->sa_family != AF_UNSPEC && 704 ((brtag = bridge_tunneltag(m)) != NULL)) { 705 memcpy(&brtag->brtag_peer.sa, srcsa, srcsa->sa_len); 706 memcpy(&brtag->brtag_local.sa, dstsa, dstsa->sa_len); 707 brtag->brtag_id = vni; 708 } 709 #endif 710 711 m->m_flags &= ~(M_BCAST|M_MCAST); 712 713 #if NPF > 0 714 pf_pkt_addr_changed(m); 715 #endif 716 if ((m->m_len < sizeof(struct ether_header)) && 717 (m = m_pullup(m, sizeof(struct ether_header))) == NULL) 718 return (ENOBUFS); 719 720 n = m_getptr(m, sizeof(struct ether_header), &off); 721 if (n == NULL) { 722 m_freem(m); 723 return (EINVAL); 724 } 725 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { 726 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); 727 /* Dispose of the original mbuf chain */ 728 m_freem(m); 729 if (n == NULL) 730 return (ENOBUFS); 731 m = n; 732 } 733 734 if_vinput(ifp, m); 735 736 /* success */ 737 return (1); 738 } 739 740 struct mbuf * 741 vxlan_encap4(struct ifnet *ifp, struct mbuf *m, 742 struct sockaddr *src, struct sockaddr *dst) 743 { 744 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 745 struct ip *ip; 746 747 /* 748 * Remove multicast and broadcast flags or encapsulated packet 749 * ends up as multicast or broadcast packet. 750 */ 751 m->m_flags &= ~(M_BCAST|M_MCAST); 752 753 M_PREPEND(m, sizeof(*ip), M_DONTWAIT); 754 if (m == NULL) 755 return (NULL); 756 757 ip = mtod(m, struct ip *); 758 ip->ip_v = IPVERSION; 759 ip->ip_hl = sizeof(struct ip) >> 2; 760 ip->ip_id = htons(ip_randomid()); 761 ip->ip_off = sc->sc_df; 762 ip->ip_p = IPPROTO_UDP; 763 ip->ip_tos = IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ? 764 m->m_pkthdr.pf.prio : sc->sc_txhprio); 765 ip->ip_len = htons(m->m_pkthdr.len); 766 767 ip->ip_src = satosin(src)->sin_addr; 768 ip->ip_dst = satosin(dst)->sin_addr; 769 770 if (sc->sc_ttl > 0) 771 ip->ip_ttl = sc->sc_ttl; 772 else 773 ip->ip_ttl = IPDEFTTL; 774 775 return (m); 776 } 777 778 #ifdef INET6 779 struct mbuf * 780 vxlan_encap6(struct ifnet *ifp, struct mbuf *m, 781 struct sockaddr *src, struct sockaddr *dst) 782 { 783 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 784 struct ip6_hdr *ip6; 785 struct in6_addr *in6a; 786 uint32_t flow; 787 788 /* 789 * Remove multicast and broadcast flags or encapsulated packet 790 * ends up as multicast or broadcast packet. 791 */ 792 m->m_flags &= ~(M_BCAST|M_MCAST); 793 794 M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT); 795 if (m == NULL) 796 return (NULL); 797 798 flow = (uint32_t)IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ? 799 m->m_pkthdr.pf.prio : sc->sc_txhprio) << 20; 800 801 ip6 = mtod(m, struct ip6_hdr *); 802 ip6->ip6_flow = htonl(flow); 803 ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 804 ip6->ip6_vfc |= IPV6_VERSION; 805 ip6->ip6_nxt = IPPROTO_UDP; 806 ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); 807 if (in6_embedscope(&ip6->ip6_src, satosin6(src), NULL) != 0) 808 goto drop; 809 if (in6_embedscope(&ip6->ip6_dst, satosin6(dst), NULL) != 0) 810 goto drop; 811 812 if (sc->sc_ttl > 0) 813 ip6->ip6_hlim = sc->sc_ttl; 814 else 815 ip6->ip6_hlim = ip6_defhlim; 816 817 if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { 818 if (in6_selectsrc(&in6a, satosin6(dst), NULL, 819 sc->sc_rdomain) != 0) 820 goto drop; 821 822 ip6->ip6_src = *in6a; 823 } 824 825 if (sc->sc_df) 826 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 827 828 /* 829 * The UDP checksum of VXLAN packets should be set to zero, 830 * but the IPv6 UDP checksum is not optional. There is an RFC 6539 831 * to relax the IPv6 UDP checksum requirement for tunnels, but it 832 * is currently not supported by most implementations. 833 */ 834 m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 835 836 return (m); 837 838 drop: 839 m_freem(m); 840 return (NULL); 841 } 842 #endif /* INET6 */ 843 844 int 845 vxlan_output(struct ifnet *ifp, struct mbuf *m) 846 { 847 struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; 848 struct vxlanudphdr *vu; 849 struct sockaddr *src, *dst; 850 #if NBRIDGE > 0 851 struct bridge_tunneltag *brtag; 852 #endif 853 int error, af; 854 uint32_t tag; 855 struct mbuf *m0; 856 857 /* VXLAN header, needs new mbuf because of alignment issues */ 858 MGET(m0, M_DONTWAIT, m->m_type); 859 if (m0 == NULL) { 860 ifp->if_oerrors++; 861 return (ENOBUFS); 862 } 863 M_MOVE_PKTHDR(m0, m); 864 m0->m_next = m; 865 m = m0; 866 m_align(m, sizeof(*vu)); 867 m->m_len = sizeof(*vu); 868 m->m_pkthdr.len += sizeof(*vu); 869 870 src = sstosa(&sc->sc_src); 871 dst = sstosa(&sc->sc_dst); 872 af = src->sa_family; 873 874 vu = mtod(m, struct vxlanudphdr *); 875 vu->vu_u.uh_sport = sc->sc_dstport; 876 vu->vu_u.uh_dport = sc->sc_dstport; 877 vu->vu_u.uh_ulen = htons(m->m_pkthdr.len); 878 vu->vu_u.uh_sum = 0; 879 tag = sc->sc_vnetid; 880 881 #if NBRIDGE > 0 882 if ((brtag = bridge_tunnel(m)) != NULL) { 883 dst = &brtag->brtag_peer.sa; 884 885 /* If accepting any VNI, source ip address is from brtag */ 886 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 887 src = &brtag->brtag_local.sa; 888 tag = (uint32_t)brtag->brtag_id; 889 af = src->sa_family; 890 } 891 892 if (dst->sa_family != af) { 893 ifp->if_oerrors++; 894 m_freem(m); 895 return (EINVAL); 896 } 897 } else 898 #endif 899 if (sc->sc_vnetid == VXLAN_VNI_ANY) { 900 /* 901 * If accepting any VNI, build the vxlan header only by 902 * bridge_tunneltag or drop packet if the tag does not exist. 903 */ 904 ifp->if_oerrors++; 905 m_freem(m); 906 return (ENETUNREACH); 907 } 908 909 if (sc->sc_vnetid != VXLAN_VNI_UNSET) { 910 vu->vu_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI); 911 vu->vu_v.vxlan_id = htonl(tag << VXLAN_VNI_S); 912 } else { 913 vu->vu_v.vxlan_flags = htonl(0); 914 vu->vu_v.vxlan_id = htonl(0); 915 } 916 917 switch (af) { 918 case AF_INET: 919 m = vxlan_encap4(ifp, m, src, dst); 920 break; 921 #ifdef INET6 922 case AF_INET6: 923 m = vxlan_encap6(ifp, m, src, dst); 924 break; 925 #endif /* INET6 */ 926 default: 927 m_freem(m); 928 m = NULL; 929 } 930 931 if (m == NULL) { 932 ifp->if_oerrors++; 933 return (ENOBUFS); 934 } 935 936 #if NBRIDGE > 0 937 if (brtag != NULL) 938 bridge_tunneluntag(m); 939 #endif 940 941 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 942 943 #if NPF > 0 944 pf_pkt_addr_changed(m); 945 #endif 946 947 switch (af) { 948 case AF_INET: 949 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, 950 &sc->sc_imo, NULL, 0); 951 break; 952 #ifdef INET6 953 case AF_INET6: 954 error = ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL); 955 break; 956 #endif /* INET6 */ 957 default: 958 m_freem(m); 959 error = EAFNOSUPPORT; 960 } 961 962 if (error) 963 ifp->if_oerrors++; 964 965 return (error); 966 } 967 968 void 969 vxlan_addr_change(void *arg) 970 { 971 struct vxlan_softc *sc = arg; 972 struct ifnet *ifp = &sc->sc_ac.ac_if; 973 int error; 974 975 /* 976 * Reset the configuration after resume or any possible address 977 * configuration changes. 978 */ 979 if ((error = vxlan_config(ifp, NULL, NULL))) { 980 /* 981 * The source address of the tunnel can temporarily disappear, 982 * after a link state change when running the DHCP client, 983 * so keep it configured. 984 */ 985 } 986 } 987 988 void 989 vxlan_if_change(void *arg) 990 { 991 struct vxlan_softc *sc = arg; 992 struct ifnet *ifp = &sc->sc_ac.ac_if; 993 994 /* 995 * Reset the configuration after the parent interface disappeared. 996 */ 997 vxlan_multicast_cleanup(ifp); 998 memset(&sc->sc_src, 0, sizeof(sc->sc_src)); 999 memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); 1000 sc->sc_dstport = htons(VXLAN_PORT); 1001 } 1002 1003 void 1004 vxlan_link_change(void *arg) 1005 { 1006 struct vxlan_softc *sc = arg; 1007 struct ifnet *ifp = &sc->sc_ac.ac_if; 1008 1009 /* 1010 * The machine might have lost its multicast associations after 1011 * link state changes. This fixes a problem with VMware after 1012 * suspend/resume of the host or guest. 1013 */ 1014 (void)vxlan_config(ifp, NULL, NULL); 1015 } 1016