1 /* $OpenBSD: if_vxlan.c,v 1.99 2023/12/23 10:52:54 bluhm Exp $ */ 2 3 /* 4 * Copyright (c) 2021 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bpfilter.h" 20 #include "pf.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/kernel.h> 25 #include <sys/mbuf.h> 26 #include <sys/socket.h> 27 #include <sys/ioctl.h> 28 #include <sys/timeout.h> 29 #include <sys/pool.h> 30 #include <sys/tree.h> 31 #include <sys/refcnt.h> 32 #include <sys/smr.h> 33 34 #include <sys/socketvar.h> 35 36 #include <net/if.h> 37 #include <net/if_var.h> 38 #include <net/if_dl.h> 39 #include <net/if_media.h> 40 #include <net/if_types.h> 41 #include <net/route.h> 42 #include <net/rtable.h> 43 44 #include <netinet/in.h> 45 #include <netinet/in_var.h> 46 #include <netinet/if_ether.h> 47 #include <netinet/ip.h> 48 #include <netinet/udp.h> 49 #include <netinet/in_pcb.h> 50 #include <netinet/ip_var.h> 51 52 #ifdef INET6 53 #include <netinet/ip6.h> 54 #include <netinet6/ip6_var.h> 55 #include <netinet6/in6_var.h> 56 #endif 57 58 /* for bridge stuff */ 59 #include <net/if_bridge.h> 60 #include <net/if_etherbridge.h> 61 62 #if NBPFILTER > 0 63 #include <net/bpf.h> 64 #endif 65 66 /* 67 * The protocol. 68 */ 69 70 #define VXLAN_PORT 4789 71 72 struct vxlan_header { 73 uint32_t vxlan_flags; 74 #define VXLAN_F_I (1U << 27) 75 uint32_t vxlan_id; 76 #define VXLAN_VNI_SHIFT 8 77 #define VXLAN_VNI_MASK (0xffffffU << VXLAN_VNI_SHIFT) 78 }; 79 80 #define VXLAN_VNI_MAX 0x00ffffffU 81 #define VXLAN_VNI_MIN 0x00000000U 82 83 /* 84 * The driver. 85 */ 86 87 union vxlan_addr { 88 struct in_addr in4; 89 struct in6_addr in6; 90 }; 91 92 struct vxlan_softc; 93 94 struct vxlan_peer { 95 RBT_ENTRY(vxlan_peer) p_entry; 96 97 struct vxlan_header p_header; 98 union vxlan_addr p_addr; 99 100 struct vxlan_softc *p_sc; 101 }; 102 103 RBT_HEAD(vxlan_peers, vxlan_peer); 104 105 struct vxlan_tep { 106 TAILQ_ENTRY(vxlan_tep) vt_entry; 107 108 sa_family_t vt_af; 109 unsigned int vt_rdomain; 110 union vxlan_addr vt_addr; 111 #define vt_addr4 vt_addr.in4 112 #define vt_addr6 vt_addr.in6 113 in_port_t vt_port; 114 115 struct socket *vt_so; 116 117 struct mutex vt_mtx; 118 struct vxlan_peers vt_peers; 119 }; 120 121 TAILQ_HEAD(vxlan_teps, vxlan_tep); 122 123 enum vxlan_tunnel_mode { 124 VXLAN_TMODE_UNSET, 125 VXLAN_TMODE_P2P, /* unicast destination, no learning */ 126 VXLAN_TMODE_LEARNING, /* multicast destination, learning */ 127 VXLAN_TMODE_ENDPOINT, /* unset destination, no learning */ 128 }; 129 130 struct vxlan_softc { 131 struct arpcom sc_ac; 132 struct etherbridge sc_eb; 133 134 unsigned int sc_rdomain; 135 sa_family_t sc_af; 136 union vxlan_addr sc_src; 137 union vxlan_addr sc_dst; 138 in_port_t sc_port; 139 struct vxlan_header sc_header; 140 unsigned int sc_if_index0; 141 142 struct task sc_dtask; 143 void *sc_inmulti; 144 145 enum vxlan_tunnel_mode sc_mode; 146 struct vxlan_peer *sc_ucast_peer; 147 struct vxlan_peer *sc_mcast_peer; 148 struct refcnt sc_refs; 149 150 uint16_t sc_df; 151 int sc_ttl; 152 int sc_txhprio; 153 int sc_rxhprio; 154 155 struct task sc_send_task; 156 }; 157 158 void vxlanattach(int); 159 160 static int vxlan_clone_create(struct if_clone *, int); 161 static int vxlan_clone_destroy(struct ifnet *); 162 163 static int vxlan_output(struct ifnet *, struct mbuf *, 164 struct sockaddr *, struct rtentry *); 165 static int vxlan_enqueue(struct ifnet *, struct mbuf *); 166 static void vxlan_start(struct ifqueue *); 167 static void vxlan_send(void *); 168 169 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); 170 static int vxlan_up(struct vxlan_softc *); 171 static int vxlan_down(struct vxlan_softc *); 172 static int vxlan_addmulti(struct vxlan_softc *, struct ifnet *); 173 static void vxlan_delmulti(struct vxlan_softc *); 174 175 static struct mbuf * 176 vxlan_input(void *, struct mbuf *, 177 struct ip *, struct ip6_hdr *, void *, int); 178 179 static int vxlan_set_rdomain(struct vxlan_softc *, const struct ifreq *); 180 static int vxlan_get_rdomain(struct vxlan_softc *, struct ifreq *); 181 static int vxlan_set_tunnel(struct vxlan_softc *, 182 const struct if_laddrreq *); 183 static int vxlan_get_tunnel(struct vxlan_softc *, struct if_laddrreq *); 184 static int vxlan_del_tunnel(struct vxlan_softc *); 185 static int vxlan_set_vnetid(struct vxlan_softc *, const struct ifreq *); 186 static int vxlan_get_vnetid(struct vxlan_softc *, struct ifreq *); 187 static int vxlan_del_vnetid(struct vxlan_softc *); 188 static int vxlan_set_parent(struct vxlan_softc *, 189 const struct if_parent *); 190 static int vxlan_get_parent(struct vxlan_softc *, struct if_parent *); 191 static int vxlan_del_parent(struct vxlan_softc *); 192 193 static int vxlan_add_addr(struct vxlan_softc *, const struct ifbareq *); 194 static int vxlan_del_addr(struct vxlan_softc *, const struct ifbareq *); 195 196 static void vxlan_detach_hook(void *); 197 198 static struct if_clone vxlan_cloner = 199 IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); 200 201 static int vxlan_eb_port_eq(void *, void *, void *); 202 static void *vxlan_eb_port_take(void *, void *); 203 static void vxlan_eb_port_rele(void *, void *); 204 static size_t vxlan_eb_port_ifname(void *, char *, size_t, void *); 205 static void vxlan_eb_port_sa(void *, struct sockaddr_storage *, void *); 206 207 static const struct etherbridge_ops vxlan_etherbridge_ops = { 208 vxlan_eb_port_eq, 209 vxlan_eb_port_take, 210 vxlan_eb_port_rele, 211 vxlan_eb_port_ifname, 212 vxlan_eb_port_sa, 213 }; 214 215 static struct rwlock vxlan_lock = RWLOCK_INITIALIZER("vteps"); 216 static struct vxlan_teps vxlan_teps = TAILQ_HEAD_INITIALIZER(vxlan_teps); 217 static struct pool vxlan_endpoint_pool; 218 219 static inline int vxlan_peer_cmp(const struct vxlan_peer *, 220 const struct vxlan_peer *); 221 222 RBT_PROTOTYPE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); 223 224 void 225 vxlanattach(int count) 226 { 227 if_clone_attach(&vxlan_cloner); 228 } 229 230 static int 231 vxlan_clone_create(struct if_clone *ifc, int unit) 232 { 233 struct vxlan_softc *sc; 234 struct ifnet *ifp; 235 int error; 236 237 if (vxlan_endpoint_pool.pr_size == 0) { 238 pool_init(&vxlan_endpoint_pool, sizeof(union vxlan_addr), 239 0, IPL_SOFTNET, 0, "vxlanep", NULL); 240 } 241 242 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL); 243 if (sc == NULL) 244 return (ENOMEM); 245 246 ifp = &sc->sc_ac.ac_if; 247 248 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 249 ifc->ifc_name, unit); 250 251 error = etherbridge_init(&sc->sc_eb, ifp->if_xname, 252 &vxlan_etherbridge_ops, sc); 253 if (error == -1) { 254 free(sc, M_DEVBUF, sizeof(*sc)); 255 return (error); 256 } 257 258 sc->sc_af = AF_UNSPEC; 259 sc->sc_txhprio = 0; 260 sc->sc_rxhprio = IF_HDRPRIO_OUTER; 261 sc->sc_df = 0; 262 sc->sc_ttl = IP_DEFAULT_MULTICAST_TTL; 263 264 task_set(&sc->sc_dtask, vxlan_detach_hook, sc); 265 refcnt_init(&sc->sc_refs); 266 task_set(&sc->sc_send_task, vxlan_send, sc); 267 268 ifp->if_softc = sc; 269 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 270 ifp->if_ioctl = vxlan_ioctl; 271 ifp->if_output = vxlan_output; 272 ifp->if_enqueue = vxlan_enqueue; 273 ifp->if_qstart = vxlan_start; 274 ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX; 275 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; 276 ether_fakeaddr(ifp); 277 278 if_counters_alloc(ifp); 279 if_attach(ifp); 280 ether_ifattach(ifp); 281 282 return (0); 283 } 284 285 static int 286 vxlan_clone_destroy(struct ifnet *ifp) 287 { 288 struct vxlan_softc *sc = ifp->if_softc; 289 290 NET_LOCK(); 291 if (ISSET(ifp->if_flags, IFF_RUNNING)) 292 vxlan_down(sc); 293 NET_UNLOCK(); 294 295 ether_ifdetach(ifp); 296 if_detach(ifp); 297 298 etherbridge_destroy(&sc->sc_eb); 299 300 refcnt_finalize(&sc->sc_refs, "vxlanfini"); 301 302 free(sc, M_DEVBUF, sizeof(*sc)); 303 304 return (0); 305 } 306 307 static struct vxlan_softc * 308 vxlan_take(struct vxlan_softc *sc) 309 { 310 refcnt_take(&sc->sc_refs); 311 return (sc); 312 } 313 314 static void 315 vxlan_rele(struct vxlan_softc *sc) 316 { 317 refcnt_rele_wake(&sc->sc_refs); 318 } 319 320 static struct mbuf * 321 vxlan_encap(struct vxlan_softc *sc, struct mbuf *m, 322 struct mbuf *(ip_encap)(struct vxlan_softc *sc, struct mbuf *, 323 const union vxlan_addr *, uint8_t)) 324 { 325 struct ifnet *ifp = &sc->sc_ac.ac_if; 326 struct m_tag *mtag; 327 struct mbuf *m0; 328 union vxlan_addr gateway; 329 const union vxlan_addr *endpoint; 330 struct vxlan_header *vh; 331 struct udphdr *uh; 332 int prio; 333 uint8_t tos; 334 335 if (sc->sc_mode == VXLAN_TMODE_UNSET) 336 goto drop; 337 338 if (sc->sc_mode == VXLAN_TMODE_P2P) 339 endpoint = &sc->sc_dst; 340 else { /* VXLAN_TMODE_LEARNING || VXLAN_TMODE_ENDPOINT */ 341 struct ether_header *eh = mtod(m, struct ether_header *); 342 343 smr_read_enter(); 344 endpoint = etherbridge_resolve_ea(&sc->sc_eb, 345 (struct ether_addr *)eh->ether_dhost); 346 if (endpoint != NULL) { 347 gateway = *endpoint; 348 endpoint = &gateway; 349 } 350 smr_read_leave(); 351 352 if (endpoint == NULL) { 353 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 354 goto drop; 355 356 /* "flood" to unknown destinations */ 357 endpoint = &sc->sc_dst; 358 } 359 } 360 361 /* force prepend mbuf because of payload alignment */ 362 m0 = m_get(M_DONTWAIT, m->m_type); 363 if (m0 == NULL) 364 goto drop; 365 366 m_align(m0, 0); 367 m0->m_len = 0; 368 369 M_MOVE_PKTHDR(m0, m); 370 m0->m_next = m; 371 372 m = m_prepend(m0, sizeof(*vh), M_DONTWAIT); 373 if (m == NULL) 374 return (NULL); 375 376 vh = mtod(m, struct vxlan_header *); 377 *vh = sc->sc_header; 378 379 m = m_prepend(m, sizeof(*uh), M_DONTWAIT); 380 if (m == NULL) 381 return (NULL); 382 383 uh = mtod(m, struct udphdr *); 384 uh->uh_sport = sc->sc_port; /* XXX */ 385 uh->uh_dport = sc->sc_port; 386 htobem16(&uh->uh_ulen, m->m_pkthdr.len); 387 uh->uh_sum = htons(0); 388 389 SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT); 390 391 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); 392 if (mtag == NULL) 393 goto drop; 394 395 *(int *)(mtag + 1) = ifp->if_index; 396 m_tag_prepend(m, mtag); 397 398 prio = sc->sc_txhprio; 399 if (prio == IF_HDRPRIO_PACKET) 400 prio = m->m_pkthdr.pf.prio; 401 tos = IFQ_PRIO2TOS(prio); 402 403 CLR(m->m_flags, M_BCAST|M_MCAST); 404 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 405 406 #if NPF > 0 407 pf_pkt_addr_changed(m); 408 #endif 409 410 return ((*ip_encap)(sc, m, endpoint, tos)); 411 drop: 412 m_freem(m); 413 return (NULL); 414 } 415 416 static struct mbuf * 417 vxlan_encap_ipv4(struct vxlan_softc *sc, struct mbuf *m, 418 const union vxlan_addr *endpoint, uint8_t tos) 419 { 420 struct ip *ip; 421 422 m = m_prepend(m, sizeof(*ip), M_DONTWAIT); 423 if (m == NULL) 424 return (NULL); 425 426 ip = mtod(m, struct ip *); 427 ip->ip_v = IPVERSION; 428 ip->ip_hl = sizeof(*ip) >> 2; 429 ip->ip_off = sc->sc_df; 430 ip->ip_tos = tos; 431 ip->ip_len = htons(m->m_pkthdr.len); 432 ip->ip_ttl = sc->sc_ttl; 433 ip->ip_p = IPPROTO_UDP; 434 ip->ip_src = sc->sc_src.in4; 435 ip->ip_dst = endpoint->in4; 436 437 return (m); 438 } 439 440 #ifdef INET6 441 static struct mbuf * 442 vxlan_encap_ipv6(struct vxlan_softc *sc, struct mbuf *m, 443 const union vxlan_addr *endpoint, uint8_t tos) 444 { 445 struct ip6_hdr *ip6; 446 int len = m->m_pkthdr.len; 447 448 m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); 449 if (m == NULL) 450 return (NULL); 451 452 ip6 = mtod(m, struct ip6_hdr *); 453 ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ? 454 htonl(m->m_pkthdr.ph_flowid) : 0; 455 ip6->ip6_vfc |= IPV6_VERSION; 456 ip6->ip6_flow |= htonl((uint32_t)tos << 20); 457 ip6->ip6_plen = htons(len); 458 ip6->ip6_nxt = IPPROTO_UDP; 459 ip6->ip6_hlim = sc->sc_ttl; 460 ip6->ip6_src = sc->sc_src.in6; 461 ip6->ip6_dst = endpoint->in6; 462 463 if (sc->sc_df) 464 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 465 466 return (m); 467 } 468 #endif /* INET6 */ 469 470 static int 471 vxlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 472 struct rtentry *rt) 473 { 474 struct m_tag *mtag; 475 476 mtag = NULL; 477 while ((mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) != NULL) { 478 if (*(int *)(mtag + 1) == ifp->if_index) { 479 m_freem(m); 480 return (EIO); 481 } 482 } 483 484 return (ether_output(ifp, m, dst, rt)); 485 } 486 487 static int 488 vxlan_enqueue(struct ifnet *ifp, struct mbuf *m) 489 { 490 struct vxlan_softc *sc = ifp->if_softc; 491 struct ifqueue *ifq = &ifp->if_snd; 492 493 if (ifq_enqueue(ifq, m) != 0) 494 return (ENOBUFS); 495 496 task_add(ifq->ifq_softnet, &sc->sc_send_task); 497 498 return (0); 499 } 500 501 static void 502 vxlan_start(struct ifqueue *ifq) 503 { 504 struct ifnet *ifp = ifq->ifq_if; 505 struct vxlan_softc *sc = ifp->if_softc; 506 507 task_add(ifq->ifq_softnet, &sc->sc_send_task); 508 } 509 510 static uint64_t 511 vxlan_send_ipv4(struct vxlan_softc *sc, struct mbuf_list *ml) 512 { 513 struct ip_moptions imo; 514 struct mbuf *m; 515 uint64_t oerrors = 0; 516 517 imo.imo_ifidx = sc->sc_if_index0; 518 imo.imo_ttl = sc->sc_ttl; 519 imo.imo_loop = 0; 520 521 NET_LOCK(); 522 while ((m = ml_dequeue(ml)) != NULL) { 523 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0) 524 oerrors++; 525 } 526 NET_UNLOCK(); 527 528 return (oerrors); 529 } 530 531 #ifdef INET6 532 static uint64_t 533 vxlan_send_ipv6(struct vxlan_softc *sc, struct mbuf_list *ml) 534 { 535 struct ip6_moptions im6o; 536 struct mbuf *m; 537 uint64_t oerrors = 0; 538 539 im6o.im6o_ifidx = sc->sc_if_index0; 540 im6o.im6o_hlim = sc->sc_ttl; 541 im6o.im6o_loop = 0; 542 543 NET_LOCK(); 544 while ((m = ml_dequeue(ml)) != NULL) { 545 if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0) 546 oerrors++; 547 } 548 NET_UNLOCK(); 549 550 return (oerrors); 551 } 552 #endif /* INET6 */ 553 554 static void 555 vxlan_send(void *arg) 556 { 557 struct vxlan_softc *sc = arg; 558 struct ifnet *ifp = &sc->sc_ac.ac_if; 559 struct mbuf *(*ip_encap)(struct vxlan_softc *, struct mbuf *, 560 const union vxlan_addr *, uint8_t); 561 uint64_t (*ip_send)(struct vxlan_softc *, struct mbuf_list *); 562 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 563 struct mbuf *m; 564 uint64_t oerrors; 565 566 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 567 return; 568 569 switch (sc->sc_af) { 570 case AF_INET: 571 ip_encap = vxlan_encap_ipv4; 572 ip_send = vxlan_send_ipv4; 573 break; 574 #ifdef INET6 575 case AF_INET6: 576 ip_encap = vxlan_encap_ipv6; 577 ip_send = vxlan_send_ipv6; 578 break; 579 #endif 580 default: 581 unhandled_af(sc->sc_af); 582 /* NOTREACHED */ 583 } 584 585 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 586 #if NBPFILTER > 0 587 caddr_t if_bpf = READ_ONCE(ifp->if_bpf); 588 if (if_bpf != NULL) 589 bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); 590 #endif 591 m = vxlan_encap(sc, m, ip_encap); 592 if (m == NULL) 593 continue; 594 595 ml_enqueue(&ml, m); 596 } 597 598 oerrors = (*ip_send)(sc, &ml); 599 600 counters_add(ifp->if_counters, ifc_oerrors, oerrors); 601 } 602 603 static struct mbuf * 604 vxlan_input(void *arg, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6, 605 void *uhp, int hlen) 606 { 607 struct vxlan_tep *vt = arg; 608 union vxlan_addr addr; 609 struct vxlan_peer key, *p; 610 struct udphdr *uh; 611 struct vxlan_header *vh; 612 struct ether_header *eh; 613 int vhlen = hlen + sizeof(*vh); 614 struct mbuf *n; 615 int off; 616 in_port_t port; 617 struct vxlan_softc *sc = NULL; 618 struct ifnet *ifp; 619 int rxhprio; 620 uint8_t tos; 621 622 if (m->m_pkthdr.len < vhlen) 623 goto drop; 624 625 uh = uhp; 626 port = uh->uh_sport; 627 628 if (ip != NULL) { 629 memset(&addr, 0, sizeof(addr)); 630 addr.in4 = ip->ip_src; 631 tos = ip->ip_tos; 632 } 633 #ifdef INET6 634 else { 635 addr.in6 = ip6->ip6_src; 636 tos = bemtoh32(&ip6->ip6_flow) >> 20; 637 } 638 #endif 639 640 if (m->m_len < vhlen) { 641 m = m_pullup(m, vhlen); 642 if (m == NULL) 643 return (NULL); 644 } 645 646 /* can't use ip/ip6/uh after this */ 647 648 vh = (struct vxlan_header *)(mtod(m, caddr_t) + hlen); 649 650 memset(&key, 0, sizeof(key)); 651 key.p_addr = addr; 652 key.p_header.vxlan_flags = vh->vxlan_flags & htonl(VXLAN_F_I); 653 key.p_header.vxlan_id = vh->vxlan_id & htonl(VXLAN_VNI_MASK); 654 655 mtx_enter(&vt->vt_mtx); 656 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); 657 if (p == NULL) { 658 memset(&key.p_addr, 0, sizeof(key.p_addr)); 659 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); 660 } 661 if (p != NULL) 662 sc = vxlan_take(p->p_sc); 663 mtx_leave(&vt->vt_mtx); 664 665 if (sc == NULL) 666 goto drop; 667 668 ifp = &sc->sc_ac.ac_if; 669 if (ISSET(ifp->if_flags, IFF_LINK0) && port != sc->sc_port) 670 goto rele_drop; 671 672 m_adj(m, vhlen); 673 674 if (m->m_pkthdr.len < sizeof(*eh)) 675 goto rele_drop; 676 677 if (m->m_len < sizeof(*eh)) { 678 m = m_pullup(m, sizeof(*eh)); 679 if (m == NULL) 680 goto rele; 681 } 682 683 n = m_getptr(m, sizeof(*eh), &off); 684 if (n == NULL) 685 goto rele_drop; 686 687 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { 688 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); 689 m_freem(m); 690 if (n == NULL) 691 goto rele; 692 m = n; 693 } 694 695 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 696 eh = mtod(m, struct ether_header *); 697 etherbridge_map_ea(&sc->sc_eb, &addr, 698 (struct ether_addr *)eh->ether_shost); 699 } 700 701 rxhprio = sc->sc_rxhprio; 702 switch (rxhprio) { 703 case IF_HDRPRIO_PACKET: 704 /* nop */ 705 break; 706 case IF_HDRPRIO_OUTER: 707 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos); 708 break; 709 default: 710 m->m_pkthdr.pf.prio = rxhprio; 711 break; \ 712 } \ 713 714 if_vinput(ifp, m); 715 rele: 716 vxlan_rele(sc); 717 return (NULL); 718 719 rele_drop: 720 vxlan_rele(sc); 721 drop: 722 m_freem(m); 723 return (NULL); 724 } 725 726 static int 727 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 728 { 729 struct vxlan_softc *sc = ifp->if_softc; 730 struct ifreq *ifr = (struct ifreq *)data; 731 struct ifbrparam *bparam = (struct ifbrparam *)data; 732 int error = 0; 733 734 switch (cmd) { 735 case SIOCSIFADDR: 736 break; 737 case SIOCSIFFLAGS: 738 if (ISSET(ifp->if_flags, IFF_UP)) { 739 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 740 error = vxlan_up(sc); 741 else 742 error = 0; 743 } else { 744 if (ISSET(ifp->if_flags, IFF_RUNNING)) 745 error = vxlan_down(sc); 746 } 747 break; 748 749 case SIOCSLIFPHYRTABLE: 750 error = vxlan_set_rdomain(sc, ifr); 751 break; 752 case SIOCGLIFPHYRTABLE: 753 error = vxlan_get_rdomain(sc, ifr); 754 break; 755 756 case SIOCSLIFPHYADDR: 757 error = vxlan_set_tunnel(sc, (const struct if_laddrreq *)data); 758 break; 759 case SIOCGLIFPHYADDR: 760 error = vxlan_get_tunnel(sc, (struct if_laddrreq *)data); 761 break; 762 case SIOCDIFPHYADDR: 763 error = vxlan_del_tunnel(sc); 764 break; 765 766 case SIOCSVNETID: 767 error = vxlan_set_vnetid(sc, ifr); 768 break; 769 case SIOCGVNETID: 770 error = vxlan_get_vnetid(sc, ifr); 771 break; 772 case SIOCDVNETID: 773 error = vxlan_del_vnetid(sc); 774 break; 775 776 case SIOCSIFPARENT: 777 error = vxlan_set_parent(sc, (struct if_parent *)data); 778 break; 779 case SIOCGIFPARENT: 780 error = vxlan_get_parent(sc, (struct if_parent *)data); 781 break; 782 case SIOCDIFPARENT: 783 error = vxlan_del_parent(sc); 784 break; 785 786 case SIOCSTXHPRIO: 787 error = if_txhprio_l2_check(ifr->ifr_hdrprio); 788 if (error != 0) 789 break; 790 791 sc->sc_txhprio = ifr->ifr_hdrprio; 792 break; 793 case SIOCGTXHPRIO: 794 ifr->ifr_hdrprio = sc->sc_txhprio; 795 break; 796 797 case SIOCSRXHPRIO: 798 error = if_rxhprio_l2_check(ifr->ifr_hdrprio); 799 if (error != 0) 800 break; 801 802 sc->sc_rxhprio = ifr->ifr_hdrprio; 803 break; 804 case SIOCGRXHPRIO: 805 ifr->ifr_hdrprio = sc->sc_rxhprio; 806 break; 807 808 case SIOCSLIFPHYDF: 809 /* commit */ 810 sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 811 break; 812 case SIOCGLIFPHYDF: 813 ifr->ifr_df = sc->sc_df ? 1 : 0; 814 break; 815 816 case SIOCSLIFPHYTTL: 817 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { 818 error = EINVAL; 819 break; 820 } 821 822 /* commit */ 823 sc->sc_ttl = (uint8_t)ifr->ifr_ttl; 824 break; 825 case SIOCGLIFPHYTTL: 826 ifr->ifr_ttl = (int)sc->sc_ttl; 827 break; 828 829 case SIOCBRDGSCACHE: 830 error = etherbridge_set_max(&sc->sc_eb, bparam); 831 break; 832 case SIOCBRDGGCACHE: 833 error = etherbridge_get_max(&sc->sc_eb, bparam); 834 break; 835 case SIOCBRDGSTO: 836 error = etherbridge_set_tmo(&sc->sc_eb, bparam); 837 break; 838 case SIOCBRDGGTO: 839 error = etherbridge_get_tmo(&sc->sc_eb, bparam); 840 break; 841 842 case SIOCBRDGRTS: 843 error = etherbridge_rtfind(&sc->sc_eb, 844 (struct ifbaconf *)data); 845 break; 846 case SIOCBRDGFLUSH: 847 etherbridge_flush(&sc->sc_eb, 848 ((struct ifbreq *)data)->ifbr_ifsflags); 849 break; 850 case SIOCBRDGSADDR: 851 error = vxlan_add_addr(sc, (struct ifbareq *)data); 852 break; 853 case SIOCBRDGDADDR: 854 error = vxlan_del_addr(sc, (struct ifbareq *)data); 855 break; 856 857 case SIOCADDMULTI: 858 case SIOCDELMULTI: 859 /* no hardware to program */ 860 break; 861 862 default: 863 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 864 break; 865 } 866 867 if (error == ENETRESET) { 868 /* no hardware to program */ 869 error = 0; 870 } 871 872 return (error); 873 } 874 875 static struct vxlan_tep * 876 vxlan_tep_get(struct vxlan_softc *sc, const union vxlan_addr *addr) 877 { 878 struct vxlan_tep *vt; 879 880 TAILQ_FOREACH(vt, &vxlan_teps, vt_entry) { 881 if (sc->sc_af == vt->vt_af && 882 sc->sc_rdomain == vt->vt_rdomain && 883 memcmp(addr, &vt->vt_addr, sizeof(*addr)) == 0 && 884 sc->sc_port == vt->vt_port) 885 return (vt); 886 } 887 888 return (NULL); 889 } 890 891 static int 892 vxlan_tep_add_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, 893 struct vxlan_peer *p) 894 { 895 struct mbuf m; 896 struct vxlan_tep *vt; 897 struct socket *so; 898 struct sockaddr_in *sin; 899 #ifdef INET6 900 struct sockaddr_in6 *sin6; 901 #endif 902 int error; 903 904 vt = vxlan_tep_get(sc, addr); 905 if (vt != NULL) { 906 struct vxlan_peer *op; 907 908 mtx_enter(&vt->vt_mtx); 909 op = RBT_INSERT(vxlan_peers, &vt->vt_peers, p); 910 mtx_leave(&vt->vt_mtx); 911 912 if (op != NULL) 913 return (EADDRINUSE); 914 915 return (0); 916 } 917 918 vt = malloc(sizeof(*vt), M_DEVBUF, M_NOWAIT|M_ZERO); 919 if (vt == NULL) 920 return (ENOMEM); 921 922 vt->vt_af = sc->sc_af; 923 vt->vt_rdomain = sc->sc_rdomain; 924 vt->vt_addr = *addr; 925 vt->vt_port = sc->sc_port; 926 927 mtx_init(&vt->vt_mtx, IPL_SOFTNET); 928 RBT_INIT(vxlan_peers, &vt->vt_peers); 929 RBT_INSERT(vxlan_peers, &vt->vt_peers, p); 930 931 error = socreate(vt->vt_af, &so, SOCK_DGRAM, IPPROTO_UDP); 932 if (error != 0) 933 goto free; 934 935 solock(so); 936 sotoinpcb(so)->inp_upcall = vxlan_input; 937 sotoinpcb(so)->inp_upcall_arg = vt; 938 sounlock(so); 939 940 m_inithdr(&m); 941 m.m_len = sizeof(vt->vt_rdomain); 942 *mtod(&m, unsigned int *) = vt->vt_rdomain; 943 error = sosetopt(so, SOL_SOCKET, SO_RTABLE, &m); 944 if (error != 0) 945 goto close; 946 947 m_inithdr(&m); 948 switch (vt->vt_af) { 949 case AF_INET: 950 sin = mtod(&m, struct sockaddr_in *); 951 memset(sin, 0, sizeof(*sin)); 952 sin->sin_len = sizeof(*sin); 953 sin->sin_family = AF_INET; 954 sin->sin_addr = addr->in4; 955 sin->sin_port = vt->vt_port; 956 957 m.m_len = sizeof(*sin); 958 break; 959 960 #ifdef INET6 961 case AF_INET6: 962 sin6 = mtod(&m, struct sockaddr_in6 *); 963 sin6->sin6_len = sizeof(*sin6); 964 sin6->sin6_family = AF_INET6; 965 in6_recoverscope(sin6, &addr->in6); 966 sin6->sin6_port = sc->sc_port; 967 968 m.m_len = sizeof(*sin6); 969 break; 970 #endif 971 default: 972 unhandled_af(vt->vt_af); 973 } 974 975 solock(so); 976 error = sobind(so, &m, curproc); 977 sounlock(so); 978 if (error != 0) 979 goto close; 980 981 rw_assert_wrlock(&vxlan_lock); 982 TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry); 983 984 vt->vt_so = so; 985 986 return (0); 987 988 close: 989 soclose(so, MSG_DONTWAIT); 990 free: 991 free(vt, M_DEVBUF, sizeof(*vt)); 992 return (error); 993 } 994 995 static void 996 vxlan_tep_del_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, 997 struct vxlan_peer *p) 998 { 999 struct vxlan_tep *vt; 1000 int empty; 1001 1002 vt = vxlan_tep_get(sc, addr); 1003 if (vt == NULL) 1004 panic("unable to find vxlan_tep for peer %p (sc %p)", p, sc); 1005 1006 mtx_enter(&vt->vt_mtx); 1007 RBT_REMOVE(vxlan_peers, &vt->vt_peers, p); 1008 empty = RBT_EMPTY(vxlan_peers, &vt->vt_peers); 1009 mtx_leave(&vt->vt_mtx); 1010 1011 if (!empty) 1012 return; 1013 1014 rw_assert_wrlock(&vxlan_lock); 1015 TAILQ_REMOVE(&vxlan_teps, vt, vt_entry); 1016 1017 soclose(vt->vt_so, MSG_DONTWAIT); 1018 free(vt, M_DEVBUF, sizeof(*vt)); 1019 } 1020 1021 static int 1022 vxlan_tep_up(struct vxlan_softc *sc) 1023 { 1024 struct vxlan_peer *up, *mp; 1025 int error; 1026 1027 up = malloc(sizeof(*up), M_DEVBUF, M_NOWAIT|M_ZERO); 1028 if (up == NULL) 1029 return (ENOMEM); 1030 1031 if (sc->sc_mode == VXLAN_TMODE_P2P) 1032 up->p_addr = sc->sc_dst; 1033 up->p_header = sc->sc_header; 1034 up->p_sc = vxlan_take(sc); 1035 1036 error = vxlan_tep_add_addr(sc, &sc->sc_src, up); 1037 if (error != 0) 1038 goto freeup; 1039 1040 sc->sc_ucast_peer = up; 1041 1042 if (sc->sc_mode != VXLAN_TMODE_LEARNING) 1043 return (0); 1044 1045 mp = malloc(sizeof(*mp), M_DEVBUF, M_NOWAIT|M_ZERO); 1046 if (mp == NULL) { 1047 error = ENOMEM; 1048 goto delup; 1049 } 1050 1051 /* addr is multicast, leave it as 0s */ 1052 mp->p_header = sc->sc_header; 1053 mp->p_sc = vxlan_take(sc); 1054 1055 /* destination address is a multicast group we want to join */ 1056 error = vxlan_tep_add_addr(sc, &sc->sc_dst, up); 1057 if (error != 0) 1058 goto freemp; 1059 1060 sc->sc_mcast_peer = mp; 1061 1062 return (0); 1063 1064 freemp: 1065 vxlan_rele(mp->p_sc); 1066 free(mp, M_DEVBUF, sizeof(*mp)); 1067 delup: 1068 vxlan_tep_del_addr(sc, &sc->sc_src, up); 1069 freeup: 1070 vxlan_rele(up->p_sc); 1071 free(up, M_DEVBUF, sizeof(*up)); 1072 return (error); 1073 } 1074 1075 static void 1076 vxlan_tep_down(struct vxlan_softc *sc) 1077 { 1078 struct vxlan_peer *up = sc->sc_ucast_peer; 1079 1080 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1081 struct vxlan_peer *mp = sc->sc_mcast_peer; 1082 vxlan_tep_del_addr(sc, &sc->sc_dst, mp); 1083 vxlan_rele(mp->p_sc); 1084 free(mp, M_DEVBUF, sizeof(*mp)); 1085 } 1086 1087 vxlan_tep_del_addr(sc, &sc->sc_src, up); 1088 vxlan_rele(up->p_sc); 1089 free(up, M_DEVBUF, sizeof(*up)); 1090 } 1091 1092 static int 1093 vxlan_up(struct vxlan_softc *sc) 1094 { 1095 struct ifnet *ifp = &sc->sc_ac.ac_if; 1096 struct ifnet *ifp0 = NULL; 1097 int error; 1098 1099 KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING)); 1100 NET_ASSERT_LOCKED(); 1101 1102 if (sc->sc_af == AF_UNSPEC) 1103 return (EDESTADDRREQ); 1104 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); 1105 1106 NET_UNLOCK(); 1107 1108 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); 1109 if (error != 0) 1110 goto netlock; 1111 1112 NET_LOCK(); 1113 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1114 /* something else beat us */ 1115 rw_exit(&vxlan_lock); 1116 return (0); 1117 } 1118 NET_UNLOCK(); 1119 1120 if (sc->sc_mode != VXLAN_TMODE_P2P) { 1121 error = etherbridge_up(&sc->sc_eb); 1122 if (error != 0) 1123 goto unlock; 1124 } 1125 1126 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1127 ifp0 = if_get(sc->sc_if_index0); 1128 if (ifp0 == NULL) { 1129 error = ENXIO; 1130 goto down; 1131 } 1132 1133 /* check again if multicast will work on top of the parent */ 1134 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 1135 error = EPROTONOSUPPORT; 1136 goto put; 1137 } 1138 1139 error = vxlan_addmulti(sc, ifp0); 1140 if (error != 0) 1141 goto put; 1142 1143 /* Register callback if parent wants to unregister */ 1144 if_detachhook_add(ifp0, &sc->sc_dtask); 1145 } else { 1146 if (sc->sc_if_index0 != 0) { 1147 error = EPROTONOSUPPORT; 1148 goto down; 1149 } 1150 } 1151 1152 error = vxlan_tep_up(sc); 1153 if (error != 0) 1154 goto del; 1155 1156 if_put(ifp0); 1157 1158 NET_LOCK(); 1159 SET(ifp->if_flags, IFF_RUNNING); 1160 rw_exit(&vxlan_lock); 1161 1162 return (0); 1163 1164 del: 1165 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1166 if (ifp0 != NULL) 1167 if_detachhook_del(ifp0, &sc->sc_dtask); 1168 vxlan_delmulti(sc); 1169 } 1170 put: 1171 if_put(ifp0); 1172 down: 1173 if (sc->sc_mode != VXLAN_TMODE_P2P) 1174 etherbridge_down(&sc->sc_eb); 1175 unlock: 1176 rw_exit(&vxlan_lock); 1177 netlock: 1178 NET_LOCK(); 1179 1180 return (error); 1181 } 1182 1183 static int 1184 vxlan_down(struct vxlan_softc *sc) 1185 { 1186 struct ifnet *ifp = &sc->sc_ac.ac_if; 1187 struct ifnet *ifp0; 1188 int error; 1189 1190 KASSERT(ISSET(ifp->if_flags, IFF_RUNNING)); 1191 NET_UNLOCK(); 1192 1193 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); 1194 if (error != 0) { 1195 NET_LOCK(); 1196 return (error); 1197 } 1198 1199 NET_LOCK(); 1200 if (!ISSET(ifp->if_flags, IFF_RUNNING)) { 1201 /* something else beat us */ 1202 rw_exit(&vxlan_lock); 1203 return (0); 1204 } 1205 NET_UNLOCK(); 1206 1207 vxlan_tep_down(sc); 1208 1209 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1210 vxlan_delmulti(sc); 1211 ifp0 = if_get(sc->sc_if_index0); 1212 if (ifp0 != NULL) { 1213 if_detachhook_del(ifp0, &sc->sc_dtask); 1214 } 1215 if_put(ifp0); 1216 } 1217 1218 if (sc->sc_mode != VXLAN_TMODE_P2P) 1219 etherbridge_down(&sc->sc_eb); 1220 1221 taskq_del_barrier(ifp->if_snd.ifq_softnet, &sc->sc_send_task); 1222 NET_LOCK(); 1223 CLR(ifp->if_flags, IFF_RUNNING); 1224 rw_exit(&vxlan_lock); 1225 1226 return (0); 1227 } 1228 1229 static int 1230 vxlan_addmulti(struct vxlan_softc *sc, struct ifnet *ifp0) 1231 { 1232 int error = 0; 1233 1234 NET_LOCK(); 1235 1236 switch (sc->sc_af) { 1237 case AF_INET: 1238 sc->sc_inmulti = in_addmulti(&sc->sc_dst.in4, ifp0); 1239 if (sc->sc_inmulti == NULL) 1240 error = EADDRNOTAVAIL; 1241 break; 1242 #ifdef INET6 1243 case AF_INET6: 1244 sc->sc_inmulti = in6_addmulti(&sc->sc_dst.in6, ifp0, &error); 1245 break; 1246 #endif 1247 default: 1248 unhandled_af(sc->sc_af); 1249 } 1250 1251 NET_UNLOCK(); 1252 1253 return (error); 1254 } 1255 1256 static void 1257 vxlan_delmulti(struct vxlan_softc *sc) 1258 { 1259 NET_LOCK(); 1260 1261 switch (sc->sc_af) { 1262 case AF_INET: 1263 in_delmulti(sc->sc_inmulti); 1264 break; 1265 #ifdef INET6 1266 case AF_INET6: 1267 in6_delmulti(sc->sc_inmulti); 1268 break; 1269 #endif 1270 default: 1271 unhandled_af(sc->sc_af); 1272 } 1273 1274 sc->sc_inmulti = NULL; /* keep it tidy */ 1275 1276 NET_UNLOCK(); 1277 } 1278 1279 static int 1280 vxlan_set_rdomain(struct vxlan_softc *sc, const struct ifreq *ifr) 1281 { 1282 struct ifnet *ifp = &sc->sc_ac.ac_if; 1283 1284 if (ifr->ifr_rdomainid < 0 || 1285 ifr->ifr_rdomainid > RT_TABLEID_MAX) 1286 return (EINVAL); 1287 if (!rtable_exists(ifr->ifr_rdomainid)) 1288 return (EADDRNOTAVAIL); 1289 1290 if (sc->sc_rdomain == ifr->ifr_rdomainid) 1291 return (0); 1292 1293 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1294 return (EBUSY); 1295 1296 /* commit */ 1297 sc->sc_rdomain = ifr->ifr_rdomainid; 1298 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1299 1300 return (0); 1301 } 1302 1303 static int 1304 vxlan_get_rdomain(struct vxlan_softc *sc, struct ifreq *ifr) 1305 { 1306 ifr->ifr_rdomainid = sc->sc_rdomain; 1307 1308 return (0); 1309 } 1310 1311 static int 1312 vxlan_set_tunnel(struct vxlan_softc *sc, const struct if_laddrreq *req) 1313 { 1314 struct ifnet *ifp = &sc->sc_ac.ac_if; 1315 struct sockaddr *src = (struct sockaddr *)&req->addr; 1316 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; 1317 struct sockaddr_in *src4, *dst4; 1318 #ifdef INET6 1319 struct sockaddr_in6 *src6, *dst6; 1320 int error; 1321 #endif 1322 union vxlan_addr saddr, daddr; 1323 unsigned int mode = VXLAN_TMODE_ENDPOINT; 1324 in_port_t port = htons(VXLAN_PORT); 1325 1326 memset(&saddr, 0, sizeof(saddr)); 1327 memset(&daddr, 0, sizeof(daddr)); 1328 1329 /* validate */ 1330 switch (src->sa_family) { 1331 case AF_INET: 1332 src4 = (struct sockaddr_in *)src; 1333 if (in_nullhost(src4->sin_addr) || 1334 IN_MULTICAST(src4->sin_addr.s_addr)) 1335 return (EINVAL); 1336 1337 if (src4->sin_port != htons(0)) 1338 port = src4->sin_port; 1339 1340 if (dst->sa_family != AF_UNSPEC) { 1341 if (dst->sa_family != AF_INET) 1342 return (EINVAL); 1343 1344 dst4 = (struct sockaddr_in *)dst; 1345 if (in_nullhost(dst4->sin_addr)) 1346 return (EINVAL); 1347 1348 if (dst4->sin_port != htons(0)) 1349 return (EINVAL); 1350 1351 /* all good */ 1352 mode = IN_MULTICAST(dst4->sin_addr.s_addr) ? 1353 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; 1354 daddr.in4 = dst4->sin_addr; 1355 } 1356 1357 saddr.in4 = src4->sin_addr; 1358 break; 1359 1360 #ifdef INET6 1361 case AF_INET6: 1362 src6 = (struct sockaddr_in6 *)src; 1363 if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) || 1364 IN6_IS_ADDR_MULTICAST(&src6->sin6_addr)) 1365 return (EINVAL); 1366 1367 if (src6->sin6_port != htons(0)) 1368 port = src6->sin6_port; 1369 1370 if (dst->sa_family != AF_UNSPEC) { 1371 if (dst->sa_family != AF_INET6) 1372 return (EINVAL); 1373 1374 dst6 = (struct sockaddr_in6 *)dst; 1375 if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr)) 1376 return (EINVAL); 1377 1378 if (src6->sin6_scope_id != dst6->sin6_scope_id) 1379 return (EINVAL); 1380 1381 if (dst6->sin6_port != htons(0)) 1382 return (EINVAL); 1383 1384 /* all good */ 1385 mode = IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) ? 1386 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; 1387 error = in6_embedscope(&daddr.in6, dst6, NULL, NULL); 1388 if (error != 0) 1389 return (error); 1390 } 1391 1392 error = in6_embedscope(&saddr.in6, src6, NULL, NULL); 1393 if (error != 0) 1394 return (error); 1395 1396 break; 1397 #endif 1398 default: 1399 return (EAFNOSUPPORT); 1400 } 1401 1402 if (memcmp(&sc->sc_src, &saddr, sizeof(sc->sc_src)) == 0 && 1403 memcmp(&sc->sc_dst, &daddr, sizeof(sc->sc_dst)) == 0 && 1404 sc->sc_port == port) 1405 return (0); 1406 1407 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1408 return (EBUSY); 1409 1410 /* commit */ 1411 sc->sc_af = src->sa_family; 1412 sc->sc_src = saddr; 1413 sc->sc_dst = daddr; 1414 sc->sc_port = port; 1415 sc->sc_mode = mode; 1416 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1417 1418 return (0); 1419 } 1420 1421 static int 1422 vxlan_get_tunnel(struct vxlan_softc *sc, struct if_laddrreq *req) 1423 { 1424 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr; 1425 struct sockaddr_in *sin; 1426 #ifdef INET6 1427 struct sockaddr_in6 *sin6; 1428 #endif 1429 1430 if (sc->sc_af == AF_UNSPEC) 1431 return (EADDRNOTAVAIL); 1432 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); 1433 1434 memset(&req->addr, 0, sizeof(req->addr)); 1435 memset(&req->dstaddr, 0, sizeof(req->dstaddr)); 1436 1437 /* default to endpoint */ 1438 dstaddr->sa_len = 2; 1439 dstaddr->sa_family = AF_UNSPEC; 1440 1441 switch (sc->sc_af) { 1442 case AF_INET: 1443 sin = (struct sockaddr_in *)&req->addr; 1444 sin->sin_len = sizeof(*sin); 1445 sin->sin_family = AF_INET; 1446 sin->sin_addr = sc->sc_src.in4; 1447 sin->sin_port = sc->sc_port; 1448 1449 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 1450 break; 1451 1452 sin = (struct sockaddr_in *)&req->dstaddr; 1453 sin->sin_len = sizeof(*sin); 1454 sin->sin_family = AF_INET; 1455 sin->sin_addr = sc->sc_dst.in4; 1456 break; 1457 1458 #ifdef INET6 1459 case AF_INET6: 1460 sin6 = (struct sockaddr_in6 *)&req->addr; 1461 sin6->sin6_len = sizeof(*sin6); 1462 sin6->sin6_family = AF_INET6; 1463 in6_recoverscope(sin6, &sc->sc_src.in6); 1464 sin6->sin6_port = sc->sc_port; 1465 1466 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 1467 break; 1468 1469 sin6 = (struct sockaddr_in6 *)&req->dstaddr; 1470 sin6->sin6_len = sizeof(*sin6); 1471 sin6->sin6_family = AF_INET6; 1472 in6_recoverscope(sin6, &sc->sc_dst.in6); 1473 break; 1474 #endif 1475 default: 1476 unhandled_af(sc->sc_af); 1477 } 1478 1479 return (0); 1480 } 1481 1482 static int 1483 vxlan_del_tunnel(struct vxlan_softc *sc) 1484 { 1485 struct ifnet *ifp = &sc->sc_ac.ac_if; 1486 1487 if (sc->sc_af == AF_UNSPEC) 1488 return (0); 1489 1490 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1491 return (EBUSY); 1492 1493 /* commit */ 1494 sc->sc_af = AF_UNSPEC; 1495 memset(&sc->sc_src, 0, sizeof(sc->sc_src)); 1496 memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); 1497 sc->sc_port = htons(0); 1498 sc->sc_mode = VXLAN_TMODE_UNSET; 1499 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1500 1501 return (0); 1502 } 1503 1504 static int 1505 vxlan_set_vnetid(struct vxlan_softc *sc, const struct ifreq *ifr) 1506 { 1507 struct ifnet *ifp = &sc->sc_ac.ac_if; 1508 uint32_t vni; 1509 1510 if (ifr->ifr_vnetid < VXLAN_VNI_MIN || 1511 ifr->ifr_vnetid > VXLAN_VNI_MAX) 1512 return (EINVAL); 1513 1514 vni = htonl(ifr->ifr_vnetid << VXLAN_VNI_SHIFT); 1515 if (ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)) && 1516 sc->sc_header.vxlan_id == vni) 1517 return (0); 1518 1519 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1520 return (EBUSY); 1521 1522 /* commit */ 1523 SET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); 1524 sc->sc_header.vxlan_id = vni; 1525 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1526 1527 return (0); 1528 } 1529 1530 static int 1531 vxlan_get_vnetid(struct vxlan_softc *sc, struct ifreq *ifr) 1532 { 1533 uint32_t vni; 1534 1535 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) 1536 return (EADDRNOTAVAIL); 1537 1538 vni = ntohl(sc->sc_header.vxlan_id); 1539 vni &= VXLAN_VNI_MASK; 1540 vni >>= VXLAN_VNI_SHIFT; 1541 1542 ifr->ifr_vnetid = vni; 1543 1544 return (0); 1545 } 1546 1547 static int 1548 vxlan_del_vnetid(struct vxlan_softc *sc) 1549 { 1550 struct ifnet *ifp = &sc->sc_ac.ac_if; 1551 1552 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) 1553 return (0); 1554 1555 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1556 return (EBUSY); 1557 1558 /* commit */ 1559 CLR(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); 1560 sc->sc_header.vxlan_id = htonl(0 << VXLAN_VNI_SHIFT); 1561 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1562 1563 return (0); 1564 } 1565 1566 static int 1567 vxlan_set_parent(struct vxlan_softc *sc, const struct if_parent *p) 1568 { 1569 struct ifnet *ifp = &sc->sc_ac.ac_if; 1570 struct ifnet *ifp0; 1571 int error = 0; 1572 1573 ifp0 = if_unit(p->ifp_parent); 1574 if (ifp0 == NULL) 1575 return (ENXIO); 1576 1577 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 1578 error = ENXIO; 1579 goto put; 1580 } 1581 1582 if (sc->sc_if_index0 == ifp0->if_index) 1583 goto put; 1584 1585 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1586 error = EBUSY; 1587 goto put; 1588 } 1589 1590 ifsetlro(ifp0, 0); 1591 1592 /* commit */ 1593 sc->sc_if_index0 = ifp0->if_index; 1594 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1595 1596 put: 1597 if_put(ifp0); 1598 return (error); 1599 } 1600 1601 static int 1602 vxlan_get_parent(struct vxlan_softc *sc, struct if_parent *p) 1603 { 1604 struct ifnet *ifp0; 1605 int error = 0; 1606 1607 ifp0 = if_get(sc->sc_if_index0); 1608 if (ifp0 == NULL) 1609 error = EADDRNOTAVAIL; 1610 else 1611 strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent)); 1612 if_put(ifp0); 1613 1614 return (error); 1615 } 1616 1617 static int 1618 vxlan_del_parent(struct vxlan_softc *sc) 1619 { 1620 struct ifnet *ifp = &sc->sc_ac.ac_if; 1621 1622 if (sc->sc_if_index0 == 0) 1623 return (0); 1624 1625 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1626 return (EBUSY); 1627 1628 /* commit */ 1629 sc->sc_if_index0 = 0; 1630 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1631 1632 return (0); 1633 } 1634 1635 static int 1636 vxlan_add_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) 1637 { 1638 struct sockaddr_in *sin; 1639 #ifdef INET6 1640 struct sockaddr_in6 *sin6; 1641 struct sockaddr_in6 src6 = { 1642 .sin6_len = sizeof(src6), 1643 .sin6_family = AF_UNSPEC, 1644 }; 1645 int error; 1646 #endif 1647 union vxlan_addr endpoint; 1648 unsigned int type; 1649 1650 switch (sc->sc_mode) { 1651 case VXLAN_TMODE_UNSET: 1652 return (ENOPROTOOPT); 1653 case VXLAN_TMODE_P2P: 1654 return (EPROTONOSUPPORT); 1655 default: 1656 break; 1657 } 1658 1659 /* ignore ifba_ifsname */ 1660 1661 if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK)) 1662 return (EINVAL); 1663 switch (ifba->ifba_flags & IFBAF_TYPEMASK) { 1664 case IFBAF_DYNAMIC: 1665 type = EBE_DYNAMIC; 1666 break; 1667 case IFBAF_STATIC: 1668 type = EBE_STATIC; 1669 break; 1670 default: 1671 return (EINVAL); 1672 } 1673 1674 memset(&endpoint, 0, sizeof(endpoint)); 1675 1676 if (ifba->ifba_dstsa.ss_family != sc->sc_af) 1677 return (EAFNOSUPPORT); 1678 switch (ifba->ifba_dstsa.ss_family) { 1679 case AF_INET: 1680 sin = (struct sockaddr_in *)&ifba->ifba_dstsa; 1681 if (in_nullhost(sin->sin_addr) || 1682 IN_MULTICAST(sin->sin_addr.s_addr)) 1683 return (EADDRNOTAVAIL); 1684 1685 if (sin->sin_port != htons(0)) 1686 return (EADDRNOTAVAIL); 1687 1688 endpoint.in4 = sin->sin_addr; 1689 break; 1690 1691 #ifdef INET6 1692 case AF_INET6: 1693 sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa; 1694 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 1695 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 1696 return (EADDRNOTAVAIL); 1697 1698 in6_recoverscope(&src6, &sc->sc_src.in6); 1699 if (src6.sin6_scope_id != sin6->sin6_scope_id) 1700 return (EADDRNOTAVAIL); 1701 1702 if (sin6->sin6_port != htons(0)) 1703 return (EADDRNOTAVAIL); 1704 1705 error = in6_embedscope(&endpoint.in6, sin6, NULL, NULL); 1706 if (error != 0) 1707 return (error); 1708 1709 break; 1710 #endif 1711 default: /* AF_UNSPEC */ 1712 return (EADDRNOTAVAIL); 1713 } 1714 1715 return (etherbridge_add_addr(&sc->sc_eb, &endpoint, 1716 &ifba->ifba_dst, type)); 1717 } 1718 1719 static int 1720 vxlan_del_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) 1721 { 1722 return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst)); 1723 } 1724 1725 void 1726 vxlan_detach_hook(void *arg) 1727 { 1728 struct vxlan_softc *sc = arg; 1729 struct ifnet *ifp = &sc->sc_ac.ac_if; 1730 1731 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1732 vxlan_down(sc); 1733 CLR(ifp->if_flags, IFF_UP); 1734 } 1735 1736 sc->sc_if_index0 = 0; 1737 } 1738 1739 static int 1740 vxlan_eb_port_eq(void *arg, void *a, void *b) 1741 { 1742 const union vxlan_addr *va = a, *vb = b; 1743 size_t i; 1744 1745 for (i = 0; i < nitems(va->in6.s6_addr32); i++) { 1746 if (va->in6.s6_addr32[i] != vb->in6.s6_addr32[i]) 1747 return (0); 1748 } 1749 1750 return (1); 1751 } 1752 1753 static void * 1754 vxlan_eb_port_take(void *arg, void *port) 1755 { 1756 union vxlan_addr *endpoint; 1757 1758 endpoint = pool_get(&vxlan_endpoint_pool, PR_NOWAIT); 1759 if (endpoint == NULL) 1760 return (NULL); 1761 1762 *endpoint = *(union vxlan_addr *)port; 1763 1764 return (endpoint); 1765 } 1766 1767 static void 1768 vxlan_eb_port_rele(void *arg, void *port) 1769 { 1770 union vxlan_addr *endpoint = port; 1771 1772 pool_put(&vxlan_endpoint_pool, endpoint); 1773 } 1774 1775 static size_t 1776 vxlan_eb_port_ifname(void *arg, char *dst, size_t len, void *port) 1777 { 1778 struct vxlan_softc *sc = arg; 1779 1780 return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len)); 1781 } 1782 1783 static void 1784 vxlan_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port) 1785 { 1786 struct vxlan_softc *sc = arg; 1787 union vxlan_addr *endpoint = port; 1788 1789 switch (sc->sc_af) { 1790 case AF_INET: { 1791 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 1792 1793 sin->sin_len = sizeof(*sin); 1794 sin->sin_family = AF_INET; 1795 sin->sin_addr = endpoint->in4; 1796 break; 1797 } 1798 #ifdef INET6 1799 case AF_INET6: { 1800 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 1801 1802 sin6->sin6_len = sizeof(*sin6); 1803 sin6->sin6_family = AF_INET6; 1804 in6_recoverscope(sin6, &endpoint->in6); 1805 break; 1806 } 1807 #endif /* INET6 */ 1808 default: 1809 unhandled_af(sc->sc_af); 1810 } 1811 } 1812 1813 static inline int 1814 vxlan_peer_cmp(const struct vxlan_peer *ap, const struct vxlan_peer *bp) 1815 { 1816 size_t i; 1817 1818 if (ap->p_header.vxlan_id > bp->p_header.vxlan_id) 1819 return (1); 1820 if (ap->p_header.vxlan_id < bp->p_header.vxlan_id) 1821 return (-1); 1822 if (ap->p_header.vxlan_flags > bp->p_header.vxlan_flags) 1823 return (1); 1824 if (ap->p_header.vxlan_flags < bp->p_header.vxlan_flags) 1825 return (-1); 1826 1827 for (i = 0; i < nitems(ap->p_addr.in6.s6_addr32); i++) { 1828 if (ap->p_addr.in6.s6_addr32[i] > 1829 bp->p_addr.in6.s6_addr32[i]) 1830 return (1); 1831 if (ap->p_addr.in6.s6_addr32[i] < 1832 bp->p_addr.in6.s6_addr32[i]) 1833 return (-1); 1834 } 1835 1836 return (0); 1837 } 1838 1839 RBT_GENERATE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); 1840