1 /* $OpenBSD: if_vxlan.c,v 1.94 2023/10/27 20:56:48 jan Exp $ */ 2 3 /* 4 * Copyright (c) 2021 David Gwynne <dlg@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 #include "bpfilter.h" 20 #include "pf.h" 21 22 #include <sys/param.h> 23 #include <sys/systm.h> 24 #include <sys/kernel.h> 25 #include <sys/mbuf.h> 26 #include <sys/socket.h> 27 #include <sys/ioctl.h> 28 #include <sys/timeout.h> 29 #include <sys/pool.h> 30 #include <sys/tree.h> 31 #include <sys/refcnt.h> 32 #include <sys/smr.h> 33 34 #include <sys/socketvar.h> 35 36 #include <net/if.h> 37 #include <net/if_var.h> 38 #include <net/if_dl.h> 39 #include <net/if_media.h> 40 #include <net/if_types.h> 41 #include <net/route.h> 42 #include <net/rtable.h> 43 44 #include <netinet/in.h> 45 #include <netinet/in_var.h> 46 #include <netinet/if_ether.h> 47 #include <netinet/ip.h> 48 #include <netinet/udp.h> 49 #include <netinet/in_pcb.h> 50 #include <netinet/ip_var.h> 51 52 #ifdef INET6 53 #include <netinet/ip6.h> 54 #include <netinet6/ip6_var.h> 55 #include <netinet6/in6_var.h> 56 #endif 57 58 /* for bridge stuff */ 59 #include <net/if_bridge.h> 60 #include <net/if_etherbridge.h> 61 62 #if NBPFILTER > 0 63 #include <net/bpf.h> 64 #endif 65 66 /* 67 * The protocol. 68 */ 69 70 #define VXLANMTU 1492 71 #define VXLAN_PORT 4789 72 73 struct vxlan_header { 74 uint32_t vxlan_flags; 75 #define VXLAN_F_I (1U << 27) 76 uint32_t vxlan_id; 77 #define VXLAN_VNI_SHIFT 8 78 #define VXLAN_VNI_MASK (0xffffffU << VXLAN_VNI_SHIFT) 79 }; 80 81 #define VXLAN_VNI_MAX 0x00ffffffU 82 #define VXLAN_VNI_MIN 0x00000000U 83 84 /* 85 * The driver. 86 */ 87 88 union vxlan_addr { 89 struct in_addr in4; 90 struct in6_addr in6; 91 }; 92 93 struct vxlan_softc; 94 95 struct vxlan_peer { 96 RBT_ENTRY(vxlan_peer) p_entry; 97 98 struct vxlan_header p_header; 99 union vxlan_addr p_addr; 100 101 struct vxlan_softc *p_sc; 102 }; 103 104 RBT_HEAD(vxlan_peers, vxlan_peer); 105 106 struct vxlan_tep { 107 TAILQ_ENTRY(vxlan_tep) vt_entry; 108 109 sa_family_t vt_af; 110 unsigned int vt_rdomain; 111 union vxlan_addr vt_addr; 112 #define vt_addr4 vt_addr.in4 113 #define vt_addr6 vt_addr.in6 114 in_port_t vt_port; 115 116 struct socket *vt_so; 117 118 struct mutex vt_mtx; 119 struct vxlan_peers vt_peers; 120 }; 121 122 TAILQ_HEAD(vxlan_teps, vxlan_tep); 123 124 enum vxlan_tunnel_mode { 125 VXLAN_TMODE_UNSET, 126 VXLAN_TMODE_P2P, /* unicast destination, no learning */ 127 VXLAN_TMODE_LEARNING, /* multicast destination, learning */ 128 VXLAN_TMODE_ENDPOINT, /* unset destination, no learning */ 129 }; 130 131 struct vxlan_softc { 132 struct arpcom sc_ac; 133 struct etherbridge sc_eb; 134 135 unsigned int sc_rdomain; 136 sa_family_t sc_af; 137 union vxlan_addr sc_src; 138 union vxlan_addr sc_dst; 139 in_port_t sc_port; 140 struct vxlan_header sc_header; 141 unsigned int sc_if_index0; 142 143 struct task sc_dtask; 144 void *sc_inmulti; 145 146 enum vxlan_tunnel_mode sc_mode; 147 struct vxlan_peer *sc_ucast_peer; 148 struct vxlan_peer *sc_mcast_peer; 149 struct refcnt sc_refs; 150 151 uint16_t sc_df; 152 int sc_ttl; 153 int sc_txhprio; 154 int sc_rxhprio; 155 156 struct task sc_send_task; 157 }; 158 159 void vxlanattach(int); 160 161 static int vxlan_clone_create(struct if_clone *, int); 162 static int vxlan_clone_destroy(struct ifnet *); 163 164 static int vxlan_output(struct ifnet *, struct mbuf *, 165 struct sockaddr *, struct rtentry *); 166 static int vxlan_enqueue(struct ifnet *, struct mbuf *); 167 static void vxlan_start(struct ifqueue *); 168 static void vxlan_send(void *); 169 170 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); 171 static int vxlan_up(struct vxlan_softc *); 172 static int vxlan_down(struct vxlan_softc *); 173 static int vxlan_addmulti(struct vxlan_softc *, struct ifnet *); 174 static void vxlan_delmulti(struct vxlan_softc *); 175 176 static struct mbuf * 177 vxlan_input(void *, struct mbuf *, 178 struct ip *, struct ip6_hdr *, void *, int); 179 180 static int vxlan_set_rdomain(struct vxlan_softc *, const struct ifreq *); 181 static int vxlan_get_rdomain(struct vxlan_softc *, struct ifreq *); 182 static int vxlan_set_tunnel(struct vxlan_softc *, 183 const struct if_laddrreq *); 184 static int vxlan_get_tunnel(struct vxlan_softc *, struct if_laddrreq *); 185 static int vxlan_del_tunnel(struct vxlan_softc *); 186 static int vxlan_set_vnetid(struct vxlan_softc *, const struct ifreq *); 187 static int vxlan_get_vnetid(struct vxlan_softc *, struct ifreq *); 188 static int vxlan_del_vnetid(struct vxlan_softc *); 189 static int vxlan_set_parent(struct vxlan_softc *, 190 const struct if_parent *); 191 static int vxlan_get_parent(struct vxlan_softc *, struct if_parent *); 192 static int vxlan_del_parent(struct vxlan_softc *); 193 194 static int vxlan_add_addr(struct vxlan_softc *, const struct ifbareq *); 195 static int vxlan_del_addr(struct vxlan_softc *, const struct ifbareq *); 196 197 static void vxlan_detach_hook(void *); 198 199 static struct if_clone vxlan_cloner = 200 IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); 201 202 static int vxlan_eb_port_eq(void *, void *, void *); 203 static void *vxlan_eb_port_take(void *, void *); 204 static void vxlan_eb_port_rele(void *, void *); 205 static size_t vxlan_eb_port_ifname(void *, char *, size_t, void *); 206 static void vxlan_eb_port_sa(void *, struct sockaddr_storage *, void *); 207 208 static const struct etherbridge_ops vxlan_etherbridge_ops = { 209 vxlan_eb_port_eq, 210 vxlan_eb_port_take, 211 vxlan_eb_port_rele, 212 vxlan_eb_port_ifname, 213 vxlan_eb_port_sa, 214 }; 215 216 static struct rwlock vxlan_lock = RWLOCK_INITIALIZER("vteps"); 217 static struct vxlan_teps vxlan_teps = TAILQ_HEAD_INITIALIZER(vxlan_teps); 218 static struct pool vxlan_endpoint_pool; 219 220 static inline int vxlan_peer_cmp(const struct vxlan_peer *, 221 const struct vxlan_peer *); 222 223 RBT_PROTOTYPE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); 224 225 void 226 vxlanattach(int count) 227 { 228 if_clone_attach(&vxlan_cloner); 229 } 230 231 static int 232 vxlan_clone_create(struct if_clone *ifc, int unit) 233 { 234 struct vxlan_softc *sc; 235 struct ifnet *ifp; 236 int error; 237 238 if (vxlan_endpoint_pool.pr_size == 0) { 239 pool_init(&vxlan_endpoint_pool, sizeof(union vxlan_addr), 240 0, IPL_SOFTNET, 0, "vxlanep", NULL); 241 } 242 243 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL); 244 if (sc == NULL) 245 return (ENOMEM); 246 247 ifp = &sc->sc_ac.ac_if; 248 249 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", 250 ifc->ifc_name, unit); 251 252 error = etherbridge_init(&sc->sc_eb, ifp->if_xname, 253 &vxlan_etherbridge_ops, sc); 254 if (error == -1) { 255 free(sc, M_DEVBUF, sizeof(*sc)); 256 return (error); 257 } 258 259 sc->sc_af = AF_UNSPEC; 260 sc->sc_txhprio = 0; 261 sc->sc_rxhprio = IF_HDRPRIO_OUTER; 262 sc->sc_df = 0; 263 sc->sc_ttl = IP_DEFAULT_MULTICAST_TTL; 264 265 task_set(&sc->sc_dtask, vxlan_detach_hook, sc); 266 refcnt_init(&sc->sc_refs); 267 task_set(&sc->sc_send_task, vxlan_send, sc); 268 269 ifp->if_softc = sc; 270 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; 271 ifp->if_ioctl = vxlan_ioctl; 272 ifp->if_output = vxlan_output; 273 ifp->if_enqueue = vxlan_enqueue; 274 ifp->if_qstart = vxlan_start; 275 ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX; 276 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; 277 ether_fakeaddr(ifp); 278 279 if_counters_alloc(ifp); 280 if_attach(ifp); 281 ether_ifattach(ifp); 282 283 return (0); 284 } 285 286 static int 287 vxlan_clone_destroy(struct ifnet *ifp) 288 { 289 struct vxlan_softc *sc = ifp->if_softc; 290 291 NET_LOCK(); 292 if (ISSET(ifp->if_flags, IFF_RUNNING)) 293 vxlan_down(sc); 294 NET_UNLOCK(); 295 296 ether_ifdetach(ifp); 297 if_detach(ifp); 298 299 etherbridge_destroy(&sc->sc_eb); 300 301 refcnt_finalize(&sc->sc_refs, "vxlanfini"); 302 303 free(sc, M_DEVBUF, sizeof(*sc)); 304 305 return (0); 306 } 307 308 static struct vxlan_softc * 309 vxlan_take(struct vxlan_softc *sc) 310 { 311 refcnt_take(&sc->sc_refs); 312 return (sc); 313 } 314 315 static void 316 vxlan_rele(struct vxlan_softc *sc) 317 { 318 refcnt_rele_wake(&sc->sc_refs); 319 } 320 321 static struct mbuf * 322 vxlan_encap(struct vxlan_softc *sc, struct mbuf *m, 323 struct mbuf *(ip_encap)(struct vxlan_softc *sc, struct mbuf *, 324 const union vxlan_addr *, uint8_t)) 325 { 326 struct ifnet *ifp = &sc->sc_ac.ac_if; 327 struct m_tag *mtag; 328 struct mbuf *m0; 329 union vxlan_addr gateway; 330 const union vxlan_addr *endpoint; 331 struct vxlan_header *vh; 332 struct udphdr *uh; 333 int prio; 334 uint8_t tos; 335 336 if (sc->sc_mode == VXLAN_TMODE_UNSET) 337 goto drop; 338 339 if (sc->sc_mode == VXLAN_TMODE_P2P) 340 endpoint = &sc->sc_dst; 341 else { /* VXLAN_TMODE_LEARNING || VXLAN_TMODE_ENDPOINT */ 342 struct ether_header *eh = mtod(m, struct ether_header *); 343 344 smr_read_enter(); 345 endpoint = etherbridge_resolve_ea(&sc->sc_eb, 346 (struct ether_addr *)eh->ether_dhost); 347 if (endpoint != NULL) { 348 gateway = *endpoint; 349 endpoint = &gateway; 350 } 351 smr_read_leave(); 352 353 if (endpoint == NULL) { 354 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 355 goto drop; 356 357 /* "flood" to unknown destinations */ 358 endpoint = &sc->sc_dst; 359 } 360 } 361 362 /* force prepend mbuf because of payload alignment */ 363 m0 = m_get(M_DONTWAIT, m->m_type); 364 if (m0 == NULL) 365 goto drop; 366 367 m_align(m0, 0); 368 m0->m_len = 0; 369 370 M_MOVE_PKTHDR(m0, m); 371 m0->m_next = m; 372 373 m = m_prepend(m0, sizeof(*vh), M_DONTWAIT); 374 if (m == NULL) 375 return (NULL); 376 377 vh = mtod(m, struct vxlan_header *); 378 *vh = sc->sc_header; 379 380 m = m_prepend(m, sizeof(*uh), M_DONTWAIT); 381 if (m == NULL) 382 return (NULL); 383 384 uh = mtod(m, struct udphdr *); 385 uh->uh_sport = sc->sc_port; /* XXX */ 386 uh->uh_dport = sc->sc_port; 387 htobem16(&uh->uh_ulen, m->m_pkthdr.len); 388 uh->uh_sum = htons(0); 389 390 SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT); 391 392 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); 393 if (mtag == NULL) 394 goto drop; 395 396 *(int *)(mtag + 1) = ifp->if_index; 397 m_tag_prepend(m, mtag); 398 399 prio = sc->sc_txhprio; 400 if (prio == IF_HDRPRIO_PACKET) 401 prio = m->m_pkthdr.pf.prio; 402 tos = IFQ_PRIO2TOS(prio); 403 404 CLR(m->m_flags, M_BCAST|M_MCAST); 405 m->m_pkthdr.ph_rtableid = sc->sc_rdomain; 406 407 #if NPF > 0 408 pf_pkt_addr_changed(m); 409 #endif 410 411 return ((*ip_encap)(sc, m, endpoint, tos)); 412 drop: 413 m_freem(m); 414 return (NULL); 415 } 416 417 static struct mbuf * 418 vxlan_encap_ipv4(struct vxlan_softc *sc, struct mbuf *m, 419 const union vxlan_addr *endpoint, uint8_t tos) 420 { 421 struct ip *ip; 422 423 m = m_prepend(m, sizeof(*ip), M_DONTWAIT); 424 if (m == NULL) 425 return (NULL); 426 427 ip = mtod(m, struct ip *); 428 ip->ip_v = IPVERSION; 429 ip->ip_hl = sizeof(*ip) >> 2; 430 ip->ip_off = sc->sc_df; 431 ip->ip_tos = tos; 432 ip->ip_len = htons(m->m_pkthdr.len); 433 ip->ip_ttl = sc->sc_ttl; 434 ip->ip_p = IPPROTO_UDP; 435 ip->ip_src = sc->sc_src.in4; 436 ip->ip_dst = endpoint->in4; 437 438 return (m); 439 } 440 441 #ifdef INET6 442 static struct mbuf * 443 vxlan_encap_ipv6(struct vxlan_softc *sc, struct mbuf *m, 444 const union vxlan_addr *endpoint, uint8_t tos) 445 { 446 struct ip6_hdr *ip6; 447 int len = m->m_pkthdr.len; 448 449 m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); 450 if (m == NULL) 451 return (NULL); 452 453 ip6 = mtod(m, struct ip6_hdr *); 454 ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ? 455 htonl(m->m_pkthdr.ph_flowid) : 0; 456 ip6->ip6_vfc |= IPV6_VERSION; 457 ip6->ip6_flow |= htonl((uint32_t)tos << 20); 458 ip6->ip6_plen = htons(len); 459 ip6->ip6_nxt = IPPROTO_UDP; 460 ip6->ip6_hlim = sc->sc_ttl; 461 ip6->ip6_src = sc->sc_src.in6; 462 ip6->ip6_dst = endpoint->in6; 463 464 if (sc->sc_df) 465 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); 466 467 return (m); 468 } 469 #endif /* INET6 */ 470 471 static int 472 vxlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 473 struct rtentry *rt) 474 { 475 struct m_tag *mtag; 476 477 mtag = NULL; 478 while ((mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) != NULL) { 479 if (*(int *)(mtag + 1) == ifp->if_index) { 480 m_freem(m); 481 return (EIO); 482 } 483 } 484 485 return (ether_output(ifp, m, dst, rt)); 486 } 487 488 static int 489 vxlan_enqueue(struct ifnet *ifp, struct mbuf *m) 490 { 491 struct vxlan_softc *sc = ifp->if_softc; 492 struct ifqueue *ifq = &ifp->if_snd; 493 494 if (ifq_enqueue(ifq, m) != 0) 495 return (ENOBUFS); 496 497 task_add(ifq->ifq_softnet, &sc->sc_send_task); 498 499 return (0); 500 } 501 502 static void 503 vxlan_start(struct ifqueue *ifq) 504 { 505 struct ifnet *ifp = ifq->ifq_if; 506 struct vxlan_softc *sc = ifp->if_softc; 507 508 task_add(ifq->ifq_softnet, &sc->sc_send_task); 509 } 510 511 static uint64_t 512 vxlan_send_ipv4(struct vxlan_softc *sc, struct mbuf_list *ml) 513 { 514 struct ip_moptions imo; 515 struct mbuf *m; 516 uint64_t oerrors = 0; 517 518 imo.imo_ifidx = sc->sc_if_index0; 519 imo.imo_ttl = sc->sc_ttl; 520 imo.imo_loop = 0; 521 522 NET_LOCK(); 523 while ((m = ml_dequeue(ml)) != NULL) { 524 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0) 525 oerrors++; 526 } 527 NET_UNLOCK(); 528 529 return (oerrors); 530 } 531 532 #ifdef INET6 533 static uint64_t 534 vxlan_send_ipv6(struct vxlan_softc *sc, struct mbuf_list *ml) 535 { 536 struct ip6_moptions im6o; 537 struct mbuf *m; 538 uint64_t oerrors = 0; 539 540 im6o.im6o_ifidx = sc->sc_if_index0; 541 im6o.im6o_hlim = sc->sc_ttl; 542 im6o.im6o_loop = 0; 543 544 NET_LOCK(); 545 while ((m = ml_dequeue(ml)) != NULL) { 546 if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0) 547 oerrors++; 548 } 549 NET_UNLOCK(); 550 551 return (oerrors); 552 } 553 #endif /* INET6 */ 554 555 static void 556 vxlan_send(void *arg) 557 { 558 struct vxlan_softc *sc = arg; 559 struct ifnet *ifp = &sc->sc_ac.ac_if; 560 struct mbuf *(*ip_encap)(struct vxlan_softc *, struct mbuf *, 561 const union vxlan_addr *, uint8_t); 562 uint64_t (*ip_send)(struct vxlan_softc *, struct mbuf_list *); 563 struct mbuf_list ml = MBUF_LIST_INITIALIZER(); 564 struct mbuf *m; 565 uint64_t oerrors; 566 567 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 568 return; 569 570 switch (sc->sc_af) { 571 case AF_INET: 572 ip_encap = vxlan_encap_ipv4; 573 ip_send = vxlan_send_ipv4; 574 break; 575 #ifdef INET6 576 case AF_INET6: 577 ip_encap = vxlan_encap_ipv6; 578 ip_send = vxlan_send_ipv6; 579 break; 580 #endif 581 default: 582 unhandled_af(sc->sc_af); 583 /* NOTREACHED */ 584 } 585 586 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { 587 #if NBPFILTER > 0 588 caddr_t if_bpf = READ_ONCE(ifp->if_bpf); 589 if (if_bpf != NULL) 590 bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); 591 #endif 592 m = vxlan_encap(sc, m, ip_encap); 593 if (m == NULL) 594 continue; 595 596 ml_enqueue(&ml, m); 597 } 598 599 oerrors = (*ip_send)(sc, &ml); 600 601 counters_add(ifp->if_counters, ifc_oerrors, oerrors); 602 } 603 604 static struct mbuf * 605 vxlan_input(void *arg, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6, 606 void *uhp, int hlen) 607 { 608 struct vxlan_tep *vt = arg; 609 union vxlan_addr addr; 610 struct vxlan_peer key, *p; 611 struct udphdr *uh; 612 struct vxlan_header *vh; 613 struct ether_header *eh; 614 int vhlen = hlen + sizeof(*vh); 615 struct mbuf *n; 616 int off; 617 in_port_t port; 618 struct vxlan_softc *sc = NULL; 619 struct ifnet *ifp; 620 int rxhprio; 621 uint8_t tos; 622 623 if (m->m_pkthdr.len < vhlen) 624 goto drop; 625 626 uh = uhp; 627 port = uh->uh_sport; 628 629 if (ip != NULL) { 630 memset(&addr, 0, sizeof(addr)); 631 addr.in4 = ip->ip_src; 632 tos = ip->ip_tos; 633 } 634 #ifdef INET6 635 else { 636 addr.in6 = ip6->ip6_src; 637 tos = bemtoh32(&ip6->ip6_flow) >> 20; 638 } 639 #endif 640 641 if (m->m_len < vhlen) { 642 m = m_pullup(m, vhlen); 643 if (m == NULL) 644 return (NULL); 645 } 646 647 /* can't use ip/ip6/uh after this */ 648 649 vh = (struct vxlan_header *)(mtod(m, caddr_t) + hlen); 650 651 memset(&key, 0, sizeof(key)); 652 key.p_addr = addr; 653 key.p_header.vxlan_flags = vh->vxlan_flags & htonl(VXLAN_F_I); 654 key.p_header.vxlan_id = vh->vxlan_id & htonl(VXLAN_VNI_MASK); 655 656 mtx_enter(&vt->vt_mtx); 657 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); 658 if (p == NULL) { 659 memset(&key.p_addr, 0, sizeof(key.p_addr)); 660 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); 661 } 662 if (p != NULL) 663 sc = vxlan_take(p->p_sc); 664 mtx_leave(&vt->vt_mtx); 665 666 if (sc == NULL) 667 goto drop; 668 669 ifp = &sc->sc_ac.ac_if; 670 if (ISSET(ifp->if_flags, IFF_LINK0) && port != sc->sc_port) 671 goto rele_drop; 672 673 m_adj(m, vhlen); 674 675 if (m->m_pkthdr.len < sizeof(*eh)) 676 goto rele_drop; 677 678 if (m->m_len < sizeof(*eh)) { 679 m = m_pullup(m, sizeof(*eh)); 680 if (m == NULL) 681 goto rele; 682 } 683 684 n = m_getptr(m, sizeof(*eh), &off); 685 if (n == NULL) 686 goto rele_drop; 687 688 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { 689 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); 690 m_freem(m); 691 if (n == NULL) 692 goto rele; 693 m = n; 694 } 695 696 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 697 eh = mtod(m, struct ether_header *); 698 etherbridge_map_ea(&sc->sc_eb, &addr, 699 (struct ether_addr *)eh->ether_shost); 700 } 701 702 rxhprio = sc->sc_rxhprio; 703 switch (rxhprio) { 704 case IF_HDRPRIO_PACKET: 705 /* nop */ 706 break; 707 case IF_HDRPRIO_OUTER: 708 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos); 709 break; 710 default: 711 m->m_pkthdr.pf.prio = rxhprio; 712 break; \ 713 } \ 714 715 if_vinput(ifp, m); 716 rele: 717 vxlan_rele(sc); 718 return (NULL); 719 720 rele_drop: 721 vxlan_rele(sc); 722 drop: 723 m_freem(m); 724 return (NULL); 725 } 726 727 static int 728 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 729 { 730 struct vxlan_softc *sc = ifp->if_softc; 731 struct ifreq *ifr = (struct ifreq *)data; 732 struct ifbrparam *bparam = (struct ifbrparam *)data; 733 int error = 0; 734 735 switch (cmd) { 736 case SIOCSIFADDR: 737 break; 738 case SIOCSIFFLAGS: 739 if (ISSET(ifp->if_flags, IFF_UP)) { 740 if (!ISSET(ifp->if_flags, IFF_RUNNING)) 741 error = vxlan_up(sc); 742 else 743 error = 0; 744 } else { 745 if (ISSET(ifp->if_flags, IFF_RUNNING)) 746 error = vxlan_down(sc); 747 } 748 break; 749 750 case SIOCSLIFPHYRTABLE: 751 error = vxlan_set_rdomain(sc, ifr); 752 break; 753 case SIOCGLIFPHYRTABLE: 754 error = vxlan_get_rdomain(sc, ifr); 755 break; 756 757 case SIOCSLIFPHYADDR: 758 error = vxlan_set_tunnel(sc, (const struct if_laddrreq *)data); 759 break; 760 case SIOCGLIFPHYADDR: 761 error = vxlan_get_tunnel(sc, (struct if_laddrreq *)data); 762 break; 763 case SIOCDIFPHYADDR: 764 error = vxlan_del_tunnel(sc); 765 break; 766 767 case SIOCSVNETID: 768 error = vxlan_set_vnetid(sc, ifr); 769 break; 770 case SIOCGVNETID: 771 error = vxlan_get_vnetid(sc, ifr); 772 break; 773 case SIOCDVNETID: 774 error = vxlan_del_vnetid(sc); 775 break; 776 777 case SIOCSIFPARENT: 778 error = vxlan_set_parent(sc, (struct if_parent *)data); 779 break; 780 case SIOCGIFPARENT: 781 error = vxlan_get_parent(sc, (struct if_parent *)data); 782 break; 783 case SIOCDIFPARENT: 784 error = vxlan_del_parent(sc); 785 break; 786 787 case SIOCSTXHPRIO: 788 error = if_txhprio_l2_check(ifr->ifr_hdrprio); 789 if (error != 0) 790 break; 791 792 sc->sc_txhprio = ifr->ifr_hdrprio; 793 break; 794 case SIOCGTXHPRIO: 795 ifr->ifr_hdrprio = sc->sc_txhprio; 796 break; 797 798 case SIOCSRXHPRIO: 799 error = if_rxhprio_l2_check(ifr->ifr_hdrprio); 800 if (error != 0) 801 break; 802 803 sc->sc_rxhprio = ifr->ifr_hdrprio; 804 break; 805 case SIOCGRXHPRIO: 806 ifr->ifr_hdrprio = sc->sc_rxhprio; 807 break; 808 809 case SIOCSLIFPHYDF: 810 /* commit */ 811 sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0); 812 break; 813 case SIOCGLIFPHYDF: 814 ifr->ifr_df = sc->sc_df ? 1 : 0; 815 break; 816 817 case SIOCSLIFPHYTTL: 818 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { 819 error = EINVAL; 820 break; 821 } 822 823 /* commit */ 824 sc->sc_ttl = (uint8_t)ifr->ifr_ttl; 825 break; 826 case SIOCGLIFPHYTTL: 827 ifr->ifr_ttl = (int)sc->sc_ttl; 828 break; 829 830 case SIOCBRDGSCACHE: 831 error = etherbridge_set_max(&sc->sc_eb, bparam); 832 break; 833 case SIOCBRDGGCACHE: 834 error = etherbridge_get_max(&sc->sc_eb, bparam); 835 break; 836 case SIOCBRDGSTO: 837 error = etherbridge_set_tmo(&sc->sc_eb, bparam); 838 break; 839 case SIOCBRDGGTO: 840 error = etherbridge_get_tmo(&sc->sc_eb, bparam); 841 break; 842 843 case SIOCBRDGRTS: 844 error = etherbridge_rtfind(&sc->sc_eb, 845 (struct ifbaconf *)data); 846 break; 847 case SIOCBRDGFLUSH: 848 etherbridge_flush(&sc->sc_eb, 849 ((struct ifbreq *)data)->ifbr_ifsflags); 850 break; 851 case SIOCBRDGSADDR: 852 error = vxlan_add_addr(sc, (struct ifbareq *)data); 853 break; 854 case SIOCBRDGDADDR: 855 error = vxlan_del_addr(sc, (struct ifbareq *)data); 856 break; 857 858 case SIOCADDMULTI: 859 case SIOCDELMULTI: 860 /* no hardware to program */ 861 break; 862 863 default: 864 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); 865 break; 866 } 867 868 if (error == ENETRESET) { 869 /* no hardware to program */ 870 error = 0; 871 } 872 873 return (error); 874 } 875 876 static struct vxlan_tep * 877 vxlan_tep_get(struct vxlan_softc *sc, const union vxlan_addr *addr) 878 { 879 struct vxlan_tep *vt; 880 881 TAILQ_FOREACH(vt, &vxlan_teps, vt_entry) { 882 if (sc->sc_af == vt->vt_af && 883 sc->sc_rdomain == vt->vt_rdomain && 884 memcmp(addr, &vt->vt_addr, sizeof(*addr)) == 0 && 885 sc->sc_port == vt->vt_port) 886 return (vt); 887 } 888 889 return (NULL); 890 } 891 892 static int 893 vxlan_tep_add_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, 894 struct vxlan_peer *p) 895 { 896 struct mbuf m; 897 struct vxlan_tep *vt; 898 struct socket *so; 899 struct sockaddr_in *sin; 900 #ifdef INET6 901 struct sockaddr_in6 *sin6; 902 #endif 903 int error; 904 905 vt = vxlan_tep_get(sc, addr); 906 if (vt != NULL) { 907 struct vxlan_peer *op; 908 909 mtx_enter(&vt->vt_mtx); 910 op = RBT_INSERT(vxlan_peers, &vt->vt_peers, p); 911 mtx_leave(&vt->vt_mtx); 912 913 if (op != NULL) 914 return (EADDRINUSE); 915 916 return (0); 917 } 918 919 vt = malloc(sizeof(*vt), M_DEVBUF, M_NOWAIT|M_ZERO); 920 if (vt == NULL) 921 return (ENOMEM); 922 923 vt->vt_af = sc->sc_af; 924 vt->vt_rdomain = sc->sc_rdomain; 925 vt->vt_addr = *addr; 926 vt->vt_port = sc->sc_port; 927 928 mtx_init(&vt->vt_mtx, IPL_SOFTNET); 929 RBT_INIT(vxlan_peers, &vt->vt_peers); 930 RBT_INSERT(vxlan_peers, &vt->vt_peers, p); 931 932 error = socreate(vt->vt_af, &so, SOCK_DGRAM, IPPROTO_UDP); 933 if (error != 0) 934 goto free; 935 936 solock(so); 937 sotoinpcb(so)->inp_upcall = vxlan_input; 938 sotoinpcb(so)->inp_upcall_arg = vt; 939 sounlock(so); 940 941 m_inithdr(&m); 942 m.m_len = sizeof(vt->vt_rdomain); 943 *mtod(&m, unsigned int *) = vt->vt_rdomain; 944 error = sosetopt(so, SOL_SOCKET, SO_RTABLE, &m); 945 if (error != 0) 946 goto close; 947 948 m_inithdr(&m); 949 switch (vt->vt_af) { 950 case AF_INET: 951 sin = mtod(&m, struct sockaddr_in *); 952 memset(sin, 0, sizeof(*sin)); 953 sin->sin_len = sizeof(*sin); 954 sin->sin_family = AF_INET; 955 sin->sin_addr = addr->in4; 956 sin->sin_port = vt->vt_port; 957 958 m.m_len = sizeof(*sin); 959 break; 960 961 #ifdef INET6 962 case AF_INET6: 963 sin6 = mtod(&m, struct sockaddr_in6 *); 964 sin6->sin6_len = sizeof(*sin6); 965 sin6->sin6_family = AF_INET6; 966 in6_recoverscope(sin6, &addr->in6); 967 sin6->sin6_port = sc->sc_port; 968 969 m.m_len = sizeof(*sin6); 970 break; 971 #endif 972 default: 973 unhandled_af(vt->vt_af); 974 } 975 976 solock(so); 977 error = sobind(so, &m, curproc); 978 sounlock(so); 979 if (error != 0) 980 goto close; 981 982 rw_assert_wrlock(&vxlan_lock); 983 TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry); 984 985 vt->vt_so = so; 986 987 return (0); 988 989 close: 990 soclose(so, MSG_DONTWAIT); 991 free: 992 free(vt, M_DEVBUF, sizeof(*vt)); 993 return (error); 994 } 995 996 static void 997 vxlan_tep_del_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, 998 struct vxlan_peer *p) 999 { 1000 struct vxlan_tep *vt; 1001 int empty; 1002 1003 vt = vxlan_tep_get(sc, addr); 1004 if (vt == NULL) 1005 panic("unable to find vxlan_tep for peer %p (sc %p)", p, sc); 1006 1007 mtx_enter(&vt->vt_mtx); 1008 RBT_REMOVE(vxlan_peers, &vt->vt_peers, p); 1009 empty = RBT_EMPTY(vxlan_peers, &vt->vt_peers); 1010 mtx_leave(&vt->vt_mtx); 1011 1012 if (!empty) 1013 return; 1014 1015 rw_assert_wrlock(&vxlan_lock); 1016 TAILQ_REMOVE(&vxlan_teps, vt, vt_entry); 1017 1018 soclose(vt->vt_so, MSG_DONTWAIT); 1019 free(vt, M_DEVBUF, sizeof(*vt)); 1020 } 1021 1022 static int 1023 vxlan_tep_up(struct vxlan_softc *sc) 1024 { 1025 struct vxlan_peer *up, *mp; 1026 int error; 1027 1028 up = malloc(sizeof(*up), M_DEVBUF, M_NOWAIT|M_ZERO); 1029 if (up == NULL) 1030 return (ENOMEM); 1031 1032 if (sc->sc_mode == VXLAN_TMODE_P2P) 1033 up->p_addr = sc->sc_dst; 1034 up->p_header = sc->sc_header; 1035 up->p_sc = vxlan_take(sc); 1036 1037 error = vxlan_tep_add_addr(sc, &sc->sc_src, up); 1038 if (error != 0) 1039 goto freeup; 1040 1041 sc->sc_ucast_peer = up; 1042 1043 if (sc->sc_mode != VXLAN_TMODE_LEARNING) 1044 return (0); 1045 1046 mp = malloc(sizeof(*mp), M_DEVBUF, M_NOWAIT|M_ZERO); 1047 if (mp == NULL) { 1048 error = ENOMEM; 1049 goto delup; 1050 } 1051 1052 /* addr is multicast, leave it as 0s */ 1053 mp->p_header = sc->sc_header; 1054 mp->p_sc = vxlan_take(sc); 1055 1056 /* destination address is a multicast group we want to join */ 1057 error = vxlan_tep_add_addr(sc, &sc->sc_dst, up); 1058 if (error != 0) 1059 goto freemp; 1060 1061 sc->sc_mcast_peer = mp; 1062 1063 return (0); 1064 1065 freemp: 1066 vxlan_rele(mp->p_sc); 1067 free(mp, M_DEVBUF, sizeof(*mp)); 1068 delup: 1069 vxlan_tep_del_addr(sc, &sc->sc_src, up); 1070 freeup: 1071 vxlan_rele(up->p_sc); 1072 free(up, M_DEVBUF, sizeof(*up)); 1073 return (error); 1074 } 1075 1076 static void 1077 vxlan_tep_down(struct vxlan_softc *sc) 1078 { 1079 struct vxlan_peer *up = sc->sc_ucast_peer; 1080 1081 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1082 struct vxlan_peer *mp = sc->sc_mcast_peer; 1083 vxlan_tep_del_addr(sc, &sc->sc_dst, mp); 1084 vxlan_rele(mp->p_sc); 1085 free(mp, M_DEVBUF, sizeof(*mp)); 1086 } 1087 1088 vxlan_tep_del_addr(sc, &sc->sc_src, up); 1089 vxlan_rele(up->p_sc); 1090 free(up, M_DEVBUF, sizeof(*up)); 1091 } 1092 1093 static int 1094 vxlan_up(struct vxlan_softc *sc) 1095 { 1096 struct ifnet *ifp = &sc->sc_ac.ac_if; 1097 struct ifnet *ifp0 = NULL; 1098 int error; 1099 1100 KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING)); 1101 NET_ASSERT_LOCKED(); 1102 1103 if (sc->sc_af == AF_UNSPEC) 1104 return (EDESTADDRREQ); 1105 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); 1106 1107 NET_UNLOCK(); 1108 1109 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); 1110 if (error != 0) 1111 goto netlock; 1112 1113 NET_LOCK(); 1114 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1115 /* something else beat us */ 1116 rw_exit(&vxlan_lock); 1117 return (0); 1118 } 1119 NET_UNLOCK(); 1120 1121 if (sc->sc_mode != VXLAN_TMODE_P2P) { 1122 error = etherbridge_up(&sc->sc_eb); 1123 if (error != 0) 1124 goto unlock; 1125 } 1126 1127 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1128 ifp0 = if_get(sc->sc_if_index0); 1129 if (ifp0 == NULL) { 1130 error = ENXIO; 1131 goto down; 1132 } 1133 1134 /* check again if multicast will work on top of the parent */ 1135 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 1136 error = EPROTONOSUPPORT; 1137 goto put; 1138 } 1139 1140 error = vxlan_addmulti(sc, ifp0); 1141 if (error != 0) 1142 goto put; 1143 1144 /* Register callback if parent wants to unregister */ 1145 if_detachhook_add(ifp0, &sc->sc_dtask); 1146 } else { 1147 if (sc->sc_if_index0 != 0) { 1148 error = EPROTONOSUPPORT; 1149 goto down; 1150 } 1151 } 1152 1153 error = vxlan_tep_up(sc); 1154 if (error != 0) 1155 goto del; 1156 1157 if_put(ifp0); 1158 1159 NET_LOCK(); 1160 SET(ifp->if_flags, IFF_RUNNING); 1161 rw_exit(&vxlan_lock); 1162 1163 return (0); 1164 1165 del: 1166 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1167 if (ifp0 != NULL) 1168 if_detachhook_del(ifp0, &sc->sc_dtask); 1169 vxlan_delmulti(sc); 1170 } 1171 put: 1172 if_put(ifp0); 1173 down: 1174 if (sc->sc_mode != VXLAN_TMODE_P2P) 1175 etherbridge_down(&sc->sc_eb); 1176 unlock: 1177 rw_exit(&vxlan_lock); 1178 netlock: 1179 NET_LOCK(); 1180 1181 return (error); 1182 } 1183 1184 static int 1185 vxlan_down(struct vxlan_softc *sc) 1186 { 1187 struct ifnet *ifp = &sc->sc_ac.ac_if; 1188 struct ifnet *ifp0; 1189 int error; 1190 1191 KASSERT(ISSET(ifp->if_flags, IFF_RUNNING)); 1192 NET_UNLOCK(); 1193 1194 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); 1195 if (error != 0) { 1196 NET_LOCK(); 1197 return (error); 1198 } 1199 1200 NET_LOCK(); 1201 if (!ISSET(ifp->if_flags, IFF_RUNNING)) { 1202 /* something else beat us */ 1203 rw_exit(&vxlan_lock); 1204 return (0); 1205 } 1206 NET_UNLOCK(); 1207 1208 vxlan_tep_down(sc); 1209 1210 if (sc->sc_mode == VXLAN_TMODE_LEARNING) { 1211 vxlan_delmulti(sc); 1212 ifp0 = if_get(sc->sc_if_index0); 1213 if (ifp0 != NULL) { 1214 if_detachhook_del(ifp0, &sc->sc_dtask); 1215 } 1216 if_put(ifp0); 1217 } 1218 1219 if (sc->sc_mode != VXLAN_TMODE_P2P) 1220 etherbridge_down(&sc->sc_eb); 1221 1222 taskq_del_barrier(ifp->if_snd.ifq_softnet, &sc->sc_send_task); 1223 NET_LOCK(); 1224 CLR(ifp->if_flags, IFF_RUNNING); 1225 rw_exit(&vxlan_lock); 1226 1227 return (0); 1228 } 1229 1230 static int 1231 vxlan_addmulti(struct vxlan_softc *sc, struct ifnet *ifp0) 1232 { 1233 int error = 0; 1234 1235 NET_LOCK(); 1236 1237 switch (sc->sc_af) { 1238 case AF_INET: 1239 sc->sc_inmulti = in_addmulti(&sc->sc_dst.in4, ifp0); 1240 if (sc->sc_inmulti == NULL) 1241 error = EADDRNOTAVAIL; 1242 break; 1243 #ifdef INET6 1244 case AF_INET6: 1245 sc->sc_inmulti = in6_addmulti(&sc->sc_dst.in6, ifp0, &error); 1246 break; 1247 #endif 1248 default: 1249 unhandled_af(sc->sc_af); 1250 } 1251 1252 NET_UNLOCK(); 1253 1254 return (error); 1255 } 1256 1257 static void 1258 vxlan_delmulti(struct vxlan_softc *sc) 1259 { 1260 NET_LOCK(); 1261 1262 switch (sc->sc_af) { 1263 case AF_INET: 1264 in_delmulti(sc->sc_inmulti); 1265 break; 1266 #ifdef INET6 1267 case AF_INET6: 1268 in6_delmulti(sc->sc_inmulti); 1269 break; 1270 #endif 1271 default: 1272 unhandled_af(sc->sc_af); 1273 } 1274 1275 sc->sc_inmulti = NULL; /* keep it tidy */ 1276 1277 NET_UNLOCK(); 1278 } 1279 1280 static int 1281 vxlan_set_rdomain(struct vxlan_softc *sc, const struct ifreq *ifr) 1282 { 1283 struct ifnet *ifp = &sc->sc_ac.ac_if; 1284 1285 if (ifr->ifr_rdomainid < 0 || 1286 ifr->ifr_rdomainid > RT_TABLEID_MAX) 1287 return (EINVAL); 1288 if (!rtable_exists(ifr->ifr_rdomainid)) 1289 return (EADDRNOTAVAIL); 1290 1291 if (sc->sc_rdomain == ifr->ifr_rdomainid) 1292 return (0); 1293 1294 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1295 return (EBUSY); 1296 1297 /* commit */ 1298 sc->sc_rdomain = ifr->ifr_rdomainid; 1299 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1300 1301 return (0); 1302 } 1303 1304 static int 1305 vxlan_get_rdomain(struct vxlan_softc *sc, struct ifreq *ifr) 1306 { 1307 ifr->ifr_rdomainid = sc->sc_rdomain; 1308 1309 return (0); 1310 } 1311 1312 static int 1313 vxlan_set_tunnel(struct vxlan_softc *sc, const struct if_laddrreq *req) 1314 { 1315 struct ifnet *ifp = &sc->sc_ac.ac_if; 1316 struct sockaddr *src = (struct sockaddr *)&req->addr; 1317 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; 1318 struct sockaddr_in *src4, *dst4; 1319 #ifdef INET6 1320 struct sockaddr_in6 *src6, *dst6; 1321 int error; 1322 #endif 1323 union vxlan_addr saddr, daddr; 1324 unsigned int mode = VXLAN_TMODE_ENDPOINT; 1325 in_port_t port = htons(VXLAN_PORT); 1326 1327 memset(&saddr, 0, sizeof(saddr)); 1328 memset(&daddr, 0, sizeof(daddr)); 1329 1330 /* validate */ 1331 switch (src->sa_family) { 1332 case AF_INET: 1333 src4 = (struct sockaddr_in *)src; 1334 if (in_nullhost(src4->sin_addr) || 1335 IN_MULTICAST(src4->sin_addr.s_addr)) 1336 return (EINVAL); 1337 1338 if (src4->sin_port != htons(0)) 1339 port = src4->sin_port; 1340 1341 if (dst->sa_family != AF_UNSPEC) { 1342 if (dst->sa_family != AF_INET) 1343 return (EINVAL); 1344 1345 dst4 = (struct sockaddr_in *)dst; 1346 if (in_nullhost(dst4->sin_addr)) 1347 return (EINVAL); 1348 1349 /* all good */ 1350 mode = IN_MULTICAST(dst4->sin_addr.s_addr) ? 1351 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; 1352 daddr.in4 = dst4->sin_addr; 1353 } 1354 1355 saddr.in4 = src4->sin_addr; 1356 break; 1357 1358 #ifdef INET6 1359 case AF_INET6: 1360 src6 = (struct sockaddr_in6 *)src; 1361 if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) || 1362 IN6_IS_ADDR_MULTICAST(&src6->sin6_addr)) 1363 return (EINVAL); 1364 1365 if (src6->sin6_port != htons(0)) 1366 port = src6->sin6_port; 1367 1368 if (dst->sa_family != AF_UNSPEC) { 1369 if (dst->sa_family != AF_INET6) 1370 return (EINVAL); 1371 1372 dst6 = (struct sockaddr_in6 *)dst; 1373 if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr)) 1374 return (EINVAL); 1375 1376 if (src6->sin6_scope_id != dst6->sin6_scope_id) 1377 return (EINVAL); 1378 1379 /* all good */ 1380 mode = IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) ? 1381 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; 1382 error = in6_embedscope(&daddr.in6, dst6, NULL); 1383 if (error != 0) 1384 return (error); 1385 } 1386 1387 error = in6_embedscope(&saddr.in6, src6, NULL); 1388 if (error != 0) 1389 return (error); 1390 1391 break; 1392 #endif 1393 default: 1394 return (EAFNOSUPPORT); 1395 } 1396 1397 if (memcmp(&sc->sc_src, &saddr, sizeof(sc->sc_src)) == 0 && 1398 memcmp(&sc->sc_dst, &daddr, sizeof(sc->sc_dst)) == 0 && 1399 sc->sc_port == port) 1400 return (0); 1401 1402 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1403 return (EBUSY); 1404 1405 /* commit */ 1406 sc->sc_af = src->sa_family; 1407 sc->sc_src = saddr; 1408 sc->sc_dst = daddr; 1409 sc->sc_port = port; 1410 sc->sc_mode = mode; 1411 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1412 1413 return (0); 1414 } 1415 1416 static int 1417 vxlan_get_tunnel(struct vxlan_softc *sc, struct if_laddrreq *req) 1418 { 1419 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr; 1420 struct sockaddr_in *sin; 1421 #ifdef INET6 1422 struct sockaddr_in6 *sin6; 1423 #endif 1424 1425 if (sc->sc_af == AF_UNSPEC) 1426 return (EADDRNOTAVAIL); 1427 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); 1428 1429 memset(&req->addr, 0, sizeof(req->addr)); 1430 memset(&req->dstaddr, 0, sizeof(req->dstaddr)); 1431 1432 /* default to endpoint */ 1433 dstaddr->sa_len = 2; 1434 dstaddr->sa_family = AF_UNSPEC; 1435 1436 switch (sc->sc_af) { 1437 case AF_INET: 1438 sin = (struct sockaddr_in *)&req->addr; 1439 sin->sin_len = sizeof(*sin); 1440 sin->sin_family = AF_INET; 1441 sin->sin_addr = sc->sc_src.in4; 1442 sin->sin_port = sc->sc_port; 1443 1444 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 1445 break; 1446 1447 sin = (struct sockaddr_in *)&req->dstaddr; 1448 sin->sin_len = sizeof(*sin); 1449 sin->sin_family = AF_INET; 1450 sin->sin_addr = sc->sc_dst.in4; 1451 break; 1452 1453 #ifdef INET6 1454 case AF_INET6: 1455 sin6 = (struct sockaddr_in6 *)&req->addr; 1456 sin6->sin6_len = sizeof(*sin6); 1457 sin6->sin6_family = AF_INET6; 1458 in6_recoverscope(sin6, &sc->sc_src.in6); 1459 sin6->sin6_port = sc->sc_port; 1460 1461 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) 1462 break; 1463 1464 sin6 = (struct sockaddr_in6 *)&req->dstaddr; 1465 sin6->sin6_len = sizeof(*sin6); 1466 sin6->sin6_family = AF_INET6; 1467 in6_recoverscope(sin6, &sc->sc_dst.in6); 1468 break; 1469 #endif 1470 default: 1471 unhandled_af(sc->sc_af); 1472 } 1473 1474 return (0); 1475 } 1476 1477 static int 1478 vxlan_del_tunnel(struct vxlan_softc *sc) 1479 { 1480 struct ifnet *ifp = &sc->sc_ac.ac_if; 1481 1482 if (sc->sc_af == AF_UNSPEC) 1483 return (0); 1484 1485 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1486 return (EBUSY); 1487 1488 /* commit */ 1489 sc->sc_af = AF_UNSPEC; 1490 memset(&sc->sc_src, 0, sizeof(sc->sc_src)); 1491 memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); 1492 sc->sc_port = htons(0); 1493 sc->sc_mode = VXLAN_TMODE_UNSET; 1494 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1495 1496 return (0); 1497 } 1498 1499 static int 1500 vxlan_set_vnetid(struct vxlan_softc *sc, const struct ifreq *ifr) 1501 { 1502 struct ifnet *ifp = &sc->sc_ac.ac_if; 1503 uint32_t vni; 1504 1505 if (ifr->ifr_vnetid < VXLAN_VNI_MIN || 1506 ifr->ifr_vnetid > VXLAN_VNI_MAX) 1507 return (EINVAL); 1508 1509 vni = htonl(ifr->ifr_vnetid << VXLAN_VNI_SHIFT); 1510 if (ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)) && 1511 sc->sc_header.vxlan_id == vni) 1512 return (0); 1513 1514 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1515 return (EBUSY); 1516 1517 /* commit */ 1518 SET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); 1519 sc->sc_header.vxlan_id = vni; 1520 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1521 1522 return (0); 1523 } 1524 1525 static int 1526 vxlan_get_vnetid(struct vxlan_softc *sc, struct ifreq *ifr) 1527 { 1528 uint32_t vni; 1529 1530 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) 1531 return (EADDRNOTAVAIL); 1532 1533 vni = ntohl(sc->sc_header.vxlan_id); 1534 vni &= VXLAN_VNI_MASK; 1535 vni >>= VXLAN_VNI_SHIFT; 1536 1537 ifr->ifr_vnetid = vni; 1538 1539 return (0); 1540 } 1541 1542 static int 1543 vxlan_del_vnetid(struct vxlan_softc *sc) 1544 { 1545 struct ifnet *ifp = &sc->sc_ac.ac_if; 1546 1547 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) 1548 return (0); 1549 1550 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1551 return (EBUSY); 1552 1553 /* commit */ 1554 CLR(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); 1555 sc->sc_header.vxlan_id = htonl(0 << VXLAN_VNI_SHIFT); 1556 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1557 1558 return (0); 1559 } 1560 1561 static int 1562 vxlan_set_parent(struct vxlan_softc *sc, const struct if_parent *p) 1563 { 1564 struct ifnet *ifp = &sc->sc_ac.ac_if; 1565 struct ifnet *ifp0; 1566 int error = 0; 1567 1568 ifp0 = if_unit(p->ifp_parent); 1569 if (ifp0 == NULL) 1570 return (ENXIO); 1571 1572 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { 1573 error = ENXIO; 1574 goto put; 1575 } 1576 1577 if (sc->sc_if_index0 == ifp0->if_index) 1578 goto put; 1579 1580 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1581 error = EBUSY; 1582 goto put; 1583 } 1584 1585 ifsetlro(ifp0, 0); 1586 1587 /* commit */ 1588 sc->sc_if_index0 = ifp0->if_index; 1589 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1590 1591 put: 1592 if_put(ifp0); 1593 return (error); 1594 } 1595 1596 static int 1597 vxlan_get_parent(struct vxlan_softc *sc, struct if_parent *p) 1598 { 1599 struct ifnet *ifp0; 1600 int error = 0; 1601 1602 ifp0 = if_get(sc->sc_if_index0); 1603 if (ifp0 == NULL) 1604 error = EADDRNOTAVAIL; 1605 else 1606 strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent)); 1607 if_put(ifp0); 1608 1609 return (error); 1610 } 1611 1612 static int 1613 vxlan_del_parent(struct vxlan_softc *sc) 1614 { 1615 struct ifnet *ifp = &sc->sc_ac.ac_if; 1616 1617 if (sc->sc_if_index0 == 0) 1618 return (0); 1619 1620 if (ISSET(ifp->if_flags, IFF_RUNNING)) 1621 return (EBUSY); 1622 1623 /* commit */ 1624 sc->sc_if_index0 = 0; 1625 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); 1626 1627 return (0); 1628 } 1629 1630 static int 1631 vxlan_add_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) 1632 { 1633 struct sockaddr_in *sin; 1634 #ifdef INET6 1635 struct sockaddr_in6 *sin6; 1636 struct sockaddr_in6 src6 = { 1637 .sin6_len = sizeof(src6), 1638 .sin6_family = AF_UNSPEC, 1639 }; 1640 int error; 1641 #endif 1642 union vxlan_addr endpoint; 1643 unsigned int type; 1644 1645 switch (sc->sc_mode) { 1646 case VXLAN_TMODE_UNSET: 1647 return (ENOPROTOOPT); 1648 case VXLAN_TMODE_P2P: 1649 return (EPROTONOSUPPORT); 1650 default: 1651 break; 1652 } 1653 1654 /* ignore ifba_ifsname */ 1655 1656 if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK)) 1657 return (EINVAL); 1658 switch (ifba->ifba_flags & IFBAF_TYPEMASK) { 1659 case IFBAF_DYNAMIC: 1660 type = EBE_DYNAMIC; 1661 break; 1662 case IFBAF_STATIC: 1663 type = EBE_STATIC; 1664 break; 1665 default: 1666 return (EINVAL); 1667 } 1668 1669 memset(&endpoint, 0, sizeof(endpoint)); 1670 1671 if (ifba->ifba_dstsa.ss_family != sc->sc_af) 1672 return (EAFNOSUPPORT); 1673 switch (ifba->ifba_dstsa.ss_family) { 1674 case AF_INET: 1675 sin = (struct sockaddr_in *)&ifba->ifba_dstsa; 1676 if (in_nullhost(sin->sin_addr) || 1677 IN_MULTICAST(sin->sin_addr.s_addr)) 1678 return (EADDRNOTAVAIL); 1679 1680 if (sin->sin_port != htons(0)) 1681 return (EADDRNOTAVAIL); 1682 1683 endpoint.in4 = sin->sin_addr; 1684 break; 1685 1686 #ifdef INET6 1687 case AF_INET6: 1688 sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa; 1689 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 1690 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) 1691 return (EADDRNOTAVAIL); 1692 1693 in6_recoverscope(&src6, &sc->sc_src.in6); 1694 if (src6.sin6_scope_id != sin6->sin6_scope_id) 1695 return (EADDRNOTAVAIL); 1696 1697 if (sin6->sin6_port != htons(0)) 1698 return (EADDRNOTAVAIL); 1699 1700 error = in6_embedscope(&endpoint.in6, sin6, NULL); 1701 if (error != 0) 1702 return (error); 1703 1704 break; 1705 #endif 1706 default: /* AF_UNSPEC */ 1707 return (EADDRNOTAVAIL); 1708 } 1709 1710 return (etherbridge_add_addr(&sc->sc_eb, &endpoint, 1711 &ifba->ifba_dst, type)); 1712 } 1713 1714 static int 1715 vxlan_del_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) 1716 { 1717 return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst)); 1718 } 1719 1720 void 1721 vxlan_detach_hook(void *arg) 1722 { 1723 struct vxlan_softc *sc = arg; 1724 struct ifnet *ifp = &sc->sc_ac.ac_if; 1725 1726 if (ISSET(ifp->if_flags, IFF_RUNNING)) { 1727 vxlan_down(sc); 1728 CLR(ifp->if_flags, IFF_UP); 1729 } 1730 1731 sc->sc_if_index0 = 0; 1732 } 1733 1734 static int 1735 vxlan_eb_port_eq(void *arg, void *a, void *b) 1736 { 1737 const union vxlan_addr *va = a, *vb = b; 1738 size_t i; 1739 1740 for (i = 0; i < nitems(va->in6.s6_addr32); i++) { 1741 if (va->in6.s6_addr32[i] != vb->in6.s6_addr32[i]) 1742 return (0); 1743 } 1744 1745 return (1); 1746 } 1747 1748 static void * 1749 vxlan_eb_port_take(void *arg, void *port) 1750 { 1751 union vxlan_addr *endpoint; 1752 1753 endpoint = pool_get(&vxlan_endpoint_pool, PR_NOWAIT); 1754 if (endpoint == NULL) 1755 return (NULL); 1756 1757 *endpoint = *(union vxlan_addr *)port; 1758 1759 return (endpoint); 1760 } 1761 1762 static void 1763 vxlan_eb_port_rele(void *arg, void *port) 1764 { 1765 union vxlan_addr *endpoint = port; 1766 1767 pool_put(&vxlan_endpoint_pool, endpoint); 1768 } 1769 1770 static size_t 1771 vxlan_eb_port_ifname(void *arg, char *dst, size_t len, void *port) 1772 { 1773 struct vxlan_softc *sc = arg; 1774 1775 return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len)); 1776 } 1777 1778 static void 1779 vxlan_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port) 1780 { 1781 struct vxlan_softc *sc = arg; 1782 union vxlan_addr *endpoint = port; 1783 1784 switch (sc->sc_af) { 1785 case AF_INET: { 1786 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 1787 1788 sin->sin_len = sizeof(*sin); 1789 sin->sin_family = AF_INET; 1790 sin->sin_addr = endpoint->in4; 1791 break; 1792 } 1793 #ifdef INET6 1794 case AF_INET6: { 1795 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 1796 1797 sin6->sin6_len = sizeof(*sin6); 1798 sin6->sin6_family = AF_INET6; 1799 in6_recoverscope(sin6, &endpoint->in6); 1800 break; 1801 } 1802 #endif /* INET6 */ 1803 default: 1804 unhandled_af(sc->sc_af); 1805 } 1806 } 1807 1808 static inline int 1809 vxlan_peer_cmp(const struct vxlan_peer *ap, const struct vxlan_peer *bp) 1810 { 1811 size_t i; 1812 1813 if (ap->p_header.vxlan_id > bp->p_header.vxlan_id) 1814 return (1); 1815 if (ap->p_header.vxlan_id < bp->p_header.vxlan_id) 1816 return (-1); 1817 if (ap->p_header.vxlan_flags > bp->p_header.vxlan_flags) 1818 return (1); 1819 if (ap->p_header.vxlan_flags < bp->p_header.vxlan_flags) 1820 return (-1); 1821 1822 for (i = 0; i < nitems(ap->p_addr.in6.s6_addr32); i++) { 1823 if (ap->p_addr.in6.s6_addr32[i] > 1824 bp->p_addr.in6.s6_addr32[i]) 1825 return (1); 1826 if (ap->p_addr.in6.s6_addr32[i] < 1827 bp->p_addr.in6.s6_addr32[i]) 1828 return (-1); 1829 } 1830 1831 return (0); 1832 } 1833 1834 RBT_GENERATE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); 1835