1 /* $OpenBSD: if_vxlan.c,v 1.100 2024/10/31 11:41:31 mvs Exp $ */
2
3 /*
4 * Copyright (c) 2021 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include "bpfilter.h"
20 #include "pf.h"
21
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/kernel.h>
25 #include <sys/mbuf.h>
26 #include <sys/socket.h>
27 #include <sys/ioctl.h>
28 #include <sys/timeout.h>
29 #include <sys/pool.h>
30 #include <sys/tree.h>
31 #include <sys/refcnt.h>
32 #include <sys/smr.h>
33
34 #include <sys/socketvar.h>
35
36 #include <net/if.h>
37 #include <net/if_var.h>
38 #include <net/if_dl.h>
39 #include <net/if_media.h>
40 #include <net/if_types.h>
41 #include <net/route.h>
42 #include <net/rtable.h>
43
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46 #include <netinet/if_ether.h>
47 #include <netinet/ip.h>
48 #include <netinet/udp.h>
49 #include <netinet/in_pcb.h>
50 #include <netinet/ip_var.h>
51
52 #ifdef INET6
53 #include <netinet/ip6.h>
54 #include <netinet6/ip6_var.h>
55 #include <netinet6/in6_var.h>
56 #endif
57
58 /* for bridge stuff */
59 #include <net/if_bridge.h>
60 #include <net/if_etherbridge.h>
61
62 #if NBPFILTER > 0
63 #include <net/bpf.h>
64 #endif
65
66 /*
67 * The protocol.
68 */
69
70 #define VXLAN_PORT 4789
71
72 struct vxlan_header {
73 uint32_t vxlan_flags;
74 #define VXLAN_F_I (1U << 27)
75 uint32_t vxlan_id;
76 #define VXLAN_VNI_SHIFT 8
77 #define VXLAN_VNI_MASK (0xffffffU << VXLAN_VNI_SHIFT)
78 };
79
80 #define VXLAN_VNI_MAX 0x00ffffffU
81 #define VXLAN_VNI_MIN 0x00000000U
82
83 /*
84 * The driver.
85 */
86
87 union vxlan_addr {
88 struct in_addr in4;
89 struct in6_addr in6;
90 };
91
92 struct vxlan_softc;
93
94 struct vxlan_peer {
95 RBT_ENTRY(vxlan_peer) p_entry;
96
97 struct vxlan_header p_header;
98 union vxlan_addr p_addr;
99
100 struct vxlan_softc *p_sc;
101 };
102
103 RBT_HEAD(vxlan_peers, vxlan_peer);
104
105 struct vxlan_tep {
106 TAILQ_ENTRY(vxlan_tep) vt_entry;
107
108 sa_family_t vt_af;
109 unsigned int vt_rdomain;
110 union vxlan_addr vt_addr;
111 #define vt_addr4 vt_addr.in4
112 #define vt_addr6 vt_addr.in6
113 in_port_t vt_port;
114
115 struct socket *vt_so;
116
117 struct mutex vt_mtx;
118 struct vxlan_peers vt_peers;
119 };
120
121 TAILQ_HEAD(vxlan_teps, vxlan_tep);
122
123 enum vxlan_tunnel_mode {
124 VXLAN_TMODE_UNSET,
125 VXLAN_TMODE_P2P, /* unicast destination, no learning */
126 VXLAN_TMODE_LEARNING, /* multicast destination, learning */
127 VXLAN_TMODE_ENDPOINT, /* unset destination, no learning */
128 };
129
130 struct vxlan_softc {
131 struct arpcom sc_ac;
132 struct etherbridge sc_eb;
133
134 unsigned int sc_rdomain;
135 sa_family_t sc_af;
136 union vxlan_addr sc_src;
137 union vxlan_addr sc_dst;
138 in_port_t sc_port;
139 struct vxlan_header sc_header;
140 unsigned int sc_if_index0;
141
142 struct task sc_dtask;
143 void *sc_inmulti;
144
145 enum vxlan_tunnel_mode sc_mode;
146 struct vxlan_peer *sc_ucast_peer;
147 struct vxlan_peer *sc_mcast_peer;
148 struct refcnt sc_refs;
149
150 uint16_t sc_df;
151 int sc_ttl;
152 int sc_txhprio;
153 int sc_rxhprio;
154
155 struct task sc_send_task;
156 };
157
158 void vxlanattach(int);
159
160 static int vxlan_clone_create(struct if_clone *, int);
161 static int vxlan_clone_destroy(struct ifnet *);
162
163 static int vxlan_output(struct ifnet *, struct mbuf *,
164 struct sockaddr *, struct rtentry *);
165 static int vxlan_enqueue(struct ifnet *, struct mbuf *);
166 static void vxlan_start(struct ifqueue *);
167 static void vxlan_send(void *);
168
169 static int vxlan_ioctl(struct ifnet *, u_long, caddr_t);
170 static int vxlan_up(struct vxlan_softc *);
171 static int vxlan_down(struct vxlan_softc *);
172 static int vxlan_addmulti(struct vxlan_softc *, struct ifnet *);
173 static void vxlan_delmulti(struct vxlan_softc *);
174
175 static struct mbuf *
176 vxlan_input(void *, struct mbuf *,
177 struct ip *, struct ip6_hdr *, void *, int);
178
179 static int vxlan_set_rdomain(struct vxlan_softc *, const struct ifreq *);
180 static int vxlan_get_rdomain(struct vxlan_softc *, struct ifreq *);
181 static int vxlan_set_tunnel(struct vxlan_softc *,
182 const struct if_laddrreq *);
183 static int vxlan_get_tunnel(struct vxlan_softc *, struct if_laddrreq *);
184 static int vxlan_del_tunnel(struct vxlan_softc *);
185 static int vxlan_set_vnetid(struct vxlan_softc *, const struct ifreq *);
186 static int vxlan_get_vnetid(struct vxlan_softc *, struct ifreq *);
187 static int vxlan_del_vnetid(struct vxlan_softc *);
188 static int vxlan_set_parent(struct vxlan_softc *,
189 const struct if_parent *);
190 static int vxlan_get_parent(struct vxlan_softc *, struct if_parent *);
191 static int vxlan_del_parent(struct vxlan_softc *);
192
193 static int vxlan_add_addr(struct vxlan_softc *, const struct ifbareq *);
194 static int vxlan_del_addr(struct vxlan_softc *, const struct ifbareq *);
195
196 static void vxlan_detach_hook(void *);
197
198 static struct if_clone vxlan_cloner =
199 IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy);
200
201 static int vxlan_eb_port_eq(void *, void *, void *);
202 static void *vxlan_eb_port_take(void *, void *);
203 static void vxlan_eb_port_rele(void *, void *);
204 static size_t vxlan_eb_port_ifname(void *, char *, size_t, void *);
205 static void vxlan_eb_port_sa(void *, struct sockaddr_storage *, void *);
206
207 static const struct etherbridge_ops vxlan_etherbridge_ops = {
208 vxlan_eb_port_eq,
209 vxlan_eb_port_take,
210 vxlan_eb_port_rele,
211 vxlan_eb_port_ifname,
212 vxlan_eb_port_sa,
213 };
214
215 static struct rwlock vxlan_lock = RWLOCK_INITIALIZER("vteps");
216 static struct vxlan_teps vxlan_teps = TAILQ_HEAD_INITIALIZER(vxlan_teps);
217 static struct pool vxlan_endpoint_pool;
218
219 static inline int vxlan_peer_cmp(const struct vxlan_peer *,
220 const struct vxlan_peer *);
221
222 RBT_PROTOTYPE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp);
223
224 void
vxlanattach(int count)225 vxlanattach(int count)
226 {
227 if_clone_attach(&vxlan_cloner);
228 }
229
230 static int
vxlan_clone_create(struct if_clone * ifc,int unit)231 vxlan_clone_create(struct if_clone *ifc, int unit)
232 {
233 struct vxlan_softc *sc;
234 struct ifnet *ifp;
235 int error;
236
237 if (vxlan_endpoint_pool.pr_size == 0) {
238 pool_init(&vxlan_endpoint_pool, sizeof(union vxlan_addr),
239 0, IPL_SOFTNET, 0, "vxlanep", NULL);
240 }
241
242 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
243 if (sc == NULL)
244 return (ENOMEM);
245
246 ifp = &sc->sc_ac.ac_if;
247
248 snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
249 ifc->ifc_name, unit);
250
251 error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
252 &vxlan_etherbridge_ops, sc);
253 if (error == -1) {
254 free(sc, M_DEVBUF, sizeof(*sc));
255 return (error);
256 }
257
258 sc->sc_af = AF_UNSPEC;
259 sc->sc_txhprio = 0;
260 sc->sc_rxhprio = IF_HDRPRIO_OUTER;
261 sc->sc_df = 0;
262 sc->sc_ttl = IP_DEFAULT_MULTICAST_TTL;
263
264 task_set(&sc->sc_dtask, vxlan_detach_hook, sc);
265 refcnt_init(&sc->sc_refs);
266 task_set(&sc->sc_send_task, vxlan_send, sc);
267
268 ifp->if_softc = sc;
269 ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
270 ifp->if_ioctl = vxlan_ioctl;
271 ifp->if_output = vxlan_output;
272 ifp->if_enqueue = vxlan_enqueue;
273 ifp->if_qstart = vxlan_start;
274 ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
275 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
276 ether_fakeaddr(ifp);
277
278 if_counters_alloc(ifp);
279 if_attach(ifp);
280 ether_ifattach(ifp);
281
282 return (0);
283 }
284
285 static int
vxlan_clone_destroy(struct ifnet * ifp)286 vxlan_clone_destroy(struct ifnet *ifp)
287 {
288 struct vxlan_softc *sc = ifp->if_softc;
289
290 NET_LOCK();
291 if (ISSET(ifp->if_flags, IFF_RUNNING))
292 vxlan_down(sc);
293 NET_UNLOCK();
294
295 ether_ifdetach(ifp);
296 if_detach(ifp);
297
298 etherbridge_destroy(&sc->sc_eb);
299
300 refcnt_finalize(&sc->sc_refs, "vxlanfini");
301
302 free(sc, M_DEVBUF, sizeof(*sc));
303
304 return (0);
305 }
306
307 static struct vxlan_softc *
vxlan_take(struct vxlan_softc * sc)308 vxlan_take(struct vxlan_softc *sc)
309 {
310 refcnt_take(&sc->sc_refs);
311 return (sc);
312 }
313
314 static void
vxlan_rele(struct vxlan_softc * sc)315 vxlan_rele(struct vxlan_softc *sc)
316 {
317 refcnt_rele_wake(&sc->sc_refs);
318 }
319
320 static struct mbuf *
vxlan_encap(struct vxlan_softc * sc,struct mbuf * m,struct mbuf * (ip_encap)(struct vxlan_softc * sc,struct mbuf *,const union vxlan_addr *,uint8_t))321 vxlan_encap(struct vxlan_softc *sc, struct mbuf *m,
322 struct mbuf *(ip_encap)(struct vxlan_softc *sc, struct mbuf *,
323 const union vxlan_addr *, uint8_t))
324 {
325 struct ifnet *ifp = &sc->sc_ac.ac_if;
326 struct m_tag *mtag;
327 struct mbuf *m0;
328 union vxlan_addr gateway;
329 const union vxlan_addr *endpoint;
330 struct vxlan_header *vh;
331 struct udphdr *uh;
332 int prio;
333 uint8_t tos;
334
335 if (sc->sc_mode == VXLAN_TMODE_UNSET)
336 goto drop;
337
338 if (sc->sc_mode == VXLAN_TMODE_P2P)
339 endpoint = &sc->sc_dst;
340 else { /* VXLAN_TMODE_LEARNING || VXLAN_TMODE_ENDPOINT */
341 struct ether_header *eh = mtod(m, struct ether_header *);
342
343 smr_read_enter();
344 endpoint = etherbridge_resolve_ea(&sc->sc_eb,
345 (struct ether_addr *)eh->ether_dhost);
346 if (endpoint != NULL) {
347 gateway = *endpoint;
348 endpoint = &gateway;
349 }
350 smr_read_leave();
351
352 if (endpoint == NULL) {
353 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
354 goto drop;
355
356 /* "flood" to unknown destinations */
357 endpoint = &sc->sc_dst;
358 }
359 }
360
361 /* force prepend mbuf because of payload alignment */
362 m0 = m_get(M_DONTWAIT, m->m_type);
363 if (m0 == NULL)
364 goto drop;
365
366 m_align(m0, 0);
367 m0->m_len = 0;
368
369 M_MOVE_PKTHDR(m0, m);
370 m0->m_next = m;
371
372 m = m_prepend(m0, sizeof(*vh), M_DONTWAIT);
373 if (m == NULL)
374 return (NULL);
375
376 vh = mtod(m, struct vxlan_header *);
377 *vh = sc->sc_header;
378
379 m = m_prepend(m, sizeof(*uh), M_DONTWAIT);
380 if (m == NULL)
381 return (NULL);
382
383 uh = mtod(m, struct udphdr *);
384 uh->uh_sport = sc->sc_port; /* XXX */
385 uh->uh_dport = sc->sc_port;
386 htobem16(&uh->uh_ulen, m->m_pkthdr.len);
387 uh->uh_sum = htons(0);
388
389 SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT);
390
391 mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
392 if (mtag == NULL)
393 goto drop;
394
395 *(int *)(mtag + 1) = ifp->if_index;
396 m_tag_prepend(m, mtag);
397
398 prio = sc->sc_txhprio;
399 if (prio == IF_HDRPRIO_PACKET)
400 prio = m->m_pkthdr.pf.prio;
401 tos = IFQ_PRIO2TOS(prio);
402
403 CLR(m->m_flags, M_BCAST|M_MCAST);
404 m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
405
406 #if NPF > 0
407 pf_pkt_addr_changed(m);
408 #endif
409
410 return ((*ip_encap)(sc, m, endpoint, tos));
411 drop:
412 m_freem(m);
413 return (NULL);
414 }
415
416 static struct mbuf *
vxlan_encap_ipv4(struct vxlan_softc * sc,struct mbuf * m,const union vxlan_addr * endpoint,uint8_t tos)417 vxlan_encap_ipv4(struct vxlan_softc *sc, struct mbuf *m,
418 const union vxlan_addr *endpoint, uint8_t tos)
419 {
420 struct ip *ip;
421
422 m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
423 if (m == NULL)
424 return (NULL);
425
426 ip = mtod(m, struct ip *);
427 ip->ip_v = IPVERSION;
428 ip->ip_hl = sizeof(*ip) >> 2;
429 ip->ip_off = sc->sc_df;
430 ip->ip_tos = tos;
431 ip->ip_len = htons(m->m_pkthdr.len);
432 ip->ip_ttl = sc->sc_ttl;
433 ip->ip_p = IPPROTO_UDP;
434 ip->ip_src = sc->sc_src.in4;
435 ip->ip_dst = endpoint->in4;
436
437 return (m);
438 }
439
440 #ifdef INET6
441 static struct mbuf *
vxlan_encap_ipv6(struct vxlan_softc * sc,struct mbuf * m,const union vxlan_addr * endpoint,uint8_t tos)442 vxlan_encap_ipv6(struct vxlan_softc *sc, struct mbuf *m,
443 const union vxlan_addr *endpoint, uint8_t tos)
444 {
445 struct ip6_hdr *ip6;
446 int len = m->m_pkthdr.len;
447
448 m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
449 if (m == NULL)
450 return (NULL);
451
452 ip6 = mtod(m, struct ip6_hdr *);
453 ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ?
454 htonl(m->m_pkthdr.ph_flowid) : 0;
455 ip6->ip6_vfc |= IPV6_VERSION;
456 ip6->ip6_flow |= htonl((uint32_t)tos << 20);
457 ip6->ip6_plen = htons(len);
458 ip6->ip6_nxt = IPPROTO_UDP;
459 ip6->ip6_hlim = sc->sc_ttl;
460 ip6->ip6_src = sc->sc_src.in6;
461 ip6->ip6_dst = endpoint->in6;
462
463 if (sc->sc_df)
464 SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
465
466 return (m);
467 }
468 #endif /* INET6 */
469
470 static int
vxlan_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * dst,struct rtentry * rt)471 vxlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
472 struct rtentry *rt)
473 {
474 struct m_tag *mtag;
475
476 mtag = NULL;
477 while ((mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) != NULL) {
478 if (*(int *)(mtag + 1) == ifp->if_index) {
479 m_freem(m);
480 return (EIO);
481 }
482 }
483
484 return (ether_output(ifp, m, dst, rt));
485 }
486
487 static int
vxlan_enqueue(struct ifnet * ifp,struct mbuf * m)488 vxlan_enqueue(struct ifnet *ifp, struct mbuf *m)
489 {
490 struct vxlan_softc *sc = ifp->if_softc;
491 struct ifqueue *ifq = &ifp->if_snd;
492
493 if (ifq_enqueue(ifq, m) != 0)
494 return (ENOBUFS);
495
496 task_add(ifq->ifq_softnet, &sc->sc_send_task);
497
498 return (0);
499 }
500
501 static void
vxlan_start(struct ifqueue * ifq)502 vxlan_start(struct ifqueue *ifq)
503 {
504 struct ifnet *ifp = ifq->ifq_if;
505 struct vxlan_softc *sc = ifp->if_softc;
506
507 task_add(ifq->ifq_softnet, &sc->sc_send_task);
508 }
509
510 static uint64_t
vxlan_send_ipv4(struct vxlan_softc * sc,struct mbuf_list * ml)511 vxlan_send_ipv4(struct vxlan_softc *sc, struct mbuf_list *ml)
512 {
513 struct ip_moptions imo;
514 struct mbuf *m;
515 uint64_t oerrors = 0;
516
517 imo.imo_ifidx = sc->sc_if_index0;
518 imo.imo_ttl = sc->sc_ttl;
519 imo.imo_loop = 0;
520
521 NET_LOCK();
522 while ((m = ml_dequeue(ml)) != NULL) {
523 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
524 oerrors++;
525 }
526 NET_UNLOCK();
527
528 return (oerrors);
529 }
530
531 #ifdef INET6
532 static uint64_t
vxlan_send_ipv6(struct vxlan_softc * sc,struct mbuf_list * ml)533 vxlan_send_ipv6(struct vxlan_softc *sc, struct mbuf_list *ml)
534 {
535 struct ip6_moptions im6o;
536 struct mbuf *m;
537 uint64_t oerrors = 0;
538
539 im6o.im6o_ifidx = sc->sc_if_index0;
540 im6o.im6o_hlim = sc->sc_ttl;
541 im6o.im6o_loop = 0;
542
543 NET_LOCK();
544 while ((m = ml_dequeue(ml)) != NULL) {
545 if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
546 oerrors++;
547 }
548 NET_UNLOCK();
549
550 return (oerrors);
551 }
552 #endif /* INET6 */
553
554 static void
vxlan_send(void * arg)555 vxlan_send(void *arg)
556 {
557 struct vxlan_softc *sc = arg;
558 struct ifnet *ifp = &sc->sc_ac.ac_if;
559 struct mbuf *(*ip_encap)(struct vxlan_softc *, struct mbuf *,
560 const union vxlan_addr *, uint8_t);
561 uint64_t (*ip_send)(struct vxlan_softc *, struct mbuf_list *);
562 struct mbuf_list ml = MBUF_LIST_INITIALIZER();
563 struct mbuf *m;
564 uint64_t oerrors;
565
566 if (!ISSET(ifp->if_flags, IFF_RUNNING))
567 return;
568
569 switch (sc->sc_af) {
570 case AF_INET:
571 ip_encap = vxlan_encap_ipv4;
572 ip_send = vxlan_send_ipv4;
573 break;
574 #ifdef INET6
575 case AF_INET6:
576 ip_encap = vxlan_encap_ipv6;
577 ip_send = vxlan_send_ipv6;
578 break;
579 #endif
580 default:
581 unhandled_af(sc->sc_af);
582 /* NOTREACHED */
583 }
584
585 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
586 #if NBPFILTER > 0
587 caddr_t if_bpf = READ_ONCE(ifp->if_bpf);
588 if (if_bpf != NULL)
589 bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
590 #endif
591 m = vxlan_encap(sc, m, ip_encap);
592 if (m == NULL)
593 continue;
594
595 ml_enqueue(&ml, m);
596 }
597
598 oerrors = (*ip_send)(sc, &ml);
599
600 counters_add(ifp->if_counters, ifc_oerrors, oerrors);
601 }
602
603 static struct mbuf *
vxlan_input(void * arg,struct mbuf * m,struct ip * ip,struct ip6_hdr * ip6,void * uhp,int hlen)604 vxlan_input(void *arg, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
605 void *uhp, int hlen)
606 {
607 struct vxlan_tep *vt = arg;
608 union vxlan_addr addr;
609 struct vxlan_peer key, *p;
610 struct udphdr *uh;
611 struct vxlan_header *vh;
612 struct ether_header *eh;
613 int vhlen = hlen + sizeof(*vh);
614 struct mbuf *n;
615 int off;
616 in_port_t port;
617 struct vxlan_softc *sc = NULL;
618 struct ifnet *ifp;
619 int rxhprio;
620 uint8_t tos;
621
622 if (m->m_pkthdr.len < vhlen)
623 goto drop;
624
625 uh = uhp;
626 port = uh->uh_sport;
627
628 if (ip != NULL) {
629 memset(&addr, 0, sizeof(addr));
630 addr.in4 = ip->ip_src;
631 tos = ip->ip_tos;
632 }
633 #ifdef INET6
634 else {
635 addr.in6 = ip6->ip6_src;
636 tos = bemtoh32(&ip6->ip6_flow) >> 20;
637 }
638 #endif
639
640 if (m->m_len < vhlen) {
641 m = m_pullup(m, vhlen);
642 if (m == NULL)
643 return (NULL);
644 }
645
646 /* can't use ip/ip6/uh after this */
647
648 vh = (struct vxlan_header *)(mtod(m, caddr_t) + hlen);
649
650 memset(&key, 0, sizeof(key));
651 key.p_addr = addr;
652 key.p_header.vxlan_flags = vh->vxlan_flags & htonl(VXLAN_F_I);
653 key.p_header.vxlan_id = vh->vxlan_id & htonl(VXLAN_VNI_MASK);
654
655 mtx_enter(&vt->vt_mtx);
656 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key);
657 if (p == NULL) {
658 memset(&key.p_addr, 0, sizeof(key.p_addr));
659 p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key);
660 }
661 if (p != NULL)
662 sc = vxlan_take(p->p_sc);
663 mtx_leave(&vt->vt_mtx);
664
665 if (sc == NULL)
666 goto drop;
667
668 ifp = &sc->sc_ac.ac_if;
669 if (ISSET(ifp->if_flags, IFF_LINK0) && port != sc->sc_port)
670 goto rele_drop;
671
672 m_adj(m, vhlen);
673
674 if (m->m_pkthdr.len < sizeof(*eh))
675 goto rele_drop;
676
677 if (m->m_len < sizeof(*eh)) {
678 m = m_pullup(m, sizeof(*eh));
679 if (m == NULL)
680 goto rele;
681 }
682
683 n = m_getptr(m, sizeof(*eh), &off);
684 if (n == NULL)
685 goto rele_drop;
686
687 if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
688 n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
689 m_freem(m);
690 if (n == NULL)
691 goto rele;
692 m = n;
693 }
694
695 if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
696 eh = mtod(m, struct ether_header *);
697 etherbridge_map_ea(&sc->sc_eb, &addr,
698 (struct ether_addr *)eh->ether_shost);
699 }
700
701 rxhprio = sc->sc_rxhprio;
702 switch (rxhprio) {
703 case IF_HDRPRIO_PACKET:
704 /* nop */
705 break;
706 case IF_HDRPRIO_OUTER:
707 m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos);
708 break;
709 default:
710 m->m_pkthdr.pf.prio = rxhprio;
711 break;
712 }
713
714 if_vinput(ifp, m);
715 rele:
716 vxlan_rele(sc);
717 return (NULL);
718
719 rele_drop:
720 vxlan_rele(sc);
721 drop:
722 m_freem(m);
723 return (NULL);
724 }
725
726 static int
vxlan_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)727 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
728 {
729 struct vxlan_softc *sc = ifp->if_softc;
730 struct ifreq *ifr = (struct ifreq *)data;
731 struct ifbrparam *bparam = (struct ifbrparam *)data;
732 int error = 0;
733
734 switch (cmd) {
735 case SIOCSIFADDR:
736 break;
737 case SIOCSIFFLAGS:
738 if (ISSET(ifp->if_flags, IFF_UP)) {
739 if (!ISSET(ifp->if_flags, IFF_RUNNING))
740 error = vxlan_up(sc);
741 else
742 error = 0;
743 } else {
744 if (ISSET(ifp->if_flags, IFF_RUNNING))
745 error = vxlan_down(sc);
746 }
747 break;
748
749 case SIOCSLIFPHYRTABLE:
750 error = vxlan_set_rdomain(sc, ifr);
751 break;
752 case SIOCGLIFPHYRTABLE:
753 error = vxlan_get_rdomain(sc, ifr);
754 break;
755
756 case SIOCSLIFPHYADDR:
757 error = vxlan_set_tunnel(sc, (const struct if_laddrreq *)data);
758 break;
759 case SIOCGLIFPHYADDR:
760 error = vxlan_get_tunnel(sc, (struct if_laddrreq *)data);
761 break;
762 case SIOCDIFPHYADDR:
763 error = vxlan_del_tunnel(sc);
764 break;
765
766 case SIOCSVNETID:
767 error = vxlan_set_vnetid(sc, ifr);
768 break;
769 case SIOCGVNETID:
770 error = vxlan_get_vnetid(sc, ifr);
771 break;
772 case SIOCDVNETID:
773 error = vxlan_del_vnetid(sc);
774 break;
775
776 case SIOCSIFPARENT:
777 error = vxlan_set_parent(sc, (struct if_parent *)data);
778 break;
779 case SIOCGIFPARENT:
780 error = vxlan_get_parent(sc, (struct if_parent *)data);
781 break;
782 case SIOCDIFPARENT:
783 error = vxlan_del_parent(sc);
784 break;
785
786 case SIOCSTXHPRIO:
787 error = if_txhprio_l2_check(ifr->ifr_hdrprio);
788 if (error != 0)
789 break;
790
791 sc->sc_txhprio = ifr->ifr_hdrprio;
792 break;
793 case SIOCGTXHPRIO:
794 ifr->ifr_hdrprio = sc->sc_txhprio;
795 break;
796
797 case SIOCSRXHPRIO:
798 error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
799 if (error != 0)
800 break;
801
802 sc->sc_rxhprio = ifr->ifr_hdrprio;
803 break;
804 case SIOCGRXHPRIO:
805 ifr->ifr_hdrprio = sc->sc_rxhprio;
806 break;
807
808 case SIOCSLIFPHYDF:
809 /* commit */
810 sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
811 break;
812 case SIOCGLIFPHYDF:
813 ifr->ifr_df = sc->sc_df ? 1 : 0;
814 break;
815
816 case SIOCSLIFPHYTTL:
817 if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
818 error = EINVAL;
819 break;
820 }
821
822 /* commit */
823 sc->sc_ttl = (uint8_t)ifr->ifr_ttl;
824 break;
825 case SIOCGLIFPHYTTL:
826 ifr->ifr_ttl = (int)sc->sc_ttl;
827 break;
828
829 case SIOCBRDGSCACHE:
830 error = etherbridge_set_max(&sc->sc_eb, bparam);
831 break;
832 case SIOCBRDGGCACHE:
833 error = etherbridge_get_max(&sc->sc_eb, bparam);
834 break;
835 case SIOCBRDGSTO:
836 error = etherbridge_set_tmo(&sc->sc_eb, bparam);
837 break;
838 case SIOCBRDGGTO:
839 error = etherbridge_get_tmo(&sc->sc_eb, bparam);
840 break;
841
842 case SIOCBRDGRTS:
843 error = etherbridge_rtfind(&sc->sc_eb,
844 (struct ifbaconf *)data);
845 break;
846 case SIOCBRDGFLUSH:
847 etherbridge_flush(&sc->sc_eb,
848 ((struct ifbreq *)data)->ifbr_ifsflags);
849 break;
850 case SIOCBRDGSADDR:
851 error = vxlan_add_addr(sc, (struct ifbareq *)data);
852 break;
853 case SIOCBRDGDADDR:
854 error = vxlan_del_addr(sc, (struct ifbareq *)data);
855 break;
856
857 case SIOCADDMULTI:
858 case SIOCDELMULTI:
859 /* no hardware to program */
860 break;
861
862 default:
863 error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
864 break;
865 }
866
867 if (error == ENETRESET) {
868 /* no hardware to program */
869 error = 0;
870 }
871
872 return (error);
873 }
874
875 static struct vxlan_tep *
vxlan_tep_get(struct vxlan_softc * sc,const union vxlan_addr * addr)876 vxlan_tep_get(struct vxlan_softc *sc, const union vxlan_addr *addr)
877 {
878 struct vxlan_tep *vt;
879
880 TAILQ_FOREACH(vt, &vxlan_teps, vt_entry) {
881 if (sc->sc_af == vt->vt_af &&
882 sc->sc_rdomain == vt->vt_rdomain &&
883 memcmp(addr, &vt->vt_addr, sizeof(*addr)) == 0 &&
884 sc->sc_port == vt->vt_port)
885 return (vt);
886 }
887
888 return (NULL);
889 }
890
891 static int
vxlan_tep_add_addr(struct vxlan_softc * sc,const union vxlan_addr * addr,struct vxlan_peer * p)892 vxlan_tep_add_addr(struct vxlan_softc *sc, const union vxlan_addr *addr,
893 struct vxlan_peer *p)
894 {
895 struct mbuf m;
896 struct vxlan_tep *vt;
897 struct socket *so;
898 struct sockaddr_in *sin;
899 #ifdef INET6
900 struct sockaddr_in6 *sin6;
901 #endif
902 int error;
903
904 vt = vxlan_tep_get(sc, addr);
905 if (vt != NULL) {
906 struct vxlan_peer *op;
907
908 mtx_enter(&vt->vt_mtx);
909 op = RBT_INSERT(vxlan_peers, &vt->vt_peers, p);
910 mtx_leave(&vt->vt_mtx);
911
912 if (op != NULL)
913 return (EADDRINUSE);
914
915 return (0);
916 }
917
918 vt = malloc(sizeof(*vt), M_DEVBUF, M_NOWAIT|M_ZERO);
919 if (vt == NULL)
920 return (ENOMEM);
921
922 vt->vt_af = sc->sc_af;
923 vt->vt_rdomain = sc->sc_rdomain;
924 vt->vt_addr = *addr;
925 vt->vt_port = sc->sc_port;
926
927 mtx_init(&vt->vt_mtx, IPL_SOFTNET);
928 RBT_INIT(vxlan_peers, &vt->vt_peers);
929 RBT_INSERT(vxlan_peers, &vt->vt_peers, p);
930
931 error = socreate(vt->vt_af, &so, SOCK_DGRAM, IPPROTO_UDP);
932 if (error != 0)
933 goto free;
934
935 solock(so);
936 sotoinpcb(so)->inp_upcall = vxlan_input;
937 sotoinpcb(so)->inp_upcall_arg = vt;
938 sounlock(so);
939
940 m_inithdr(&m);
941 m.m_len = sizeof(vt->vt_rdomain);
942 *mtod(&m, unsigned int *) = vt->vt_rdomain;
943 error = sosetopt(so, SOL_SOCKET, SO_RTABLE, &m);
944 if (error != 0)
945 goto close;
946
947 m_inithdr(&m);
948 switch (vt->vt_af) {
949 case AF_INET:
950 sin = mtod(&m, struct sockaddr_in *);
951 memset(sin, 0, sizeof(*sin));
952 sin->sin_len = sizeof(*sin);
953 sin->sin_family = AF_INET;
954 sin->sin_addr = addr->in4;
955 sin->sin_port = vt->vt_port;
956
957 m.m_len = sizeof(*sin);
958 break;
959
960 #ifdef INET6
961 case AF_INET6:
962 sin6 = mtod(&m, struct sockaddr_in6 *);
963 sin6->sin6_len = sizeof(*sin6);
964 sin6->sin6_family = AF_INET6;
965 in6_recoverscope(sin6, &addr->in6);
966 sin6->sin6_port = sc->sc_port;
967
968 m.m_len = sizeof(*sin6);
969 break;
970 #endif
971 default:
972 unhandled_af(vt->vt_af);
973 }
974
975 solock(so);
976 error = sobind(so, &m, curproc);
977 sounlock(so);
978 if (error != 0)
979 goto close;
980
981 rw_assert_wrlock(&vxlan_lock);
982 TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry);
983
984 vt->vt_so = so;
985
986 return (0);
987
988 close:
989 soclose(so, MSG_DONTWAIT);
990 free:
991 free(vt, M_DEVBUF, sizeof(*vt));
992 return (error);
993 }
994
995 static void
vxlan_tep_del_addr(struct vxlan_softc * sc,const union vxlan_addr * addr,struct vxlan_peer * p)996 vxlan_tep_del_addr(struct vxlan_softc *sc, const union vxlan_addr *addr,
997 struct vxlan_peer *p)
998 {
999 struct vxlan_tep *vt;
1000 int empty;
1001
1002 vt = vxlan_tep_get(sc, addr);
1003 if (vt == NULL)
1004 panic("unable to find vxlan_tep for peer %p (sc %p)", p, sc);
1005
1006 mtx_enter(&vt->vt_mtx);
1007 RBT_REMOVE(vxlan_peers, &vt->vt_peers, p);
1008 empty = RBT_EMPTY(vxlan_peers, &vt->vt_peers);
1009 mtx_leave(&vt->vt_mtx);
1010
1011 if (!empty)
1012 return;
1013
1014 rw_assert_wrlock(&vxlan_lock);
1015 TAILQ_REMOVE(&vxlan_teps, vt, vt_entry);
1016
1017 soclose(vt->vt_so, MSG_DONTWAIT);
1018 free(vt, M_DEVBUF, sizeof(*vt));
1019 }
1020
1021 static int
vxlan_tep_up(struct vxlan_softc * sc)1022 vxlan_tep_up(struct vxlan_softc *sc)
1023 {
1024 struct vxlan_peer *up, *mp;
1025 int error;
1026
1027 up = malloc(sizeof(*up), M_DEVBUF, M_NOWAIT|M_ZERO);
1028 if (up == NULL)
1029 return (ENOMEM);
1030
1031 if (sc->sc_mode == VXLAN_TMODE_P2P)
1032 up->p_addr = sc->sc_dst;
1033 up->p_header = sc->sc_header;
1034 up->p_sc = vxlan_take(sc);
1035
1036 error = vxlan_tep_add_addr(sc, &sc->sc_src, up);
1037 if (error != 0)
1038 goto freeup;
1039
1040 sc->sc_ucast_peer = up;
1041
1042 if (sc->sc_mode != VXLAN_TMODE_LEARNING)
1043 return (0);
1044
1045 mp = malloc(sizeof(*mp), M_DEVBUF, M_NOWAIT|M_ZERO);
1046 if (mp == NULL) {
1047 error = ENOMEM;
1048 goto delup;
1049 }
1050
1051 /* addr is multicast, leave it as 0s */
1052 mp->p_header = sc->sc_header;
1053 mp->p_sc = vxlan_take(sc);
1054
1055 /* destination address is a multicast group we want to join */
1056 error = vxlan_tep_add_addr(sc, &sc->sc_dst, up);
1057 if (error != 0)
1058 goto freemp;
1059
1060 sc->sc_mcast_peer = mp;
1061
1062 return (0);
1063
1064 freemp:
1065 vxlan_rele(mp->p_sc);
1066 free(mp, M_DEVBUF, sizeof(*mp));
1067 delup:
1068 vxlan_tep_del_addr(sc, &sc->sc_src, up);
1069 freeup:
1070 vxlan_rele(up->p_sc);
1071 free(up, M_DEVBUF, sizeof(*up));
1072 return (error);
1073 }
1074
1075 static void
vxlan_tep_down(struct vxlan_softc * sc)1076 vxlan_tep_down(struct vxlan_softc *sc)
1077 {
1078 struct vxlan_peer *up = sc->sc_ucast_peer;
1079
1080 if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
1081 struct vxlan_peer *mp = sc->sc_mcast_peer;
1082 vxlan_tep_del_addr(sc, &sc->sc_dst, mp);
1083 vxlan_rele(mp->p_sc);
1084 free(mp, M_DEVBUF, sizeof(*mp));
1085 }
1086
1087 vxlan_tep_del_addr(sc, &sc->sc_src, up);
1088 vxlan_rele(up->p_sc);
1089 free(up, M_DEVBUF, sizeof(*up));
1090 }
1091
1092 static int
vxlan_up(struct vxlan_softc * sc)1093 vxlan_up(struct vxlan_softc *sc)
1094 {
1095 struct ifnet *ifp = &sc->sc_ac.ac_if;
1096 struct ifnet *ifp0 = NULL;
1097 int error;
1098
1099 KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
1100 NET_ASSERT_LOCKED();
1101
1102 if (sc->sc_af == AF_UNSPEC)
1103 return (EDESTADDRREQ);
1104 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET);
1105
1106 NET_UNLOCK();
1107
1108 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR);
1109 if (error != 0)
1110 goto netlock;
1111
1112 NET_LOCK();
1113 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
1114 /* something else beat us */
1115 rw_exit(&vxlan_lock);
1116 return (0);
1117 }
1118 NET_UNLOCK();
1119
1120 if (sc->sc_mode != VXLAN_TMODE_P2P) {
1121 error = etherbridge_up(&sc->sc_eb);
1122 if (error != 0)
1123 goto unlock;
1124 }
1125
1126 if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
1127 ifp0 = if_get(sc->sc_if_index0);
1128 if (ifp0 == NULL) {
1129 error = ENXIO;
1130 goto down;
1131 }
1132
1133 /* check again if multicast will work on top of the parent */
1134 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
1135 error = EPROTONOSUPPORT;
1136 goto put;
1137 }
1138
1139 error = vxlan_addmulti(sc, ifp0);
1140 if (error != 0)
1141 goto put;
1142
1143 /* Register callback if parent wants to unregister */
1144 if_detachhook_add(ifp0, &sc->sc_dtask);
1145 } else {
1146 if (sc->sc_if_index0 != 0) {
1147 error = EPROTONOSUPPORT;
1148 goto down;
1149 }
1150 }
1151
1152 error = vxlan_tep_up(sc);
1153 if (error != 0)
1154 goto del;
1155
1156 if_put(ifp0);
1157
1158 NET_LOCK();
1159 SET(ifp->if_flags, IFF_RUNNING);
1160 rw_exit(&vxlan_lock);
1161
1162 return (0);
1163
1164 del:
1165 if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
1166 if (ifp0 != NULL)
1167 if_detachhook_del(ifp0, &sc->sc_dtask);
1168 vxlan_delmulti(sc);
1169 }
1170 put:
1171 if_put(ifp0);
1172 down:
1173 if (sc->sc_mode != VXLAN_TMODE_P2P)
1174 etherbridge_down(&sc->sc_eb);
1175 unlock:
1176 rw_exit(&vxlan_lock);
1177 netlock:
1178 NET_LOCK();
1179
1180 return (error);
1181 }
1182
1183 static int
vxlan_down(struct vxlan_softc * sc)1184 vxlan_down(struct vxlan_softc *sc)
1185 {
1186 struct ifnet *ifp = &sc->sc_ac.ac_if;
1187 struct ifnet *ifp0;
1188 int error;
1189
1190 KASSERT(ISSET(ifp->if_flags, IFF_RUNNING));
1191 NET_UNLOCK();
1192
1193 error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR);
1194 if (error != 0) {
1195 NET_LOCK();
1196 return (error);
1197 }
1198
1199 NET_LOCK();
1200 if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1201 /* something else beat us */
1202 rw_exit(&vxlan_lock);
1203 return (0);
1204 }
1205 NET_UNLOCK();
1206
1207 vxlan_tep_down(sc);
1208
1209 if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
1210 vxlan_delmulti(sc);
1211 ifp0 = if_get(sc->sc_if_index0);
1212 if (ifp0 != NULL) {
1213 if_detachhook_del(ifp0, &sc->sc_dtask);
1214 }
1215 if_put(ifp0);
1216 }
1217
1218 if (sc->sc_mode != VXLAN_TMODE_P2P)
1219 etherbridge_down(&sc->sc_eb);
1220
1221 taskq_del_barrier(ifp->if_snd.ifq_softnet, &sc->sc_send_task);
1222 NET_LOCK();
1223 CLR(ifp->if_flags, IFF_RUNNING);
1224 rw_exit(&vxlan_lock);
1225
1226 return (0);
1227 }
1228
1229 static int
vxlan_addmulti(struct vxlan_softc * sc,struct ifnet * ifp0)1230 vxlan_addmulti(struct vxlan_softc *sc, struct ifnet *ifp0)
1231 {
1232 int error = 0;
1233
1234 NET_LOCK();
1235
1236 switch (sc->sc_af) {
1237 case AF_INET:
1238 sc->sc_inmulti = in_addmulti(&sc->sc_dst.in4, ifp0);
1239 if (sc->sc_inmulti == NULL)
1240 error = EADDRNOTAVAIL;
1241 break;
1242 #ifdef INET6
1243 case AF_INET6:
1244 sc->sc_inmulti = in6_addmulti(&sc->sc_dst.in6, ifp0, &error);
1245 break;
1246 #endif
1247 default:
1248 unhandled_af(sc->sc_af);
1249 }
1250
1251 NET_UNLOCK();
1252
1253 return (error);
1254 }
1255
1256 static void
vxlan_delmulti(struct vxlan_softc * sc)1257 vxlan_delmulti(struct vxlan_softc *sc)
1258 {
1259 NET_LOCK();
1260
1261 switch (sc->sc_af) {
1262 case AF_INET:
1263 in_delmulti(sc->sc_inmulti);
1264 break;
1265 #ifdef INET6
1266 case AF_INET6:
1267 in6_delmulti(sc->sc_inmulti);
1268 break;
1269 #endif
1270 default:
1271 unhandled_af(sc->sc_af);
1272 }
1273
1274 sc->sc_inmulti = NULL; /* keep it tidy */
1275
1276 NET_UNLOCK();
1277 }
1278
1279 static int
vxlan_set_rdomain(struct vxlan_softc * sc,const struct ifreq * ifr)1280 vxlan_set_rdomain(struct vxlan_softc *sc, const struct ifreq *ifr)
1281 {
1282 struct ifnet *ifp = &sc->sc_ac.ac_if;
1283
1284 if (ifr->ifr_rdomainid < 0 ||
1285 ifr->ifr_rdomainid > RT_TABLEID_MAX)
1286 return (EINVAL);
1287 if (!rtable_exists(ifr->ifr_rdomainid))
1288 return (EADDRNOTAVAIL);
1289
1290 if (sc->sc_rdomain == ifr->ifr_rdomainid)
1291 return (0);
1292
1293 if (ISSET(ifp->if_flags, IFF_RUNNING))
1294 return (EBUSY);
1295
1296 /* commit */
1297 sc->sc_rdomain = ifr->ifr_rdomainid;
1298 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1299
1300 return (0);
1301 }
1302
1303 static int
vxlan_get_rdomain(struct vxlan_softc * sc,struct ifreq * ifr)1304 vxlan_get_rdomain(struct vxlan_softc *sc, struct ifreq *ifr)
1305 {
1306 ifr->ifr_rdomainid = sc->sc_rdomain;
1307
1308 return (0);
1309 }
1310
1311 static int
vxlan_set_tunnel(struct vxlan_softc * sc,const struct if_laddrreq * req)1312 vxlan_set_tunnel(struct vxlan_softc *sc, const struct if_laddrreq *req)
1313 {
1314 struct ifnet *ifp = &sc->sc_ac.ac_if;
1315 struct sockaddr *src = (struct sockaddr *)&req->addr;
1316 struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
1317 struct sockaddr_in *src4, *dst4;
1318 #ifdef INET6
1319 struct sockaddr_in6 *src6, *dst6;
1320 int error;
1321 #endif
1322 union vxlan_addr saddr, daddr;
1323 unsigned int mode = VXLAN_TMODE_ENDPOINT;
1324 in_port_t port = htons(VXLAN_PORT);
1325
1326 memset(&saddr, 0, sizeof(saddr));
1327 memset(&daddr, 0, sizeof(daddr));
1328
1329 /* validate */
1330 switch (src->sa_family) {
1331 case AF_INET:
1332 src4 = (struct sockaddr_in *)src;
1333 if (in_nullhost(src4->sin_addr) ||
1334 IN_MULTICAST(src4->sin_addr.s_addr))
1335 return (EINVAL);
1336
1337 if (src4->sin_port != htons(0))
1338 port = src4->sin_port;
1339
1340 if (dst->sa_family != AF_UNSPEC) {
1341 if (dst->sa_family != AF_INET)
1342 return (EINVAL);
1343
1344 dst4 = (struct sockaddr_in *)dst;
1345 if (in_nullhost(dst4->sin_addr))
1346 return (EINVAL);
1347
1348 if (dst4->sin_port != htons(0))
1349 return (EINVAL);
1350
1351 /* all good */
1352 mode = IN_MULTICAST(dst4->sin_addr.s_addr) ?
1353 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P;
1354 daddr.in4 = dst4->sin_addr;
1355 }
1356
1357 saddr.in4 = src4->sin_addr;
1358 break;
1359
1360 #ifdef INET6
1361 case AF_INET6:
1362 src6 = (struct sockaddr_in6 *)src;
1363 if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
1364 IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
1365 return (EINVAL);
1366
1367 if (src6->sin6_port != htons(0))
1368 port = src6->sin6_port;
1369
1370 if (dst->sa_family != AF_UNSPEC) {
1371 if (dst->sa_family != AF_INET6)
1372 return (EINVAL);
1373
1374 dst6 = (struct sockaddr_in6 *)dst;
1375 if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr))
1376 return (EINVAL);
1377
1378 if (src6->sin6_scope_id != dst6->sin6_scope_id)
1379 return (EINVAL);
1380
1381 if (dst6->sin6_port != htons(0))
1382 return (EINVAL);
1383
1384 /* all good */
1385 mode = IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) ?
1386 VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P;
1387 error = in6_embedscope(&daddr.in6, dst6, NULL, NULL);
1388 if (error != 0)
1389 return (error);
1390 }
1391
1392 error = in6_embedscope(&saddr.in6, src6, NULL, NULL);
1393 if (error != 0)
1394 return (error);
1395
1396 break;
1397 #endif
1398 default:
1399 return (EAFNOSUPPORT);
1400 }
1401
1402 if (memcmp(&sc->sc_src, &saddr, sizeof(sc->sc_src)) == 0 &&
1403 memcmp(&sc->sc_dst, &daddr, sizeof(sc->sc_dst)) == 0 &&
1404 sc->sc_port == port)
1405 return (0);
1406
1407 if (ISSET(ifp->if_flags, IFF_RUNNING))
1408 return (EBUSY);
1409
1410 /* commit */
1411 sc->sc_af = src->sa_family;
1412 sc->sc_src = saddr;
1413 sc->sc_dst = daddr;
1414 sc->sc_port = port;
1415 sc->sc_mode = mode;
1416 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1417
1418 return (0);
1419 }
1420
1421 static int
vxlan_get_tunnel(struct vxlan_softc * sc,struct if_laddrreq * req)1422 vxlan_get_tunnel(struct vxlan_softc *sc, struct if_laddrreq *req)
1423 {
1424 struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
1425 struct sockaddr_in *sin;
1426 #ifdef INET6
1427 struct sockaddr_in6 *sin6;
1428 #endif
1429
1430 if (sc->sc_af == AF_UNSPEC)
1431 return (EADDRNOTAVAIL);
1432 KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET);
1433
1434 memset(&req->addr, 0, sizeof(req->addr));
1435 memset(&req->dstaddr, 0, sizeof(req->dstaddr));
1436
1437 /* default to endpoint */
1438 dstaddr->sa_len = 2;
1439 dstaddr->sa_family = AF_UNSPEC;
1440
1441 switch (sc->sc_af) {
1442 case AF_INET:
1443 sin = (struct sockaddr_in *)&req->addr;
1444 sin->sin_len = sizeof(*sin);
1445 sin->sin_family = AF_INET;
1446 sin->sin_addr = sc->sc_src.in4;
1447 sin->sin_port = sc->sc_port;
1448
1449 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
1450 break;
1451
1452 sin = (struct sockaddr_in *)&req->dstaddr;
1453 sin->sin_len = sizeof(*sin);
1454 sin->sin_family = AF_INET;
1455 sin->sin_addr = sc->sc_dst.in4;
1456 break;
1457
1458 #ifdef INET6
1459 case AF_INET6:
1460 sin6 = (struct sockaddr_in6 *)&req->addr;
1461 sin6->sin6_len = sizeof(*sin6);
1462 sin6->sin6_family = AF_INET6;
1463 in6_recoverscope(sin6, &sc->sc_src.in6);
1464 sin6->sin6_port = sc->sc_port;
1465
1466 if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
1467 break;
1468
1469 sin6 = (struct sockaddr_in6 *)&req->dstaddr;
1470 sin6->sin6_len = sizeof(*sin6);
1471 sin6->sin6_family = AF_INET6;
1472 in6_recoverscope(sin6, &sc->sc_dst.in6);
1473 break;
1474 #endif
1475 default:
1476 unhandled_af(sc->sc_af);
1477 }
1478
1479 return (0);
1480 }
1481
1482 static int
vxlan_del_tunnel(struct vxlan_softc * sc)1483 vxlan_del_tunnel(struct vxlan_softc *sc)
1484 {
1485 struct ifnet *ifp = &sc->sc_ac.ac_if;
1486
1487 if (sc->sc_af == AF_UNSPEC)
1488 return (0);
1489
1490 if (ISSET(ifp->if_flags, IFF_RUNNING))
1491 return (EBUSY);
1492
1493 /* commit */
1494 sc->sc_af = AF_UNSPEC;
1495 memset(&sc->sc_src, 0, sizeof(sc->sc_src));
1496 memset(&sc->sc_dst, 0, sizeof(sc->sc_dst));
1497 sc->sc_port = htons(0);
1498 sc->sc_mode = VXLAN_TMODE_UNSET;
1499 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1500
1501 return (0);
1502 }
1503
1504 static int
vxlan_set_vnetid(struct vxlan_softc * sc,const struct ifreq * ifr)1505 vxlan_set_vnetid(struct vxlan_softc *sc, const struct ifreq *ifr)
1506 {
1507 struct ifnet *ifp = &sc->sc_ac.ac_if;
1508 uint32_t vni;
1509
1510 if (ifr->ifr_vnetid < VXLAN_VNI_MIN ||
1511 ifr->ifr_vnetid > VXLAN_VNI_MAX)
1512 return (EINVAL);
1513
1514 vni = htonl(ifr->ifr_vnetid << VXLAN_VNI_SHIFT);
1515 if (ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)) &&
1516 sc->sc_header.vxlan_id == vni)
1517 return (0);
1518
1519 if (ISSET(ifp->if_flags, IFF_RUNNING))
1520 return (EBUSY);
1521
1522 /* commit */
1523 SET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I));
1524 sc->sc_header.vxlan_id = vni;
1525 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1526
1527 return (0);
1528 }
1529
1530 static int
vxlan_get_vnetid(struct vxlan_softc * sc,struct ifreq * ifr)1531 vxlan_get_vnetid(struct vxlan_softc *sc, struct ifreq *ifr)
1532 {
1533 uint32_t vni;
1534
1535 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)))
1536 return (EADDRNOTAVAIL);
1537
1538 vni = ntohl(sc->sc_header.vxlan_id);
1539 vni &= VXLAN_VNI_MASK;
1540 vni >>= VXLAN_VNI_SHIFT;
1541
1542 ifr->ifr_vnetid = vni;
1543
1544 return (0);
1545 }
1546
1547 static int
vxlan_del_vnetid(struct vxlan_softc * sc)1548 vxlan_del_vnetid(struct vxlan_softc *sc)
1549 {
1550 struct ifnet *ifp = &sc->sc_ac.ac_if;
1551
1552 if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)))
1553 return (0);
1554
1555 if (ISSET(ifp->if_flags, IFF_RUNNING))
1556 return (EBUSY);
1557
1558 /* commit */
1559 CLR(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I));
1560 sc->sc_header.vxlan_id = htonl(0 << VXLAN_VNI_SHIFT);
1561 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1562
1563 return (0);
1564 }
1565
1566 static int
vxlan_set_parent(struct vxlan_softc * sc,const struct if_parent * p)1567 vxlan_set_parent(struct vxlan_softc *sc, const struct if_parent *p)
1568 {
1569 struct ifnet *ifp = &sc->sc_ac.ac_if;
1570 struct ifnet *ifp0;
1571 int error = 0;
1572
1573 ifp0 = if_unit(p->ifp_parent);
1574 if (ifp0 == NULL)
1575 return (ENXIO);
1576
1577 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
1578 error = ENXIO;
1579 goto put;
1580 }
1581
1582 if (sc->sc_if_index0 == ifp0->if_index)
1583 goto put;
1584
1585 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
1586 error = EBUSY;
1587 goto put;
1588 }
1589
1590 ifsetlro(ifp0, 0);
1591
1592 /* commit */
1593 sc->sc_if_index0 = ifp0->if_index;
1594 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1595
1596 put:
1597 if_put(ifp0);
1598 return (error);
1599 }
1600
1601 static int
vxlan_get_parent(struct vxlan_softc * sc,struct if_parent * p)1602 vxlan_get_parent(struct vxlan_softc *sc, struct if_parent *p)
1603 {
1604 struct ifnet *ifp0;
1605 int error = 0;
1606
1607 ifp0 = if_get(sc->sc_if_index0);
1608 if (ifp0 == NULL)
1609 error = EADDRNOTAVAIL;
1610 else
1611 strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent));
1612 if_put(ifp0);
1613
1614 return (error);
1615 }
1616
1617 static int
vxlan_del_parent(struct vxlan_softc * sc)1618 vxlan_del_parent(struct vxlan_softc *sc)
1619 {
1620 struct ifnet *ifp = &sc->sc_ac.ac_if;
1621
1622 if (sc->sc_if_index0 == 0)
1623 return (0);
1624
1625 if (ISSET(ifp->if_flags, IFF_RUNNING))
1626 return (EBUSY);
1627
1628 /* commit */
1629 sc->sc_if_index0 = 0;
1630 etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1631
1632 return (0);
1633 }
1634
1635 static int
vxlan_add_addr(struct vxlan_softc * sc,const struct ifbareq * ifba)1636 vxlan_add_addr(struct vxlan_softc *sc, const struct ifbareq *ifba)
1637 {
1638 struct sockaddr_in *sin;
1639 #ifdef INET6
1640 struct sockaddr_in6 *sin6;
1641 struct sockaddr_in6 src6 = {
1642 .sin6_len = sizeof(src6),
1643 .sin6_family = AF_UNSPEC,
1644 };
1645 int error;
1646 #endif
1647 union vxlan_addr endpoint;
1648 unsigned int type;
1649
1650 switch (sc->sc_mode) {
1651 case VXLAN_TMODE_UNSET:
1652 return (ENOPROTOOPT);
1653 case VXLAN_TMODE_P2P:
1654 return (EPROTONOSUPPORT);
1655 default:
1656 break;
1657 }
1658
1659 /* ignore ifba_ifsname */
1660
1661 if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK))
1662 return (EINVAL);
1663 switch (ifba->ifba_flags & IFBAF_TYPEMASK) {
1664 case IFBAF_DYNAMIC:
1665 type = EBE_DYNAMIC;
1666 break;
1667 case IFBAF_STATIC:
1668 type = EBE_STATIC;
1669 break;
1670 default:
1671 return (EINVAL);
1672 }
1673
1674 memset(&endpoint, 0, sizeof(endpoint));
1675
1676 if (ifba->ifba_dstsa.ss_family != sc->sc_af)
1677 return (EAFNOSUPPORT);
1678 switch (ifba->ifba_dstsa.ss_family) {
1679 case AF_INET:
1680 sin = (struct sockaddr_in *)&ifba->ifba_dstsa;
1681 if (in_nullhost(sin->sin_addr) ||
1682 IN_MULTICAST(sin->sin_addr.s_addr))
1683 return (EADDRNOTAVAIL);
1684
1685 if (sin->sin_port != htons(0))
1686 return (EADDRNOTAVAIL);
1687
1688 endpoint.in4 = sin->sin_addr;
1689 break;
1690
1691 #ifdef INET6
1692 case AF_INET6:
1693 sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa;
1694 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
1695 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
1696 return (EADDRNOTAVAIL);
1697
1698 in6_recoverscope(&src6, &sc->sc_src.in6);
1699 if (src6.sin6_scope_id != sin6->sin6_scope_id)
1700 return (EADDRNOTAVAIL);
1701
1702 if (sin6->sin6_port != htons(0))
1703 return (EADDRNOTAVAIL);
1704
1705 error = in6_embedscope(&endpoint.in6, sin6, NULL, NULL);
1706 if (error != 0)
1707 return (error);
1708
1709 break;
1710 #endif
1711 default: /* AF_UNSPEC */
1712 return (EADDRNOTAVAIL);
1713 }
1714
1715 return (etherbridge_add_addr(&sc->sc_eb, &endpoint,
1716 &ifba->ifba_dst, type));
1717 }
1718
1719 static int
vxlan_del_addr(struct vxlan_softc * sc,const struct ifbareq * ifba)1720 vxlan_del_addr(struct vxlan_softc *sc, const struct ifbareq *ifba)
1721 {
1722 return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst));
1723 }
1724
1725 void
vxlan_detach_hook(void * arg)1726 vxlan_detach_hook(void *arg)
1727 {
1728 struct vxlan_softc *sc = arg;
1729 struct ifnet *ifp = &sc->sc_ac.ac_if;
1730
1731 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
1732 vxlan_down(sc);
1733 CLR(ifp->if_flags, IFF_UP);
1734 }
1735
1736 sc->sc_if_index0 = 0;
1737 }
1738
1739 static int
vxlan_eb_port_eq(void * arg,void * a,void * b)1740 vxlan_eb_port_eq(void *arg, void *a, void *b)
1741 {
1742 const union vxlan_addr *va = a, *vb = b;
1743 size_t i;
1744
1745 for (i = 0; i < nitems(va->in6.s6_addr32); i++) {
1746 if (va->in6.s6_addr32[i] != vb->in6.s6_addr32[i])
1747 return (0);
1748 }
1749
1750 return (1);
1751 }
1752
1753 static void *
vxlan_eb_port_take(void * arg,void * port)1754 vxlan_eb_port_take(void *arg, void *port)
1755 {
1756 union vxlan_addr *endpoint;
1757
1758 endpoint = pool_get(&vxlan_endpoint_pool, PR_NOWAIT);
1759 if (endpoint == NULL)
1760 return (NULL);
1761
1762 *endpoint = *(union vxlan_addr *)port;
1763
1764 return (endpoint);
1765 }
1766
1767 static void
vxlan_eb_port_rele(void * arg,void * port)1768 vxlan_eb_port_rele(void *arg, void *port)
1769 {
1770 union vxlan_addr *endpoint = port;
1771
1772 pool_put(&vxlan_endpoint_pool, endpoint);
1773 }
1774
1775 static size_t
vxlan_eb_port_ifname(void * arg,char * dst,size_t len,void * port)1776 vxlan_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
1777 {
1778 struct vxlan_softc *sc = arg;
1779
1780 return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len));
1781 }
1782
1783 static void
vxlan_eb_port_sa(void * arg,struct sockaddr_storage * ss,void * port)1784 vxlan_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
1785 {
1786 struct vxlan_softc *sc = arg;
1787 union vxlan_addr *endpoint = port;
1788
1789 switch (sc->sc_af) {
1790 case AF_INET: {
1791 struct sockaddr_in *sin = (struct sockaddr_in *)ss;
1792
1793 sin->sin_len = sizeof(*sin);
1794 sin->sin_family = AF_INET;
1795 sin->sin_addr = endpoint->in4;
1796 break;
1797 }
1798 #ifdef INET6
1799 case AF_INET6: {
1800 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
1801
1802 sin6->sin6_len = sizeof(*sin6);
1803 sin6->sin6_family = AF_INET6;
1804 in6_recoverscope(sin6, &endpoint->in6);
1805 break;
1806 }
1807 #endif /* INET6 */
1808 default:
1809 unhandled_af(sc->sc_af);
1810 }
1811 }
1812
1813 static inline int
vxlan_peer_cmp(const struct vxlan_peer * ap,const struct vxlan_peer * bp)1814 vxlan_peer_cmp(const struct vxlan_peer *ap, const struct vxlan_peer *bp)
1815 {
1816 size_t i;
1817
1818 if (ap->p_header.vxlan_id > bp->p_header.vxlan_id)
1819 return (1);
1820 if (ap->p_header.vxlan_id < bp->p_header.vxlan_id)
1821 return (-1);
1822 if (ap->p_header.vxlan_flags > bp->p_header.vxlan_flags)
1823 return (1);
1824 if (ap->p_header.vxlan_flags < bp->p_header.vxlan_flags)
1825 return (-1);
1826
1827 for (i = 0; i < nitems(ap->p_addr.in6.s6_addr32); i++) {
1828 if (ap->p_addr.in6.s6_addr32[i] >
1829 bp->p_addr.in6.s6_addr32[i])
1830 return (1);
1831 if (ap->p_addr.in6.s6_addr32[i] <
1832 bp->p_addr.in6.s6_addr32[i])
1833 return (-1);
1834 }
1835
1836 return (0);
1837 }
1838
1839 RBT_GENERATE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp);
1840