xref: /openbsd/sys/net/if_vxlan.c (revision 938ff1ae)
1 /*	$OpenBSD: if_vxlan.c,v 1.99 2023/12/23 10:52:54 bluhm Exp $ */
2 
3 /*
4  * Copyright (c) 2021 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "pf.h"
21 
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/kernel.h>
25 #include <sys/mbuf.h>
26 #include <sys/socket.h>
27 #include <sys/ioctl.h>
28 #include <sys/timeout.h>
29 #include <sys/pool.h>
30 #include <sys/tree.h>
31 #include <sys/refcnt.h>
32 #include <sys/smr.h>
33 
34 #include <sys/socketvar.h>
35 
36 #include <net/if.h>
37 #include <net/if_var.h>
38 #include <net/if_dl.h>
39 #include <net/if_media.h>
40 #include <net/if_types.h>
41 #include <net/route.h>
42 #include <net/rtable.h>
43 
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46 #include <netinet/if_ether.h>
47 #include <netinet/ip.h>
48 #include <netinet/udp.h>
49 #include <netinet/in_pcb.h>
50 #include <netinet/ip_var.h>
51 
52 #ifdef INET6
53 #include <netinet/ip6.h>
54 #include <netinet6/ip6_var.h>
55 #include <netinet6/in6_var.h>
56 #endif
57 
58 /* for bridge stuff */
59 #include <net/if_bridge.h>
60 #include <net/if_etherbridge.h>
61 
62 #if NBPFILTER > 0
63 #include <net/bpf.h>
64 #endif
65 
66 /*
67  * The protocol.
68  */
69 
70 #define VXLAN_PORT		4789
71 
72 struct vxlan_header {
73 	uint32_t		vxlan_flags;
74 #define VXLAN_F_I			(1U << 27)
75 	uint32_t		vxlan_id;
76 #define VXLAN_VNI_SHIFT			8
77 #define VXLAN_VNI_MASK			(0xffffffU << VXLAN_VNI_SHIFT)
78 };
79 
80 #define VXLAN_VNI_MAX			0x00ffffffU
81 #define VXLAN_VNI_MIN			0x00000000U
82 
83 /*
84  * The driver.
85  */
86 
87 union vxlan_addr {
88 	struct in_addr		in4;
89 	struct in6_addr		in6;
90 };
91 
92 struct vxlan_softc;
93 
94 struct vxlan_peer {
95 	RBT_ENTRY(vxlan_peer)	 p_entry;
96 
97 	struct vxlan_header	 p_header;
98 	union vxlan_addr	 p_addr;
99 
100 	struct vxlan_softc	*p_sc;
101 };
102 
103 RBT_HEAD(vxlan_peers, vxlan_peer);
104 
105 struct vxlan_tep {
106 	TAILQ_ENTRY(vxlan_tep)	 vt_entry;
107 
108 	sa_family_t		 vt_af;
109 	unsigned int		 vt_rdomain;
110 	union vxlan_addr	 vt_addr;
111 #define vt_addr4 vt_addr.in4
112 #define vt_addr6 vt_addr.in6
113 	in_port_t		 vt_port;
114 
115 	struct socket		*vt_so;
116 
117 	struct mutex		 vt_mtx;
118 	struct vxlan_peers	 vt_peers;
119 };
120 
121 TAILQ_HEAD(vxlan_teps, vxlan_tep);
122 
123 enum vxlan_tunnel_mode {
124 	VXLAN_TMODE_UNSET,
125 	VXLAN_TMODE_P2P,	 /* unicast destination, no learning */
126 	VXLAN_TMODE_LEARNING,	 /* multicast destination, learning */
127 	VXLAN_TMODE_ENDPOINT,	 /* unset destination, no learning */
128 };
129 
130 struct vxlan_softc {
131 	struct arpcom		 sc_ac;
132 	struct etherbridge	 sc_eb;
133 
134 	unsigned int		 sc_rdomain;
135 	sa_family_t		 sc_af;
136 	union vxlan_addr	 sc_src;
137 	union vxlan_addr	 sc_dst;
138 	in_port_t		 sc_port;
139 	struct vxlan_header	 sc_header;
140 	unsigned int		 sc_if_index0;
141 
142 	struct task		 sc_dtask;
143 	void			*sc_inmulti;
144 
145 	enum vxlan_tunnel_mode	 sc_mode;
146 	struct vxlan_peer	*sc_ucast_peer;
147 	struct vxlan_peer	*sc_mcast_peer;
148 	struct refcnt		 sc_refs;
149 
150 	uint16_t		 sc_df;
151 	int			 sc_ttl;
152 	int			 sc_txhprio;
153 	int			 sc_rxhprio;
154 
155 	struct task		 sc_send_task;
156 };
157 
158 void		vxlanattach(int);
159 
160 static int	vxlan_clone_create(struct if_clone *, int);
161 static int	vxlan_clone_destroy(struct ifnet *);
162 
163 static int	vxlan_output(struct ifnet *, struct mbuf *,
164 		    struct sockaddr *, struct rtentry *);
165 static int	vxlan_enqueue(struct ifnet *, struct mbuf *);
166 static void	vxlan_start(struct ifqueue *);
167 static void	vxlan_send(void *);
168 
169 static int	vxlan_ioctl(struct ifnet *, u_long, caddr_t);
170 static int	vxlan_up(struct vxlan_softc *);
171 static int	vxlan_down(struct vxlan_softc *);
172 static int	vxlan_addmulti(struct vxlan_softc *, struct ifnet *);
173 static void	vxlan_delmulti(struct vxlan_softc *);
174 
175 static struct mbuf *
176 		vxlan_input(void *, struct mbuf *,
177 		    struct ip *, struct ip6_hdr *, void *, int);
178 
179 static int	vxlan_set_rdomain(struct vxlan_softc *, const struct ifreq *);
180 static int	vxlan_get_rdomain(struct vxlan_softc *, struct ifreq *);
181 static int	vxlan_set_tunnel(struct vxlan_softc *,
182 		    const struct if_laddrreq *);
183 static int	vxlan_get_tunnel(struct vxlan_softc *, struct if_laddrreq *);
184 static int	vxlan_del_tunnel(struct vxlan_softc *);
185 static int	vxlan_set_vnetid(struct vxlan_softc *, const struct ifreq *);
186 static int	vxlan_get_vnetid(struct vxlan_softc *, struct ifreq *);
187 static int	vxlan_del_vnetid(struct vxlan_softc *);
188 static int	vxlan_set_parent(struct vxlan_softc *,
189 		    const struct if_parent *);
190 static int	vxlan_get_parent(struct vxlan_softc *, struct if_parent *);
191 static int	vxlan_del_parent(struct vxlan_softc *);
192 
193 static int	vxlan_add_addr(struct vxlan_softc *, const struct ifbareq *);
194 static int	vxlan_del_addr(struct vxlan_softc *, const struct ifbareq *);
195 
196 static void	vxlan_detach_hook(void *);
197 
198 static struct if_clone vxlan_cloner =
199     IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy);
200 
201 static int	 vxlan_eb_port_eq(void *, void *, void *);
202 static void	*vxlan_eb_port_take(void *, void *);
203 static void	 vxlan_eb_port_rele(void *, void *);
204 static size_t	 vxlan_eb_port_ifname(void *, char *, size_t, void *);
205 static void	 vxlan_eb_port_sa(void *, struct sockaddr_storage *, void *);
206 
207 static const struct etherbridge_ops vxlan_etherbridge_ops = {
208 	vxlan_eb_port_eq,
209 	vxlan_eb_port_take,
210 	vxlan_eb_port_rele,
211 	vxlan_eb_port_ifname,
212 	vxlan_eb_port_sa,
213 };
214 
215 static struct rwlock vxlan_lock = RWLOCK_INITIALIZER("vteps");
216 static struct vxlan_teps vxlan_teps = TAILQ_HEAD_INITIALIZER(vxlan_teps);
217 static struct pool vxlan_endpoint_pool;
218 
219 static inline int	vxlan_peer_cmp(const struct vxlan_peer *,
220 			    const struct vxlan_peer *);
221 
222 RBT_PROTOTYPE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp);
223 
224 void
vxlanattach(int count)225 vxlanattach(int count)
226 {
227 	if_clone_attach(&vxlan_cloner);
228 }
229 
230 static int
vxlan_clone_create(struct if_clone * ifc,int unit)231 vxlan_clone_create(struct if_clone *ifc, int unit)
232 {
233 	struct vxlan_softc *sc;
234 	struct ifnet *ifp;
235 	int error;
236 
237 	if (vxlan_endpoint_pool.pr_size == 0) {
238 		pool_init(&vxlan_endpoint_pool, sizeof(union vxlan_addr),
239 		    0, IPL_SOFTNET, 0, "vxlanep", NULL);
240 	}
241 
242 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
243 	if (sc == NULL)
244 		return (ENOMEM);
245 
246 	ifp = &sc->sc_ac.ac_if;
247 
248 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
249 	    ifc->ifc_name, unit);
250 
251 	error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
252 	    &vxlan_etherbridge_ops, sc);
253 	if (error == -1) {
254 		free(sc, M_DEVBUF, sizeof(*sc));
255 		return (error);
256 	}
257 
258 	sc->sc_af = AF_UNSPEC;
259 	sc->sc_txhprio = 0;
260 	sc->sc_rxhprio = IF_HDRPRIO_OUTER;
261 	sc->sc_df = 0;
262 	sc->sc_ttl = IP_DEFAULT_MULTICAST_TTL;
263 
264 	task_set(&sc->sc_dtask, vxlan_detach_hook, sc);
265 	refcnt_init(&sc->sc_refs);
266 	task_set(&sc->sc_send_task, vxlan_send, sc);
267 
268 	ifp->if_softc = sc;
269 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
270 	ifp->if_ioctl = vxlan_ioctl;
271 	ifp->if_output = vxlan_output;
272 	ifp->if_enqueue = vxlan_enqueue;
273 	ifp->if_qstart = vxlan_start;
274 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
275 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
276 	ether_fakeaddr(ifp);
277 
278 	if_counters_alloc(ifp);
279 	if_attach(ifp);
280 	ether_ifattach(ifp);
281 
282 	return (0);
283 }
284 
285 static int
vxlan_clone_destroy(struct ifnet * ifp)286 vxlan_clone_destroy(struct ifnet *ifp)
287 {
288 	struct vxlan_softc *sc = ifp->if_softc;
289 
290 	NET_LOCK();
291 	if (ISSET(ifp->if_flags, IFF_RUNNING))
292 		vxlan_down(sc);
293 	NET_UNLOCK();
294 
295 	ether_ifdetach(ifp);
296 	if_detach(ifp);
297 
298 	etherbridge_destroy(&sc->sc_eb);
299 
300 	refcnt_finalize(&sc->sc_refs, "vxlanfini");
301 
302 	free(sc, M_DEVBUF, sizeof(*sc));
303 
304 	return (0);
305 }
306 
307 static struct vxlan_softc *
vxlan_take(struct vxlan_softc * sc)308 vxlan_take(struct vxlan_softc *sc)
309 {
310 	refcnt_take(&sc->sc_refs);
311 	return (sc);
312 }
313 
314 static void
vxlan_rele(struct vxlan_softc * sc)315 vxlan_rele(struct vxlan_softc *sc)
316 {
317 	refcnt_rele_wake(&sc->sc_refs);
318 }
319 
320 static struct mbuf *
vxlan_encap(struct vxlan_softc * sc,struct mbuf * m,struct mbuf * (ip_encap)(struct vxlan_softc * sc,struct mbuf *,const union vxlan_addr *,uint8_t))321 vxlan_encap(struct vxlan_softc *sc, struct mbuf *m,
322     struct mbuf *(ip_encap)(struct vxlan_softc *sc, struct mbuf *,
323     const union vxlan_addr *, uint8_t))
324 {
325 	struct ifnet *ifp = &sc->sc_ac.ac_if;
326 	struct m_tag *mtag;
327 	struct mbuf *m0;
328 	union vxlan_addr gateway;
329 	const union vxlan_addr *endpoint;
330 	struct vxlan_header *vh;
331 	struct udphdr *uh;
332 	int prio;
333 	uint8_t tos;
334 
335 	if (sc->sc_mode == VXLAN_TMODE_UNSET)
336 		goto drop;
337 
338 	if (sc->sc_mode == VXLAN_TMODE_P2P)
339 		endpoint = &sc->sc_dst;
340 	else { /* VXLAN_TMODE_LEARNING || VXLAN_TMODE_ENDPOINT */
341 		struct ether_header *eh = mtod(m, struct ether_header *);
342 
343 		smr_read_enter();
344 		endpoint = etherbridge_resolve_ea(&sc->sc_eb,
345 		    (struct ether_addr *)eh->ether_dhost);
346 		if (endpoint != NULL) {
347 			gateway = *endpoint;
348 			endpoint = &gateway;
349 		}
350 		smr_read_leave();
351 
352 		if (endpoint == NULL) {
353 			if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
354 				goto drop;
355 
356 			/* "flood" to unknown destinations */
357 			endpoint = &sc->sc_dst;
358 		}
359 	}
360 
361 	/* force prepend mbuf because of payload alignment */
362 	m0 = m_get(M_DONTWAIT, m->m_type);
363 	if (m0 == NULL)
364 		goto drop;
365 
366 	m_align(m0, 0);
367 	m0->m_len = 0;
368 
369 	M_MOVE_PKTHDR(m0, m);
370 	m0->m_next = m;
371 
372 	m = m_prepend(m0, sizeof(*vh), M_DONTWAIT);
373 	if (m == NULL)
374 		return (NULL);
375 
376 	vh = mtod(m, struct vxlan_header *);
377 	*vh = sc->sc_header;
378 
379 	m = m_prepend(m, sizeof(*uh), M_DONTWAIT);
380 	if (m == NULL)
381 		return (NULL);
382 
383 	uh = mtod(m, struct udphdr *);
384 	uh->uh_sport = sc->sc_port; /* XXX */
385 	uh->uh_dport = sc->sc_port;
386 	htobem16(&uh->uh_ulen, m->m_pkthdr.len);
387 	uh->uh_sum = htons(0);
388 
389 	SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT);
390 
391 	mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
392 	if (mtag == NULL)
393 		goto drop;
394 
395 	*(int *)(mtag + 1) = ifp->if_index;
396 	m_tag_prepend(m, mtag);
397 
398 	prio = sc->sc_txhprio;
399 	if (prio == IF_HDRPRIO_PACKET)
400 		prio = m->m_pkthdr.pf.prio;
401 	tos = IFQ_PRIO2TOS(prio);
402 
403 	CLR(m->m_flags, M_BCAST|M_MCAST);
404 	m->m_pkthdr.ph_rtableid = sc->sc_rdomain;
405 
406 #if NPF > 0
407 	pf_pkt_addr_changed(m);
408 #endif
409 
410 	return ((*ip_encap)(sc, m, endpoint, tos));
411 drop:
412 	m_freem(m);
413 	return (NULL);
414 }
415 
416 static struct mbuf *
vxlan_encap_ipv4(struct vxlan_softc * sc,struct mbuf * m,const union vxlan_addr * endpoint,uint8_t tos)417 vxlan_encap_ipv4(struct vxlan_softc *sc, struct mbuf *m,
418     const union vxlan_addr *endpoint, uint8_t tos)
419 {
420 	struct ip *ip;
421 
422 	m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
423 	if (m == NULL)
424 		return (NULL);
425 
426 	ip = mtod(m, struct ip *);
427 	ip->ip_v = IPVERSION;
428 	ip->ip_hl = sizeof(*ip) >> 2;
429 	ip->ip_off = sc->sc_df;
430 	ip->ip_tos = tos;
431 	ip->ip_len = htons(m->m_pkthdr.len);
432 	ip->ip_ttl = sc->sc_ttl;
433 	ip->ip_p = IPPROTO_UDP;
434 	ip->ip_src = sc->sc_src.in4;
435 	ip->ip_dst = endpoint->in4;
436 
437 	return (m);
438 }
439 
440 #ifdef INET6
441 static struct mbuf *
vxlan_encap_ipv6(struct vxlan_softc * sc,struct mbuf * m,const union vxlan_addr * endpoint,uint8_t tos)442 vxlan_encap_ipv6(struct vxlan_softc *sc, struct mbuf *m,
443     const union vxlan_addr *endpoint, uint8_t tos)
444 {
445 	struct ip6_hdr *ip6;
446 	int len = m->m_pkthdr.len;
447 
448 	m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
449 	if (m == NULL)
450 		return (NULL);
451 
452 	ip6 = mtod(m, struct ip6_hdr *);
453 	ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ?
454 	    htonl(m->m_pkthdr.ph_flowid) : 0;
455 	ip6->ip6_vfc |= IPV6_VERSION;
456 	ip6->ip6_flow |= htonl((uint32_t)tos << 20);
457 	ip6->ip6_plen = htons(len);
458 	ip6->ip6_nxt = IPPROTO_UDP;
459 	ip6->ip6_hlim = sc->sc_ttl;
460 	ip6->ip6_src = sc->sc_src.in6;
461 	ip6->ip6_dst = endpoint->in6;
462 
463 	if (sc->sc_df)
464 		SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
465 
466 	return (m);
467 }
468 #endif /* INET6 */
469 
470 static int
vxlan_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * dst,struct rtentry * rt)471 vxlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
472     struct rtentry *rt)
473 {
474 	struct m_tag *mtag;
475 
476 	mtag = NULL;
477 	while ((mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) != NULL) {
478 		if (*(int *)(mtag + 1) == ifp->if_index) {
479 			m_freem(m);
480 			return (EIO);
481 		}
482 	}
483 
484 	return (ether_output(ifp, m, dst, rt));
485 }
486 
487 static int
vxlan_enqueue(struct ifnet * ifp,struct mbuf * m)488 vxlan_enqueue(struct ifnet *ifp, struct mbuf *m)
489 {
490 	struct vxlan_softc *sc = ifp->if_softc;
491 	struct ifqueue *ifq = &ifp->if_snd;
492 
493 	if (ifq_enqueue(ifq, m) != 0)
494 		return (ENOBUFS);
495 
496 	task_add(ifq->ifq_softnet, &sc->sc_send_task);
497 
498 	return (0);
499 }
500 
501 static void
vxlan_start(struct ifqueue * ifq)502 vxlan_start(struct ifqueue *ifq)
503 {
504 	struct ifnet *ifp = ifq->ifq_if;
505 	struct vxlan_softc *sc = ifp->if_softc;
506 
507 	task_add(ifq->ifq_softnet, &sc->sc_send_task);
508 }
509 
510 static uint64_t
vxlan_send_ipv4(struct vxlan_softc * sc,struct mbuf_list * ml)511 vxlan_send_ipv4(struct vxlan_softc *sc, struct mbuf_list *ml)
512 {
513 	struct ip_moptions imo;
514 	struct mbuf *m;
515 	uint64_t oerrors = 0;
516 
517 	imo.imo_ifidx = sc->sc_if_index0;
518 	imo.imo_ttl = sc->sc_ttl;
519 	imo.imo_loop = 0;
520 
521 	NET_LOCK();
522 	while ((m = ml_dequeue(ml)) != NULL) {
523 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) != 0)
524 			oerrors++;
525 	}
526 	NET_UNLOCK();
527 
528 	return (oerrors);
529 }
530 
531 #ifdef INET6
532 static uint64_t
vxlan_send_ipv6(struct vxlan_softc * sc,struct mbuf_list * ml)533 vxlan_send_ipv6(struct vxlan_softc *sc, struct mbuf_list *ml)
534 {
535 	struct ip6_moptions im6o;
536 	struct mbuf *m;
537 	uint64_t oerrors = 0;
538 
539 	im6o.im6o_ifidx = sc->sc_if_index0;
540 	im6o.im6o_hlim = sc->sc_ttl;
541 	im6o.im6o_loop = 0;
542 
543 	NET_LOCK();
544 	while ((m = ml_dequeue(ml)) != NULL) {
545 		if (ip6_output(m, NULL, NULL, 0, &im6o, NULL) != 0)
546 			oerrors++;
547 	}
548 	NET_UNLOCK();
549 
550 	return (oerrors);
551 }
552 #endif /* INET6 */
553 
554 static void
vxlan_send(void * arg)555 vxlan_send(void *arg)
556 {
557 	struct vxlan_softc *sc = arg;
558 	struct ifnet *ifp = &sc->sc_ac.ac_if;
559 	struct mbuf *(*ip_encap)(struct vxlan_softc *, struct mbuf *,
560 	    const union vxlan_addr *, uint8_t);
561 	uint64_t (*ip_send)(struct vxlan_softc *, struct mbuf_list *);
562 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
563 	struct mbuf *m;
564 	uint64_t oerrors;
565 
566 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
567 		return;
568 
569 	switch (sc->sc_af) {
570 	case AF_INET:
571 		ip_encap = vxlan_encap_ipv4;
572 		ip_send = vxlan_send_ipv4;
573 		break;
574 #ifdef INET6
575 	case AF_INET6:
576 		ip_encap = vxlan_encap_ipv6;
577 		ip_send = vxlan_send_ipv6;
578 		break;
579 #endif
580 	default:
581 		unhandled_af(sc->sc_af);
582 		/* NOTREACHED */
583 	}
584 
585 	while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
586 #if NBPFILTER > 0
587 		caddr_t if_bpf = READ_ONCE(ifp->if_bpf);
588 		if (if_bpf != NULL)
589 			bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
590 #endif
591 		m = vxlan_encap(sc, m, ip_encap);
592 		if (m == NULL)
593 			continue;
594 
595 		ml_enqueue(&ml, m);
596 	}
597 
598 	oerrors = (*ip_send)(sc, &ml);
599 
600 	counters_add(ifp->if_counters, ifc_oerrors, oerrors);
601 }
602 
603 static struct mbuf *
vxlan_input(void * arg,struct mbuf * m,struct ip * ip,struct ip6_hdr * ip6,void * uhp,int hlen)604 vxlan_input(void *arg, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6,
605     void *uhp, int hlen)
606 {
607 	struct vxlan_tep *vt = arg;
608 	union vxlan_addr addr;
609 	struct vxlan_peer key, *p;
610 	struct udphdr *uh;
611 	struct vxlan_header *vh;
612 	struct ether_header *eh;
613 	int vhlen = hlen + sizeof(*vh);
614 	struct mbuf *n;
615 	int off;
616 	in_port_t port;
617 	struct vxlan_softc *sc = NULL;
618 	struct ifnet *ifp;
619 	int rxhprio;
620 	uint8_t tos;
621 
622 	if (m->m_pkthdr.len < vhlen)
623 		goto drop;
624 
625 	uh = uhp;
626 	port = uh->uh_sport;
627 
628 	if (ip != NULL) {
629 		memset(&addr, 0, sizeof(addr));
630 		addr.in4 = ip->ip_src;
631 		tos = ip->ip_tos;
632 	}
633 #ifdef INET6
634 	else {
635 		addr.in6 = ip6->ip6_src;
636 		tos = bemtoh32(&ip6->ip6_flow) >> 20;
637 	}
638 #endif
639 
640 	if (m->m_len < vhlen) {
641 		m = m_pullup(m, vhlen);
642 		if (m == NULL)
643 			return (NULL);
644 	}
645 
646 	/* can't use ip/ip6/uh after this */
647 
648 	vh = (struct vxlan_header *)(mtod(m, caddr_t) + hlen);
649 
650 	memset(&key, 0, sizeof(key));
651 	key.p_addr = addr;
652 	key.p_header.vxlan_flags = vh->vxlan_flags & htonl(VXLAN_F_I);
653 	key.p_header.vxlan_id = vh->vxlan_id & htonl(VXLAN_VNI_MASK);
654 
655 	mtx_enter(&vt->vt_mtx);
656 	p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key);
657 	if (p == NULL) {
658 		memset(&key.p_addr, 0, sizeof(key.p_addr));
659 		p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key);
660 	}
661 	if (p != NULL)
662 		sc = vxlan_take(p->p_sc);
663 	mtx_leave(&vt->vt_mtx);
664 
665 	if (sc == NULL)
666 		goto drop;
667 
668 	ifp = &sc->sc_ac.ac_if;
669 	if (ISSET(ifp->if_flags, IFF_LINK0) && port != sc->sc_port)
670 		goto rele_drop;
671 
672 	m_adj(m, vhlen);
673 
674 	if (m->m_pkthdr.len < sizeof(*eh))
675 		goto rele_drop;
676 
677 	if (m->m_len < sizeof(*eh)) {
678 		m = m_pullup(m, sizeof(*eh));
679 		if (m == NULL)
680 			goto rele;
681 	}
682 
683 	n = m_getptr(m, sizeof(*eh), &off);
684 	if (n == NULL)
685 		goto rele_drop;
686 
687 	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
688 		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
689 		m_freem(m);
690 		if (n == NULL)
691 			goto rele;
692 		m = n;
693 	}
694 
695 	if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
696 		eh = mtod(m, struct ether_header *);
697 		etherbridge_map_ea(&sc->sc_eb, &addr,
698 		    (struct ether_addr *)eh->ether_shost);
699 	}
700 
701 	rxhprio = sc->sc_rxhprio;
702 	switch (rxhprio) {
703 	case IF_HDRPRIO_PACKET:
704 		/* nop */
705 		break;
706 	case IF_HDRPRIO_OUTER:
707 		m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(tos);
708 		break;
709 	default:
710 		m->m_pkthdr.pf.prio = rxhprio;
711 		break;                                                  \
712         }                                                               \
713 
714 	if_vinput(ifp, m);
715 rele:
716 	vxlan_rele(sc);
717 	return (NULL);
718 
719 rele_drop:
720 	vxlan_rele(sc);
721 drop:
722 	m_freem(m);
723 	return (NULL);
724 }
725 
726 static int
vxlan_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)727 vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
728 {
729 	struct vxlan_softc *sc = ifp->if_softc;
730 	struct ifreq *ifr = (struct ifreq *)data;
731 	struct ifbrparam *bparam = (struct ifbrparam *)data;
732 	int error = 0;
733 
734 	switch (cmd) {
735 	case SIOCSIFADDR:
736 		break;
737 	case SIOCSIFFLAGS:
738 		if (ISSET(ifp->if_flags, IFF_UP)) {
739 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
740 				error = vxlan_up(sc);
741 			else
742 				error = 0;
743 		} else {
744 			if (ISSET(ifp->if_flags, IFF_RUNNING))
745 				error = vxlan_down(sc);
746 		}
747 		break;
748 
749 	case SIOCSLIFPHYRTABLE:
750 		error = vxlan_set_rdomain(sc, ifr);
751 		break;
752 	case SIOCGLIFPHYRTABLE:
753 		error = vxlan_get_rdomain(sc, ifr);
754 		break;
755 
756 	case SIOCSLIFPHYADDR:
757 		error = vxlan_set_tunnel(sc, (const struct if_laddrreq *)data);
758 		break;
759 	case SIOCGLIFPHYADDR:
760 		error = vxlan_get_tunnel(sc, (struct if_laddrreq *)data);
761 		break;
762 	case SIOCDIFPHYADDR:
763 		error = vxlan_del_tunnel(sc);
764 		break;
765 
766 	case SIOCSVNETID:
767 		error = vxlan_set_vnetid(sc, ifr);
768 		break;
769 	case SIOCGVNETID:
770 		error = vxlan_get_vnetid(sc, ifr);
771 		break;
772 	case SIOCDVNETID:
773 		error = vxlan_del_vnetid(sc);
774 		break;
775 
776 	case SIOCSIFPARENT:
777 		error = vxlan_set_parent(sc, (struct if_parent *)data);
778 		break;
779 	case SIOCGIFPARENT:
780 		error = vxlan_get_parent(sc, (struct if_parent *)data);
781 		break;
782 	case SIOCDIFPARENT:
783 		error = vxlan_del_parent(sc);
784 		break;
785 
786 	case SIOCSTXHPRIO:
787 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
788 		if (error != 0)
789 			break;
790 
791 		sc->sc_txhprio = ifr->ifr_hdrprio;
792 		break;
793 	case SIOCGTXHPRIO:
794 		ifr->ifr_hdrprio = sc->sc_txhprio;
795 		break;
796 
797 	case SIOCSRXHPRIO:
798 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
799 		if (error != 0)
800 			break;
801 
802 		sc->sc_rxhprio = ifr->ifr_hdrprio;
803 		break;
804 	case SIOCGRXHPRIO:
805 		ifr->ifr_hdrprio = sc->sc_rxhprio;
806 		break;
807 
808 	case SIOCSLIFPHYDF:
809 		/* commit */
810 		sc->sc_df = ifr->ifr_df ? htons(IP_DF) : htons(0);
811 		break;
812 	case SIOCGLIFPHYDF:
813 		ifr->ifr_df = sc->sc_df ? 1 : 0;
814 		break;
815 
816 	case SIOCSLIFPHYTTL:
817 		if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) {
818 			error = EINVAL;
819 			break;
820 		}
821 
822 		/* commit */
823 		sc->sc_ttl = (uint8_t)ifr->ifr_ttl;
824 		break;
825 	case SIOCGLIFPHYTTL:
826 		ifr->ifr_ttl = (int)sc->sc_ttl;
827 		break;
828 
829 	case SIOCBRDGSCACHE:
830 		error = etherbridge_set_max(&sc->sc_eb, bparam);
831 		break;
832 	case SIOCBRDGGCACHE:
833 		error = etherbridge_get_max(&sc->sc_eb, bparam);
834 		break;
835 	case SIOCBRDGSTO:
836 		error = etherbridge_set_tmo(&sc->sc_eb, bparam);
837 		break;
838 	case SIOCBRDGGTO:
839 		error = etherbridge_get_tmo(&sc->sc_eb, bparam);
840 		break;
841 
842 	case SIOCBRDGRTS:
843 		error = etherbridge_rtfind(&sc->sc_eb,
844 		    (struct ifbaconf *)data);
845 		break;
846 	case SIOCBRDGFLUSH:
847 		etherbridge_flush(&sc->sc_eb,
848 		    ((struct ifbreq *)data)->ifbr_ifsflags);
849 		break;
850 	case SIOCBRDGSADDR:
851 		error = vxlan_add_addr(sc, (struct ifbareq *)data);
852 		break;
853 	case SIOCBRDGDADDR:
854 		error = vxlan_del_addr(sc, (struct ifbareq *)data);
855 		break;
856 
857 	case SIOCADDMULTI:
858 	case SIOCDELMULTI:
859 		/* no hardware to program */
860 		break;
861 
862 	default:
863 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
864 		break;
865 	}
866 
867 	if (error == ENETRESET) {
868 		/* no hardware to program */
869 		error = 0;
870 	}
871 
872 	return (error);
873 }
874 
875 static struct vxlan_tep *
vxlan_tep_get(struct vxlan_softc * sc,const union vxlan_addr * addr)876 vxlan_tep_get(struct vxlan_softc *sc, const union vxlan_addr *addr)
877 {
878 	struct vxlan_tep *vt;
879 
880 	TAILQ_FOREACH(vt, &vxlan_teps, vt_entry) {
881 		if (sc->sc_af == vt->vt_af &&
882 		    sc->sc_rdomain == vt->vt_rdomain &&
883 		    memcmp(addr, &vt->vt_addr, sizeof(*addr)) == 0 &&
884 		    sc->sc_port == vt->vt_port)
885 			return (vt);
886 	}
887 
888 	return (NULL);
889 }
890 
891 static int
vxlan_tep_add_addr(struct vxlan_softc * sc,const union vxlan_addr * addr,struct vxlan_peer * p)892 vxlan_tep_add_addr(struct vxlan_softc *sc, const union vxlan_addr *addr,
893     struct vxlan_peer *p)
894 {
895 	struct mbuf m;
896 	struct vxlan_tep *vt;
897 	struct socket *so;
898 	struct sockaddr_in *sin;
899 #ifdef INET6
900 	struct sockaddr_in6 *sin6;
901 #endif
902 	int error;
903 
904 	vt = vxlan_tep_get(sc, addr);
905 	if (vt != NULL) {
906 		struct vxlan_peer *op;
907 
908 		mtx_enter(&vt->vt_mtx);
909 		op = RBT_INSERT(vxlan_peers, &vt->vt_peers, p);
910 		mtx_leave(&vt->vt_mtx);
911 
912 		if (op != NULL)
913 			return (EADDRINUSE);
914 
915 		return (0);
916 	}
917 
918 	vt = malloc(sizeof(*vt), M_DEVBUF, M_NOWAIT|M_ZERO);
919 	if (vt == NULL)
920 		return (ENOMEM);
921 
922 	vt->vt_af = sc->sc_af;
923 	vt->vt_rdomain = sc->sc_rdomain;
924 	vt->vt_addr = *addr;
925 	vt->vt_port = sc->sc_port;
926 
927 	mtx_init(&vt->vt_mtx, IPL_SOFTNET);
928 	RBT_INIT(vxlan_peers, &vt->vt_peers);
929 	RBT_INSERT(vxlan_peers, &vt->vt_peers, p);
930 
931 	error = socreate(vt->vt_af, &so, SOCK_DGRAM, IPPROTO_UDP);
932 	if (error != 0)
933 		goto free;
934 
935 	solock(so);
936 	sotoinpcb(so)->inp_upcall = vxlan_input;
937 	sotoinpcb(so)->inp_upcall_arg = vt;
938 	sounlock(so);
939 
940 	m_inithdr(&m);
941 	m.m_len = sizeof(vt->vt_rdomain);
942 	*mtod(&m, unsigned int *) = vt->vt_rdomain;
943 	error = sosetopt(so, SOL_SOCKET, SO_RTABLE, &m);
944 	if (error != 0)
945 		goto close;
946 
947 	m_inithdr(&m);
948 	switch (vt->vt_af) {
949 	case AF_INET:
950 		sin = mtod(&m, struct sockaddr_in *);
951 		memset(sin, 0, sizeof(*sin));
952 		sin->sin_len = sizeof(*sin);
953 		sin->sin_family = AF_INET;
954 		sin->sin_addr = addr->in4;
955 		sin->sin_port = vt->vt_port;
956 
957 		m.m_len = sizeof(*sin);
958 		break;
959 
960 #ifdef INET6
961 	case AF_INET6:
962 		sin6 = mtod(&m, struct sockaddr_in6 *);
963 		sin6->sin6_len = sizeof(*sin6);
964 		sin6->sin6_family = AF_INET6;
965 		in6_recoverscope(sin6, &addr->in6);
966 		sin6->sin6_port = sc->sc_port;
967 
968 		m.m_len = sizeof(*sin6);
969 		break;
970 #endif
971 	default:
972 		unhandled_af(vt->vt_af);
973 	}
974 
975 	solock(so);
976 	error = sobind(so, &m, curproc);
977 	sounlock(so);
978 	if (error != 0)
979 		goto close;
980 
981 	rw_assert_wrlock(&vxlan_lock);
982 	TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry);
983 
984 	vt->vt_so = so;
985 
986 	return (0);
987 
988 close:
989 	soclose(so, MSG_DONTWAIT);
990 free:
991 	free(vt, M_DEVBUF, sizeof(*vt));
992 	return (error);
993 }
994 
995 static void
vxlan_tep_del_addr(struct vxlan_softc * sc,const union vxlan_addr * addr,struct vxlan_peer * p)996 vxlan_tep_del_addr(struct vxlan_softc *sc, const union vxlan_addr *addr,
997     struct vxlan_peer *p)
998 {
999 	struct vxlan_tep *vt;
1000 	int empty;
1001 
1002 	vt = vxlan_tep_get(sc, addr);
1003 	if (vt == NULL)
1004 		panic("unable to find vxlan_tep for peer %p (sc %p)", p, sc);
1005 
1006 	mtx_enter(&vt->vt_mtx);
1007 	RBT_REMOVE(vxlan_peers, &vt->vt_peers, p);
1008 	empty = RBT_EMPTY(vxlan_peers, &vt->vt_peers);
1009 	mtx_leave(&vt->vt_mtx);
1010 
1011 	if (!empty)
1012 		return;
1013 
1014 	rw_assert_wrlock(&vxlan_lock);
1015 	TAILQ_REMOVE(&vxlan_teps, vt, vt_entry);
1016 
1017 	soclose(vt->vt_so, MSG_DONTWAIT);
1018 	free(vt, M_DEVBUF, sizeof(*vt));
1019 }
1020 
1021 static int
vxlan_tep_up(struct vxlan_softc * sc)1022 vxlan_tep_up(struct vxlan_softc *sc)
1023 {
1024 	struct vxlan_peer *up, *mp;
1025 	int error;
1026 
1027 	up = malloc(sizeof(*up), M_DEVBUF, M_NOWAIT|M_ZERO);
1028 	if (up == NULL)
1029 		return (ENOMEM);
1030 
1031 	if (sc->sc_mode == VXLAN_TMODE_P2P)
1032 		up->p_addr = sc->sc_dst;
1033 	up->p_header = sc->sc_header;
1034 	up->p_sc = vxlan_take(sc);
1035 
1036 	error = vxlan_tep_add_addr(sc, &sc->sc_src, up);
1037 	if (error != 0)
1038 		goto freeup;
1039 
1040 	sc->sc_ucast_peer = up;
1041 
1042 	if (sc->sc_mode != VXLAN_TMODE_LEARNING)
1043 		return (0);
1044 
1045 	mp = malloc(sizeof(*mp), M_DEVBUF, M_NOWAIT|M_ZERO);
1046 	if (mp == NULL) {
1047 		error = ENOMEM;
1048 		goto delup;
1049 	}
1050 
1051 	/* addr is multicast, leave it as 0s */
1052 	mp->p_header = sc->sc_header;
1053 	mp->p_sc = vxlan_take(sc);
1054 
1055 	/* destination address is a multicast group we want to join */
1056 	error = vxlan_tep_add_addr(sc, &sc->sc_dst, up);
1057 	if (error != 0)
1058 		goto freemp;
1059 
1060 	sc->sc_mcast_peer = mp;
1061 
1062 	return (0);
1063 
1064 freemp:
1065 	vxlan_rele(mp->p_sc);
1066 	free(mp, M_DEVBUF, sizeof(*mp));
1067 delup:
1068 	vxlan_tep_del_addr(sc, &sc->sc_src, up);
1069 freeup:
1070 	vxlan_rele(up->p_sc);
1071 	free(up, M_DEVBUF, sizeof(*up));
1072 	return (error);
1073 }
1074 
1075 static void
vxlan_tep_down(struct vxlan_softc * sc)1076 vxlan_tep_down(struct vxlan_softc *sc)
1077 {
1078 	struct vxlan_peer *up = sc->sc_ucast_peer;
1079 
1080 	if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
1081 		struct vxlan_peer *mp = sc->sc_mcast_peer;
1082 		vxlan_tep_del_addr(sc, &sc->sc_dst, mp);
1083 		vxlan_rele(mp->p_sc);
1084 		free(mp, M_DEVBUF, sizeof(*mp));
1085 	}
1086 
1087 	vxlan_tep_del_addr(sc, &sc->sc_src, up);
1088 	vxlan_rele(up->p_sc);
1089 	free(up, M_DEVBUF, sizeof(*up));
1090 }
1091 
1092 static int
vxlan_up(struct vxlan_softc * sc)1093 vxlan_up(struct vxlan_softc *sc)
1094 {
1095 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1096 	struct ifnet *ifp0 = NULL;
1097 	int error;
1098 
1099 	KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
1100 	NET_ASSERT_LOCKED();
1101 
1102 	if (sc->sc_af == AF_UNSPEC)
1103 		return (EDESTADDRREQ);
1104 	KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET);
1105 
1106 	NET_UNLOCK();
1107 
1108 	error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR);
1109 	if (error != 0)
1110 		goto netlock;
1111 
1112 	NET_LOCK();
1113 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
1114 		/* something else beat us */
1115 		rw_exit(&vxlan_lock);
1116 		return (0);
1117 	}
1118 	NET_UNLOCK();
1119 
1120 	if (sc->sc_mode != VXLAN_TMODE_P2P) {
1121 		error = etherbridge_up(&sc->sc_eb);
1122 		if (error != 0)
1123 			goto unlock;
1124 	}
1125 
1126 	if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
1127 		ifp0 = if_get(sc->sc_if_index0);
1128 		if (ifp0 == NULL) {
1129 			error = ENXIO;
1130 			goto down;
1131 		}
1132 
1133 		/* check again if multicast will work on top of the parent */
1134 		if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
1135 			error = EPROTONOSUPPORT;
1136 			goto put;
1137 		}
1138 
1139 		error = vxlan_addmulti(sc, ifp0);
1140 		if (error != 0)
1141 			goto put;
1142 
1143 		/* Register callback if parent wants to unregister */
1144 		if_detachhook_add(ifp0, &sc->sc_dtask);
1145 	} else {
1146 		if (sc->sc_if_index0 != 0) {
1147 			error = EPROTONOSUPPORT;
1148 			goto down;
1149 		}
1150 	}
1151 
1152 	error = vxlan_tep_up(sc);
1153 	if (error != 0)
1154 		goto del;
1155 
1156 	if_put(ifp0);
1157 
1158 	NET_LOCK();
1159 	SET(ifp->if_flags, IFF_RUNNING);
1160 	rw_exit(&vxlan_lock);
1161 
1162 	return (0);
1163 
1164 del:
1165 	if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
1166 		if (ifp0 != NULL)
1167 			if_detachhook_del(ifp0, &sc->sc_dtask);
1168 		vxlan_delmulti(sc);
1169 	}
1170 put:
1171 	if_put(ifp0);
1172 down:
1173 	if (sc->sc_mode != VXLAN_TMODE_P2P)
1174 		etherbridge_down(&sc->sc_eb);
1175 unlock:
1176 	rw_exit(&vxlan_lock);
1177 netlock:
1178 	NET_LOCK();
1179 
1180 	return (error);
1181 }
1182 
1183 static int
vxlan_down(struct vxlan_softc * sc)1184 vxlan_down(struct vxlan_softc *sc)
1185 {
1186 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1187 	struct ifnet *ifp0;
1188 	int error;
1189 
1190 	KASSERT(ISSET(ifp->if_flags, IFF_RUNNING));
1191 	NET_UNLOCK();
1192 
1193 	error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR);
1194 	if (error != 0) {
1195 		NET_LOCK();
1196 		return (error);
1197 	}
1198 
1199 	NET_LOCK();
1200 	if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
1201 		/* something else beat us */
1202 		rw_exit(&vxlan_lock);
1203 		return (0);
1204 	}
1205 	NET_UNLOCK();
1206 
1207 	vxlan_tep_down(sc);
1208 
1209 	if (sc->sc_mode == VXLAN_TMODE_LEARNING) {
1210 		vxlan_delmulti(sc);
1211 		ifp0 = if_get(sc->sc_if_index0);
1212 		if (ifp0 != NULL) {
1213 			if_detachhook_del(ifp0, &sc->sc_dtask);
1214 		}
1215 		if_put(ifp0);
1216 	}
1217 
1218 	if (sc->sc_mode != VXLAN_TMODE_P2P)
1219 		etherbridge_down(&sc->sc_eb);
1220 
1221 	taskq_del_barrier(ifp->if_snd.ifq_softnet, &sc->sc_send_task);
1222 	NET_LOCK();
1223 	CLR(ifp->if_flags, IFF_RUNNING);
1224 	rw_exit(&vxlan_lock);
1225 
1226 	return (0);
1227 }
1228 
1229 static int
vxlan_addmulti(struct vxlan_softc * sc,struct ifnet * ifp0)1230 vxlan_addmulti(struct vxlan_softc *sc, struct ifnet *ifp0)
1231 {
1232 	int error = 0;
1233 
1234 	NET_LOCK();
1235 
1236 	switch (sc->sc_af) {
1237 	case AF_INET:
1238 		sc->sc_inmulti = in_addmulti(&sc->sc_dst.in4, ifp0);
1239 		if (sc->sc_inmulti == NULL)
1240 			error = EADDRNOTAVAIL;
1241 		break;
1242 #ifdef INET6
1243 	case AF_INET6:
1244 		sc->sc_inmulti = in6_addmulti(&sc->sc_dst.in6, ifp0, &error);
1245 		break;
1246 #endif
1247 	default:
1248 		unhandled_af(sc->sc_af);
1249 	}
1250 
1251 	NET_UNLOCK();
1252 
1253 	return (error);
1254 }
1255 
1256 static void
vxlan_delmulti(struct vxlan_softc * sc)1257 vxlan_delmulti(struct vxlan_softc *sc)
1258 {
1259 	NET_LOCK();
1260 
1261 	switch (sc->sc_af) {
1262 	case AF_INET:
1263 		in_delmulti(sc->sc_inmulti);
1264 		break;
1265 #ifdef INET6
1266 	case AF_INET6:
1267 		in6_delmulti(sc->sc_inmulti);
1268 		break;
1269 #endif
1270 	default:
1271 		unhandled_af(sc->sc_af);
1272 	}
1273 
1274 	sc->sc_inmulti = NULL; /* keep it tidy */
1275 
1276 	NET_UNLOCK();
1277 }
1278 
1279 static int
vxlan_set_rdomain(struct vxlan_softc * sc,const struct ifreq * ifr)1280 vxlan_set_rdomain(struct vxlan_softc *sc, const struct ifreq *ifr)
1281 {
1282 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1283 
1284 	if (ifr->ifr_rdomainid < 0 ||
1285 	    ifr->ifr_rdomainid > RT_TABLEID_MAX)
1286 		return (EINVAL);
1287 	if (!rtable_exists(ifr->ifr_rdomainid))
1288 		return (EADDRNOTAVAIL);
1289 
1290 	if (sc->sc_rdomain == ifr->ifr_rdomainid)
1291 		return (0);
1292 
1293 	if (ISSET(ifp->if_flags, IFF_RUNNING))
1294 		return (EBUSY);
1295 
1296 	/* commit */
1297 	sc->sc_rdomain = ifr->ifr_rdomainid;
1298 	etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1299 
1300 	return (0);
1301 }
1302 
1303 static int
vxlan_get_rdomain(struct vxlan_softc * sc,struct ifreq * ifr)1304 vxlan_get_rdomain(struct vxlan_softc *sc, struct ifreq *ifr)
1305 {
1306 	ifr->ifr_rdomainid = sc->sc_rdomain;
1307 
1308 	return (0);
1309 }
1310 
1311 static int
vxlan_set_tunnel(struct vxlan_softc * sc,const struct if_laddrreq * req)1312 vxlan_set_tunnel(struct vxlan_softc *sc, const struct if_laddrreq *req)
1313 {
1314 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1315 	struct sockaddr *src = (struct sockaddr *)&req->addr;
1316 	struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
1317 	struct sockaddr_in *src4, *dst4;
1318 #ifdef INET6
1319 	struct sockaddr_in6 *src6, *dst6;
1320 	int error;
1321 #endif
1322 	union vxlan_addr saddr, daddr;
1323 	unsigned int mode = VXLAN_TMODE_ENDPOINT;
1324 	in_port_t port = htons(VXLAN_PORT);
1325 
1326 	memset(&saddr, 0, sizeof(saddr));
1327 	memset(&daddr, 0, sizeof(daddr));
1328 
1329 	/* validate */
1330 	switch (src->sa_family) {
1331 	case AF_INET:
1332 		src4 = (struct sockaddr_in *)src;
1333 		if (in_nullhost(src4->sin_addr) ||
1334 		    IN_MULTICAST(src4->sin_addr.s_addr))
1335 			return (EINVAL);
1336 
1337 		if (src4->sin_port != htons(0))
1338 			port = src4->sin_port;
1339 
1340 		if (dst->sa_family != AF_UNSPEC) {
1341 			if (dst->sa_family != AF_INET)
1342 				return (EINVAL);
1343 
1344 			dst4 = (struct sockaddr_in *)dst;
1345 			if (in_nullhost(dst4->sin_addr))
1346 				return (EINVAL);
1347 
1348 			if (dst4->sin_port != htons(0))
1349 				return (EINVAL);
1350 
1351 			/* all good */
1352 			mode = IN_MULTICAST(dst4->sin_addr.s_addr) ?
1353 			    VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P;
1354 			daddr.in4 = dst4->sin_addr;
1355 		}
1356 
1357 		saddr.in4 = src4->sin_addr;
1358 		break;
1359 
1360 #ifdef INET6
1361 	case AF_INET6:
1362 		src6 = (struct sockaddr_in6 *)src;
1363 		if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
1364 		    IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
1365 			return (EINVAL);
1366 
1367 		if (src6->sin6_port != htons(0))
1368 			port = src6->sin6_port;
1369 
1370 		if (dst->sa_family != AF_UNSPEC) {
1371 			if (dst->sa_family != AF_INET6)
1372 				return (EINVAL);
1373 
1374 			dst6 = (struct sockaddr_in6 *)dst;
1375 			if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr))
1376 				return (EINVAL);
1377 
1378 			if (src6->sin6_scope_id != dst6->sin6_scope_id)
1379 				return (EINVAL);
1380 
1381 			if (dst6->sin6_port != htons(0))
1382 				return (EINVAL);
1383 
1384 			/* all good */
1385 			mode = IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) ?
1386 			    VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P;
1387 			error = in6_embedscope(&daddr.in6, dst6, NULL, NULL);
1388 			if (error != 0)
1389 				return (error);
1390 		}
1391 
1392 		error = in6_embedscope(&saddr.in6, src6, NULL, NULL);
1393 		if (error != 0)
1394 			return (error);
1395 
1396 		break;
1397 #endif
1398 	default:
1399 		return (EAFNOSUPPORT);
1400 	}
1401 
1402 	if (memcmp(&sc->sc_src, &saddr, sizeof(sc->sc_src)) == 0 &&
1403 	    memcmp(&sc->sc_dst, &daddr, sizeof(sc->sc_dst)) == 0 &&
1404 	    sc->sc_port == port)
1405 		return (0);
1406 
1407 	if (ISSET(ifp->if_flags, IFF_RUNNING))
1408 		return (EBUSY);
1409 
1410 	/* commit */
1411 	sc->sc_af = src->sa_family;
1412 	sc->sc_src = saddr;
1413 	sc->sc_dst = daddr;
1414 	sc->sc_port = port;
1415 	sc->sc_mode = mode;
1416 	etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1417 
1418 	return (0);
1419 }
1420 
1421 static int
vxlan_get_tunnel(struct vxlan_softc * sc,struct if_laddrreq * req)1422 vxlan_get_tunnel(struct vxlan_softc *sc, struct if_laddrreq *req)
1423 {
1424 	struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr;
1425 	struct sockaddr_in *sin;
1426 #ifdef INET6
1427 	struct sockaddr_in6 *sin6;
1428 #endif
1429 
1430 	if (sc->sc_af == AF_UNSPEC)
1431 		return (EADDRNOTAVAIL);
1432 	KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET);
1433 
1434 	memset(&req->addr, 0, sizeof(req->addr));
1435 	memset(&req->dstaddr, 0, sizeof(req->dstaddr));
1436 
1437 	/* default to endpoint */
1438 	dstaddr->sa_len = 2;
1439 	dstaddr->sa_family = AF_UNSPEC;
1440 
1441 	switch (sc->sc_af) {
1442 	case AF_INET:
1443 		sin = (struct sockaddr_in *)&req->addr;
1444 		sin->sin_len = sizeof(*sin);
1445 		sin->sin_family = AF_INET;
1446 		sin->sin_addr = sc->sc_src.in4;
1447 		sin->sin_port = sc->sc_port;
1448 
1449 		if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
1450 			break;
1451 
1452 		sin = (struct sockaddr_in *)&req->dstaddr;
1453 		sin->sin_len = sizeof(*sin);
1454 		sin->sin_family = AF_INET;
1455 		sin->sin_addr = sc->sc_dst.in4;
1456 		break;
1457 
1458 #ifdef INET6
1459 	case AF_INET6:
1460 		sin6 = (struct sockaddr_in6 *)&req->addr;
1461 		sin6->sin6_len = sizeof(*sin6);
1462 		sin6->sin6_family = AF_INET6;
1463 		in6_recoverscope(sin6, &sc->sc_src.in6);
1464 		sin6->sin6_port = sc->sc_port;
1465 
1466 		if (sc->sc_mode == VXLAN_TMODE_ENDPOINT)
1467 			break;
1468 
1469 		sin6 = (struct sockaddr_in6 *)&req->dstaddr;
1470 		sin6->sin6_len = sizeof(*sin6);
1471 		sin6->sin6_family = AF_INET6;
1472 		in6_recoverscope(sin6, &sc->sc_dst.in6);
1473 		break;
1474 #endif
1475 	default:
1476 		unhandled_af(sc->sc_af);
1477 	}
1478 
1479 	return (0);
1480 }
1481 
1482 static int
vxlan_del_tunnel(struct vxlan_softc * sc)1483 vxlan_del_tunnel(struct vxlan_softc *sc)
1484 {
1485 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1486 
1487 	if (sc->sc_af == AF_UNSPEC)
1488 		return (0);
1489 
1490 	if (ISSET(ifp->if_flags, IFF_RUNNING))
1491 		return (EBUSY);
1492 
1493 	/* commit */
1494 	sc->sc_af = AF_UNSPEC;
1495 	memset(&sc->sc_src, 0, sizeof(sc->sc_src));
1496 	memset(&sc->sc_dst, 0, sizeof(sc->sc_dst));
1497 	sc->sc_port = htons(0);
1498 	sc->sc_mode = VXLAN_TMODE_UNSET;
1499 	etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1500 
1501 	return (0);
1502 }
1503 
1504 static int
vxlan_set_vnetid(struct vxlan_softc * sc,const struct ifreq * ifr)1505 vxlan_set_vnetid(struct vxlan_softc *sc, const struct ifreq *ifr)
1506 {
1507 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1508 	uint32_t vni;
1509 
1510 	if (ifr->ifr_vnetid < VXLAN_VNI_MIN ||
1511 	    ifr->ifr_vnetid > VXLAN_VNI_MAX)
1512 		return (EINVAL);
1513 
1514 	vni = htonl(ifr->ifr_vnetid << VXLAN_VNI_SHIFT);
1515 	if (ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)) &&
1516 	    sc->sc_header.vxlan_id == vni)
1517 		return (0);
1518 
1519 	if (ISSET(ifp->if_flags, IFF_RUNNING))
1520 		return (EBUSY);
1521 
1522 	/* commit */
1523 	SET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I));
1524 	sc->sc_header.vxlan_id = vni;
1525 	etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1526 
1527 	return (0);
1528 }
1529 
1530 static int
vxlan_get_vnetid(struct vxlan_softc * sc,struct ifreq * ifr)1531 vxlan_get_vnetid(struct vxlan_softc *sc, struct ifreq *ifr)
1532 {
1533 	uint32_t vni;
1534 
1535 	if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)))
1536 		return (EADDRNOTAVAIL);
1537 
1538 	vni = ntohl(sc->sc_header.vxlan_id);
1539 	vni &= VXLAN_VNI_MASK;
1540 	vni >>= VXLAN_VNI_SHIFT;
1541 
1542 	ifr->ifr_vnetid = vni;
1543 
1544 	return (0);
1545 }
1546 
1547 static int
vxlan_del_vnetid(struct vxlan_softc * sc)1548 vxlan_del_vnetid(struct vxlan_softc *sc)
1549 {
1550 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1551 
1552 	if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)))
1553 		return (0);
1554 
1555 	if (ISSET(ifp->if_flags, IFF_RUNNING))
1556 		return (EBUSY);
1557 
1558 	/* commit */
1559 	CLR(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I));
1560 	sc->sc_header.vxlan_id = htonl(0 << VXLAN_VNI_SHIFT);
1561 	etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1562 
1563 	return (0);
1564 }
1565 
1566 static int
vxlan_set_parent(struct vxlan_softc * sc,const struct if_parent * p)1567 vxlan_set_parent(struct vxlan_softc *sc, const struct if_parent *p)
1568 {
1569 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1570 	struct ifnet *ifp0;
1571 	int error = 0;
1572 
1573 	ifp0 = if_unit(p->ifp_parent);
1574 	if (ifp0 == NULL)
1575 		return (ENXIO);
1576 
1577 	if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
1578 		error = ENXIO;
1579 		goto put;
1580 	}
1581 
1582 	if (sc->sc_if_index0 == ifp0->if_index)
1583 		goto put;
1584 
1585 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
1586 		error = EBUSY;
1587 		goto put;
1588 	}
1589 
1590 	ifsetlro(ifp0, 0);
1591 
1592 	/* commit */
1593 	sc->sc_if_index0 = ifp0->if_index;
1594 	etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1595 
1596 put:
1597 	if_put(ifp0);
1598 	return (error);
1599 }
1600 
1601 static int
vxlan_get_parent(struct vxlan_softc * sc,struct if_parent * p)1602 vxlan_get_parent(struct vxlan_softc *sc, struct if_parent *p)
1603 {
1604 	struct ifnet *ifp0;
1605 	int error = 0;
1606 
1607 	ifp0 = if_get(sc->sc_if_index0);
1608 	if (ifp0 == NULL)
1609 		error = EADDRNOTAVAIL;
1610 	else
1611 		strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent));
1612 	if_put(ifp0);
1613 
1614 	return (error);
1615 }
1616 
1617 static int
vxlan_del_parent(struct vxlan_softc * sc)1618 vxlan_del_parent(struct vxlan_softc *sc)
1619 {
1620 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1621 
1622 	if (sc->sc_if_index0 == 0)
1623 		return (0);
1624 
1625 	if (ISSET(ifp->if_flags, IFF_RUNNING))
1626 		return (EBUSY);
1627 
1628 	/* commit */
1629 	sc->sc_if_index0 = 0;
1630 	etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL);
1631 
1632 	return (0);
1633 }
1634 
1635 static int
vxlan_add_addr(struct vxlan_softc * sc,const struct ifbareq * ifba)1636 vxlan_add_addr(struct vxlan_softc *sc, const struct ifbareq *ifba)
1637 {
1638 	struct sockaddr_in *sin;
1639 #ifdef INET6
1640 	struct sockaddr_in6 *sin6;
1641 	struct sockaddr_in6 src6 = {
1642 		.sin6_len = sizeof(src6),
1643 		.sin6_family = AF_UNSPEC,
1644 	};
1645 	int error;
1646 #endif
1647 	union vxlan_addr endpoint;
1648 	unsigned int type;
1649 
1650 	switch (sc->sc_mode) {
1651 	case VXLAN_TMODE_UNSET:
1652 		return (ENOPROTOOPT);
1653 	case VXLAN_TMODE_P2P:
1654 		return (EPROTONOSUPPORT);
1655 	default:
1656 		break;
1657 	}
1658 
1659 	/* ignore ifba_ifsname */
1660 
1661 	if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK))
1662 		return (EINVAL);
1663 	switch (ifba->ifba_flags & IFBAF_TYPEMASK) {
1664 	case IFBAF_DYNAMIC:
1665 		type = EBE_DYNAMIC;
1666 		break;
1667 	case IFBAF_STATIC:
1668 		type = EBE_STATIC;
1669 		break;
1670 	default:
1671 		return (EINVAL);
1672 	}
1673 
1674 	memset(&endpoint, 0, sizeof(endpoint));
1675 
1676 	if (ifba->ifba_dstsa.ss_family != sc->sc_af)
1677 		return (EAFNOSUPPORT);
1678 	switch (ifba->ifba_dstsa.ss_family) {
1679 	case AF_INET:
1680 		sin = (struct sockaddr_in *)&ifba->ifba_dstsa;
1681 		if (in_nullhost(sin->sin_addr) ||
1682 		    IN_MULTICAST(sin->sin_addr.s_addr))
1683 			return (EADDRNOTAVAIL);
1684 
1685 		if (sin->sin_port != htons(0))
1686 			return (EADDRNOTAVAIL);
1687 
1688 		endpoint.in4 = sin->sin_addr;
1689 		break;
1690 
1691 #ifdef INET6
1692 	case AF_INET6:
1693 		sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa;
1694 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
1695 		    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
1696 			return (EADDRNOTAVAIL);
1697 
1698 		in6_recoverscope(&src6, &sc->sc_src.in6);
1699 		if (src6.sin6_scope_id != sin6->sin6_scope_id)
1700 			return (EADDRNOTAVAIL);
1701 
1702 		if (sin6->sin6_port != htons(0))
1703 			return (EADDRNOTAVAIL);
1704 
1705 		error = in6_embedscope(&endpoint.in6, sin6, NULL, NULL);
1706 		if (error != 0)
1707 			return (error);
1708 
1709 		break;
1710 #endif
1711 	default: /* AF_UNSPEC */
1712 		return (EADDRNOTAVAIL);
1713 	}
1714 
1715 	return (etherbridge_add_addr(&sc->sc_eb, &endpoint,
1716 	    &ifba->ifba_dst, type));
1717 }
1718 
1719 static int
vxlan_del_addr(struct vxlan_softc * sc,const struct ifbareq * ifba)1720 vxlan_del_addr(struct vxlan_softc *sc, const struct ifbareq *ifba)
1721 {
1722 	return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst));
1723 }
1724 
1725 void
vxlan_detach_hook(void * arg)1726 vxlan_detach_hook(void *arg)
1727 {
1728 	struct vxlan_softc *sc = arg;
1729 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1730 
1731 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
1732 		vxlan_down(sc);
1733 		CLR(ifp->if_flags, IFF_UP);
1734 	}
1735 
1736 	sc->sc_if_index0 = 0;
1737 }
1738 
1739 static int
vxlan_eb_port_eq(void * arg,void * a,void * b)1740 vxlan_eb_port_eq(void *arg, void *a, void *b)
1741 {
1742 	const union vxlan_addr *va = a, *vb = b;
1743 	size_t i;
1744 
1745 	for (i = 0; i < nitems(va->in6.s6_addr32); i++) {
1746 		if (va->in6.s6_addr32[i] != vb->in6.s6_addr32[i])
1747 			return (0);
1748 	}
1749 
1750 	return (1);
1751 }
1752 
1753 static void *
vxlan_eb_port_take(void * arg,void * port)1754 vxlan_eb_port_take(void *arg, void *port)
1755 {
1756 	union vxlan_addr *endpoint;
1757 
1758 	endpoint = pool_get(&vxlan_endpoint_pool, PR_NOWAIT);
1759 	if (endpoint == NULL)
1760 		return (NULL);
1761 
1762 	*endpoint = *(union vxlan_addr *)port;
1763 
1764 	return (endpoint);
1765 }
1766 
1767 static void
vxlan_eb_port_rele(void * arg,void * port)1768 vxlan_eb_port_rele(void *arg, void *port)
1769 {
1770 	union vxlan_addr *endpoint = port;
1771 
1772 	pool_put(&vxlan_endpoint_pool, endpoint);
1773 }
1774 
1775 static size_t
vxlan_eb_port_ifname(void * arg,char * dst,size_t len,void * port)1776 vxlan_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
1777 {
1778 	struct vxlan_softc *sc = arg;
1779 
1780 	return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len));
1781 }
1782 
1783 static void
vxlan_eb_port_sa(void * arg,struct sockaddr_storage * ss,void * port)1784 vxlan_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
1785 {
1786 	struct vxlan_softc *sc = arg;
1787 	union vxlan_addr *endpoint = port;
1788 
1789 	switch (sc->sc_af) {
1790 	case AF_INET: {
1791 		struct sockaddr_in *sin = (struct sockaddr_in *)ss;
1792 
1793 		sin->sin_len = sizeof(*sin);
1794 		sin->sin_family = AF_INET;
1795 		sin->sin_addr = endpoint->in4;
1796 		break;
1797 	}
1798 #ifdef INET6
1799 	case AF_INET6: {
1800 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
1801 
1802 		sin6->sin6_len = sizeof(*sin6);
1803 		sin6->sin6_family = AF_INET6;
1804 		in6_recoverscope(sin6, &endpoint->in6);
1805 		break;
1806 	}
1807 #endif /* INET6 */
1808 	default:
1809 		unhandled_af(sc->sc_af);
1810 	}
1811 }
1812 
1813 static inline int
vxlan_peer_cmp(const struct vxlan_peer * ap,const struct vxlan_peer * bp)1814 vxlan_peer_cmp(const struct vxlan_peer *ap, const struct vxlan_peer *bp)
1815 {
1816 	size_t i;
1817 
1818 	if (ap->p_header.vxlan_id > bp->p_header.vxlan_id)
1819 		return (1);
1820 	if (ap->p_header.vxlan_id < bp->p_header.vxlan_id)
1821 		return (-1);
1822 	if (ap->p_header.vxlan_flags > bp->p_header.vxlan_flags)
1823 		return (1);
1824 	if (ap->p_header.vxlan_flags < bp->p_header.vxlan_flags)
1825 		return (-1);
1826 
1827 	for (i = 0; i < nitems(ap->p_addr.in6.s6_addr32); i++) {
1828 		if (ap->p_addr.in6.s6_addr32[i] >
1829 		    bp->p_addr.in6.s6_addr32[i])
1830 			return (1);
1831 		if (ap->p_addr.in6.s6_addr32[i] <
1832 		    bp->p_addr.in6.s6_addr32[i])
1833 			return (-1);
1834 	}
1835 
1836 	return (0);
1837 }
1838 
1839 RBT_GENERATE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp);
1840