xref: /openbsd/sys/net/if_veb.c (revision d415bd75)
1 /*	$OpenBSD: if_veb.c,v 1.31 2023/05/16 14:32:54 jan Exp $ */
2 
3 /*
4  * Copyright (c) 2021 David Gwynne <dlg@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "pf.h"
21 #include "vlan.h"
22 
23 #include <sys/param.h>
24 #include <sys/kernel.h>
25 #include <sys/malloc.h>
26 #include <sys/mbuf.h>
27 #include <sys/queue.h>
28 #include <sys/socket.h>
29 #include <sys/sockio.h>
30 #include <sys/systm.h>
31 #include <sys/syslog.h>
32 #include <sys/rwlock.h>
33 #include <sys/percpu.h>
34 #include <sys/smr.h>
35 #include <sys/task.h>
36 #include <sys/pool.h>
37 
38 #include <net/if.h>
39 #include <net/if_dl.h>
40 #include <net/if_types.h>
41 
42 #include <netinet/in.h>
43 #include <netinet/ip.h>
44 #include <netinet/if_ether.h>
45 
46 #ifdef INET6
47 #include <netinet6/in6_var.h>
48 #include <netinet/ip6.h>
49 #include <netinet6/ip6_var.h>
50 #endif
51 
52 #if 0 && defined(IPSEC)
53 /*
54  * IPsec handling is disabled in veb until getting and using tdbs is mpsafe.
55  */
56 #include <netinet/ip_ipsp.h>
57 #include <net/if_enc.h>
58 #endif
59 
60 #include <net/if_bridge.h>
61 #include <net/if_etherbridge.h>
62 
63 #if NBPFILTER > 0
64 #include <net/bpf.h>
65 #endif
66 
67 #if NPF > 0
68 #include <net/pfvar.h>
69 #endif
70 
71 #if NVLAN > 0
72 #include <net/if_vlan_var.h>
73 #endif
74 
75 /* SIOCBRDGIFFLGS, SIOCBRDGIFFLGS */
76 #define VEB_IFBIF_FLAGS	(IFBIF_LEARNING|IFBIF_DISCOVER|IFBIF_BLOCKNONIP)
77 
78 struct veb_rule {
79 	TAILQ_ENTRY(veb_rule)		vr_entry;
80 	SMR_TAILQ_ENTRY(veb_rule)	vr_lentry[2];
81 
82 	uint16_t			vr_flags;
83 #define VEB_R_F_IN				(1U << 0)
84 #define VEB_R_F_OUT				(1U << 1)
85 #define VEB_R_F_SRC				(1U << 2)
86 #define VEB_R_F_DST				(1U << 3)
87 
88 #define VEB_R_F_ARP				(1U << 4)
89 #define VEB_R_F_RARP				(1U << 5)
90 #define VEB_R_F_SHA				(1U << 6)
91 #define VEB_R_F_SPA				(1U << 7)
92 #define VEB_R_F_THA				(1U << 8)
93 #define VEB_R_F_TPA				(1U << 9)
94 	uint16_t			 vr_arp_op;
95 
96 	uint64_t			 vr_src;
97 	uint64_t			 vr_dst;
98 	struct ether_addr		 vr_arp_sha;
99 	struct ether_addr		 vr_arp_tha;
100 	struct in_addr			 vr_arp_spa;
101 	struct in_addr			 vr_arp_tpa;
102 
103 	unsigned int			 vr_action;
104 #define VEB_R_MATCH				0
105 #define VEB_R_PASS				1
106 #define VEB_R_BLOCK				2
107 
108 	int				 vr_pftag;
109 };
110 
111 TAILQ_HEAD(veb_rules, veb_rule);
112 SMR_TAILQ_HEAD(veb_rule_list, veb_rule);
113 
114 struct veb_softc;
115 
116 struct veb_port {
117 	struct ifnet			*p_ifp0;
118 	struct refcnt			 p_refs;
119 
120 	int (*p_enqueue)(struct ifnet *, struct mbuf *);
121 
122 	int (*p_ioctl)(struct ifnet *, u_long, caddr_t);
123 	int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
124 	    struct rtentry *);
125 
126 	struct task			 p_ltask;
127 	struct task			 p_dtask;
128 
129 	struct veb_softc		*p_veb;
130 
131 	struct ether_brport		 p_brport;
132 
133 	unsigned int			 p_link_state;
134 	unsigned int			 p_bif_flags;
135 	uint32_t			 p_protected;
136 
137 	struct veb_rules		 p_vrl;
138 	unsigned int			 p_nvrl;
139 	struct veb_rule_list		 p_vr_list[2];
140 #define VEB_RULE_LIST_OUT			0
141 #define VEB_RULE_LIST_IN			1
142 };
143 
144 struct veb_ports {
145 	struct refcnt			 m_refs;
146 	unsigned int			 m_count;
147 
148 	/* followed by an array of veb_port pointers */
149 };
150 
151 struct veb_softc {
152 	struct ifnet			 sc_if;
153 	unsigned int			 sc_dead;
154 
155 	struct etherbridge		 sc_eb;
156 
157 	struct rwlock			 sc_rule_lock;
158 	struct veb_ports		*sc_ports;
159 	struct veb_ports		*sc_spans;
160 };
161 
162 #define DPRINTF(_sc, fmt...)    do { \
163 	if (ISSET((_sc)->sc_if.if_flags, IFF_DEBUG)) \
164 		printf(fmt); \
165 } while (0)
166 
167 static int	veb_clone_create(struct if_clone *, int);
168 static int	veb_clone_destroy(struct ifnet *);
169 
170 static int	veb_ioctl(struct ifnet *, u_long, caddr_t);
171 static void	veb_input(struct ifnet *, struct mbuf *);
172 static int	veb_enqueue(struct ifnet *, struct mbuf *);
173 static int	veb_output(struct ifnet *, struct mbuf *, struct sockaddr *,
174 		    struct rtentry *);
175 static void	veb_start(struct ifqueue *);
176 
177 static int	veb_up(struct veb_softc *);
178 static int	veb_down(struct veb_softc *);
179 static int	veb_iff(struct veb_softc *);
180 
181 static void	veb_p_linkch(void *);
182 static void	veb_p_detach(void *);
183 static int	veb_p_ioctl(struct ifnet *, u_long, caddr_t);
184 static int	veb_p_output(struct ifnet *, struct mbuf *,
185 		    struct sockaddr *, struct rtentry *);
186 
187 static inline size_t
188 veb_ports_size(unsigned int n)
189 {
190 	/* use of _ALIGN is inspired by CMSGs */
191 	return _ALIGN(sizeof(struct veb_ports)) +
192 	    n * sizeof(struct veb_port *);
193 }
194 
195 static inline struct veb_port **
196 veb_ports_array(struct veb_ports *m)
197 {
198 	return (struct veb_port **)((caddr_t)m + _ALIGN(sizeof(*m)));
199 }
200 
201 static inline void veb_ports_free(struct veb_ports *);
202 
203 static void	veb_p_unlink(struct veb_softc *, struct veb_port *);
204 static void	veb_p_fini(struct veb_port *);
205 static void	veb_p_dtor(struct veb_softc *, struct veb_port *);
206 static int	veb_add_port(struct veb_softc *,
207 		    const struct ifbreq *, unsigned int);
208 static int	veb_del_port(struct veb_softc *,
209 		    const struct ifbreq *, unsigned int);
210 static int	veb_port_list(struct veb_softc *, struct ifbifconf *);
211 static int	veb_port_set_flags(struct veb_softc *, struct ifbreq *);
212 static int	veb_port_get_flags(struct veb_softc *, struct ifbreq *);
213 static int	veb_port_set_protected(struct veb_softc *,
214 		    const struct ifbreq *);
215 static int	veb_add_addr(struct veb_softc *, const struct ifbareq *);
216 static int	veb_del_addr(struct veb_softc *, const struct ifbareq *);
217 
218 static int	veb_rule_add(struct veb_softc *, const struct ifbrlreq *);
219 static int	veb_rule_list_flush(struct veb_softc *,
220 		    const struct ifbrlreq *);
221 static void	veb_rule_list_free(struct veb_rule *);
222 static int	veb_rule_list_get(struct veb_softc *, struct ifbrlconf *);
223 
224 static int	 veb_eb_port_cmp(void *, void *, void *);
225 static void	*veb_eb_port_take(void *, void *);
226 static void	 veb_eb_port_rele(void *, void *);
227 static size_t	 veb_eb_port_ifname(void *, char *, size_t, void *);
228 static void	 veb_eb_port_sa(void *, struct sockaddr_storage *, void *);
229 
230 static void	 veb_eb_brport_take(void *);
231 static void	 veb_eb_brport_rele(void *);
232 
233 static const struct etherbridge_ops veb_etherbridge_ops = {
234 	veb_eb_port_cmp,
235 	veb_eb_port_take,
236 	veb_eb_port_rele,
237 	veb_eb_port_ifname,
238 	veb_eb_port_sa,
239 };
240 
241 static struct if_clone veb_cloner =
242     IF_CLONE_INITIALIZER("veb", veb_clone_create, veb_clone_destroy);
243 
244 static struct pool veb_rule_pool;
245 
246 static int	vport_clone_create(struct if_clone *, int);
247 static int	vport_clone_destroy(struct ifnet *);
248 
249 struct vport_softc {
250 	struct arpcom		 sc_ac;
251 	unsigned int		 sc_dead;
252 };
253 
254 static int	vport_if_enqueue(struct ifnet *, struct mbuf *);
255 
256 static int	vport_ioctl(struct ifnet *, u_long, caddr_t);
257 static int	vport_enqueue(struct ifnet *, struct mbuf *);
258 static void	vport_start(struct ifqueue *);
259 
260 static int	vport_up(struct vport_softc *);
261 static int	vport_down(struct vport_softc *);
262 static int	vport_iff(struct vport_softc *);
263 
264 static struct if_clone vport_cloner =
265     IF_CLONE_INITIALIZER("vport", vport_clone_create, vport_clone_destroy);
266 
267 void
268 vebattach(int count)
269 {
270 	if_clone_attach(&veb_cloner);
271 	if_clone_attach(&vport_cloner);
272 }
273 
274 static int
275 veb_clone_create(struct if_clone *ifc, int unit)
276 {
277 	struct veb_softc *sc;
278 	struct ifnet *ifp;
279 	int error;
280 
281 	if (veb_rule_pool.pr_size == 0) {
282 		pool_init(&veb_rule_pool, sizeof(struct veb_rule),
283 		    0, IPL_SOFTNET, 0, "vebrpl", NULL);
284 	}
285 
286 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
287 	if (sc == NULL)
288 		return (ENOMEM);
289 
290 	rw_init(&sc->sc_rule_lock, "vebrlk");
291 	sc->sc_ports = NULL;
292 	sc->sc_spans = NULL;
293 
294 	ifp = &sc->sc_if;
295 
296 	snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
297 
298 	error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
299 	    &veb_etherbridge_ops, sc);
300 	if (error != 0) {
301 		free(sc, M_DEVBUF, sizeof(*sc));
302 		return (error);
303 	}
304 
305 	ifp->if_softc = sc;
306 	ifp->if_type = IFT_BRIDGE;
307 	ifp->if_hdrlen = ETHER_HDR_LEN;
308 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
309 	ifp->if_ioctl = veb_ioctl;
310 	ifp->if_input = veb_input;
311 	ifp->if_output = veb_output;
312 	ifp->if_enqueue = veb_enqueue;
313 	ifp->if_qstart = veb_start;
314 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
315 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
316 
317 	if_counters_alloc(ifp);
318 	if_attach(ifp);
319 
320 	if_alloc_sadl(ifp);
321 
322 #if NBPFILTER > 0
323 	bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN);
324 #endif
325 
326 	return (0);
327 }
328 
329 static int
330 veb_clone_destroy(struct ifnet *ifp)
331 {
332 	struct veb_softc *sc = ifp->if_softc;
333 	struct veb_ports *mp, *ms;
334 	struct veb_port **ps;
335 	struct veb_port *p;
336 	unsigned int i;
337 
338 	NET_LOCK();
339 	sc->sc_dead = 1;
340 
341 	if (ISSET(ifp->if_flags, IFF_RUNNING))
342 		veb_down(sc);
343 	NET_UNLOCK();
344 
345 	if_detach(ifp);
346 
347 	NET_LOCK();
348 
349 	/*
350 	 * this is an upside down version of veb_p_dtor() and
351 	 * veb_ports_destroy() to avoid a lot of malloc/free and
352 	 * smr_barrier calls if we remove ports one by one.
353 	 */
354 
355 	mp = SMR_PTR_GET_LOCKED(&sc->sc_ports);
356 	SMR_PTR_SET_LOCKED(&sc->sc_ports, NULL);
357 	if (mp != NULL) {
358 		ps = veb_ports_array(mp);
359 		for (i = 0; i < mp->m_count; i++)
360 			veb_p_unlink(sc, ps[i]);
361 	}
362 
363 	ms = SMR_PTR_GET_LOCKED(&sc->sc_spans);
364 	SMR_PTR_SET_LOCKED(&sc->sc_spans, NULL);
365 	if (ms != NULL) {
366 		ps = veb_ports_array(ms);
367 		for (i = 0; i < ms->m_count; i++)
368 			veb_p_unlink(sc, ps[i]);
369 	}
370 
371 	if (mp != NULL || ms != NULL) {
372 		smr_barrier(); /* everything everywhere all at once */
373 
374 		if (mp != NULL) {
375 			refcnt_finalize(&mp->m_refs, "vebdtor");
376 
377 			ps = veb_ports_array(mp);
378 			for (i = 0; i < mp->m_count; i++) {
379 				p = ps[i];
380 				/* the ports map holds a port ref */
381 				refcnt_rele(&p->p_refs);
382 				/* now we can finalize the port */
383 				veb_p_fini(p);
384 			}
385 
386 			veb_ports_free(mp);
387 		}
388 		if (ms != NULL) {
389 			refcnt_finalize(&ms->m_refs, "vebdtor");
390 
391 			ps = veb_ports_array(ms);
392 			for (i = 0; i < ms->m_count; i++) {
393 				p = ps[i];
394 				/* the ports map holds a port ref */
395 				refcnt_rele(&p->p_refs);
396 				/* now we can finalize the port */
397 				veb_p_fini(p);
398 			}
399 
400 			veb_ports_free(ms);
401 		}
402 	}
403 	NET_UNLOCK();
404 
405 	etherbridge_destroy(&sc->sc_eb);
406 
407 	free(sc, M_DEVBUF, sizeof(*sc));
408 
409 	return (0);
410 }
411 
412 static struct mbuf *
413 veb_span_input(struct ifnet *ifp0, struct mbuf *m, uint64_t dst, void *brport)
414 {
415 	m_freem(m);
416 	return (NULL);
417 }
418 
419 static void
420 veb_span(struct veb_softc *sc, struct mbuf *m0)
421 {
422 	struct veb_ports *sm;
423 	struct veb_port **ps;
424 	struct veb_port *p;
425 	struct ifnet *ifp0;
426 	struct mbuf *m;
427 	unsigned int i;
428 
429 	smr_read_enter();
430 	sm = SMR_PTR_GET(&sc->sc_spans);
431 	if (sm != NULL)
432 		refcnt_take(&sm->m_refs);
433 	smr_read_leave();
434 	if (sm == NULL)
435 		return;
436 
437 	ps = veb_ports_array(sm);
438 	for (i = 0; i < sm->m_count; i++) {
439 		p = ps[i];
440 
441 		ifp0 = p->p_ifp0;
442 		if (!ISSET(ifp0->if_flags, IFF_RUNNING))
443 			continue;
444 
445 		m = m_dup_pkt(m0, max_linkhdr + ETHER_ALIGN, M_NOWAIT);
446 		if (m == NULL) {
447 			/* XXX count error */
448 			continue;
449 		}
450 
451 		if_enqueue(ifp0, m); /* XXX count error */
452 	}
453 	refcnt_rele_wake(&sm->m_refs);
454 }
455 
456 static int
457 veb_ip_filter(const struct mbuf *m)
458 {
459 	const struct ether_header *eh;
460 
461 	eh = mtod(m, struct ether_header *);
462 	switch (ntohs(eh->ether_type)) {
463 	case ETHERTYPE_IP:
464 	case ETHERTYPE_ARP:
465 	case ETHERTYPE_REVARP:
466 	case ETHERTYPE_IPV6:
467 		return (0);
468 	default:
469 		break;
470 	}
471 
472 	return (1);
473 }
474 
475 static int
476 veb_vlan_filter(const struct mbuf *m)
477 {
478 	const struct ether_header *eh;
479 
480 	eh = mtod(m, struct ether_header *);
481 	switch (ntohs(eh->ether_type)) {
482 	case ETHERTYPE_VLAN:
483 	case ETHERTYPE_QINQ:
484 		return (1);
485 	default:
486 		break;
487 	}
488 
489 	return (0);
490 }
491 
492 static int
493 veb_rule_arp_match(const struct veb_rule *vr, struct mbuf *m)
494 {
495 	struct ether_header *eh;
496 	struct ether_arp ea;
497 
498 	eh = mtod(m, struct ether_header *);
499 
500 	if (eh->ether_type != htons(ETHERTYPE_ARP))
501 		return (0);
502 	if (m->m_pkthdr.len < sizeof(*eh) + sizeof(ea))
503 		return (0);
504 
505 	m_copydata(m, sizeof(*eh), sizeof(ea), (caddr_t)&ea);
506 
507 	if (ea.arp_hrd != htons(ARPHRD_ETHER) ||
508 	    ea.arp_pro != htons(ETHERTYPE_IP) ||
509 	    ea.arp_hln != ETHER_ADDR_LEN ||
510 	    ea.arp_pln != sizeof(struct in_addr))
511 		return (0);
512 
513 	if (ISSET(vr->vr_flags, VEB_R_F_ARP)) {
514 		if (ea.arp_op != htons(ARPOP_REQUEST) &&
515 		    ea.arp_op != htons(ARPOP_REPLY))
516 			return (0);
517 	}
518 	if (ISSET(vr->vr_flags, VEB_R_F_RARP)) {
519 		if (ea.arp_op != htons(ARPOP_REVREQUEST) &&
520 		    ea.arp_op != htons(ARPOP_REVREPLY))
521 			return (0);
522 	}
523 
524 	if (vr->vr_arp_op != htons(0) && vr->vr_arp_op != ea.arp_op)
525 		return (0);
526 
527 	if (ISSET(vr->vr_flags, VEB_R_F_SHA) &&
528 	    !ETHER_IS_EQ(&vr->vr_arp_sha, ea.arp_sha))
529 		return (0);
530 	if (ISSET(vr->vr_flags, VEB_R_F_THA) &&
531 	    !ETHER_IS_EQ(&vr->vr_arp_tha, ea.arp_tha))
532 		return (0);
533 	if (ISSET(vr->vr_flags, VEB_R_F_SPA) &&
534 	    memcmp(&vr->vr_arp_spa, ea.arp_spa, sizeof(vr->vr_arp_spa)) != 0)
535 		return (0);
536 	if (ISSET(vr->vr_flags, VEB_R_F_TPA) &&
537 	    memcmp(&vr->vr_arp_tpa, ea.arp_tpa, sizeof(vr->vr_arp_tpa)) != 0)
538 		return (0);
539 
540 	return (1);
541 }
542 
543 static int
544 veb_rule_list_test(struct veb_rule *vr, int dir, struct mbuf *m,
545     uint64_t src, uint64_t dst)
546 {
547 	SMR_ASSERT_CRITICAL();
548 
549 	do {
550 		if (ISSET(vr->vr_flags, VEB_R_F_ARP|VEB_R_F_RARP) &&
551 		    !veb_rule_arp_match(vr, m))
552 			continue;
553 
554 		if (ISSET(vr->vr_flags, VEB_R_F_SRC) &&
555 		    vr->vr_src != src)
556 			continue;
557 		if (ISSET(vr->vr_flags, VEB_R_F_DST) &&
558 		    vr->vr_dst != dst)
559 			continue;
560 
561 		if (vr->vr_action == VEB_R_BLOCK)
562 			return (VEB_R_BLOCK);
563 #if NPF > 0
564 		pf_tag_packet(m, vr->vr_pftag, -1);
565 #endif
566 		if (vr->vr_action == VEB_R_PASS)
567 			return (VEB_R_PASS);
568 	} while ((vr = SMR_TAILQ_NEXT(vr, vr_lentry[dir])) != NULL);
569 
570 	return (VEB_R_PASS);
571 }
572 
573 static inline int
574 veb_rule_filter(struct veb_port *p, int dir, struct mbuf *m,
575     uint64_t src, uint64_t dst)
576 {
577 	struct veb_rule *vr;
578 	int filter = VEB_R_PASS;
579 
580 	smr_read_enter();
581 	vr = SMR_TAILQ_FIRST(&p->p_vr_list[dir]);
582 	if (vr != NULL)
583 		filter = veb_rule_list_test(vr, dir, m, src, dst);
584 	smr_read_leave();
585 
586 	return (filter == VEB_R_BLOCK);
587 }
588 
589 #if NPF > 0
590 struct veb_pf_ip_family {
591 	sa_family_t	   af;
592 	struct mbuf	*(*ip_check)(struct ifnet *, struct mbuf *);
593 	void		 (*ip_input)(struct ifnet *, struct mbuf *);
594 };
595 
596 static const struct veb_pf_ip_family veb_pf_ipv4 = {
597 	.af		= AF_INET,
598 	.ip_check	= ipv4_check,
599 	.ip_input	= ipv4_input,
600 };
601 
602 #ifdef INET6
603 static const struct veb_pf_ip_family veb_pf_ipv6 = {
604 	.af		= AF_INET6,
605 	.ip_check	= ipv6_check,
606 	.ip_input	= ipv6_input,
607 };
608 #endif
609 
610 static struct mbuf *
611 veb_pf(struct ifnet *ifp0, int dir, struct mbuf *m)
612 {
613 	struct ether_header *eh, copy;
614 	const struct veb_pf_ip_family *fam;
615 
616 	/*
617 	 * pf runs on vport interfaces when they enter or leave the
618 	 * l3 stack, so don't confuse things (even more) by running
619 	 * pf again here. note that because of this exception the
620 	 * pf direction on vport interfaces is reversed compared to
621 	 * other veb ports.
622 	 */
623 	if (ifp0->if_enqueue == vport_enqueue)
624 		return (m);
625 
626 	eh = mtod(m, struct ether_header *);
627 	switch (ntohs(eh->ether_type)) {
628 	case ETHERTYPE_IP:
629 		fam = &veb_pf_ipv4;
630 		break;
631 #ifdef INET6
632 	case ETHERTYPE_IPV6:
633 		fam = &veb_pf_ipv6;
634 		break;
635 #endif
636 	default:
637 		return (m);
638 	}
639 
640 	copy = *eh;
641 	m_adj(m, sizeof(*eh));
642 
643 	if (dir == PF_IN) {
644 		m = (*fam->ip_check)(ifp0, m);
645 		if (m == NULL)
646 			return (NULL);
647 	}
648 
649 	if (pf_test(fam->af, dir, ifp0, &m) != PF_PASS) {
650 		m_freem(m);
651 		return (NULL);
652 	}
653 	if (m == NULL)
654 		return (NULL);
655 
656 	if (dir == PF_IN && ISSET(m->m_pkthdr.pf.flags, PF_TAG_DIVERTED)) {
657 		pf_mbuf_unlink_state_key(m);
658 		pf_mbuf_unlink_inpcb(m);
659 		(*fam->ip_input)(ifp0, m);
660 		return (NULL);
661 	}
662 
663 	m = m_prepend(m, sizeof(*eh), M_DONTWAIT);
664 	if (m == NULL)
665 		return (NULL);
666 
667 	/* checksum? */
668 
669 	eh = mtod(m, struct ether_header *);
670 	*eh = copy;
671 
672 	return (m);
673 }
674 #endif /* NPF > 0 */
675 
676 #if 0 && defined(IPSEC)
677 static struct mbuf *
678 veb_ipsec_proto_in(struct ifnet *ifp0, struct mbuf *m, int iphlen,
679     /* const */ union sockaddr_union *dst, int poff)
680 {
681 	struct tdb *tdb;
682 	uint16_t cpi;
683 	uint32_t spi;
684 	uint8_t proto;
685 
686 	/* ipsec_common_input checks for 8 bytes of input, so we do too */
687 	if (m->m_pkthdr.len < iphlen + 2 * sizeof(u_int32_t))
688 		return (m); /* decline */
689 
690 	proto = *(mtod(m, uint8_t *) + poff);
691 	/* i'm not a huge fan of how these headers get picked at */
692 	switch (proto) {
693 	case IPPROTO_ESP:
694 		m_copydata(m, iphlen, sizeof(spi), &spi);
695 		break;
696 	case IPPROTO_AH:
697 		m_copydata(m, iphlen + sizeof(uint32_t), sizeof(spi), &spi);
698 		break;
699 	case IPPROTO_IPCOMP:
700 		m_copydata(m, iphlen + sizeof(uint16_t), sizeof(cpi), &cpi);
701 		spi = htonl(ntohs(cpi));
702 		break;
703 	default:
704 		return (m); /* decline */
705 	}
706 
707 	tdb = gettdb(m->m_pkthdr.ph_rtableid, spi, dst, proto);
708 	if (tdb != NULL && !ISSET(tdb->tdb_flags, TDBF_INVALID) &&
709 	    tdb->tdb_xform != NULL) {
710 		if (tdb->tdb_first_use == 0) {
711 			tdb->tdb_first_use = gettime();
712 			if (ISSET(tdb->tdb_flags, TDBF_FIRSTUSE)) {
713 				timeout_add_sec(&tdb->tdb_first_tmo,
714 				    tdb->tdb_exp_first_use);
715 			}
716 			if (ISSET(tdb->tdb_flags, TDBF_SOFT_FIRSTUSE)) {
717 				timeout_add_sec(&tdb->tdb_sfirst_tmo,
718 				    tdb->tdb_soft_first_use);
719 			}
720 		}
721 
722 		(*(tdb->tdb_xform->xf_input))(m, tdb, iphlen, poff);
723 		return (NULL);
724 	}
725 
726 	return (m);
727 }
728 
729 static struct mbuf *
730 veb_ipsec_ipv4_in(struct ifnet *ifp0, struct mbuf *m)
731 {
732 	union sockaddr_union su = {
733 		.sin.sin_len = sizeof(su.sin),
734 		.sin.sin_family = AF_INET,
735 	};
736 	struct ip *ip;
737 	int iphlen;
738 
739 	if (m->m_len < sizeof(*ip)) {
740 		m = m_pullup(m, sizeof(*ip));
741 		if (m == NULL)
742 			return (NULL);
743 	}
744 
745 	ip = mtod(m, struct ip *);
746 	iphlen = ip->ip_hl << 2;
747 	if (iphlen < sizeof(*ip)) {
748 		/* this is a weird packet, decline */
749 		return (m);
750 	}
751 
752 	su.sin.sin_addr = ip->ip_dst;
753 
754 	return (veb_ipsec_proto_in(ifp0, m, iphlen, &su,
755 	    offsetof(struct ip, ip_p)));
756 }
757 
758 #ifdef INET6
759 static struct mbuf *
760 veb_ipsec_ipv6_in(struct ifnet *ifp0, struct mbuf *m)
761 {
762 	union sockaddr_union su = {
763 		.sin6.sin6_len = sizeof(su.sin6),
764 		.sin6.sin6_family = AF_INET6,
765 	};
766 	struct ip6_hdr *ip6;
767 
768 	if (m->m_len < sizeof(*ip6)) {
769 		m = m_pullup(m, sizeof(*ip6));
770 		if (m == NULL)
771 			return (NULL);
772 	}
773 
774 	ip6 = mtod(m, struct ip6_hdr *);
775 
776 	su.sin6.sin6_addr = ip6->ip6_dst;
777 
778 	/* XXX scope? */
779 
780 	return (veb_ipsec_proto_in(ifp0, m, sizeof(*ip6), &su,
781 	    offsetof(struct ip6_hdr, ip6_nxt)));
782 }
783 #endif /* INET6 */
784 
785 static struct mbuf *
786 veb_ipsec_in(struct ifnet *ifp0, struct mbuf *m)
787 {
788 	struct mbuf *(*ipsec_ip_in)(struct ifnet *, struct mbuf *);
789 	struct ether_header *eh, copy;
790 
791 	if (ifp0->if_enqueue == vport_enqueue)
792 		return (m);
793 
794 	eh = mtod(m, struct ether_header *);
795 	switch (ntohs(eh->ether_type)) {
796 	case ETHERTYPE_IP:
797 		ipsec_ip_in = veb_ipsec_ipv4_in;
798 		break;
799 #ifdef INET6
800 	case ETHERTYPE_IPV6:
801 		ipsec_ip_in = veb_ipsec_ipv6_in;
802 		break;
803 #endif /* INET6 */
804 	default:
805 		return (m);
806 	}
807 
808 	copy = *eh;
809 	m_adj(m, sizeof(*eh));
810 
811 	m = (*ipsec_ip_in)(ifp0, m);
812 	if (m == NULL)
813 		return (NULL);
814 
815 	m = m_prepend(m, sizeof(*eh), M_DONTWAIT);
816 	if (m == NULL)
817 		return (NULL);
818 
819 	eh = mtod(m, struct ether_header *);
820 	*eh = copy;
821 
822 	return (m);
823 }
824 
825 static struct mbuf *
826 veb_ipsec_proto_out(struct mbuf *m, sa_family_t af, int iphlen)
827 {
828 	struct tdb *tdb;
829 	int error;
830 #if NPF > 0
831 	struct ifnet *encifp;
832 #endif
833 
834 	tdb = ipsp_spd_lookup(m, af, iphlen, &error, IPSP_DIRECTION_OUT,
835 	    NULL, NULL, NULL);
836 	if (tdb == NULL)
837 		return (m);
838 
839 #if NPF > 0
840 	encifp = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap);
841 	if (encifp != NULL) {
842 		if (pf_test(af, PF_OUT, encifp, &m) != PF_PASS) {
843 			m_freem(m);
844 			return (NULL);
845 		}
846 		if (m == NULL)
847 			return (NULL);
848 	}
849 #endif /* NPF > 0 */
850 
851 	/* XXX mtu checks */
852 
853 	(void)ipsp_process_packet(m, tdb, af, 0);
854 	return (NULL);
855 }
856 
857 static struct mbuf *
858 veb_ipsec_ipv4_out(struct mbuf *m)
859 {
860 	struct ip *ip;
861 	int iphlen;
862 
863 	if (m->m_len < sizeof(*ip)) {
864 		m = m_pullup(m, sizeof(*ip));
865 		if (m == NULL)
866 			return (NULL);
867 	}
868 
869 	ip = mtod(m, struct ip *);
870 	iphlen = ip->ip_hl << 2;
871 	if (iphlen < sizeof(*ip)) {
872 		/* this is a weird packet, decline */
873 		return (m);
874 	}
875 
876 	return (veb_ipsec_proto_out(m, AF_INET, iphlen));
877 }
878 
879 #ifdef INET6
880 static struct mbuf *
881 veb_ipsec_ipv6_out(struct mbuf *m)
882 {
883 	return (veb_ipsec_proto_out(m, AF_INET6, sizeof(struct ip6_hdr)));
884 }
885 #endif /* INET6 */
886 
887 static struct mbuf *
888 veb_ipsec_out(struct ifnet *ifp0, struct mbuf *m)
889 {
890 	struct mbuf *(*ipsec_ip_out)(struct mbuf *);
891 	struct ether_header *eh, copy;
892 
893 	if (ifp0->if_enqueue == vport_enqueue)
894 		return (m);
895 
896 	eh = mtod(m, struct ether_header *);
897 	switch (ntohs(eh->ether_type)) {
898 	case ETHERTYPE_IP:
899 		ipsec_ip_out = veb_ipsec_ipv4_out;
900 		break;
901 #ifdef INET6
902 	case ETHERTYPE_IPV6:
903 		ipsec_ip_out = veb_ipsec_ipv6_out;
904 		break;
905 #endif /* INET6 */
906 	default:
907 		return (m);
908 	}
909 
910 	copy = *eh;
911 	m_adj(m, sizeof(*eh));
912 
913 	m = (*ipsec_ip_out)(m);
914 	if (m == NULL)
915 		return (NULL);
916 
917 	m = m_prepend(m, sizeof(*eh), M_DONTWAIT);
918 	if (m == NULL)
919 		return (NULL);
920 
921 	eh = mtod(m, struct ether_header *);
922 	*eh = copy;
923 
924 	return (m);
925 }
926 #endif /* IPSEC */
927 
928 static void
929 veb_broadcast(struct veb_softc *sc, struct veb_port *rp, struct mbuf *m0,
930     uint64_t src, uint64_t dst)
931 {
932 	struct ifnet *ifp = &sc->sc_if;
933 	struct veb_ports *pm;
934 	struct veb_port **ps;
935 	struct veb_port *tp;
936 	struct ifnet *ifp0;
937 	struct mbuf *m;
938 	unsigned int i;
939 
940 #if NPF > 0
941 	/*
942 	 * we couldn't find a specific port to send this packet to,
943 	 * but pf should still have a chance to apply policy to it.
944 	 * let pf look at it, but use the veb interface as a proxy.
945 	 */
946 	if (ISSET(ifp->if_flags, IFF_LINK1) &&
947 	    (m0 = veb_pf(ifp, PF_OUT, m0)) == NULL)
948 		return;
949 #endif
950 
951 #if 0 && defined(IPSEC)
952 	/* same goes for ipsec */
953 	if (ISSET(ifp->if_flags, IFF_LINK2) &&
954 	    (m0 = veb_ipsec_out(ifp, m0)) == NULL)
955 		return;
956 #endif
957 
958 	counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
959 	    m0->m_pkthdr.len);
960 
961 	smr_read_enter();
962 	pm = SMR_PTR_GET(&sc->sc_ports);
963 	if (__predict_true(pm != NULL))
964 		refcnt_take(&pm->m_refs);
965 	smr_read_leave();
966 	if (__predict_false(pm == NULL))
967 		goto done;
968 
969 	ps = veb_ports_array(pm);
970 	for (i = 0; i < pm->m_count; i++) {
971 		tp = ps[i];
972 
973 		if (rp == tp || (rp->p_protected & tp->p_protected)) {
974 			/*
975 			 * don't let Ethernet packets hairpin or
976 			 * move between ports in the same protected
977 			 * domain(s).
978 			 */
979 			continue;
980 		}
981 
982 		ifp0 = tp->p_ifp0;
983 		if (!ISSET(ifp0->if_flags, IFF_RUNNING)) {
984 			/* don't waste time */
985 			continue;
986 		}
987 
988 		if (!ISSET(tp->p_bif_flags, IFBIF_DISCOVER) &&
989 		    !ISSET(m0->m_flags, M_BCAST | M_MCAST)) {
990 			/* don't flood unknown unicast */
991 			continue;
992 		}
993 
994 		if (veb_rule_filter(tp, VEB_RULE_LIST_OUT, m0, src, dst))
995 			continue;
996 
997 		m = m_dup_pkt(m0, max_linkhdr + ETHER_ALIGN, M_NOWAIT);
998 		if (m == NULL) {
999 			/* XXX count error? */
1000 			continue;
1001 		}
1002 
1003 		(*tp->p_enqueue)(ifp0, m); /* XXX count error */
1004 	}
1005 	refcnt_rele_wake(&pm->m_refs);
1006 
1007 done:
1008 	m_freem(m0);
1009 }
1010 
1011 static struct mbuf *
1012 veb_transmit(struct veb_softc *sc, struct veb_port *rp, struct veb_port *tp,
1013     struct mbuf *m, uint64_t src, uint64_t dst)
1014 {
1015 	struct ifnet *ifp = &sc->sc_if;
1016 	struct ifnet *ifp0;
1017 
1018 	if (tp == NULL)
1019 		return (m);
1020 
1021 	if (rp == tp || (rp->p_protected & tp->p_protected)) {
1022 		/*
1023 		 * don't let Ethernet packets hairpin or move between
1024 		 * ports in the same protected domain(s).
1025 		 */
1026 		goto drop;
1027 	}
1028 
1029 	if (veb_rule_filter(tp, VEB_RULE_LIST_OUT, m, src, dst))
1030 		goto drop;
1031 
1032 	ifp0 = tp->p_ifp0;
1033 
1034 #if 0 && defined(IPSEC)
1035 	if (ISSET(ifp->if_flags, IFF_LINK2) &&
1036 	    (m = veb_ipsec_out(ifp0, m0)) == NULL)
1037 		return;
1038 #endif
1039 
1040 #if NPF > 0
1041 	if (ISSET(ifp->if_flags, IFF_LINK1) &&
1042 	    (m = veb_pf(ifp0, PF_OUT, m)) == NULL)
1043 		return (NULL);
1044 #endif
1045 
1046 	counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
1047 	    m->m_pkthdr.len);
1048 
1049 	(*tp->p_enqueue)(ifp0, m); /* XXX count error */
1050 
1051 	return (NULL);
1052 drop:
1053 	m_freem(m);
1054 	return (NULL);
1055 }
1056 
1057 static struct mbuf *
1058 veb_vport_input(struct ifnet *ifp0, struct mbuf *m, uint64_t dst, void *brport)
1059 {
1060 	return (m);
1061 }
1062 
1063 static struct mbuf *
1064 veb_port_input(struct ifnet *ifp0, struct mbuf *m, uint64_t dst, void *brport)
1065 {
1066 	struct veb_port *p = brport;
1067 	struct veb_softc *sc = p->p_veb;
1068 	struct ifnet *ifp = &sc->sc_if;
1069 	struct ether_header *eh;
1070 	uint64_t src;
1071 #if NBPFILTER > 0
1072 	caddr_t if_bpf;
1073 #endif
1074 
1075 	if (!ISSET(ifp->if_flags, IFF_RUNNING))
1076 		return (m);
1077 
1078 	eh = mtod(m, struct ether_header *);
1079 	src = ether_addr_to_e64((struct ether_addr *)eh->ether_shost);
1080 
1081 	/* Is this a MAC Bridge component Reserved address? */
1082 	if (ETH64_IS_8021_RSVD(dst)) {
1083 		if (!ISSET(ifp->if_flags, IFF_LINK0)) {
1084 			/*
1085 			 * letting vlans through implies this is
1086 			 * an s-vlan component.
1087 			 */
1088 			goto drop;
1089 		}
1090 
1091 		 /* look at the last nibble of the 802.1 reserved address */
1092 		switch (dst & 0xf) {
1093 		case 0x0: /* Nearest Customer Bridge Group Address */
1094 		case 0xb: /* EDE-SS PEP (IEEE Std 802.1AEcg) */
1095 		case 0xc: /* reserved */
1096 		case 0xd: /* Provider Bridge MVRP Address */
1097 		case 0xf: /* reserved */
1098 			break;
1099 		default:
1100 			goto drop;
1101 		}
1102 	}
1103 
1104 #if NVLAN > 0
1105 	/*
1106 	 * If the underlying interface removed the VLAN header itself,
1107 	 * add it back.
1108 	 */
1109 	if (ISSET(m->m_flags, M_VLANTAG)) {
1110 		m = vlan_inject(m, ETHERTYPE_VLAN, m->m_pkthdr.ether_vtag);
1111 		if (m == NULL) {
1112 			counters_inc(ifp->if_counters, ifc_ierrors);
1113 			goto drop;
1114 		}
1115 	}
1116 #endif
1117 
1118 	counters_pkt(ifp->if_counters, ifc_ipackets, ifc_ibytes,
1119 	    m->m_pkthdr.len);
1120 
1121 	/* force packets into the one routing domain for pf */
1122 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
1123 
1124 #if NBPFILTER > 0
1125 	if_bpf = READ_ONCE(ifp->if_bpf);
1126 	if (if_bpf != NULL) {
1127 		if (bpf_mtap_ether(if_bpf, m, 0) != 0)
1128 			goto drop;
1129 	}
1130 #endif
1131 
1132 	veb_span(sc, m);
1133 
1134 	if (ISSET(p->p_bif_flags, IFBIF_BLOCKNONIP) &&
1135 	    veb_ip_filter(m))
1136 		goto drop;
1137 
1138 	if (!ISSET(ifp->if_flags, IFF_LINK0) &&
1139 	    veb_vlan_filter(m))
1140 		goto drop;
1141 
1142 	if (veb_rule_filter(p, VEB_RULE_LIST_IN, m, src, dst))
1143 		goto drop;
1144 
1145 #if NPF > 0
1146 	if (ISSET(ifp->if_flags, IFF_LINK1) &&
1147 	    (m = veb_pf(ifp0, PF_IN, m)) == NULL)
1148 		return (NULL);
1149 #endif
1150 
1151 #if 0 && defined(IPSEC)
1152 	if (ISSET(ifp->if_flags, IFF_LINK2) &&
1153 	    (m = veb_ipsec_in(ifp0, m)) == NULL)
1154 		return (NULL);
1155 #endif
1156 
1157 	eh = mtod(m, struct ether_header *);
1158 
1159 	if (ISSET(p->p_bif_flags, IFBIF_LEARNING))
1160 		etherbridge_map(&sc->sc_eb, p, src);
1161 
1162 	CLR(m->m_flags, M_BCAST|M_MCAST);
1163 
1164 	if (!ETH64_IS_MULTICAST(dst)) {
1165 		struct veb_port *tp = NULL;
1166 
1167 		smr_read_enter();
1168 		tp = etherbridge_resolve(&sc->sc_eb, dst);
1169 		if (tp != NULL)
1170 			veb_eb_port_take(NULL, tp);
1171 		smr_read_leave();
1172 		if (tp != NULL) {
1173 			m = veb_transmit(sc, p, tp, m, src, dst);
1174 			veb_eb_port_rele(NULL, tp);
1175 		}
1176 
1177 		if (m == NULL)
1178 			return (NULL);
1179 
1180 		/* unknown unicast address */
1181 	} else {
1182 		SET(m->m_flags, ETH64_IS_BROADCAST(dst) ? M_BCAST : M_MCAST);
1183 	}
1184 
1185 	veb_broadcast(sc, p, m, src, dst);
1186 	return (NULL);
1187 
1188 drop:
1189 	m_freem(m);
1190 	return (NULL);
1191 }
1192 
1193 static void
1194 veb_input(struct ifnet *ifp, struct mbuf *m)
1195 {
1196 	m_freem(m);
1197 }
1198 
1199 static int
1200 veb_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1201     struct rtentry *rt)
1202 {
1203 	m_freem(m);
1204 	return (ENODEV);
1205 }
1206 
1207 static int
1208 veb_enqueue(struct ifnet *ifp, struct mbuf *m)
1209 {
1210 	m_freem(m);
1211 	return (ENODEV);
1212 }
1213 
1214 static void
1215 veb_start(struct ifqueue *ifq)
1216 {
1217 	ifq_purge(ifq);
1218 }
1219 
1220 static int
1221 veb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1222 {
1223 	struct veb_softc *sc = ifp->if_softc;
1224 	struct ifbrparam *bparam = (struct ifbrparam *)data;
1225 	int error = 0;
1226 
1227 	if (sc->sc_dead)
1228 		return (ENXIO);
1229 
1230 	switch (cmd) {
1231 	case SIOCSIFFLAGS:
1232 		if (ISSET(ifp->if_flags, IFF_UP)) {
1233 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
1234 				error = veb_up(sc);
1235 		} else {
1236 			if (ISSET(ifp->if_flags, IFF_RUNNING))
1237 				error = veb_down(sc);
1238 		}
1239 		break;
1240 
1241 	case SIOCBRDGADD:
1242 		error = suser(curproc);
1243 		if (error != 0)
1244 			break;
1245 
1246 		error = veb_add_port(sc, (struct ifbreq *)data, 0);
1247 		break;
1248 	case SIOCBRDGADDS:
1249 		error = suser(curproc);
1250 		if (error != 0)
1251 			break;
1252 
1253 		error = veb_add_port(sc, (struct ifbreq *)data, 1);
1254 		break;
1255 	case SIOCBRDGDEL:
1256 		error = suser(curproc);
1257 		if (error != 0)
1258 			break;
1259 
1260 		error = veb_del_port(sc, (struct ifbreq *)data, 0);
1261 		break;
1262 	case SIOCBRDGDELS:
1263 		error = suser(curproc);
1264 		if (error != 0)
1265 			break;
1266 
1267 		error = veb_del_port(sc, (struct ifbreq *)data, 1);
1268 		break;
1269 
1270 	case SIOCBRDGSCACHE:
1271 		error = suser(curproc);
1272 		if (error != 0)
1273 			break;
1274 
1275 		error = etherbridge_set_max(&sc->sc_eb, bparam);
1276 		break;
1277 	case SIOCBRDGGCACHE:
1278 		error = etherbridge_get_max(&sc->sc_eb, bparam);
1279 		break;
1280 
1281 	case SIOCBRDGSTO:
1282 		error = suser(curproc);
1283 		if (error != 0)
1284 			break;
1285 
1286 		error = etherbridge_set_tmo(&sc->sc_eb, bparam);
1287 		break;
1288 	case SIOCBRDGGTO:
1289 		error = etherbridge_get_tmo(&sc->sc_eb, bparam);
1290 		break;
1291 
1292 	case SIOCBRDGRTS:
1293 		error = etherbridge_rtfind(&sc->sc_eb, (struct ifbaconf *)data);
1294 		break;
1295 	case SIOCBRDGIFS:
1296 		error = veb_port_list(sc, (struct ifbifconf *)data);
1297 		break;
1298 	case SIOCBRDGFLUSH:
1299 		etherbridge_flush(&sc->sc_eb,
1300 		    ((struct ifbreq *)data)->ifbr_ifsflags);
1301 		break;
1302 	case SIOCBRDGSADDR:
1303 		error = veb_add_addr(sc, (struct ifbareq *)data);
1304 		break;
1305 	case SIOCBRDGDADDR:
1306 		error = veb_del_addr(sc, (struct ifbareq *)data);
1307 		break;
1308 
1309 	case SIOCBRDGSIFPROT:
1310 		error = veb_port_set_protected(sc, (struct ifbreq *)data);
1311 		break;
1312 
1313 	case SIOCBRDGSIFFLGS:
1314 		error = veb_port_set_flags(sc, (struct ifbreq *)data);
1315 		break;
1316 	case SIOCBRDGGIFFLGS:
1317 		error = veb_port_get_flags(sc, (struct ifbreq *)data);
1318 		break;
1319 
1320 	case SIOCBRDGARL:
1321 		error = veb_rule_add(sc, (struct ifbrlreq *)data);
1322 		break;
1323 	case SIOCBRDGFRL:
1324 		error = veb_rule_list_flush(sc, (struct ifbrlreq *)data);
1325 		break;
1326 	case SIOCBRDGGRL:
1327 		error = veb_rule_list_get(sc, (struct ifbrlconf *)data);
1328 		break;
1329 
1330 	default:
1331 		error = ENOTTY;
1332 		break;
1333 	}
1334 
1335 	if (error == ENETRESET)
1336 		error = veb_iff(sc);
1337 
1338 	return (error);
1339 }
1340 
1341 static struct veb_ports *
1342 veb_ports_insert(struct veb_ports *om, struct veb_port *p)
1343 {
1344 	struct veb_ports *nm;
1345 	struct veb_port **nps, **ops;
1346 	unsigned int ocount = om != NULL ? om->m_count : 0;
1347 	unsigned int ncount = ocount + 1;
1348 	unsigned int i;
1349 
1350 	nm = malloc(veb_ports_size(ncount), M_DEVBUF, M_WAITOK|M_ZERO);
1351 
1352 	refcnt_init(&nm->m_refs);
1353 	nm->m_count = ncount;
1354 
1355 	nps = veb_ports_array(nm);
1356 
1357 	if (om != NULL) {
1358 		ops = veb_ports_array(om);
1359 		for (i = 0; i < ocount; i++) {
1360 			struct veb_port *op = ops[i];
1361 			refcnt_take(&op->p_refs);
1362 			nps[i] = op;
1363 		}
1364 	} else
1365 		i = 0;
1366 
1367 	refcnt_take(&p->p_refs);
1368 	nps[i] = p;
1369 
1370 	return (nm);
1371 }
1372 
1373 static struct veb_ports *
1374 veb_ports_remove(struct veb_ports *om, struct veb_port *p)
1375 {
1376 	struct veb_ports *nm;
1377 	struct veb_port **nps, **ops;
1378 	unsigned int ocount = om->m_count;
1379 	unsigned int ncount = ocount - 1;
1380 	unsigned int i, j;
1381 
1382 	if (ncount == 0)
1383 		return (NULL);
1384 
1385 	nm = malloc(veb_ports_size(ncount), M_DEVBUF, M_WAITOK|M_ZERO);
1386 
1387 	refcnt_init(&nm->m_refs);
1388 	nm->m_count = ncount;
1389 
1390 	nps = veb_ports_array(nm);
1391 	j = 0;
1392 
1393 	ops = veb_ports_array(om);
1394 	for (i = 0; i < ocount; i++) {
1395 		struct veb_port *op = ops[i];
1396 		if (op == p)
1397 			continue;
1398 
1399 		refcnt_take(&op->p_refs);
1400 		nps[j++] = op;
1401 	}
1402 	KASSERT(j == ncount);
1403 
1404 	return (nm);
1405 }
1406 
1407 static inline void
1408 veb_ports_free(struct veb_ports *m)
1409 {
1410 	free(m, M_DEVBUF, veb_ports_size(m->m_count));
1411 }
1412 
1413 static void
1414 veb_ports_destroy(struct veb_ports *m)
1415 {
1416 	struct veb_port **ps = veb_ports_array(m);
1417 	unsigned int i;
1418 
1419 	for (i = 0; i < m->m_count; i++) {
1420 		struct veb_port *p = ps[i];
1421 		refcnt_rele_wake(&p->p_refs);
1422 	}
1423 
1424 	veb_ports_free(m);
1425 }
1426 
1427 static int
1428 veb_add_port(struct veb_softc *sc, const struct ifbreq *req, unsigned int span)
1429 {
1430 	struct ifnet *ifp = &sc->sc_if;
1431 	struct ifnet *ifp0;
1432 	struct veb_ports **ports_ptr;
1433 	struct veb_ports *om, *nm;
1434 	struct veb_port *p;
1435 	int isvport;
1436 	int error;
1437 
1438 	NET_ASSERT_LOCKED();
1439 
1440 	ifp0 = if_unit(req->ifbr_ifsname);
1441 	if (ifp0 == NULL)
1442 		return (EINVAL);
1443 
1444 	if (ifp0->if_type != IFT_ETHER) {
1445 		error = EPROTONOSUPPORT;
1446 		goto put;
1447 	}
1448 
1449 	if (ifp0 == ifp) {
1450 		error = EPROTONOSUPPORT;
1451 		goto put;
1452 	}
1453 
1454 	isvport = (ifp0->if_enqueue == vport_enqueue);
1455 
1456 	error = ether_brport_isset(ifp0);
1457 	if (error != 0)
1458 		goto put;
1459 
1460 	/* let's try */
1461 
1462 	p = malloc(sizeof(*p), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
1463 	if (p == NULL) {
1464 		error = ENOMEM;
1465 		goto put;
1466 	}
1467 
1468 	ifsetlro(ifp0, 0);
1469 
1470 	p->p_ifp0 = ifp0;
1471 	p->p_veb = sc;
1472 
1473 	refcnt_init(&p->p_refs);
1474 	TAILQ_INIT(&p->p_vrl);
1475 	SMR_TAILQ_INIT(&p->p_vr_list[0]);
1476 	SMR_TAILQ_INIT(&p->p_vr_list[1]);
1477 
1478 	p->p_enqueue = isvport ? vport_if_enqueue : if_enqueue;
1479 	p->p_ioctl = ifp0->if_ioctl;
1480 	p->p_output = ifp0->if_output;
1481 
1482 	if (span) {
1483 		ports_ptr = &sc->sc_spans;
1484 
1485 		if (isvport) {
1486 			error = EPROTONOSUPPORT;
1487 			goto free;
1488 		}
1489 
1490 		p->p_brport.eb_input = veb_span_input;
1491 		p->p_bif_flags = IFBIF_SPAN;
1492 	} else {
1493 		ports_ptr = &sc->sc_ports;
1494 
1495 		error = ifpromisc(ifp0, 1);
1496 		if (error != 0)
1497 			goto free;
1498 
1499 		p->p_bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
1500 		p->p_brport.eb_input = isvport ?
1501 		    veb_vport_input : veb_port_input;
1502 	}
1503 
1504 	p->p_brport.eb_port_take = veb_eb_brport_take;
1505 	p->p_brport.eb_port_rele = veb_eb_brport_rele;
1506 
1507 	om = SMR_PTR_GET_LOCKED(ports_ptr);
1508 	nm = veb_ports_insert(om, p);
1509 
1510 	/* this might have changed if we slept for malloc or ifpromisc */
1511 	error = ether_brport_isset(ifp0);
1512 	if (error != 0)
1513 		goto unpromisc;
1514 
1515 	task_set(&p->p_ltask, veb_p_linkch, p);
1516 	if_linkstatehook_add(ifp0, &p->p_ltask);
1517 
1518 	task_set(&p->p_dtask, veb_p_detach, p);
1519 	if_detachhook_add(ifp0, &p->p_dtask);
1520 
1521 	p->p_brport.eb_port = p;
1522 
1523 	/* commit */
1524 	SMR_PTR_SET_LOCKED(ports_ptr, nm);
1525 
1526 	ether_brport_set(ifp0, &p->p_brport);
1527 	if (!isvport) { /* vport is special */
1528 		ifp0->if_ioctl = veb_p_ioctl;
1529 		ifp0->if_output = veb_p_output;
1530 	}
1531 
1532 	veb_p_linkch(p);
1533 
1534 	/* clean up the old veb_ports map */
1535 	smr_barrier();
1536 	if (om != NULL) {
1537 		refcnt_finalize(&om->m_refs, "vebports");
1538 		veb_ports_destroy(om);
1539 	}
1540 
1541 	return (0);
1542 
1543 unpromisc:
1544 	if (!span)
1545 		ifpromisc(ifp0, 0);
1546 free:
1547 	free(p, M_DEVBUF, sizeof(*p));
1548 put:
1549 	if_put(ifp0);
1550 	return (error);
1551 }
1552 
1553 static struct veb_port *
1554 veb_trunkport(struct veb_softc *sc, const char *name, unsigned int span)
1555 {
1556 	struct veb_ports *m;
1557 	struct veb_port **ps;
1558 	struct veb_port *p;
1559 	unsigned int i;
1560 
1561 	m = SMR_PTR_GET_LOCKED(span ? &sc->sc_spans : &sc->sc_ports);
1562 	if (m == NULL)
1563 		return (NULL);
1564 
1565 	ps = veb_ports_array(m);
1566 	for (i = 0; i < m->m_count; i++) {
1567 		p = ps[i];
1568 
1569 		if (strncmp(p->p_ifp0->if_xname, name, IFNAMSIZ) == 0)
1570 			return (p);
1571 	}
1572 
1573 	return (NULL);
1574 }
1575 
1576 static int
1577 veb_del_port(struct veb_softc *sc, const struct ifbreq *req, unsigned int span)
1578 {
1579 	struct veb_port *p;
1580 
1581 	NET_ASSERT_LOCKED();
1582 	p = veb_trunkport(sc, req->ifbr_ifsname, span);
1583 	if (p == NULL)
1584 		return (EINVAL);
1585 
1586 	veb_p_dtor(sc, p);
1587 
1588 	return (0);
1589 }
1590 
1591 static struct veb_port *
1592 veb_port_get(struct veb_softc *sc, const char *name)
1593 {
1594 	struct veb_ports *m;
1595 	struct veb_port **ps;
1596 	struct veb_port *p;
1597 	unsigned int i;
1598 
1599 	NET_ASSERT_LOCKED();
1600 
1601 	m = SMR_PTR_GET_LOCKED(&sc->sc_ports);
1602 	if (m == NULL)
1603 		return (NULL);
1604 
1605 	ps = veb_ports_array(m);
1606 	for (i = 0; i < m->m_count; i++) {
1607 		p = ps[i];
1608 
1609 		if (strncmp(p->p_ifp0->if_xname, name, IFNAMSIZ) == 0) {
1610 			refcnt_take(&p->p_refs);
1611 			return (p);
1612 		}
1613 	}
1614 
1615 	return (NULL);
1616 }
1617 
1618 static void
1619 veb_port_put(struct veb_softc *sc, struct veb_port *p)
1620 {
1621 	refcnt_rele_wake(&p->p_refs);
1622 }
1623 
1624 static int
1625 veb_port_set_protected(struct veb_softc *sc, const struct ifbreq *ifbr)
1626 {
1627 	struct veb_port *p;
1628 
1629 	p = veb_port_get(sc, ifbr->ifbr_ifsname);
1630 	if (p == NULL)
1631 		return (ESRCH);
1632 
1633 	p->p_protected = ifbr->ifbr_protected;
1634 	veb_port_put(sc, p);
1635 
1636 	return (0);
1637 }
1638 
1639 static int
1640 veb_rule_add(struct veb_softc *sc, const struct ifbrlreq *ifbr)
1641 {
1642 	const struct ifbrarpf *brla = &ifbr->ifbr_arpf;
1643 	struct veb_rule vr, *vrp;
1644 	struct veb_port *p;
1645 	int error;
1646 
1647 	memset(&vr, 0, sizeof(vr));
1648 
1649 	switch (ifbr->ifbr_action) {
1650 	case BRL_ACTION_BLOCK:
1651 		vr.vr_action = VEB_R_BLOCK;
1652 		break;
1653 	case BRL_ACTION_PASS:
1654 		vr.vr_action = VEB_R_PASS;
1655 		break;
1656 	/* XXX VEB_R_MATCH */
1657 	default:
1658 		return (EINVAL);
1659 	}
1660 
1661 	if (!ISSET(ifbr->ifbr_flags, BRL_FLAG_IN|BRL_FLAG_OUT))
1662 		return (EINVAL);
1663 	if (ISSET(ifbr->ifbr_flags, BRL_FLAG_IN))
1664 		SET(vr.vr_flags, VEB_R_F_IN);
1665 	if (ISSET(ifbr->ifbr_flags, BRL_FLAG_OUT))
1666 		SET(vr.vr_flags, VEB_R_F_OUT);
1667 
1668 	if (ISSET(ifbr->ifbr_flags, BRL_FLAG_SRCVALID)) {
1669 		SET(vr.vr_flags, VEB_R_F_SRC);
1670 		vr.vr_src = ether_addr_to_e64(&ifbr->ifbr_src);
1671 	}
1672 	if (ISSET(ifbr->ifbr_flags, BRL_FLAG_DSTVALID)) {
1673 		SET(vr.vr_flags, VEB_R_F_DST);
1674 		vr.vr_dst = ether_addr_to_e64(&ifbr->ifbr_dst);
1675 	}
1676 
1677 	/* ARP rule */
1678 	if (ISSET(brla->brla_flags, BRLA_ARP|BRLA_RARP)) {
1679 		if (ISSET(brla->brla_flags, BRLA_ARP))
1680 			SET(vr.vr_flags, VEB_R_F_ARP);
1681 		if (ISSET(brla->brla_flags, BRLA_RARP))
1682 			SET(vr.vr_flags, VEB_R_F_RARP);
1683 
1684 		if (ISSET(brla->brla_flags, BRLA_SHA)) {
1685 			SET(vr.vr_flags, VEB_R_F_SHA);
1686 			vr.vr_arp_sha = brla->brla_sha;
1687 		}
1688 		if (ISSET(brla->brla_flags, BRLA_THA)) {
1689 			SET(vr.vr_flags, VEB_R_F_THA);
1690 			vr.vr_arp_tha = brla->brla_tha;
1691 		}
1692 		if (ISSET(brla->brla_flags, BRLA_SPA)) {
1693 			SET(vr.vr_flags, VEB_R_F_SPA);
1694 			vr.vr_arp_spa = brla->brla_spa;
1695 		}
1696 		if (ISSET(brla->brla_flags, BRLA_TPA)) {
1697 			SET(vr.vr_flags, VEB_R_F_TPA);
1698 			vr.vr_arp_tpa = brla->brla_tpa;
1699 		}
1700 		vr.vr_arp_op = htons(brla->brla_op);
1701 	}
1702 
1703 	if (ifbr->ifbr_tagname[0] != '\0') {
1704 #if NPF > 0
1705 		vr.vr_pftag = pf_tagname2tag((char *)ifbr->ifbr_tagname, 1);
1706 		if (vr.vr_pftag == 0)
1707 			return (ENOMEM);
1708 #else
1709 		return (EINVAL);
1710 #endif
1711 	}
1712 
1713 	p = veb_port_get(sc, ifbr->ifbr_ifsname);
1714 	if (p == NULL) {
1715 		error = ESRCH;
1716 		goto error;
1717 	}
1718 
1719 	vrp = pool_get(&veb_rule_pool, PR_WAITOK|PR_LIMITFAIL|PR_ZERO);
1720 	if (vrp == NULL) {
1721 		error = ENOMEM;
1722 		goto port_put;
1723 	}
1724 
1725 	*vrp = vr;
1726 
1727 	/* there's one big lock on a veb for all ports */
1728 	error = rw_enter(&sc->sc_rule_lock, RW_WRITE|RW_INTR);
1729 	if (error != 0)
1730 		goto rule_put;
1731 
1732 	TAILQ_INSERT_TAIL(&p->p_vrl, vrp, vr_entry);
1733 	p->p_nvrl++;
1734 	if (ISSET(vr.vr_flags, VEB_R_F_OUT)) {
1735 		SMR_TAILQ_INSERT_TAIL_LOCKED(&p->p_vr_list[0],
1736 		    vrp, vr_lentry[0]);
1737 	}
1738 	if (ISSET(vr.vr_flags, VEB_R_F_IN)) {
1739 		SMR_TAILQ_INSERT_TAIL_LOCKED(&p->p_vr_list[1],
1740 		    vrp, vr_lentry[1]);
1741 	}
1742 
1743 	rw_exit(&sc->sc_rule_lock);
1744 	veb_port_put(sc, p);
1745 
1746 	return (0);
1747 
1748 rule_put:
1749 	pool_put(&veb_rule_pool, vrp);
1750 port_put:
1751 	veb_port_put(sc, p);
1752 error:
1753 #if NPF > 0
1754 	pf_tag_unref(vr.vr_pftag);
1755 #endif
1756 	return (error);
1757 }
1758 
1759 static void
1760 veb_rule_list_free(struct veb_rule *nvr)
1761 {
1762 	struct veb_rule *vr;
1763 
1764 	while ((vr = nvr) != NULL) {
1765 		nvr = TAILQ_NEXT(vr, vr_entry);
1766 		pool_put(&veb_rule_pool, vr);
1767 	}
1768 }
1769 
1770 static int
1771 veb_rule_list_flush(struct veb_softc *sc, const struct ifbrlreq *ifbr)
1772 {
1773 	struct veb_port *p;
1774 	struct veb_rule *vr;
1775 	int error;
1776 
1777 	p = veb_port_get(sc, ifbr->ifbr_ifsname);
1778 	if (p == NULL)
1779 		return (ESRCH);
1780 
1781 	error = rw_enter(&sc->sc_rule_lock, RW_WRITE|RW_INTR);
1782 	if (error != 0) {
1783 		veb_port_put(sc, p);
1784 		return (error);
1785 	}
1786 
1787 	/* take all the rules away */
1788 	vr = TAILQ_FIRST(&p->p_vrl);
1789 
1790 	/* reset the lists and counts of rules */
1791 	TAILQ_INIT(&p->p_vrl);
1792 	p->p_nvrl = 0;
1793 	SMR_TAILQ_INIT(&p->p_vr_list[0]);
1794 	SMR_TAILQ_INIT(&p->p_vr_list[1]);
1795 
1796 	rw_exit(&sc->sc_rule_lock);
1797 	veb_port_put(sc, p);
1798 
1799 	smr_barrier();
1800 	veb_rule_list_free(vr);
1801 
1802 	return (0);
1803 }
1804 
1805 static void
1806 veb_rule2ifbr(struct ifbrlreq *ifbr, const struct veb_rule *vr)
1807 {
1808 	switch (vr->vr_action) {
1809 	case VEB_R_PASS:
1810 		ifbr->ifbr_action = BRL_ACTION_PASS;
1811 		break;
1812 	case VEB_R_BLOCK:
1813 		ifbr->ifbr_action = BRL_ACTION_BLOCK;
1814 		break;
1815 	}
1816 
1817 	if (ISSET(vr->vr_flags, VEB_R_F_IN))
1818 		SET(ifbr->ifbr_flags, BRL_FLAG_IN);
1819 	if (ISSET(vr->vr_flags, VEB_R_F_OUT))
1820 		SET(ifbr->ifbr_flags, BRL_FLAG_OUT);
1821 
1822 	if (ISSET(vr->vr_flags, VEB_R_F_SRC)) {
1823 		SET(ifbr->ifbr_flags, BRL_FLAG_SRCVALID);
1824 		ether_e64_to_addr(&ifbr->ifbr_src, vr->vr_src);
1825 	}
1826 	if (ISSET(vr->vr_flags, VEB_R_F_DST)) {
1827 		SET(ifbr->ifbr_flags, BRL_FLAG_DSTVALID);
1828 		ether_e64_to_addr(&ifbr->ifbr_dst, vr->vr_dst);
1829 	}
1830 
1831 	/* ARP rule */
1832 	if (ISSET(vr->vr_flags, VEB_R_F_ARP|VEB_R_F_RARP)) {
1833 		struct ifbrarpf *brla = &ifbr->ifbr_arpf;
1834 
1835 		if (ISSET(vr->vr_flags, VEB_R_F_ARP))
1836 			SET(brla->brla_flags, BRLA_ARP);
1837 		if (ISSET(vr->vr_flags, VEB_R_F_RARP))
1838 			SET(brla->brla_flags, BRLA_RARP);
1839 
1840 		if (ISSET(vr->vr_flags, VEB_R_F_SHA)) {
1841 			SET(brla->brla_flags, BRLA_SHA);
1842 			brla->brla_sha = vr->vr_arp_sha;
1843 		}
1844 		if (ISSET(vr->vr_flags, VEB_R_F_THA)) {
1845 			SET(brla->brla_flags, BRLA_THA);
1846 			brla->brla_tha = vr->vr_arp_tha;
1847 		}
1848 
1849 		if (ISSET(vr->vr_flags, VEB_R_F_SPA)) {
1850 			SET(brla->brla_flags, BRLA_SPA);
1851 			brla->brla_spa = vr->vr_arp_spa;
1852 		}
1853 		if (ISSET(vr->vr_flags, VEB_R_F_TPA)) {
1854 			SET(brla->brla_flags, BRLA_TPA);
1855 			brla->brla_tpa = vr->vr_arp_tpa;
1856 		}
1857 
1858 		brla->brla_op = ntohs(vr->vr_arp_op);
1859 	}
1860 
1861 #if NPF > 0
1862 	if (vr->vr_pftag != 0)
1863 		pf_tag2tagname(vr->vr_pftag, ifbr->ifbr_tagname);
1864 #endif
1865 }
1866 
1867 static int
1868 veb_rule_list_get(struct veb_softc *sc, struct ifbrlconf *ifbrl)
1869 {
1870 	struct veb_port *p;
1871 	struct veb_rule *vr;
1872 	struct ifbrlreq *ifbr, *ifbrs;
1873 	int error = 0;
1874 	size_t len;
1875 
1876 	p = veb_port_get(sc, ifbrl->ifbrl_ifsname);
1877 	if (p == NULL)
1878 		return (ESRCH);
1879 
1880 	len = p->p_nvrl; /* estimate */
1881 	if (ifbrl->ifbrl_len == 0 || len == 0) {
1882 		ifbrl->ifbrl_len = len * sizeof(*ifbrs);
1883 		goto port_put;
1884 	}
1885 
1886 	error = rw_enter(&sc->sc_rule_lock, RW_READ|RW_INTR);
1887 	if (error != 0)
1888 		goto port_put;
1889 
1890 	ifbrs = mallocarray(p->p_nvrl, sizeof(*ifbrs), M_TEMP,
1891 	    M_WAITOK|M_CANFAIL|M_ZERO);
1892 	if (ifbrs == NULL) {
1893 		rw_exit(&sc->sc_rule_lock);
1894 		goto port_put;
1895 	}
1896 	len = p->p_nvrl * sizeof(*ifbrs);
1897 
1898 	ifbr = ifbrs;
1899 	TAILQ_FOREACH(vr, &p->p_vrl, vr_entry) {
1900 		strlcpy(ifbr->ifbr_name, sc->sc_if.if_xname, IFNAMSIZ);
1901 		strlcpy(ifbr->ifbr_ifsname, p->p_ifp0->if_xname, IFNAMSIZ);
1902 		veb_rule2ifbr(ifbr, vr);
1903 
1904 		ifbr++;
1905 	}
1906 
1907 	rw_exit(&sc->sc_rule_lock);
1908 
1909 	error = copyout(ifbrs, ifbrl->ifbrl_buf, min(len, ifbrl->ifbrl_len));
1910 	if (error == 0)
1911 		ifbrl->ifbrl_len = len;
1912 	free(ifbrs, M_TEMP, len);
1913 
1914 port_put:
1915 	veb_port_put(sc, p);
1916 	return (error);
1917 }
1918 
1919 static int
1920 veb_port_list(struct veb_softc *sc, struct ifbifconf *bifc)
1921 {
1922 	struct ifnet *ifp = &sc->sc_if;
1923 	struct veb_ports *m;
1924 	struct veb_port **ps;
1925 	struct veb_port *p;
1926 	struct ifnet *ifp0;
1927 	struct ifbreq breq;
1928 	int n = 0, error = 0;
1929 	unsigned int i;
1930 
1931 	NET_ASSERT_LOCKED();
1932 
1933 	if (bifc->ifbic_len == 0) {
1934 		m = SMR_PTR_GET_LOCKED(&sc->sc_ports);
1935 		if (m != NULL)
1936 			n += m->m_count;
1937 		m = SMR_PTR_GET_LOCKED(&sc->sc_spans);
1938 		if (m != NULL)
1939 			n += m->m_count;
1940 		goto done;
1941 	}
1942 
1943 	m = SMR_PTR_GET_LOCKED(&sc->sc_ports);
1944 	if (m != NULL) {
1945 		ps = veb_ports_array(m);
1946 		for (i = 0; i < m->m_count; i++) {
1947 			if (bifc->ifbic_len < sizeof(breq))
1948 				break;
1949 
1950 			p = ps[i];
1951 
1952 			memset(&breq, 0, sizeof(breq));
1953 
1954 			ifp0 = p->p_ifp0;
1955 
1956 			strlcpy(breq.ifbr_name, ifp->if_xname, IFNAMSIZ);
1957 			strlcpy(breq.ifbr_ifsname, ifp0->if_xname, IFNAMSIZ);
1958 
1959 			breq.ifbr_ifsflags = p->p_bif_flags;
1960 			breq.ifbr_portno = ifp0->if_index;
1961 			breq.ifbr_protected = p->p_protected;
1962 			if ((error = copyout(&breq, bifc->ifbic_req + n,
1963 			    sizeof(breq))) != 0)
1964 				goto done;
1965 
1966 			bifc->ifbic_len -= sizeof(breq);
1967 			n++;
1968 		}
1969 	}
1970 
1971 	m = SMR_PTR_GET_LOCKED(&sc->sc_spans);
1972 	if (m != NULL) {
1973 		ps = veb_ports_array(m);
1974 		for (i = 0; i < m->m_count; i++) {
1975 			if (bifc->ifbic_len < sizeof(breq))
1976 				break;
1977 
1978 			p = ps[i];
1979 
1980 			memset(&breq, 0, sizeof(breq));
1981 
1982 			strlcpy(breq.ifbr_name, ifp->if_xname, IFNAMSIZ);
1983 			strlcpy(breq.ifbr_ifsname, p->p_ifp0->if_xname,
1984 			    IFNAMSIZ);
1985 
1986 			breq.ifbr_ifsflags = p->p_bif_flags;
1987 			if ((error = copyout(&breq, bifc->ifbic_req + n,
1988 			    sizeof(breq))) != 0)
1989 				goto done;
1990 
1991 			bifc->ifbic_len -= sizeof(breq);
1992 			n++;
1993 		}
1994 	}
1995 
1996 done:
1997 	bifc->ifbic_len = n * sizeof(breq);
1998 	return (error);
1999 }
2000 
2001 static int
2002 veb_port_set_flags(struct veb_softc *sc, struct ifbreq *ifbr)
2003 {
2004 	struct veb_port *p;
2005 
2006 	if (ISSET(ifbr->ifbr_ifsflags, ~VEB_IFBIF_FLAGS))
2007 		return (EINVAL);
2008 
2009 	p = veb_port_get(sc, ifbr->ifbr_ifsname);
2010 	if (p == NULL)
2011 		return (ESRCH);
2012 
2013 	p->p_bif_flags = ifbr->ifbr_ifsflags;
2014 
2015 	veb_port_put(sc, p);
2016 	return (0);
2017 }
2018 
2019 static int
2020 veb_port_get_flags(struct veb_softc *sc, struct ifbreq *ifbr)
2021 {
2022 	struct veb_port *p;
2023 
2024 	p = veb_port_get(sc, ifbr->ifbr_ifsname);
2025 	if (p == NULL)
2026 		return (ESRCH);
2027 
2028 	ifbr->ifbr_ifsflags = p->p_bif_flags;
2029 	ifbr->ifbr_portno = p->p_ifp0->if_index;
2030 	ifbr->ifbr_protected = p->p_protected;
2031 
2032 	veb_port_put(sc, p);
2033 	return (0);
2034 }
2035 
2036 static int
2037 veb_add_addr(struct veb_softc *sc, const struct ifbareq *ifba)
2038 {
2039 	struct veb_port *p;
2040 	int error = 0;
2041 	unsigned int type;
2042 
2043 	if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK))
2044 		return (EINVAL);
2045 	switch (ifba->ifba_flags & IFBAF_TYPEMASK) {
2046 	case IFBAF_DYNAMIC:
2047 		type = EBE_DYNAMIC;
2048 		break;
2049 	case IFBAF_STATIC:
2050 		type = EBE_STATIC;
2051 		break;
2052 	default:
2053 		return (EINVAL);
2054 	}
2055 
2056 	if (ifba->ifba_dstsa.ss_family != AF_UNSPEC)
2057 		return (EAFNOSUPPORT);
2058 
2059 	p = veb_port_get(sc, ifba->ifba_ifsname);
2060 	if (p == NULL)
2061 		return (ESRCH);
2062 
2063 	error = etherbridge_add_addr(&sc->sc_eb, p, &ifba->ifba_dst, type);
2064 
2065 	veb_port_put(sc, p);
2066 
2067 	return (error);
2068 }
2069 
2070 static int
2071 veb_del_addr(struct veb_softc *sc, const struct ifbareq *ifba)
2072 {
2073 	return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst));
2074 }
2075 
2076 static int
2077 veb_p_ioctl(struct ifnet *ifp0, u_long cmd, caddr_t data)
2078 {
2079 	const struct ether_brport *eb = ether_brport_get_locked(ifp0);
2080 	struct veb_port *p;
2081 	int error = 0;
2082 
2083 	KASSERTMSG(eb != NULL,
2084 	    "%s: %s called without an ether_brport set",
2085 	    ifp0->if_xname, __func__);
2086 	KASSERTMSG((eb->eb_input == veb_port_input) ||
2087 	    (eb->eb_input == veb_span_input),
2088 	    "%s called %s, but eb_input (%p) seems wrong",
2089 	    ifp0->if_xname, __func__, eb->eb_input);
2090 
2091 	p = eb->eb_port;
2092 
2093 	switch (cmd) {
2094 	case SIOCSIFADDR:
2095 		error = EBUSY;
2096 		break;
2097 
2098 	default:
2099 		error = (*p->p_ioctl)(ifp0, cmd, data);
2100 		break;
2101 	}
2102 
2103 	return (error);
2104 }
2105 
2106 static int
2107 veb_p_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst,
2108     struct rtentry *rt)
2109 {
2110 	int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
2111 	    struct rtentry *) = NULL;
2112 	const struct ether_brport *eb;
2113 
2114 	/* restrict transmission to bpf only */
2115 	if ((m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL)) {
2116 		m_freem(m);
2117 		return (EBUSY);
2118 	}
2119 
2120 	smr_read_enter();
2121 	eb = ether_brport_get(ifp0);
2122 	if (eb != NULL && eb->eb_input == veb_port_input) {
2123 		struct veb_port *p = eb->eb_port;
2124 		p_output = p->p_output; /* code doesn't go away */
2125 	}
2126 	smr_read_leave();
2127 
2128 	if (p_output == NULL) {
2129 		m_freem(m);
2130 		return (ENXIO);
2131 	}
2132 
2133 	return ((*p_output)(ifp0, m, dst, rt));
2134 }
2135 
2136 /*
2137  * there must be an smr_barrier after ether_brport_clr() and before
2138  * veb_port is freed in veb_p_fini()
2139  */
2140 
2141 static void
2142 veb_p_unlink(struct veb_softc *sc, struct veb_port *p)
2143 {
2144 	struct ifnet *ifp = &sc->sc_if;
2145 	struct ifnet *ifp0 = p->p_ifp0;
2146 
2147 	ifp0->if_ioctl = p->p_ioctl;
2148 	ifp0->if_output = p->p_output;
2149 
2150 	ether_brport_clr(ifp0); /* needs an smr_barrier */
2151 
2152 	if_detachhook_del(ifp0, &p->p_dtask);
2153 	if_linkstatehook_del(ifp0, &p->p_ltask);
2154 
2155 	if (!ISSET(p->p_bif_flags, IFBIF_SPAN)) {
2156 		if (ifpromisc(ifp0, 0) != 0) {
2157 			log(LOG_WARNING, "%s %s: unable to disable promisc\n",
2158 			    ifp->if_xname, ifp0->if_xname);
2159 		}
2160 
2161 		etherbridge_detach_port(&sc->sc_eb, p);
2162 	}
2163 }
2164 
2165 static void
2166 veb_p_fini(struct veb_port *p)
2167 {
2168 	struct ifnet *ifp0 = p->p_ifp0;
2169 
2170 	refcnt_finalize(&p->p_refs, "vebpdtor");
2171 	veb_rule_list_free(TAILQ_FIRST(&p->p_vrl));
2172 
2173 	if_put(ifp0);
2174 	free(p, M_DEVBUF, sizeof(*p)); /* hope you didn't forget smr_barrier */
2175 }
2176 
2177 static void
2178 veb_p_dtor(struct veb_softc *sc, struct veb_port *p)
2179 {
2180 	struct veb_ports **ports_ptr;
2181 	struct veb_ports *om, *nm;
2182 
2183 	ports_ptr = ISSET(p->p_bif_flags, IFBIF_SPAN) ?
2184 	    &sc->sc_spans : &sc->sc_ports;
2185 
2186 	om = SMR_PTR_GET_LOCKED(ports_ptr);
2187 	nm = veb_ports_remove(om, p);
2188 	SMR_PTR_SET_LOCKED(ports_ptr, nm);
2189 
2190 	veb_p_unlink(sc, p);
2191 
2192 	smr_barrier();
2193 	refcnt_finalize(&om->m_refs, "vebports");
2194 	veb_ports_destroy(om);
2195 
2196 	veb_p_fini(p);
2197 }
2198 
2199 static void
2200 veb_p_detach(void *arg)
2201 {
2202 	struct veb_port *p = arg;
2203 	struct veb_softc *sc = p->p_veb;
2204 
2205 	NET_ASSERT_LOCKED();
2206 
2207 	veb_p_dtor(sc, p);
2208 }
2209 
2210 static int
2211 veb_p_active(struct veb_port *p)
2212 {
2213 	struct ifnet *ifp0 = p->p_ifp0;
2214 
2215 	return (ISSET(ifp0->if_flags, IFF_RUNNING) &&
2216 	    LINK_STATE_IS_UP(ifp0->if_link_state));
2217 }
2218 
2219 static void
2220 veb_p_linkch(void *arg)
2221 {
2222 	struct veb_port *p = arg;
2223 	u_char link_state = LINK_STATE_FULL_DUPLEX;
2224 
2225 	NET_ASSERT_LOCKED();
2226 
2227 	if (!veb_p_active(p))
2228 		link_state = LINK_STATE_DOWN;
2229 
2230 	p->p_link_state = link_state;
2231 }
2232 
2233 static int
2234 veb_up(struct veb_softc *sc)
2235 {
2236 	struct ifnet *ifp = &sc->sc_if;
2237 	int error;
2238 
2239 	error = etherbridge_up(&sc->sc_eb);
2240 	if (error != 0)
2241 		return (error);
2242 
2243 	NET_ASSERT_LOCKED();
2244 	SET(ifp->if_flags, IFF_RUNNING);
2245 
2246 	return (0);
2247 }
2248 
2249 static int
2250 veb_iff(struct veb_softc *sc)
2251 {
2252 	return (0);
2253 }
2254 
2255 static int
2256 veb_down(struct veb_softc *sc)
2257 {
2258 	struct ifnet *ifp = &sc->sc_if;
2259 	int error;
2260 
2261 	error = etherbridge_down(&sc->sc_eb);
2262 	if (error != 0)
2263 		return (0);
2264 
2265 	NET_ASSERT_LOCKED();
2266 	CLR(ifp->if_flags, IFF_RUNNING);
2267 
2268 	return (0);
2269 }
2270 
2271 static int
2272 veb_eb_port_cmp(void *arg, void *a, void *b)
2273 {
2274 	struct veb_port *pa = a, *pb = b;
2275 	return (pa == pb);
2276 }
2277 
2278 static void *
2279 veb_eb_port_take(void *arg, void *port)
2280 {
2281 	struct veb_port *p = port;
2282 
2283 	refcnt_take(&p->p_refs);
2284 
2285 	return (p);
2286 }
2287 
2288 static void
2289 veb_eb_port_rele(void *arg, void *port)
2290 {
2291 	struct veb_port *p = port;
2292 
2293 	refcnt_rele_wake(&p->p_refs);
2294 }
2295 
2296 static void
2297 veb_eb_brport_take(void *port)
2298 {
2299 	veb_eb_port_take(NULL, port);
2300 }
2301 
2302 static void
2303 veb_eb_brport_rele(void *port)
2304 {
2305 	veb_eb_port_rele(NULL, port);
2306 }
2307 
2308 static size_t
2309 veb_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
2310 {
2311 	struct veb_port *p = port;
2312 
2313 	return (strlcpy(dst, p->p_ifp0->if_xname, len));
2314 }
2315 
2316 static void
2317 veb_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
2318 {
2319 	ss->ss_family = AF_UNSPEC;
2320 }
2321 
2322 /*
2323  * virtual ethernet bridge port
2324  */
2325 
2326 static int
2327 vport_clone_create(struct if_clone *ifc, int unit)
2328 {
2329 	struct vport_softc *sc;
2330 	struct ifnet *ifp;
2331 
2332 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
2333 	if (sc == NULL)
2334 		return (ENOMEM);
2335 
2336 	ifp = &sc->sc_ac.ac_if;
2337 
2338 	snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", ifc->ifc_name, unit);
2339 
2340 	ifp->if_softc = sc;
2341 	ifp->if_type = IFT_ETHER;
2342 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
2343 	ifp->if_ioctl = vport_ioctl;
2344 	ifp->if_enqueue = vport_enqueue;
2345 	ifp->if_qstart = vport_start;
2346 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2347 	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
2348 	ether_fakeaddr(ifp);
2349 
2350 	if_counters_alloc(ifp);
2351 	if_attach(ifp);
2352 	ether_ifattach(ifp);
2353 
2354 	return (0);
2355 }
2356 
2357 static int
2358 vport_clone_destroy(struct ifnet *ifp)
2359 {
2360 	struct vport_softc *sc = ifp->if_softc;
2361 
2362 	NET_LOCK();
2363 	sc->sc_dead = 1;
2364 
2365 	if (ISSET(ifp->if_flags, IFF_RUNNING))
2366 		vport_down(sc);
2367 	NET_UNLOCK();
2368 
2369 	ether_ifdetach(ifp);
2370 	if_detach(ifp);
2371 
2372 	free(sc, M_DEVBUF, sizeof(*sc));
2373 
2374 	return (0);
2375 }
2376 
2377 static int
2378 vport_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2379 {
2380 	struct vport_softc *sc = ifp->if_softc;
2381 	int error = 0;
2382 
2383 	if (sc->sc_dead)
2384 		return (ENXIO);
2385 
2386 	switch (cmd) {
2387 	case SIOCSIFFLAGS:
2388 		if (ISSET(ifp->if_flags, IFF_UP)) {
2389 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
2390 				error = vport_up(sc);
2391 		} else {
2392 			if (ISSET(ifp->if_flags, IFF_RUNNING))
2393 				error = vport_down(sc);
2394 		}
2395 		break;
2396 
2397 	case SIOCADDMULTI:
2398 	case SIOCDELMULTI:
2399 		break;
2400 
2401 	default:
2402 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
2403 		break;
2404 	}
2405 
2406 	if (error == ENETRESET)
2407 		error = vport_iff(sc);
2408 
2409 	return (error);
2410 }
2411 
2412 static int
2413 vport_up(struct vport_softc *sc)
2414 {
2415 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2416 
2417 	NET_ASSERT_LOCKED();
2418 	SET(ifp->if_flags, IFF_RUNNING);
2419 
2420 	return (0);
2421 }
2422 
2423 static int
2424 vport_iff(struct vport_softc *sc)
2425 {
2426 	return (0);
2427 }
2428 
2429 static int
2430 vport_down(struct vport_softc *sc)
2431 {
2432 	struct ifnet *ifp = &sc->sc_ac.ac_if;
2433 
2434 	NET_ASSERT_LOCKED();
2435 	CLR(ifp->if_flags, IFF_RUNNING);
2436 
2437 	return (0);
2438 }
2439 
2440 static int
2441 vport_if_enqueue(struct ifnet *ifp, struct mbuf *m)
2442 {
2443 	/*
2444 	 * switching an l2 packet toward a vport means pushing it
2445 	 * into the network stack. this function exists to make
2446 	 * if_vinput compat with veb calling if_enqueue.
2447 	 */
2448 
2449 	if_vinput(ifp, m);
2450 
2451 	return (0);
2452 }
2453 
2454 static int
2455 vport_enqueue(struct ifnet *ifp, struct mbuf *m)
2456 {
2457 	struct arpcom *ac;
2458 	const struct ether_brport *eb;
2459 	int error = ENETDOWN;
2460 #if NBPFILTER > 0
2461 	caddr_t if_bpf;
2462 #endif
2463 
2464 	/*
2465 	 * a packet sent from the l3 stack out a vport goes into
2466 	 * veb for switching out another port.
2467 	 */
2468 
2469 #if NPF > 0
2470 	/*
2471 	 * there's no relationship between pf states in the l3 stack
2472 	 * and the l2 bridge.
2473 	 */
2474 	pf_pkt_addr_changed(m);
2475 #endif
2476 
2477 	ac = (struct arpcom *)ifp;
2478 
2479 	smr_read_enter();
2480 	eb = SMR_PTR_GET(&ac->ac_brport);
2481 	if (eb != NULL)
2482 		eb->eb_port_take(eb->eb_port);
2483 	smr_read_leave();
2484 	if (eb != NULL) {
2485 		struct mbuf *(*input)(struct ifnet *, struct mbuf *,
2486 		    uint64_t, void *) = eb->eb_input;
2487 		struct ether_header *eh;
2488 		uint64_t dst;
2489 
2490 		counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
2491 		    m->m_pkthdr.len);
2492 
2493 #if NBPFILTER > 0
2494 		if_bpf = READ_ONCE(ifp->if_bpf);
2495 		if (if_bpf != NULL)
2496 			bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
2497 #endif
2498 
2499 		eh = mtod(m, struct ether_header *);
2500 		dst = ether_addr_to_e64((struct ether_addr *)eh->ether_dhost);
2501 
2502 		if (input == veb_vport_input)
2503 			input = veb_port_input;
2504 		m = (*input)(ifp, m, dst, eb->eb_port);
2505 
2506 		error = 0;
2507 
2508 		eb->eb_port_rele(eb->eb_port);
2509 	}
2510 
2511 	m_freem(m);
2512 
2513 	return (error);
2514 }
2515 
2516 static void
2517 vport_start(struct ifqueue *ifq)
2518 {
2519 	ifq_purge(ifq);
2520 }
2521