xref: /openbsd/sys/net/if_bpe.c (revision 09467b48)
1 /*	$OpenBSD: if_bpe.c,v 1.13 2020/07/22 08:38:51 dlg Exp $ */
2 /*
3  * Copyright (c) 2018 David Gwynne <dlg@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bpfilter.h"
19 #include "pf.h"
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/kernel.h>
24 #include <sys/mbuf.h>
25 #include <sys/socket.h>
26 #include <sys/ioctl.h>
27 #include <sys/timeout.h>
28 #include <sys/pool.h>
29 #include <sys/tree.h>
30 
31 #include <net/if.h>
32 #include <net/if_var.h>
33 #include <net/if_dl.h>
34 #include <net/if_media.h>
35 #include <net/if_types.h>
36 #include <net/rtable.h>
37 
38 #include <netinet/in.h>
39 #include <netinet/if_ether.h>
40 
41 /* for bridge stuff */
42 #include <net/if_bridge.h>
43 
44 
45 #if NBPFILTER > 0
46 #include <net/bpf.h>
47 #endif
48 
49 #include <net/if_bpe.h>
50 
51 #define PBB_ITAG_ISID		0x00ffffff
52 #define PBB_ITAG_ISID_MIN	0x00000000
53 #define PBB_ITAG_ISID_MAX	0x00ffffff
54 #define PBB_ITAG_RES2		0x03000000	/* must be zero on input */
55 #define PBB_ITAG_RES1		0x04000000	/* ignore on input */
56 #define PBB_ITAG_UCA		0x08000000
57 #define PBB_ITAG_DEI		0x10000000
58 #define PBB_ITAG_PCP_SHIFT	29
59 #define PBB_ITAG_PCP_MASK	(0x7U << PBB_ITAG_PCP_SHIFT)
60 
61 #define BPE_BRIDGE_AGE_TMO	100 /* seconds */
62 
63 struct bpe_key {
64 	int			k_if;
65 	uint32_t		k_isid;
66 
67 	RBT_ENTRY(bpe_tunnel)	k_entry;
68 };
69 
70 RBT_HEAD(bpe_tree, bpe_key);
71 
72 static inline int bpe_cmp(const struct bpe_key *, const struct bpe_key *);
73 
74 RBT_PROTOTYPE(bpe_tree, bpe_key, k_entry, bpe_cmp);
75 RBT_GENERATE(bpe_tree, bpe_key, k_entry, bpe_cmp);
76 
77 struct bpe_entry {
78 	struct ether_addr	be_c_da; /* customer address - must be first */
79 	struct ether_addr	be_b_da; /* bridge address */
80 	unsigned int		be_type;
81 #define BPE_ENTRY_DYNAMIC		0
82 #define BPE_ENTRY_STATIC		1
83 	struct refcnt		be_refs;
84 	time_t			be_age;
85 
86 	RBT_ENTRY(bpe_entry)	be_entry;
87 };
88 
89 RBT_HEAD(bpe_map, bpe_entry);
90 
91 static inline int bpe_entry_cmp(const struct bpe_entry *,
92     const struct bpe_entry *);
93 
94 RBT_PROTOTYPE(bpe_map, bpe_entry, be_entry, bpe_entry_cmp);
95 RBT_GENERATE(bpe_map, bpe_entry, be_entry, bpe_entry_cmp);
96 
97 struct bpe_softc {
98 	struct bpe_key		sc_key; /* must be first */
99 	struct arpcom		sc_ac;
100 	int			sc_txhprio;
101 	int			sc_rxhprio;
102 	uint8_t			sc_group[ETHER_ADDR_LEN];
103 
104 	struct task		sc_ltask;
105 	struct task		sc_dtask;
106 
107 	struct bpe_map		sc_bridge_map;
108 	struct rwlock		sc_bridge_lock;
109 	unsigned int		sc_bridge_num;
110 	unsigned int		sc_bridge_max;
111 	int			sc_bridge_tmo; /* seconds */
112 	struct timeout		sc_bridge_age;
113 };
114 
115 void		bpeattach(int);
116 
117 static int	bpe_clone_create(struct if_clone *, int);
118 static int	bpe_clone_destroy(struct ifnet *);
119 
120 static void	bpe_start(struct ifnet *);
121 static int	bpe_ioctl(struct ifnet *, u_long, caddr_t);
122 static int	bpe_media_get(struct bpe_softc *, struct ifreq *);
123 static int	bpe_up(struct bpe_softc *);
124 static int	bpe_down(struct bpe_softc *);
125 static int	bpe_multi(struct bpe_softc *, struct ifnet *, u_long);
126 static int	bpe_set_vnetid(struct bpe_softc *, const struct ifreq *);
127 static void	bpe_set_group(struct bpe_softc *, uint32_t);
128 static int	bpe_set_parent(struct bpe_softc *, const struct if_parent *);
129 static int	bpe_get_parent(struct bpe_softc *, struct if_parent *);
130 static int	bpe_del_parent(struct bpe_softc *);
131 static void	bpe_link_hook(void *);
132 static void	bpe_link_state(struct bpe_softc *, u_char, uint64_t);
133 static void	bpe_detach_hook(void *);
134 
135 static void	bpe_input_map(struct bpe_softc *,
136 		    const uint8_t *, const uint8_t *);
137 static void	bpe_bridge_age(void *);
138 
139 static struct if_clone bpe_cloner =
140     IF_CLONE_INITIALIZER("bpe", bpe_clone_create, bpe_clone_destroy);
141 
142 static struct bpe_tree bpe_interfaces = RBT_INITIALIZER();
143 static struct rwlock bpe_lock = RWLOCK_INITIALIZER("bpeifs");
144 static struct pool bpe_entry_pool;
145 
146 void
147 bpeattach(int count)
148 {
149 	if_clone_attach(&bpe_cloner);
150 }
151 
152 static int
153 bpe_clone_create(struct if_clone *ifc, int unit)
154 {
155 	struct bpe_softc *sc;
156 	struct ifnet *ifp;
157 
158 	if (bpe_entry_pool.pr_size == 0) {
159 		pool_init(&bpe_entry_pool, sizeof(struct bpe_entry), 0,
160 		    IPL_NONE, 0, "bpepl", NULL);
161 	}
162 
163 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
164 	ifp = &sc->sc_ac.ac_if;
165 
166 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
167 	    ifc->ifc_name, unit);
168 
169 	sc->sc_key.k_if = 0;
170 	sc->sc_key.k_isid = 0;
171 	bpe_set_group(sc, 0);
172 
173 	sc->sc_txhprio = IF_HDRPRIO_PACKET;
174 	sc->sc_rxhprio = IF_HDRPRIO_OUTER;
175 
176 	task_set(&sc->sc_ltask, bpe_link_hook, sc);
177 	task_set(&sc->sc_dtask, bpe_detach_hook, sc);
178 
179 	rw_init(&sc->sc_bridge_lock, "bpebr");
180 	RBT_INIT(bpe_map, &sc->sc_bridge_map);
181 	sc->sc_bridge_num = 0;
182 	sc->sc_bridge_max = 100; /* XXX */
183 	sc->sc_bridge_tmo = 240;
184 	timeout_set_proc(&sc->sc_bridge_age, bpe_bridge_age, sc);
185 
186 	ifp->if_softc = sc;
187 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
188 	ifp->if_ioctl = bpe_ioctl;
189 	ifp->if_start = bpe_start;
190 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
191 	ifp->if_xflags = IFXF_CLONED;
192 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
193 	ether_fakeaddr(ifp);
194 
195 	if_counters_alloc(ifp);
196 	if_attach(ifp);
197 	ether_ifattach(ifp);
198 
199 	return (0);
200 }
201 
202 static int
203 bpe_clone_destroy(struct ifnet *ifp)
204 {
205 	struct bpe_softc *sc = ifp->if_softc;
206 
207 	NET_LOCK();
208 	if (ISSET(ifp->if_flags, IFF_RUNNING))
209 		bpe_down(sc);
210 	NET_UNLOCK();
211 
212 	ether_ifdetach(ifp);
213 	if_detach(ifp);
214 
215 	free(sc, M_DEVBUF, sizeof(*sc));
216 
217 	return (0);
218 }
219 
220 static inline int
221 bpe_entry_valid(struct bpe_softc *sc, const struct bpe_entry *be)
222 {
223 	time_t diff;
224 
225 	if (be == NULL)
226 		return (0);
227 
228 	if (be->be_type == BPE_ENTRY_STATIC)
229 		return (1);
230 
231 	diff = getuptime() - be->be_age;
232 	if (diff < sc->sc_bridge_tmo)
233 		return (1);
234 
235 	return (0);
236 }
237 
238 static void
239 bpe_start(struct ifnet *ifp)
240 {
241 	struct bpe_softc *sc = ifp->if_softc;
242 	struct ifnet *ifp0;
243 	struct mbuf *m0, *m;
244 	struct ether_header *ceh;
245 	struct ether_header *beh;
246 	uint32_t itag, *itagp;
247 	int hlen = sizeof(*beh) + sizeof(*itagp);
248 #if NBPFILTER > 0
249 	caddr_t if_bpf;
250 #endif
251 	int txprio;
252 	uint8_t prio;
253 
254 	ifp0 = if_get(sc->sc_key.k_if);
255 	if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
256 		ifq_purge(&ifp->if_snd);
257 		goto done;
258 	}
259 
260 	txprio = sc->sc_txhprio;
261 
262 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
263 #if NBPFILTER > 0
264 		if_bpf = ifp->if_bpf;
265 		if (if_bpf)
266 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
267 #endif
268 
269 		ceh = mtod(m0, struct ether_header *);
270 
271 		/* force prepend of a whole mbuf because of alignment */
272 		m = m_get(M_DONTWAIT, m0->m_type);
273 		if (m == NULL) {
274 			m_freem(m0);
275 			continue;
276 		}
277 
278 		M_MOVE_PKTHDR(m, m0);
279 		m->m_next = m0;
280 
281 		m_align(m, 0);
282 		m->m_len = 0;
283 
284 		m = m_prepend(m, hlen, M_DONTWAIT);
285 		if (m == NULL)
286 			continue;
287 
288 		beh = mtod(m, struct ether_header *);
289 
290 		if (ETHER_IS_BROADCAST(ceh->ether_dhost)) {
291 			memcpy(beh->ether_dhost, sc->sc_group,
292 			    sizeof(beh->ether_dhost));
293 		} else {
294 			struct bpe_entry *be;
295 
296 			rw_enter_read(&sc->sc_bridge_lock);
297 			be = RBT_FIND(bpe_map, &sc->sc_bridge_map,
298 			    (struct bpe_entry *)ceh->ether_dhost);
299 			if (bpe_entry_valid(sc, be)) {
300 				memcpy(beh->ether_dhost, &be->be_b_da,
301 				    sizeof(beh->ether_dhost));
302 			} else {
303 				/* "flood" to unknown hosts */
304 				memcpy(beh->ether_dhost, sc->sc_group,
305 				    sizeof(beh->ether_dhost));
306 			}
307 			rw_exit_read(&sc->sc_bridge_lock);
308 		}
309 
310 		memcpy(beh->ether_shost, ((struct arpcom *)ifp0)->ac_enaddr,
311 		    sizeof(beh->ether_shost));
312 		beh->ether_type = htons(ETHERTYPE_PBB);
313 
314 		prio = (txprio == IF_HDRPRIO_PACKET) ?
315 		    m->m_pkthdr.pf.prio : txprio;
316 
317 		itag = sc->sc_key.k_isid;
318 		itag |= prio << PBB_ITAG_PCP_SHIFT;
319 		itagp = (uint32_t *)(beh + 1);
320 
321 		htobem32(itagp, itag);
322 
323 		if_enqueue(ifp0, m);
324 	}
325 
326 done:
327 	if_put(ifp0);
328 }
329 
330 static void
331 bpe_bridge_age(void *arg)
332 {
333 	struct bpe_softc *sc = arg;
334 	struct bpe_entry *be, *nbe;
335 	time_t diff;
336 
337 	timeout_add_sec(&sc->sc_bridge_age, BPE_BRIDGE_AGE_TMO);
338 
339 	rw_enter_write(&sc->sc_bridge_lock);
340 	RBT_FOREACH_SAFE(be, bpe_map, &sc->sc_bridge_map, nbe) {
341 		if (be->be_type != BPE_ENTRY_DYNAMIC)
342 			continue;
343 
344 		diff = getuptime() - be->be_age;
345 		if (diff < sc->sc_bridge_tmo)
346 			continue;
347 
348 		sc->sc_bridge_num--;
349 		RBT_REMOVE(bpe_map, &sc->sc_bridge_map, be);
350 		if (refcnt_rele(&be->be_refs))
351 			pool_put(&bpe_entry_pool, be);
352 	}
353 	rw_exit_write(&sc->sc_bridge_lock);
354 }
355 
356 static int
357 bpe_rtfind(struct bpe_softc *sc, struct ifbaconf *baconf)
358 {
359 	struct ifnet *ifp = &sc->sc_ac.ac_if;
360 	struct bpe_entry *be;
361 	struct ifbareq bareq;
362 	caddr_t uaddr, end;
363 	int error;
364 	time_t age;
365 	struct sockaddr_dl *sdl;
366 
367 	if (baconf->ifbac_len == 0) {
368 		/* single read is atomic */
369 		baconf->ifbac_len = sc->sc_bridge_num * sizeof(bareq);
370 		return (0);
371 	}
372 
373 	uaddr = baconf->ifbac_buf;
374 	end = uaddr + baconf->ifbac_len;
375 
376 	rw_enter_read(&sc->sc_bridge_lock);
377 	RBT_FOREACH(be, bpe_map, &sc->sc_bridge_map) {
378 		if (uaddr >= end)
379 			break;
380 
381 		memcpy(bareq.ifba_name, ifp->if_xname,
382 		    sizeof(bareq.ifba_name));
383 		memcpy(bareq.ifba_ifsname, ifp->if_xname,
384 		    sizeof(bareq.ifba_ifsname));
385 		memcpy(&bareq.ifba_dst, &be->be_c_da,
386 		    sizeof(bareq.ifba_dst));
387 
388 		memset(&bareq.ifba_dstsa, 0, sizeof(bareq.ifba_dstsa));
389 
390 		bzero(&bareq.ifba_dstsa, sizeof(bareq.ifba_dstsa));
391 		sdl = (struct sockaddr_dl *)&bareq.ifba_dstsa;
392 		sdl->sdl_len = sizeof(sdl);
393 		sdl->sdl_family = AF_LINK;
394 		sdl->sdl_index = 0;
395 		sdl->sdl_type = IFT_ETHER;
396 		sdl->sdl_nlen = 0;
397 		sdl->sdl_alen = sizeof(be->be_b_da);
398 		CTASSERT(sizeof(sdl->sdl_data) >= sizeof(be->be_b_da));
399 		memcpy(sdl->sdl_data, &be->be_b_da, sizeof(be->be_b_da));
400 
401 		switch (be->be_type) {
402 		case BPE_ENTRY_DYNAMIC:
403 			age = getuptime() - be->be_age;
404 			bareq.ifba_age = MIN(age, 0xff);
405 			bareq.ifba_flags = IFBAF_DYNAMIC;
406 			break;
407 		case BPE_ENTRY_STATIC:
408 			bareq.ifba_age = 0;
409 			bareq.ifba_flags = IFBAF_STATIC;
410 			break;
411 		}
412 
413 		error = copyout(&bareq, uaddr, sizeof(bareq));
414 		if (error != 0) {
415 			rw_exit_read(&sc->sc_bridge_lock);
416 			return (error);
417 		}
418 
419 		uaddr += sizeof(bareq);
420 	}
421 	baconf->ifbac_len = sc->sc_bridge_num * sizeof(bareq);
422 	rw_exit_read(&sc->sc_bridge_lock);
423 
424 	return (0);
425 }
426 
427 static void
428 bpe_flush_map(struct bpe_softc *sc, uint32_t flags)
429 {
430 	struct bpe_entry *be, *nbe;
431 
432 	rw_enter_write(&sc->sc_bridge_lock);
433 	RBT_FOREACH_SAFE(be, bpe_map, &sc->sc_bridge_map, nbe) {
434 		if (flags == IFBF_FLUSHDYN &&
435 		    be->be_type != BPE_ENTRY_DYNAMIC)
436 			continue;
437 
438 		RBT_REMOVE(bpe_map, &sc->sc_bridge_map, be);
439 		if (refcnt_rele(&be->be_refs))
440 			pool_put(&bpe_entry_pool, be);
441 	}
442 	rw_exit_write(&sc->sc_bridge_lock);
443 }
444 
445 static int
446 bpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
447 {
448 	struct bpe_softc *sc = ifp->if_softc;
449 	struct ifreq *ifr = (struct ifreq *)data;
450 	struct ifbrparam *bparam = (struct ifbrparam *)data;
451 	int error = 0;
452 
453 	switch (cmd) {
454 	case SIOCSIFFLAGS:
455 		if (ISSET(ifp->if_flags, IFF_UP)) {
456 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
457 				error = bpe_up(sc);
458 			else
459 				error = 0;
460 		} else {
461 			if (ISSET(ifp->if_flags, IFF_RUNNING))
462 				error = bpe_down(sc);
463 		}
464 		break;
465 
466 	case SIOCSVNETID:
467 		error = bpe_set_vnetid(sc, ifr);
468 		break;
469 	case SIOCGVNETID:
470 		ifr->ifr_vnetid = sc->sc_key.k_isid;
471 		break;
472 
473 	case SIOCSIFPARENT:
474 		error = bpe_set_parent(sc, (struct if_parent *)data);
475 		break;
476 	case SIOCGIFPARENT:
477 		error = bpe_get_parent(sc, (struct if_parent *)data);
478 		break;
479 	case SIOCDIFPARENT:
480 		error = bpe_del_parent(sc);
481 		break;
482 
483 	case SIOCSTXHPRIO:
484 		error = if_txhprio_l2_check(ifr->ifr_hdrprio);
485 		if (error != 0)
486 			break;
487 
488 		sc->sc_txhprio = ifr->ifr_hdrprio;
489 		break;
490 	case SIOCGTXHPRIO:
491 		ifr->ifr_hdrprio = sc->sc_txhprio;
492 		break;
493 
494 	case SIOCSRXHPRIO:
495 		error = if_rxhprio_l2_check(ifr->ifr_hdrprio);
496 		if (error != 0)
497 			break;
498 
499 		sc->sc_rxhprio = ifr->ifr_hdrprio;
500 		break;
501 	case SIOCGRXHPRIO:
502 		ifr->ifr_hdrprio = sc->sc_rxhprio;
503 		break;
504 
505 	case SIOCGIFMEDIA:
506 		error = bpe_media_get(sc, ifr);
507 		break;
508 
509 	case SIOCBRDGSCACHE:
510 		error = suser(curproc);
511 		if (error != 0)
512 			break;
513 
514 		if (bparam->ifbrp_csize < 1) {
515 			error = EINVAL;
516 			break;
517 		}
518 
519 		/* commit */
520 		sc->sc_bridge_max = bparam->ifbrp_csize;
521 		break;
522 	case SIOCBRDGGCACHE:
523 		bparam->ifbrp_csize = sc->sc_bridge_max;
524 		break;
525 
526 	case SIOCBRDGSTO:
527 		error = suser(curproc);
528 		if (error != 0)
529 			break;
530 
531 		if (bparam->ifbrp_ctime < 8 ||
532 		    bparam->ifbrp_ctime > 3600) {
533 			error = EINVAL;
534 			break;
535 		}
536 		sc->sc_bridge_tmo = bparam->ifbrp_ctime;
537 		break;
538 	case SIOCBRDGGTO:
539 		bparam->ifbrp_ctime = sc->sc_bridge_tmo;
540 		break;
541 
542 	case SIOCBRDGRTS:
543 		error = bpe_rtfind(sc, (struct ifbaconf *)data);
544 		break;
545 	case SIOCBRDGFLUSH:
546 		error = suser(curproc);
547 		if (error != 0)
548 			break;
549 
550 		bpe_flush_map(sc,
551 		    ((struct ifbreq *)data)->ifbr_ifsflags);
552 		break;
553 
554 	default:
555 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
556 		break;
557 	}
558 
559 	return (error);
560 }
561 
562 static int
563 bpe_media_get(struct bpe_softc *sc, struct ifreq *ifr)
564 {
565 	struct ifnet *ifp0;
566 	int error;
567 
568 	ifp0 = if_get(sc->sc_key.k_if);
569 	if (ifp0 != NULL)
570 		error = (*ifp0->if_ioctl)(ifp0, SIOCGIFMEDIA, (caddr_t)ifr);
571 	else
572 		error = ENOTTY;
573 	if_put(ifp0);
574 
575 	return (error);
576 }
577 
578 static int
579 bpe_up(struct bpe_softc *sc)
580 {
581 	struct ifnet *ifp = &sc->sc_ac.ac_if;
582 	struct ifnet *ifp0;
583 	struct bpe_softc *osc;
584 	int error = 0;
585 	u_int hardmtu;
586 	u_int hlen = sizeof(struct ether_header) + sizeof(uint32_t);
587 
588 	KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
589 	NET_ASSERT_LOCKED();
590 
591 	ifp0 = if_get(sc->sc_key.k_if);
592 	if (ifp0 == NULL)
593 		return (ENXIO);
594 
595 	/* check again if bpe will work on top of the parent */
596 	if (ifp0->if_type != IFT_ETHER) {
597 		error = EPROTONOSUPPORT;
598 		goto put;
599 	}
600 
601 	hardmtu = ifp0->if_hardmtu;
602 	if (hardmtu < hlen) {
603 		error = ENOBUFS;
604 		goto put;
605 	}
606 	hardmtu -= hlen;
607 	if (ifp->if_mtu > hardmtu) {
608 		error = ENOBUFS;
609 		goto put;
610 	}
611 
612 	/* parent is fine, let's prepare the bpe to handle packets */
613 	ifp->if_hardmtu = hardmtu;
614 	SET(ifp->if_flags, ifp0->if_flags & IFF_SIMPLEX);
615 
616 	/* commit the interface */
617 	error = rw_enter(&bpe_lock, RW_WRITE | RW_INTR);
618 	if (error != 0)
619 		goto scrub;
620 
621 	osc = (struct bpe_softc *)RBT_INSERT(bpe_tree, &bpe_interfaces,
622 	    (struct bpe_key *)sc);
623 	rw_exit(&bpe_lock);
624 
625 	if (osc != NULL) {
626 		error = EADDRINUSE;
627 		goto scrub;
628 	}
629 
630 	if (bpe_multi(sc, ifp0, SIOCADDMULTI) != 0) {
631 		error = ENOTCONN;
632 		goto remove;
633 	}
634 
635 	/* Register callback for physical link state changes */
636 	if_linkstatehook_add(ifp0, &sc->sc_ltask);
637 
638 	/* Register callback if parent wants to unregister */
639 	if_detachhook_add(ifp0, &sc->sc_dtask);
640 
641 	/* we're running now */
642 	SET(ifp->if_flags, IFF_RUNNING);
643 	bpe_link_state(sc, ifp0->if_link_state, ifp0->if_baudrate);
644 
645 	if_put(ifp0);
646 
647 	timeout_add_sec(&sc->sc_bridge_age, BPE_BRIDGE_AGE_TMO);
648 
649 	return (0);
650 
651 remove:
652 	rw_enter(&bpe_lock, RW_WRITE);
653 	RBT_REMOVE(bpe_tree, &bpe_interfaces, (struct bpe_key *)sc);
654 	rw_exit(&bpe_lock);
655 scrub:
656 	CLR(ifp->if_flags, IFF_SIMPLEX);
657 	ifp->if_hardmtu = 0xffff;
658 put:
659 	if_put(ifp0);
660 
661 	return (error);
662 }
663 
664 static int
665 bpe_down(struct bpe_softc *sc)
666 {
667 	struct ifnet *ifp = &sc->sc_ac.ac_if;
668 	struct ifnet *ifp0;
669 
670 	NET_ASSERT_LOCKED();
671 
672 	CLR(ifp->if_flags, IFF_RUNNING);
673 
674 	ifp0 = if_get(sc->sc_key.k_if);
675 	if (ifp0 != NULL) {
676 		if_detachhook_del(ifp0, &sc->sc_dtask);
677 		if_linkstatehook_del(ifp0, &sc->sc_ltask);
678 		bpe_multi(sc, ifp0, SIOCDELMULTI);
679 	}
680 	if_put(ifp0);
681 
682 	rw_enter(&bpe_lock, RW_WRITE);
683 	RBT_REMOVE(bpe_tree, &bpe_interfaces, (struct bpe_key *)sc);
684 	rw_exit(&bpe_lock);
685 
686 	CLR(ifp->if_flags, IFF_SIMPLEX);
687 	ifp->if_hardmtu = 0xffff;
688 
689 	return (0);
690 }
691 
692 static int
693 bpe_multi(struct bpe_softc *sc, struct ifnet *ifp0, u_long cmd)
694 {
695 	struct ifreq ifr;
696 	struct sockaddr *sa;
697 
698 	/* make it convincing */
699 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
700 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
701 
702 	sa = &ifr.ifr_addr;
703 	CTASSERT(sizeof(sa->sa_data) >= sizeof(sc->sc_group));
704 
705 	sa->sa_family = AF_UNSPEC;
706 	memcpy(sa->sa_data, sc->sc_group, sizeof(sc->sc_group));
707 
708 	return ((*ifp0->if_ioctl)(ifp0, cmd, (caddr_t)&ifr));
709 }
710 
711 static void
712 bpe_set_group(struct bpe_softc *sc, uint32_t isid)
713 {
714 	uint8_t *group = sc->sc_group;
715 
716 	group[0] = 0x01;
717 	group[1] = 0x1e;
718 	group[2] = 0x83;
719 	group[3] = isid >> 16;
720 	group[4] = isid >> 8;
721 	group[5] = isid >> 0;
722 }
723 
724 static int
725 bpe_set_vnetid(struct bpe_softc *sc, const struct ifreq *ifr)
726 {
727 	struct ifnet *ifp = &sc->sc_ac.ac_if;
728 	uint32_t isid;
729 
730 	if (ifr->ifr_vnetid < PBB_ITAG_ISID_MIN ||
731 	    ifr->ifr_vnetid > PBB_ITAG_ISID_MAX)
732 		return (EINVAL);
733 
734 	isid = ifr->ifr_vnetid;
735 	if (isid == sc->sc_key.k_isid)
736 		return (0);
737 
738 	if (ISSET(ifp->if_flags, IFF_RUNNING))
739 		return (EBUSY);
740 
741 	/* commit */
742 	sc->sc_key.k_isid = isid;
743 	bpe_set_group(sc, isid);
744 	bpe_flush_map(sc, IFBF_FLUSHALL);
745 
746 	return (0);
747 }
748 
749 static int
750 bpe_set_parent(struct bpe_softc *sc, const struct if_parent *p)
751 {
752 	struct ifnet *ifp = &sc->sc_ac.ac_if;
753 	struct ifnet *ifp0;
754 
755 	ifp0 = ifunit(p->ifp_parent); /* doesn't need an if_put */
756 	if (ifp0 == NULL)
757 		return (ENXIO);
758 
759 	if (ifp0->if_type != IFT_ETHER)
760 		return (ENXIO);
761 
762 	if (ifp0->if_index == sc->sc_key.k_if)
763 		return (0);
764 
765 	if (ISSET(ifp->if_flags, IFF_RUNNING))
766 		return (EBUSY);
767 
768 	/* commit */
769 	sc->sc_key.k_if = ifp0->if_index;
770 	bpe_flush_map(sc, IFBF_FLUSHALL);
771 
772 	return (0);
773 }
774 
775 static int
776 bpe_get_parent(struct bpe_softc *sc, struct if_parent *p)
777 {
778 	struct ifnet *ifp0;
779 	int error = 0;
780 
781 	ifp0 = if_get(sc->sc_key.k_if);
782 	if (ifp0 == NULL)
783 		error = EADDRNOTAVAIL;
784 	else
785 		memcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent));
786 	if_put(ifp0);
787 
788 	return (error);
789 }
790 
791 static int
792 bpe_del_parent(struct bpe_softc *sc)
793 {
794 	struct ifnet *ifp = &sc->sc_ac.ac_if;
795 
796 	if (ISSET(ifp->if_flags, IFF_RUNNING))
797 		return (EBUSY);
798 
799 	/* commit */
800 	sc->sc_key.k_if = 0;
801 	bpe_flush_map(sc, IFBF_FLUSHALL);
802 
803 	return (0);
804 }
805 
806 static inline struct bpe_softc *
807 bpe_find(struct ifnet *ifp0, uint32_t isid)
808 {
809 	struct bpe_key k = { .k_if = ifp0->if_index, .k_isid = isid };
810 	struct bpe_softc *sc;
811 
812 	rw_enter_read(&bpe_lock);
813 	sc = (struct bpe_softc *)RBT_FIND(bpe_tree, &bpe_interfaces, &k);
814 	rw_exit_read(&bpe_lock);
815 
816 	return (sc);
817 }
818 
819 static void
820 bpe_input_map(struct bpe_softc *sc, const uint8_t *ba, const uint8_t *ca)
821 {
822 	struct bpe_entry *be;
823 	int new = 0;
824 
825 	if (ETHER_IS_MULTICAST(ca))
826 		return;
827 
828 	/* remember where it came from */
829 	rw_enter_read(&sc->sc_bridge_lock);
830 	be = RBT_FIND(bpe_map, &sc->sc_bridge_map, (struct bpe_entry *)ca);
831 	if (be == NULL)
832 		new = 1;
833 	else {
834 		be->be_age = getuptime(); /* only a little bit racy */
835 
836 		if (be->be_type != BPE_ENTRY_DYNAMIC ||
837 		    ETHER_IS_EQ(ba, &be->be_b_da))
838 			be = NULL;
839 		else
840 			refcnt_take(&be->be_refs);
841 	}
842 	rw_exit_read(&sc->sc_bridge_lock);
843 
844 	if (new) {
845 		struct bpe_entry *obe;
846 		unsigned int num;
847 
848 		be = pool_get(&bpe_entry_pool, PR_NOWAIT);
849 		if (be == NULL) {
850 			/* oh well */
851 			return;
852 		}
853 
854 		memcpy(&be->be_c_da, ca, sizeof(be->be_c_da));
855 		memcpy(&be->be_b_da, ba, sizeof(be->be_b_da));
856 		be->be_type = BPE_ENTRY_DYNAMIC;
857 		refcnt_init(&be->be_refs);
858 		be->be_age = getuptime();
859 
860 		rw_enter_write(&sc->sc_bridge_lock);
861 		num = sc->sc_bridge_num;
862 		if (++num > sc->sc_bridge_max)
863 			obe = be;
864 		else {
865 			/* try and give the ref to the map */
866 			obe = RBT_INSERT(bpe_map, &sc->sc_bridge_map, be);
867 			if (obe == NULL) {
868 				/* count the insert */
869 				sc->sc_bridge_num = num;
870 			}
871 		}
872 		rw_exit_write(&sc->sc_bridge_lock);
873 
874 		if (obe != NULL)
875 			pool_put(&bpe_entry_pool, obe);
876 	} else if (be != NULL) {
877 		rw_enter_write(&sc->sc_bridge_lock);
878 		memcpy(&be->be_b_da, ba, sizeof(be->be_b_da));
879 		rw_exit_write(&sc->sc_bridge_lock);
880 
881 		if (refcnt_rele(&be->be_refs)) {
882 			/* ioctl may have deleted the entry */
883 			pool_put(&bpe_entry_pool, be);
884 		}
885 	}
886 }
887 
888 void
889 bpe_input(struct ifnet *ifp0, struct mbuf *m)
890 {
891 	struct bpe_softc *sc;
892 	struct ifnet *ifp;
893 	struct ether_header *beh, *ceh;
894 	uint32_t *itagp, itag;
895 	unsigned int hlen = sizeof(*beh) + sizeof(*itagp) + sizeof(*ceh);
896 	struct mbuf *n;
897 	int off;
898 	int prio;
899 
900 	if (m->m_len < hlen) {
901 		m = m_pullup(m, hlen);
902 		if (m == NULL) {
903 			/* pbb short ++ */
904 			return;
905 		}
906 	}
907 
908 	beh = mtod(m, struct ether_header *);
909 	itagp = (uint32_t *)(beh + 1);
910 	itag = bemtoh32(itagp);
911 
912 	if (itag & PBB_ITAG_RES2) {
913 		/* dropped by res2 ++ */
914 		goto drop;
915 	}
916 
917 	sc = bpe_find(ifp0, itag & PBB_ITAG_ISID);
918 	if (sc == NULL) {
919 		/* no interface found */
920 		goto drop;
921 	}
922 
923 	ceh = (struct ether_header *)(itagp + 1);
924 
925 	bpe_input_map(sc, beh->ether_shost, ceh->ether_shost);
926 
927 	m_adj(m, sizeof(*beh) + sizeof(*itagp));
928 
929 	n = m_getptr(m, sizeof(*ceh), &off);
930 	if (n == NULL) {
931 		/* no data ++ */
932 		goto drop;
933 	}
934 
935 	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
936 		/* unaligned ++ */
937 		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
938 		m_freem(m);
939 		if (n == NULL)
940 			return;
941 
942 		m = n;
943 	}
944 
945 	ifp = &sc->sc_ac.ac_if;
946 
947 	prio = sc->sc_rxhprio;
948 	switch (prio) {
949 	case IF_HDRPRIO_PACKET:
950 		break;
951 	case IF_HDRPRIO_OUTER:
952 		m->m_pkthdr.pf.prio = (itag & PBB_ITAG_PCP_MASK) >>
953 		    PBB_ITAG_PCP_SHIFT;
954 		break;
955 	default:
956 		m->m_pkthdr.pf.prio = prio;
957 		break;
958 	}
959 
960 	m->m_flags &= ~(M_BCAST|M_MCAST);
961 	m->m_pkthdr.ph_ifidx = ifp->if_index;
962 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
963 
964 #if NPF > 0
965 	pf_pkt_addr_changed(m);
966 #endif
967 
968 	if_vinput(ifp, m);
969 	return;
970 
971 drop:
972 	m_freem(m);
973 }
974 
975 void
976 bpe_detach_hook(void *arg)
977 {
978 	struct bpe_softc *sc = arg;
979 	struct ifnet *ifp = &sc->sc_ac.ac_if;
980 
981 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
982 		bpe_down(sc);
983 		CLR(ifp->if_flags, IFF_UP);
984 	}
985 
986 	sc->sc_key.k_if = 0;
987 }
988 
989 static void
990 bpe_link_hook(void *arg)
991 {
992 	struct bpe_softc *sc = arg;
993 	struct ifnet *ifp0;
994 	u_char link = LINK_STATE_DOWN;
995 	uint64_t baud = 0;
996 
997 	ifp0 = if_get(sc->sc_key.k_if);
998 	if (ifp0 != NULL) {
999 		link = ifp0->if_link_state;
1000 		baud = ifp0->if_baudrate;
1001 	}
1002 	if_put(ifp0);
1003 
1004 	bpe_link_state(sc, link, baud);
1005 }
1006 
1007 void
1008 bpe_link_state(struct bpe_softc *sc, u_char link, uint64_t baud)
1009 {
1010 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1011 
1012 	if (ifp->if_link_state == link)
1013 		return;
1014 
1015 	ifp->if_link_state = link;
1016 	ifp->if_baudrate = baud;
1017 
1018 	if_link_state_change(ifp);
1019 }
1020 
1021 static inline int
1022 bpe_cmp(const struct bpe_key *a, const struct bpe_key *b)
1023 {
1024 	if (a->k_if > b->k_if)
1025 		return (1);
1026 	if (a->k_if < b->k_if)
1027 		return (-1);
1028 	if (a->k_isid > b->k_isid)
1029 		return (1);
1030 	if (a->k_isid < b->k_isid)
1031 		return (-1);
1032 
1033 	return (0);
1034 }
1035 
1036 static inline int
1037 bpe_entry_cmp(const struct bpe_entry *a, const struct bpe_entry *b)
1038 {
1039 	return memcmp(&a->be_c_da, &b->be_c_da, sizeof(a->be_c_da));
1040 }
1041