xref: /dragonfly/sys/net/netmap/netmap_vale.c (revision 896f2e3a)
1 /*
2  * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *   1. Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *   2. Redistributions in binary form must reproduce the above copyright
10  *      notice, this list of conditions and the following disclaimer in the
11  *      documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 
27 /*
28  * This module implements the VALE switch for netmap
29 
30 --- VALE SWITCH ---
31 
32 NMG_LOCK() serializes all modifications to switches and ports.
33 A switch cannot be deleted until all ports are gone.
34 
35 For each switch, an SX lock (RWlock on linux) protects
36 deletion of ports. When configuring or deleting a new port, the
37 lock is acquired in exclusive mode (after holding NMG_LOCK).
38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39 The lock is held throughout the entire forwarding cycle,
40 during which the thread may incur in a page fault.
41 Hence it is important that sleepable shared locks are used.
42 
43 On the rx ring, the per-port lock is grabbed initially to reserve
44 a number of slot in the ring, then the lock is released,
45 packets are copied from source to destination, and then
46 the lock is acquired again and the receive ring is updated.
47 (A similar thing is done on the tx ring for NIC and host stack
48 ports attached to the switch)
49 
50  */
51 
52 /*
53  * OS-specific code that is used only within this file.
54  * Other OS-specific code that must be accessed by drivers
55  * is present in netmap_kern.h
56  */
57 
58 #include <sys/cdefs.h> /* prerequisite */
59 __FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257176 2013-10-26 17:58:36Z glebius $");
60 
61 #include <sys/types.h>
62 #include <sys/errno.h>
63 #include <sys/param.h>	/* defines used in kernel.h */
64 #include <sys/kernel.h>	/* types used in module initialization */
65 #include <sys/conf.h>	/* cdevsw struct, UID, GID */
66 #include <sys/sockio.h>
67 #include <sys/socketvar.h>	/* struct socket */
68 #include <sys/malloc.h>
69 #include <sys/poll.h>
70 #include <sys/lock.h>
71 #include <sys/socket.h> /* sockaddrs */
72 #include <sys/sysctl.h>
73 #include <net/if.h>
74 #include <net/if_var.h>
75 #include <net/bpf.h>		/* BIOCIMMEDIATE */
76 #include <sys/bus.h>	/* bus_dmamap_* */
77 #include <sys/endian.h>
78 #include <sys/refcount.h>
79 
80 
81 #define BDG_RWLOCK_T		struct lock
82 
83 #define	BDG_RWINIT(b)		\
84 	lockinit(&(b)->bdg_lock, "bdg lock", 0, LK_CANRECURSE)
85 #define BDG_WLOCK(b)		lockmgr(&(b)->bdg_lock, LK_EXCLUSIVE)
86 #define BDG_WUNLOCK(b)		lockmgr(&(b)->bdg_lock, LK_RELEASE)
87 #define BDG_RLOCK(b)		lockmgr(&(b)->bdg_lock, LK_SHARED)
88 #define BDG_RTRYLOCK(b)		lockmgr(&(b)->bdg_lock, LK_SHARED|LK_NOWAIT)
89 #define BDG_RUNLOCK(b)		lockmgr(&(b)->bdg_lock, LK_RELEASE)
90 #define BDG_RWDESTROY(b)	lockuninit(&(b)->bdg_lock)
91 
92 /*
93  * common headers
94  */
95 
96 #include <net/netmap.h>
97 #include <net/netmap/netmap_kern.h>
98 #include <net/netmap/netmap_mem2.h>
99 
100 #ifdef WITH_VALE
101 
102 /*
103  * system parameters (most of them in netmap_kern.h)
104  * NM_NAME	prefix for switch port names, default "vale"
105  * NM_BDG_MAXPORTS	number of ports
106  * NM_BRIDGES	max number of switches in the system.
107  *	XXX should become a sysctl or tunable
108  *
109  * Switch ports are named valeX:Y where X is the switch name and Y
110  * is the port. If Y matches a physical interface name, the port is
111  * connected to a physical device.
112  *
113  * Unlike physical interfaces, switch ports use their own memory region
114  * for rings and buffers.
115  * The virtual interfaces use per-queue lock instead of core lock.
116  * In the tx loop, we aggregate traffic in batches to make all operations
117  * faster. The batch size is bridge_batch.
118  */
119 #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
120 #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
121 #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
122 #define NM_BDG_HASH		1024	/* forwarding table entries */
123 #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
124 #define NM_MULTISEG		64	/* max size of a chain of bufs */
125 /* actual size of the tables */
126 #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
127 /* NM_FT_NULL terminates a list of slots in the ft */
128 #define NM_FT_NULL		NM_BDG_BATCH_MAX
129 #define	NM_BRIDGES		8	/* number of bridges */
130 
131 
132 /*
133  * bridge_batch is set via sysctl to the max batch size to be
134  * used in the bridge. The actual value may be larger as the
135  * last packet in the block may overflow the size.
136  */
137 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
138 SYSCTL_DECL(_dev_netmap);
139 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
140 
141 
142 static int bdg_netmap_attach(struct netmap_adapter *);
143 static int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
144 static int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
145 static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
146 int kern_netmap_regif(struct nmreq *nmr);
147 
148 /*
149  * Each transmit queue accumulates a batch of packets into
150  * a structure before forwarding. Packets to the same
151  * destination are put in a list using ft_next as a link field.
152  * ft_frags and ft_next are valid only on the first fragment.
153  */
154 struct nm_bdg_fwd {	/* forwarding entry for a bridge */
155 	void *ft_buf;		/* netmap or indirect buffer */
156 	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
157 	uint8_t _ft_port;	/* dst port (unused) */
158 	uint16_t ft_flags;	/* flags, e.g. indirect */
159 	uint16_t ft_len;	/* src fragment len */
160 	uint16_t ft_next;	/* next packet to same destination */
161 };
162 
163 /*
164  * For each output interface, nm_bdg_q is used to construct a list.
165  * bq_len is the number of output buffers (we can have coalescing
166  * during the copy).
167  */
168 struct nm_bdg_q {
169 	uint16_t bq_head;
170 	uint16_t bq_tail;
171 	uint32_t bq_len;	/* number of buffers */
172 };
173 
174 /* XXX revise this */
175 struct nm_hash_ent {
176 	uint64_t	mac;	/* the top 2 bytes are the epoch */
177 	uint64_t	ports;
178 };
179 
180 /*
181  * nm_bridge is a descriptor for a VALE switch.
182  * Interfaces for a bridge are all in bdg_ports[].
183  * The array has fixed size, an empty entry does not terminate
184  * the search, but lookups only occur on attach/detach so we
185  * don't mind if they are slow.
186  *
187  * The bridge is non blocking on the transmit ports: excess
188  * packets are dropped if there is no room on the output port.
189  *
190  * bdg_lock protects accesses to the bdg_ports array.
191  * This is a rw lock (or equivalent).
192  */
193 struct nm_bridge {
194 	/* XXX what is the proper alignment/layout ? */
195 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
196 	int		bdg_namelen;
197 	uint32_t	bdg_active_ports; /* 0 means free */
198 	char		bdg_basename[IFNAMSIZ];
199 
200 	/* Indexes of active ports (up to active_ports)
201 	 * and all other remaining ports.
202 	 */
203 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
204 
205 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
206 
207 
208 	/*
209 	 * The function to decide the destination port.
210 	 * It returns either of an index of the destination port,
211 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
212 	 * forward this packet.  ring_nr is the source ring index, and the
213 	 * function may overwrite this value to forward this packet to a
214 	 * different ring index.
215 	 * This function must be set by netmap_bdgctl().
216 	 */
217 	bdg_lookup_fn_t nm_bdg_lookup;
218 
219 	/* the forwarding table, MAC+ports.
220 	 * XXX should be changed to an argument to be passed to
221 	 * the lookup function, and allocated on attach
222 	 */
223 	struct nm_hash_ent ht[NM_BDG_HASH];
224 };
225 
226 
227 /*
228  * XXX in principle nm_bridges could be created dynamically
229  * Right now we have a static array and deletions are protected
230  * by an exclusive lock.
231  */
232 struct nm_bridge nm_bridges[NM_BRIDGES];
233 
234 
235 /*
236  * A few function to tell which kind of port are we using.
237  * XXX should we hold a lock ?
238  *
239  * nma_is_vp()		virtual port
240  * nma_is_host()	port connected to the host stack
241  * nma_is_hw()		port connected to a NIC
242  * nma_is_generic()	generic netmap adapter XXX stop this madness
243  */
244 static __inline int
245 nma_is_vp(struct netmap_adapter *na)
246 {
247 	return na->nm_register == bdg_netmap_reg;
248 }
249 
250 
251 static __inline int
252 nma_is_host(struct netmap_adapter *na)
253 {
254 	return na->nm_register == NULL;
255 }
256 
257 
258 static __inline int
259 nma_is_hw(struct netmap_adapter *na)
260 {
261 	/* In case of sw adapter, nm_register is NULL */
262 	return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na);
263 }
264 
265 static __inline int
266 nma_is_bwrap(struct netmap_adapter *na)
267 {
268 	return na->nm_register == netmap_bwrap_register;
269 }
270 
271 
272 
273 /*
274  * this is a slightly optimized copy routine which rounds
275  * to multiple of 64 bytes and is often faster than dealing
276  * with other odd sizes. We assume there is enough room
277  * in the source and destination buffers.
278  *
279  * XXX only for multiples of 64 bytes, non overlapped.
280  */
281 static inline void
282 pkt_copy(void *_src, void *_dst, int l)
283 {
284         uint64_t *src = _src;
285         uint64_t *dst = _dst;
286         if (unlikely(l >= 1024)) {
287                 memcpy(dst, src, l);
288                 return;
289         }
290         for (; likely(l > 0); l-=64) {
291                 *dst++ = *src++;
292                 *dst++ = *src++;
293                 *dst++ = *src++;
294                 *dst++ = *src++;
295                 *dst++ = *src++;
296                 *dst++ = *src++;
297                 *dst++ = *src++;
298                 *dst++ = *src++;
299         }
300 }
301 
302 
303 
304 /*
305  * locate a bridge among the existing ones.
306  * MUST BE CALLED WITH NMG_LOCK()
307  *
308  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
309  * We assume that this is called with a name of at least NM_NAME chars.
310  */
311 static struct nm_bridge *
312 nm_find_bridge(const char *name, int create)
313 {
314 	int i, l, namelen;
315 	struct nm_bridge *b = NULL;
316 
317 	NMG_LOCK_ASSERT();
318 
319 	namelen = strlen(NM_NAME);	/* base length */
320 	l = name ? strlen(name) : 0;		/* actual length */
321 	if (l < namelen) {
322 		D("invalid bridge name %s", name ? name : NULL);
323 		return NULL;
324 	}
325 	for (i = namelen + 1; i < l; i++) {
326 		if (name[i] == ':') {
327 			namelen = i;
328 			break;
329 		}
330 	}
331 	if (namelen >= IFNAMSIZ)
332 		namelen = IFNAMSIZ;
333 	ND("--- prefix is '%.*s' ---", namelen, name);
334 
335 	/* lookup the name, remember empty slot if there is one */
336 	for (i = 0; i < NM_BRIDGES; i++) {
337 		struct nm_bridge *x = nm_bridges + i;
338 
339 		if (x->bdg_active_ports == 0) {
340 			if (create && b == NULL)
341 				b = x;	/* record empty slot */
342 		} else if (x->bdg_namelen != namelen) {
343 			continue;
344 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
345 			ND("found '%.*s' at %d", namelen, name, i);
346 			b = x;
347 			break;
348 		}
349 	}
350 	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
351 		/* initialize the bridge */
352 		strncpy(b->bdg_basename, name, namelen);
353 		ND("create new bridge %s with ports %d", b->bdg_basename,
354 			b->bdg_active_ports);
355 		b->bdg_namelen = namelen;
356 		b->bdg_active_ports = 0;
357 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
358 			b->bdg_port_index[i] = i;
359 		/* set the default function */
360 		b->nm_bdg_lookup = netmap_bdg_learning;
361 		/* reset the MAC address table */
362 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
363 	}
364 	return b;
365 }
366 
367 
368 /*
369  * Free the forwarding tables for rings attached to switch ports.
370  */
371 static void
372 nm_free_bdgfwd(struct netmap_adapter *na)
373 {
374 	int nrings, i;
375 	struct netmap_kring *kring;
376 
377 	NMG_LOCK_ASSERT();
378 	nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
379 	kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
380 	for (i = 0; i < nrings; i++) {
381 		if (kring[i].nkr_ft) {
382 			kfree(kring[i].nkr_ft, M_DEVBUF);
383 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
384 		}
385 	}
386 }
387 
388 
389 /*
390  * Allocate the forwarding tables for the rings attached to the bridge ports.
391  */
392 static int
393 nm_alloc_bdgfwd(struct netmap_adapter *na)
394 {
395 	int nrings, l, i, num_dstq;
396 	struct netmap_kring *kring;
397 
398 	NMG_LOCK_ASSERT();
399 	/* all port:rings + broadcast */
400 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
401 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
402 	l += sizeof(struct nm_bdg_q) * num_dstq;
403 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
404 
405 	nrings = na->num_tx_rings + 1;
406 	kring = na->tx_rings;
407 	for (i = 0; i < nrings; i++) {
408 		struct nm_bdg_fwd *ft;
409 		struct nm_bdg_q *dstq;
410 		int j;
411 
412 		ft = kmalloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
413 		if (!ft) {
414 			nm_free_bdgfwd(na);
415 			return ENOMEM;
416 		}
417 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
418 		for (j = 0; j < num_dstq; j++) {
419 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
420 			dstq[j].bq_len = 0;
421 		}
422 		kring[i].nkr_ft = ft;
423 	}
424 	return 0;
425 }
426 
427 
428 static void
429 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
430 {
431 	int s_hw = hw, s_sw = sw;
432 	int i, lim =b->bdg_active_ports;
433 	uint8_t tmp[NM_BDG_MAXPORTS];
434 
435 	/*
436 	New algorithm:
437 	make a copy of bdg_port_index;
438 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
439 	in the array of bdg_port_index, replacing them with
440 	entries from the bottom of the array;
441 	decrement bdg_active_ports;
442 	acquire BDG_WLOCK() and copy back the array.
443 	 */
444 
445 	D("detach %d and %d (lim %d)", hw, sw, lim);
446 	/* make a copy of the list of active ports, update it,
447 	 * and then copy back within BDG_WLOCK().
448 	 */
449 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
450 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
451 		if (hw >= 0 && tmp[i] == hw) {
452 			ND("detach hw %d at %d", hw, i);
453 			lim--; /* point to last active port */
454 			tmp[i] = tmp[lim]; /* swap with i */
455 			tmp[lim] = hw;	/* now this is inactive */
456 			hw = -1;
457 		} else if (sw >= 0 && tmp[i] == sw) {
458 			ND("detach sw %d at %d", sw, i);
459 			lim--;
460 			tmp[i] = tmp[lim];
461 			tmp[lim] = sw;
462 			sw = -1;
463 		} else {
464 			i++;
465 		}
466 	}
467 	if (hw >= 0 || sw >= 0) {
468 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
469 	}
470 
471 	BDG_WLOCK(b);
472 	b->bdg_ports[s_hw] = NULL;
473 	if (s_sw >= 0) {
474 		b->bdg_ports[s_sw] = NULL;
475 	}
476 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
477 	b->bdg_active_ports = lim;
478 	BDG_WUNLOCK(b);
479 
480 	ND("now %d active ports", lim);
481 	if (lim == 0) {
482 		ND("marking bridge %s as free", b->bdg_basename);
483 		b->nm_bdg_lookup = NULL;
484 	}
485 }
486 
487 static void
488 netmap_adapter_vp_dtor(struct netmap_adapter *na)
489 {
490 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
491 	struct nm_bridge *b = vpna->na_bdg;
492 	struct ifnet *ifp = na->ifp;
493 
494 	ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
495 
496 	if (b) {
497 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
498 	}
499 
500 	bzero(ifp, sizeof(*ifp));
501 	kfree(ifp, M_DEVBUF);
502 	na->ifp = NULL;
503 }
504 
505 int
506 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
507 {
508 	const char *name = nmr->nr_name;
509 	struct ifnet *ifp;
510 	int error = 0;
511 	struct netmap_adapter *ret;
512 	struct netmap_vp_adapter *vpna;
513 	struct nm_bridge *b;
514 	int i, j, cand = -1, cand2 = -1;
515 	int needed;
516 
517 	*na = NULL;     /* default return value */
518 
519 	/* first try to see if this is a bridge port. */
520 	NMG_LOCK_ASSERT();
521 	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
522 		return 0;  /* no error, but no VALE prefix */
523 	}
524 
525 	b = nm_find_bridge(name, create);
526 	if (b == NULL) {
527 		D("no bridges available for '%s'", name);
528 		return (ENXIO);
529 	}
530 
531 	/* Now we are sure that name starts with the bridge's name,
532 	 * lookup the port in the bridge. We need to scan the entire
533 	 * list. It is not important to hold a WLOCK on the bridge
534 	 * during the search because NMG_LOCK already guarantees
535 	 * that there are no other possible writers.
536 	 */
537 
538 	/* lookup in the local list of ports */
539 	for (j = 0; j < b->bdg_active_ports; j++) {
540 		i = b->bdg_port_index[j];
541 		vpna = b->bdg_ports[i];
542 		// KASSERT(na != NULL);
543 		ifp = vpna->up.ifp;
544 		/* XXX make sure the name only contains one : */
545 		if (!strcmp(NM_IFPNAME(ifp), name)) {
546 			netmap_adapter_get(&vpna->up);
547 			ND("found existing if %s refs %d", name,
548 				vpna->na_bdg_refcount);
549 			*na = (struct netmap_adapter *)vpna;
550 			return 0;
551 		}
552 	}
553 	/* not found, should we create it? */
554 	if (!create)
555 		return ENXIO;
556 	/* yes we should, see if we have space to attach entries */
557 	needed = 2; /* in some cases we only need 1 */
558 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
559 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
560 		return EINVAL;
561 	}
562 	/* record the next two ports available, but do not allocate yet */
563 	cand = b->bdg_port_index[b->bdg_active_ports];
564 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
565 	ND("+++ bridge %s port %s used %d avail %d %d",
566 		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
567 
568 	/*
569 	 * try see if there is a matching NIC with this name
570 	 * (after the bridge's name)
571 	 */
572 	ifnet_lock();
573 	ifp = ifunit(name + b->bdg_namelen + 1);
574 	if (!ifp) { /* this is a virtual port */
575 		/* Create a temporary NA with arguments, then
576 		 * bdg_netmap_attach() will allocate the real one
577 		 * and attach it to the ifp
578 		 */
579 		struct netmap_adapter tmp_na;
580 
581 		ifnet_unlock();
582 
583 		if (nmr->nr_cmd) {
584 			/* nr_cmd must be 0 for a virtual port */
585 			return EINVAL;
586 		}
587 		bzero(&tmp_na, sizeof(tmp_na));
588 		/* bound checking */
589 		tmp_na.num_tx_rings = nmr->nr_tx_rings;
590 		nm_bound_var(&tmp_na.num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
591 		nmr->nr_tx_rings = tmp_na.num_tx_rings; // write back
592 		tmp_na.num_rx_rings = nmr->nr_rx_rings;
593 		nm_bound_var(&tmp_na.num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
594 		nmr->nr_rx_rings = tmp_na.num_rx_rings; // write back
595 		nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
596 				1, NM_BDG_MAXSLOTS, NULL);
597 		tmp_na.num_tx_desc = nmr->nr_tx_slots;
598 		nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
599 				1, NM_BDG_MAXSLOTS, NULL);
600 		tmp_na.num_rx_desc = nmr->nr_rx_slots;
601 
602 	 	/* create a struct ifnet for the new port.
603 		 * need M_NOWAIT as we are under nma_lock
604 		 */
605 		ifp = kmalloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
606 		if (!ifp)
607 			return ENOMEM;
608 
609 		strcpy(ifp->if_xname, name);
610 		tmp_na.ifp = ifp;
611 		/* bdg_netmap_attach creates a struct netmap_adapter */
612 		error = bdg_netmap_attach(&tmp_na);
613 		if (error) {
614 			D("error %d", error);
615 			kfree(ifp, M_DEVBUF);
616 			return error;
617 		}
618 		ret = NA(ifp);
619 		cand2 = -1;	/* only need one port */
620 	} else {  /* this is a NIC */
621 		struct ifnet *fake_ifp;
622 
623 		error = netmap_get_hw_na(ifp, &ret);
624 		if (error || ret == NULL)
625 			goto out;
626 
627 		/* make sure the NIC is not already in use */
628 		if (NETMAP_OWNED_BY_ANY(ret)) {
629 			D("NIC %s busy, cannot attach to bridge",
630 				NM_IFPNAME(ifp));
631 			error = EINVAL;
632 			goto out;
633 		}
634 		/* create a fake interface */
635 		fake_ifp = kmalloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
636 		if (!fake_ifp) {
637 			error = ENOMEM;
638 			goto out;
639 		}
640 		strcpy(fake_ifp->if_xname, name);
641 		error = netmap_bwrap_attach(fake_ifp, ifp);
642 		if (error) {
643 			kfree(fake_ifp, M_DEVBUF);
644 			goto out;
645 		}
646 		ret = NA(fake_ifp);
647 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
648 			cand2 = -1; /* only need one port */
649 
650 		ifnet_unlock();
651 	}
652 	vpna = (struct netmap_vp_adapter *)ret;
653 
654 	BDG_WLOCK(b);
655 	vpna->bdg_port = cand;
656 	ND("NIC  %p to bridge port %d", vpna, cand);
657 	/* bind the port to the bridge (virtual ports are not active) */
658 	b->bdg_ports[cand] = vpna;
659 	vpna->na_bdg = b;
660 	b->bdg_active_ports++;
661 	if (cand2 >= 0) {
662 		struct netmap_vp_adapter *hostna = vpna + 1;
663 		/* also bind the host stack to the bridge */
664 		b->bdg_ports[cand2] = hostna;
665 		hostna->bdg_port = cand2;
666 		hostna->na_bdg = b;
667 		b->bdg_active_ports++;
668 		ND("host %p to bridge port %d", hostna, cand2);
669 	}
670 	ND("if %s refs %d", name, vpna->up.na_refcount);
671 	BDG_WUNLOCK(b);
672 	*na = ret;
673 	netmap_adapter_get(ret);
674 	return 0;
675 
676 out:
677 	ifnet_unlock();
678 	return error;
679 }
680 
681 
682 /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
683 static int
684 nm_bdg_attach(struct nmreq *nmr)
685 {
686 	struct netmap_adapter *na;
687 	struct netmap_if *nifp;
688 	struct netmap_priv_d *npriv;
689 	struct netmap_bwrap_adapter *bna;
690 	int error;
691 
692 	npriv = kmalloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
693 	if (npriv == NULL)
694 		return ENOMEM;
695 	NMG_LOCK();
696 	/* XXX probably netmap_get_bdg_na() */
697 	error = netmap_get_na(nmr, &na, 1 /* create if not exists */);
698 	if (error) /* no device, or another bridge or user owns the device */
699 		goto unlock_exit;
700 	/* netmap_get_na() sets na_bdg if this is a physical interface
701 	 * that we can attach to a switch.
702 	 */
703 	if (!nma_is_bwrap(na)) {
704 		/* got reference to a virtual port or direct access to a NIC.
705 		 * perhaps specified no bridge prefix or wrong NIC name
706 		 */
707 		error = EINVAL;
708 		goto unref_exit;
709 	}
710 
711 	if (na->active_fds > 0) { /* already registered */
712 		error = EBUSY;
713 		goto unref_exit;
714 	}
715 
716 	nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error);
717 	if (!nifp) {
718 		goto unref_exit;
719 	}
720 
721 	bna = (struct netmap_bwrap_adapter*)na;
722 	bna->na_kpriv = npriv;
723 	NMG_UNLOCK();
724 	ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
725 	return 0;
726 
727 unref_exit:
728 	netmap_adapter_put(na);
729 unlock_exit:
730 	NMG_UNLOCK();
731 	bzero(npriv, sizeof(*npriv));
732 	kfree(npriv, M_DEVBUF);
733 	return error;
734 }
735 
736 static int
737 nm_bdg_detach(struct nmreq *nmr)
738 {
739 	struct netmap_adapter *na;
740 	int error;
741 	struct netmap_bwrap_adapter *bna;
742 	int last_instance;
743 
744 	NMG_LOCK();
745 	error = netmap_get_na(nmr, &na, 0 /* don't create */);
746 	if (error) { /* no device, or another bridge or user owns the device */
747 		goto unlock_exit;
748 	}
749 	if (!nma_is_bwrap(na)) {
750 		/* got reference to a virtual port or direct access to a NIC.
751 		 * perhaps specified no bridge's prefix or wrong NIC's name
752 		 */
753 		error = EINVAL;
754 		goto unref_exit;
755 	}
756 	bna = (struct netmap_bwrap_adapter *)na;
757 
758 	if (na->active_fds == 0) { /* not registered */
759 		error = EINVAL;
760 		goto unref_exit;
761 	}
762 
763 	last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
764 	if (!last_instance) {
765 		D("--- error, trying to detach an entry with active mmaps");
766 		error = EINVAL;
767 	} else {
768 		struct netmap_priv_d *npriv = bna->na_kpriv;
769 
770 		bna->na_kpriv = NULL;
771 		D("deleting priv");
772 
773 		bzero(npriv, sizeof(*npriv));
774 		kfree(npriv, M_DEVBUF);
775 	}
776 
777 unref_exit:
778 	netmap_adapter_put(na);
779 unlock_exit:
780 	NMG_UNLOCK();
781 	return error;
782 
783 }
784 
785 
786 /* exported to kernel callers, e.g. OVS ?
787  * Entry point.
788  * Called without NMG_LOCK.
789  */
790 int
791 netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
792 {
793 	struct nm_bridge *b;
794 	struct netmap_vp_adapter *na;
795 	struct ifnet *iter;
796 	char *name = nmr->nr_name;
797 	int cmd = nmr->nr_cmd, namelen = strlen(name);
798 	int error = 0, i, j;
799 
800 	switch (cmd) {
801 	case NETMAP_BDG_ATTACH:
802 		error = nm_bdg_attach(nmr);
803 		break;
804 
805 	case NETMAP_BDG_DETACH:
806 		error = nm_bdg_detach(nmr);
807 		break;
808 
809 	case NETMAP_BDG_LIST:
810 		/* this is used to enumerate bridges and ports */
811 		if (namelen) { /* look up indexes of bridge and port */
812 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
813 				error = EINVAL;
814 				break;
815 			}
816 			NMG_LOCK();
817 			b = nm_find_bridge(name, 0 /* don't create */);
818 			if (!b) {
819 				error = ENOENT;
820 				NMG_UNLOCK();
821 				break;
822 			}
823 
824 			error = ENOENT;
825 			for (j = 0; j < b->bdg_active_ports; j++) {
826 				i = b->bdg_port_index[j];
827 				na = b->bdg_ports[i];
828 				if (na == NULL) {
829 					D("---AAAAAAAAARGH-------");
830 					continue;
831 				}
832 				iter = na->up.ifp;
833 				/* the former and the latter identify a
834 				 * virtual port and a NIC, respectively
835 				 */
836 				if (!strcmp(iter->if_xname, name)) {
837 					/* bridge index */
838 					nmr->nr_arg1 = b - nm_bridges;
839 					nmr->nr_arg2 = i; /* port index */
840 					error = 0;
841 					break;
842 				}
843 			}
844 			NMG_UNLOCK();
845 		} else {
846 			/* return the first non-empty entry starting from
847 			 * bridge nr_arg1 and port nr_arg2.
848 			 *
849 			 * Users can detect the end of the same bridge by
850 			 * seeing the new and old value of nr_arg1, and can
851 			 * detect the end of all the bridge by error != 0
852 			 */
853 			i = nmr->nr_arg1;
854 			j = nmr->nr_arg2;
855 
856 			NMG_LOCK();
857 			for (error = ENOENT; i < NM_BRIDGES; i++) {
858 				b = nm_bridges + i;
859 				if (j >= b->bdg_active_ports) {
860 					j = 0; /* following bridges scan from 0 */
861 					continue;
862 				}
863 				nmr->nr_arg1 = i;
864 				nmr->nr_arg2 = j;
865 				j = b->bdg_port_index[j];
866 				na = b->bdg_ports[j];
867 				iter = na->up.ifp;
868 				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
869 				error = 0;
870 				break;
871 			}
872 			NMG_UNLOCK();
873 		}
874 		break;
875 
876 	case NETMAP_BDG_LOOKUP_REG:
877 		/* register a lookup function to the given bridge.
878 		 * nmr->nr_name may be just bridge's name (including ':'
879 		 * if it is not just NM_NAME).
880 		 */
881 		if (!func) {
882 			error = EINVAL;
883 			break;
884 		}
885 		NMG_LOCK();
886 		b = nm_find_bridge(name, 0 /* don't create */);
887 		if (!b) {
888 			error = EINVAL;
889 		} else {
890 			b->nm_bdg_lookup = func;
891 		}
892 		NMG_UNLOCK();
893 		break;
894 
895 	default:
896 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
897 		error = EINVAL;
898 		break;
899 	}
900 	return error;
901 }
902 
903 
904 static int
905 netmap_vp_krings_create(struct netmap_adapter *na)
906 {
907 	u_int ntx, nrx, tailroom;
908 	int error, i;
909 	uint32_t *leases;
910 
911 	/* XXX vps do not need host rings,
912 	 * but we crash if we don't have one
913 	 */
914 	ntx = na->num_tx_rings + 1;
915 	nrx = na->num_rx_rings + 1;
916 
917 	/*
918 	 * Leases are attached to RX rings on vale ports
919 	 */
920 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
921 
922 	error = netmap_krings_create(na, ntx, nrx, tailroom);
923 	if (error)
924 		return error;
925 
926 	leases = na->tailroom;
927 
928 	for (i = 0; i < nrx; i++) { /* Receive rings */
929 		na->rx_rings[i].nkr_leases = leases;
930 		leases += na->num_rx_desc;
931 	}
932 
933 	error = nm_alloc_bdgfwd(na);
934 	if (error) {
935 		netmap_krings_delete(na);
936 		return error;
937 	}
938 
939 	return 0;
940 }
941 
942 static void
943 netmap_vp_krings_delete(struct netmap_adapter *na)
944 {
945 	nm_free_bdgfwd(na);
946 	netmap_krings_delete(na);
947 }
948 
949 
950 static int
951 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
952 	struct netmap_vp_adapter *na, u_int ring_nr);
953 
954 
955 /*
956  * Grab packets from a kring, move them into the ft structure
957  * associated to the tx (input) port. Max one instance per port,
958  * filtered on input (ioctl, poll or XXX).
959  * Returns the next position in the ring.
960  */
961 static int
962 nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
963 	struct netmap_kring *kring, u_int end)
964 {
965 	struct netmap_ring *ring = kring->ring;
966 	struct nm_bdg_fwd *ft;
967 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
968 	u_int ft_i = 0;	/* start from 0 */
969 	u_int frags = 1; /* how many frags ? */
970 	struct nm_bridge *b = na->na_bdg;
971 
972 	/* To protect against modifications to the bridge we acquire a
973 	 * shared lock, waiting if we can sleep (if the source port is
974 	 * attached to a user process) or with a trylock otherwise (NICs).
975 	 */
976 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
977 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
978 		BDG_RLOCK(b);
979 	else if (!BDG_RTRYLOCK(b))
980 		return 0;
981 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
982 	ft = kring->nkr_ft;
983 
984 	for (; likely(j != end); j = nm_next(j, lim)) {
985 		struct netmap_slot *slot = &ring->slot[j];
986 		char *buf;
987 
988 		ft[ft_i].ft_len = slot->len;
989 		ft[ft_i].ft_flags = slot->flags;
990 
991 		ND("flags is 0x%x", slot->flags);
992 		/* this slot goes into a list so initialize the link field */
993 		ft[ft_i].ft_next = NM_FT_NULL;
994 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
995 			(void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
996 		prefetch(buf);
997 		++ft_i;
998 		if (slot->flags & NS_MOREFRAG) {
999 			frags++;
1000 			continue;
1001 		}
1002 		if (unlikely(netmap_verbose && frags > 1))
1003 			RD(5, "%d frags at %d", frags, ft_i - frags);
1004 		ft[ft_i - frags].ft_frags = frags;
1005 		frags = 1;
1006 		if (unlikely((int)ft_i >= bridge_batch))
1007 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1008 	}
1009 	if (frags > 1) {
1010 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1011 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1012 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1013 		ft[ft_i - frags].ft_frags = frags - 1;
1014 	}
1015 	if (ft_i)
1016 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1017 	BDG_RUNLOCK(b);
1018 	return j;
1019 }
1020 
1021 
1022 /*
1023  *---- support for virtual bridge -----
1024  */
1025 
1026 /* ----- FreeBSD if_bridge hash function ------- */
1027 
1028 /*
1029  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1030  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1031  *
1032  * http://www.burtleburtle.net/bob/hash/spooky.html
1033  */
1034 #define mix(a, b, c)                                                    \
1035 do {                                                                    \
1036         a -= b; a -= c; a ^= (c >> 13);                                 \
1037         b -= c; b -= a; b ^= (a << 8);                                  \
1038         c -= a; c -= b; c ^= (b >> 13);                                 \
1039         a -= b; a -= c; a ^= (c >> 12);                                 \
1040         b -= c; b -= a; b ^= (a << 16);                                 \
1041         c -= a; c -= b; c ^= (b >> 5);                                  \
1042         a -= b; a -= c; a ^= (c >> 3);                                  \
1043         b -= c; b -= a; b ^= (a << 10);                                 \
1044         c -= a; c -= b; c ^= (b >> 15);                                 \
1045 } while (/*CONSTCOND*/0)
1046 
1047 static __inline uint32_t
1048 nm_bridge_rthash(const uint8_t *addr)
1049 {
1050         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1051 
1052         b += addr[5] << 8;
1053         b += addr[4];
1054         a += addr[3] << 24;
1055         a += addr[2] << 16;
1056         a += addr[1] << 8;
1057         a += addr[0];
1058 
1059         mix(a, b, c);
1060 #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1061         return (c & BRIDGE_RTHASH_MASK);
1062 }
1063 
1064 #undef mix
1065 
1066 
1067 static int
1068 bdg_netmap_reg(struct netmap_adapter *na, int onoff)
1069 {
1070 	struct netmap_vp_adapter *vpna =
1071 		(struct netmap_vp_adapter*)na;
1072 	struct ifnet *ifp = na->ifp;
1073 
1074 	/* the interface is already attached to the bridge,
1075 	 * so we only need to toggle IFCAP_NETMAP.
1076 	 */
1077 	BDG_WLOCK(vpna->na_bdg);
1078 	if (onoff) {
1079 		ifp->if_capenable |= IFCAP_NETMAP;
1080 	} else {
1081 		ifp->if_capenable &= ~IFCAP_NETMAP;
1082 	}
1083 	BDG_WUNLOCK(vpna->na_bdg);
1084 	return 0;
1085 }
1086 
1087 
1088 /*
1089  * Lookup function for a learning bridge.
1090  * Update the hash table with the source address,
1091  * and then returns the destination port index, and the
1092  * ring in *dst_ring (at the moment, always use ring 0)
1093  */
1094 u_int
1095 netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1096 		struct netmap_vp_adapter *na)
1097 {
1098 	struct nm_hash_ent *ht = na->na_bdg->ht;
1099 	uint32_t sh, dh;
1100 	u_int dst, mysrc = na->bdg_port;
1101 	uint64_t smac, dmac;
1102 
1103 	if (buf_len < 14) {
1104 		D("invalid buf length %d", buf_len);
1105 		return NM_BDG_NOPORT;
1106 	}
1107 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1108 	smac = le64toh(*(uint64_t *)(buf + 4));
1109 	smac >>= 16;
1110 
1111 	/*
1112 	 * The hash is somewhat expensive, there might be some
1113 	 * worthwhile optimizations here.
1114 	 */
1115 	if ((buf[6] & 1) == 0) { /* valid src */
1116 		uint8_t *s = buf+6;
1117 		sh = nm_bridge_rthash(s); // XXX hash of source
1118 		/* update source port forwarding entry */
1119 		ht[sh].mac = smac;	/* XXX expire ? */
1120 		ht[sh].ports = mysrc;
1121 		if (netmap_verbose)
1122 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1123 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1124 	}
1125 	dst = NM_BDG_BROADCAST;
1126 	if ((buf[0] & 1) == 0) { /* unicast */
1127 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1128 		if (ht[dh].mac == dmac) {	/* found dst */
1129 			dst = ht[dh].ports;
1130 		}
1131 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1132 	}
1133 	*dst_ring = 0;
1134 	return dst;
1135 }
1136 
1137 
1138 /*
1139  * This flush routine supports only unicast and broadcast but a large
1140  * number of ports, and lets us replace the learn and dispatch functions.
1141  */
1142 int
1143 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1144 		u_int ring_nr)
1145 {
1146 	struct nm_bdg_q *dst_ents, *brddst;
1147 	uint16_t num_dsts = 0, *dsts;
1148 	struct nm_bridge *b = na->na_bdg;
1149 	u_int i, j, me = na->bdg_port;
1150 
1151 	/*
1152 	 * The work area (pointed by ft) is followed by an array of
1153 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1154 	 * queues per port plus one for the broadcast traffic.
1155 	 * Then we have an array of destination indexes.
1156 	 */
1157 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1158 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1159 
1160 	/* first pass: find a destination for each packet in the batch */
1161 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1162 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1163 		uint16_t dst_port, d_i;
1164 		struct nm_bdg_q *d;
1165 
1166 		ND("slot %d frags %d", i, ft[i].ft_frags);
1167 		dst_port = b->nm_bdg_lookup(ft[i].ft_buf, ft[i].ft_len,
1168 			&dst_ring, na);
1169 		if (netmap_verbose > 255)
1170 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1171 		if (dst_port == NM_BDG_NOPORT)
1172 			continue; /* this packet is identified to be dropped */
1173 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1174 			continue;
1175 		else if (dst_port == NM_BDG_BROADCAST)
1176 			dst_ring = 0; /* broadcasts always go to ring 0 */
1177 		else if (unlikely(dst_port == me ||
1178 		    !b->bdg_ports[dst_port]))
1179 			continue;
1180 
1181 		/* get a position in the scratch pad */
1182 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1183 		d = dst_ents + d_i;
1184 
1185 		/* append the first fragment to the list */
1186 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1187 			d->bq_head = d->bq_tail = i;
1188 			/* remember this position to be scanned later */
1189 			if (dst_port != NM_BDG_BROADCAST)
1190 				dsts[num_dsts++] = d_i;
1191 		} else {
1192 			ft[d->bq_tail].ft_next = i;
1193 			d->bq_tail = i;
1194 		}
1195 		d->bq_len += ft[i].ft_frags;
1196 	}
1197 
1198 	/*
1199 	 * Broadcast traffic goes to ring 0 on all destinations.
1200 	 * So we need to add these rings to the list of ports to scan.
1201 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1202 	 * expensive. We should keep a compact list of active destinations
1203 	 * so we could shorten this loop.
1204 	 */
1205 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1206 	if (brddst->bq_head != NM_FT_NULL) {
1207 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1208 			uint16_t d_i;
1209 			i = b->bdg_port_index[j];
1210 			if (unlikely(i == me))
1211 				continue;
1212 			d_i = i * NM_BDG_MAXRINGS;
1213 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1214 				dsts[num_dsts++] = d_i;
1215 		}
1216 	}
1217 
1218 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1219 	/* second pass: scan destinations (XXX will be modular somehow) */
1220 	for (i = 0; i < num_dsts; i++) {
1221 		struct ifnet *dst_ifp;
1222 		struct netmap_vp_adapter *dst_na;
1223 		struct netmap_kring *kring;
1224 		struct netmap_ring *ring;
1225 		u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next;
1226 		u_int needed, howmany;
1227 		int retry = netmap_txsync_retry;
1228 		struct nm_bdg_q *d;
1229 		uint32_t my_start = 0, lease_idx = 0;
1230 		int nrings;
1231 
1232 		d_i = dsts[i];
1233 		ND("second pass %d port %d", i, d_i);
1234 		d = dst_ents + d_i;
1235 		// XXX fix the division
1236 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1237 		/* protect from the lookup function returning an inactive
1238 		 * destination port
1239 		 */
1240 		if (unlikely(dst_na == NULL))
1241 			goto cleanup;
1242 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1243 			goto cleanup;
1244 		dst_ifp = dst_na->up.ifp;
1245 		/*
1246 		 * The interface may be in !netmap mode in two cases:
1247 		 * - when na is attached but not activated yet;
1248 		 * - when na is being deactivated but is still attached.
1249 		 */
1250 		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1251 			ND("not in netmap mode!");
1252 			goto cleanup;
1253 		}
1254 
1255 		/* there is at least one either unicast or broadcast packet */
1256 		brd_next = brddst->bq_head;
1257 		next = d->bq_head;
1258 		/* we need to reserve this many slots. If fewer are
1259 		 * available, some packets will be dropped.
1260 		 * Packets may have multiple fragments, so we may not use
1261 		 * there is a chance that we may not use all of the slots
1262 		 * we have claimed, so we will need to handle the leftover
1263 		 * ones when we regain the lock.
1264 		 */
1265 		needed = d->bq_len + brddst->bq_len;
1266 
1267 		ND(5, "pass 2 dst %d is %x %s",
1268 			i, d_i, is_vp ? "virtual" : "nic/host");
1269 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1270 		nrings = dst_na->up.num_rx_rings;
1271 		if (dst_nr >= nrings)
1272 			dst_nr = dst_nr % nrings;
1273 		kring = &dst_na->up.rx_rings[dst_nr];
1274 		ring = kring->ring;
1275 		lim = kring->nkr_num_slots - 1;
1276 
1277 retry:
1278 
1279 		/* reserve the buffers in the queue and an entry
1280 		 * to report completion, and drop lock.
1281 		 * XXX this might become a helper function.
1282 		 */
1283 		lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1284 		if (kring->nkr_stopped) {
1285 			lockmgr(&kring->q_lock, LK_RELEASE);
1286 			goto cleanup;
1287 		}
1288 		if (dst_na->retry) {
1289 			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1290 		}
1291 		my_start = j = kring->nkr_hwlease;
1292 		howmany = nm_kr_space(kring, 1);
1293 		if (needed < howmany)
1294 			howmany = needed;
1295 		lease_idx = nm_kr_lease(kring, howmany, 1);
1296 		lockmgr(&kring->q_lock, LK_RELEASE);
1297 
1298 		/* only retry if we need more than available slots */
1299 		if (retry && needed <= howmany)
1300 			retry = 0;
1301 
1302 		/* copy to the destination queue */
1303 		while (howmany > 0) {
1304 			struct netmap_slot *slot;
1305 			struct nm_bdg_fwd *ft_p, *ft_end;
1306 			u_int cnt;
1307 
1308 			/* find the queue from which we pick next packet.
1309 			 * NM_FT_NULL is always higher than valid indexes
1310 			 * so we never dereference it if the other list
1311 			 * has packets (and if both are empty we never
1312 			 * get here).
1313 			 */
1314 			if (next < brd_next) {
1315 				ft_p = ft + next;
1316 				next = ft_p->ft_next;
1317 			} else { /* insert broadcast */
1318 				ft_p = ft + brd_next;
1319 				brd_next = ft_p->ft_next;
1320 			}
1321 			cnt = ft_p->ft_frags; // cnt > 0
1322 			if (unlikely(cnt > howmany))
1323 			    break; /* no more space */
1324 			howmany -= cnt;
1325 			if (netmap_verbose && cnt > 1)
1326 				RD(5, "rx %d frags to %d", cnt, j);
1327 			ft_end = ft_p + cnt;
1328 			do {
1329 			    void *dst, *src = ft_p->ft_buf;
1330 			    size_t len = (ft_p->ft_len + 63) & ~63;
1331 
1332 			    slot = &ring->slot[j];
1333 			    dst = BDG_NMB(&dst_na->up, slot);
1334 			    /* round to a multiple of 64 */
1335 
1336 			    ND("send %d %d bytes at %s:%d",
1337 				i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j);
1338 			    if (ft_p->ft_flags & NS_INDIRECT) {
1339 				if (copyin(src, dst, len)) {
1340 					// invalid user pointer, pretend len is 0
1341 					ft_p->ft_len = 0;
1342 				}
1343 			    } else {
1344 				//memcpy(dst, src, len);
1345 				pkt_copy(src, dst, (int)len);
1346 			    }
1347 			    slot->len = ft_p->ft_len;
1348 			    slot->flags = (cnt << 8)| NS_MOREFRAG;
1349 			    j = nm_next(j, lim);
1350 			    ft_p++;
1351 			    sent++;
1352 			} while (ft_p != ft_end);
1353 			slot->flags = (cnt << 8); /* clear flag on last entry */
1354 			/* are we done ? */
1355 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1356 				break;
1357 		}
1358 		{
1359 		    /* current position */
1360 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1361 		    uint32_t update_pos;
1362 		    int still_locked = 1;
1363 
1364 		    lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1365 		    if (unlikely(howmany > 0)) {
1366 			/* not used all bufs. If i am the last one
1367 			 * i can recover the slots, otherwise must
1368 			 * fill them with 0 to mark empty packets.
1369 			 */
1370 			ND("leftover %d bufs", howmany);
1371 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1372 			    /* yes i am the last one */
1373 			    ND("roll back nkr_hwlease to %d", j);
1374 			    kring->nkr_hwlease = j;
1375 			} else {
1376 			    while (howmany-- > 0) {
1377 				ring->slot[j].len = 0;
1378 				ring->slot[j].flags = 0;
1379 				j = nm_next(j, lim);
1380 			    }
1381 			}
1382 		    }
1383 		    p[lease_idx] = j; /* report I am done */
1384 
1385 		    update_pos = nm_kr_rxpos(kring);
1386 
1387 		    if (my_start == update_pos) {
1388 			/* all slots before my_start have been reported,
1389 			 * so scan subsequent leases to see if other ranges
1390 			 * have been completed, and to a selwakeup or txsync.
1391 		         */
1392 			while (lease_idx != kring->nkr_lease_idx &&
1393 				p[lease_idx] != NR_NOSLOT) {
1394 			    j = p[lease_idx];
1395 			    p[lease_idx] = NR_NOSLOT;
1396 			    lease_idx = nm_next(lease_idx, lim);
1397 			}
1398 			/* j is the new 'write' position. j != my_start
1399 			 * means there are new buffers to report
1400 			 */
1401 			if (likely(j != my_start)) {
1402 				uint32_t old_avail = kring->nr_hwavail;
1403 
1404 				kring->nr_hwavail = (j >= kring->nr_hwcur) ?
1405 					j - kring->nr_hwcur :
1406 					j + lim + 1 - kring->nr_hwcur;
1407 				if (kring->nr_hwavail < old_avail) {
1408 					D("avail shrink %d -> %d",
1409 						old_avail, kring->nr_hwavail);
1410 				}
1411 				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1412 				still_locked = 0;
1413 				lockmgr(&kring->q_lock, LK_RELEASE);
1414 				if (dst_na->retry && retry--)
1415 					goto retry;
1416 			}
1417 		    }
1418 		    if (still_locked)
1419 			lockmgr(&kring->q_lock, LK_RELEASE);
1420 		}
1421 cleanup:
1422 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1423 		d->bq_len = 0;
1424 	}
1425 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1426 	brddst->bq_len = 0;
1427 	return 0;
1428 }
1429 
1430 static int
1431 netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1432 {
1433 	struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
1434 	struct netmap_ring *ring = kring->ring;
1435 	u_int j, k, lim = kring->nkr_num_slots - 1;
1436 
1437 	k = ring->cur;
1438 	if (k > lim)
1439 		return netmap_ring_reinit(kring);
1440 
1441 	if (bridge_batch <= 0) { /* testing only */
1442 		j = k; // used all
1443 		goto done;
1444 	}
1445 	if (bridge_batch > NM_BDG_BATCH)
1446 		bridge_batch = NM_BDG_BATCH;
1447 
1448 	j = nm_bdg_preflush(na, ring_nr, kring, k);
1449 	if (j != k)
1450 		D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
1451 	/* k-j modulo ring size is the number of slots processed */
1452 	if (k < j)
1453 		k += kring->nkr_num_slots;
1454 	kring->nr_hwavail = lim - (k - j);
1455 
1456 done:
1457 	kring->nr_hwcur = j;
1458 	ring->avail = kring->nr_hwavail;
1459 	if (netmap_verbose)
1460 		D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1461 	return 0;
1462 }
1463 
1464 
1465 /*
1466  * main dispatch routine for the bridge.
1467  * We already know that only one thread is running this.
1468  * we must run nm_bdg_preflush without lock.
1469  */
1470 static int
1471 bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1472 {
1473 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1474 	return netmap_vp_txsync(vpna, ring_nr, flags);
1475 }
1476 
1477 
1478 /*
1479  * user process reading from a VALE switch.
1480  * Already protected against concurrent calls from userspace,
1481  * but we must acquire the queue's lock to protect against
1482  * writers on the same queue.
1483  */
1484 static int
1485 bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1486 {
1487 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1488 	struct netmap_ring *ring = kring->ring;
1489 	u_int j, lim = kring->nkr_num_slots - 1;
1490 	u_int k = ring->cur, resvd = ring->reserved;
1491 	int n;
1492 
1493 	lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1494 	if (k > lim) {
1495 		D("ouch dangerous reset!!!");
1496 		n = netmap_ring_reinit(kring);
1497 		goto done;
1498 	}
1499 
1500 	/* skip past packets that userspace has released */
1501 	j = kring->nr_hwcur;    /* netmap ring index */
1502 	if (resvd > 0) {
1503 		if (resvd + ring->avail >= lim + 1) {
1504 			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
1505 			ring->reserved = resvd = 0; // XXX panic...
1506 		}
1507 		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
1508 	}
1509 
1510 	if (j != k) { /* userspace has released some packets. */
1511 		n = k - j;
1512 		if (n < 0)
1513 			n += kring->nkr_num_slots;
1514 		ND("userspace releases %d packets", n);
1515 		for (n = 0; likely(j != k); n++) {
1516 			struct netmap_slot *slot = &ring->slot[j];
1517 			void *addr = BDG_NMB(na, slot);
1518 
1519 			if (addr == netmap_buffer_base) { /* bad buf */
1520 				D("bad buffer index %d, ignore ?",
1521 					slot->buf_idx);
1522 			}
1523 			slot->flags &= ~NS_BUF_CHANGED;
1524 			j = nm_next(j, lim);
1525 		}
1526 		kring->nr_hwavail -= n;
1527 		kring->nr_hwcur = k;
1528 	}
1529 	/* tell userspace that there are new packets */
1530 	ring->avail = kring->nr_hwavail - resvd;
1531 	n = 0;
1532 done:
1533 	lockmgr(&kring->q_lock, LK_RELEASE);
1534 	return n;
1535 }
1536 
1537 static int
1538 bdg_netmap_attach(struct netmap_adapter *arg)
1539 {
1540 	struct netmap_vp_adapter *vpna;
1541 	struct netmap_adapter *na;
1542 	int error;
1543 
1544 	vpna = kmalloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1545 	if (vpna == NULL)
1546 		return ENOMEM;
1547 	na = &vpna->up;
1548 	*na = *arg;
1549 	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1550 	na->nm_txsync = bdg_netmap_txsync;
1551 	na->nm_rxsync = bdg_netmap_rxsync;
1552 	na->nm_register = bdg_netmap_reg;
1553 	na->nm_dtor = netmap_adapter_vp_dtor;
1554 	na->nm_krings_create = netmap_vp_krings_create;
1555 	na->nm_krings_delete = netmap_vp_krings_delete;
1556 	na->nm_mem = netmap_mem_private_new(NM_IFPNAME(arg->ifp),
1557 			na->num_tx_rings, na->num_tx_desc,
1558 			na->num_rx_rings, na->num_rx_desc);
1559 	/* other nmd fields are set in the common routine */
1560 	error = netmap_attach_common(na);
1561 	if (error) {
1562 		kfree(vpna, M_DEVBUF);
1563 		return error;
1564 	}
1565 	return 0;
1566 }
1567 
1568 static void
1569 netmap_bwrap_dtor(struct netmap_adapter *na)
1570 {
1571 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1572 	struct netmap_adapter *hwna = bna->hwna;
1573 	struct nm_bridge *b = bna->up.na_bdg,
1574 		*bh = bna->host.na_bdg;
1575 	struct ifnet *ifp = na->ifp;
1576 
1577 	ND("na %p", na);
1578 
1579 	if (b) {
1580 		netmap_bdg_detach_common(b, bna->up.bdg_port,
1581 			(bh ? bna->host.bdg_port : -1));
1582 	}
1583 
1584 	hwna->na_private = NULL;
1585 	netmap_adapter_put(hwna);
1586 
1587 	bzero(ifp, sizeof(*ifp));
1588 	kfree(ifp, M_DEVBUF);
1589 	na->ifp = NULL;
1590 
1591 }
1592 
1593 /*
1594  * Pass packets from nic to the bridge.
1595  * XXX TODO check locking: this is called from the interrupt
1596  * handler so we should make sure that the interface is not
1597  * disconnected while passing down an interrupt.
1598  *
1599  * Note, no user process can access this NIC so we can ignore
1600  * the info in the 'ring'.
1601  */
1602 /* callback that overwrites the hwna notify callback.
1603  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1604  * The bridge wrapper then sends the packets through the bridge.
1605  */
1606 static int
1607 netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1608 {
1609 	struct ifnet *ifp = na->ifp;
1610 	struct netmap_bwrap_adapter *bna = na->na_private;
1611 	struct netmap_vp_adapter *hostna = &bna->host;
1612 	struct netmap_kring *kring, *bkring;
1613 	struct netmap_ring *ring;
1614 	int is_host_ring = ring_nr == na->num_rx_rings;
1615 	struct netmap_vp_adapter *vpna = &bna->up;
1616 	int error = 0;
1617 
1618 	ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags);
1619 
1620 	if (flags & NAF_DISABLE_NOTIFY) {
1621 		kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1622 		bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
1623 		if (kring->nkr_stopped)
1624 			netmap_disable_ring(bkring);
1625 		else
1626 			bkring->nkr_stopped = 0;
1627 		return 0;
1628 	}
1629 
1630 	if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1631 		return 0;
1632 
1633 	if (tx == NR_TX)
1634 		return 0;
1635 
1636 	kring = &na->rx_rings[ring_nr];
1637 	ring = kring->ring;
1638 
1639 	/* make sure the ring is not disabled */
1640 	if (nm_kr_tryget(kring))
1641 		return 0;
1642 
1643 	if (is_host_ring && hostna->na_bdg == NULL) {
1644 		error = bna->save_notify(na, ring_nr, tx, flags);
1645 		goto put_out;
1646 	}
1647 
1648 	if (is_host_ring) {
1649 		vpna = hostna;
1650 		ring_nr = 0;
1651 	} else {
1652 		/* fetch packets that have arrived.
1653 		 * XXX maybe do this in a loop ?
1654 		 */
1655 		error = na->nm_rxsync(na, ring_nr, 0);
1656 		if (error)
1657 			goto put_out;
1658 	}
1659 	if (kring->nr_hwavail == 0 && netmap_verbose) {
1660 		D("how strange, interrupt with no packets on %s",
1661 			NM_IFPNAME(ifp));
1662 		goto put_out;
1663 	}
1664 	/* XXX avail ? */
1665 	ring->cur = nm_kr_rxpos(kring);
1666 	netmap_vp_txsync(vpna, ring_nr, flags);
1667 
1668 	if (!is_host_ring)
1669 		error = na->nm_rxsync(na, ring_nr, 0);
1670 
1671 put_out:
1672 	nm_kr_put(kring);
1673 	return error;
1674 }
1675 
1676 static int
1677 netmap_bwrap_register(struct netmap_adapter *na, int onoff)
1678 {
1679 	struct netmap_bwrap_adapter *bna =
1680 		(struct netmap_bwrap_adapter *)na;
1681 	struct netmap_adapter *hwna = bna->hwna;
1682 	struct netmap_vp_adapter *hostna = &bna->host;
1683 	int error;
1684 
1685 	ND("%s %d", NM_IFPNAME(ifp), onoff);
1686 
1687 	if (onoff) {
1688 		int i;
1689 
1690 		hwna->na_lut = na->na_lut;
1691 		hwna->na_lut_objtotal = na->na_lut_objtotal;
1692 
1693 		if (hostna->na_bdg) {
1694 			hostna->up.na_lut = na->na_lut;
1695 			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1696 		}
1697 
1698 		/* cross-link the netmap rings */
1699 		for (i = 0; i <= na->num_tx_rings; i++) {
1700 			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1701 			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1702 		}
1703 		for (i = 0; i <= na->num_rx_rings; i++) {
1704 			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1705 			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1706 		}
1707 	}
1708 
1709 	if (hwna->ifp) {
1710 		error = hwna->nm_register(hwna, onoff);
1711 		if (error)
1712 			return error;
1713 	}
1714 
1715 	bdg_netmap_reg(na, onoff);
1716 
1717 	if (onoff) {
1718 		bna->save_notify = hwna->nm_notify;
1719 		hwna->nm_notify = netmap_bwrap_intr_notify;
1720 	} else {
1721 		hwna->nm_notify = bna->save_notify;
1722 		hwna->na_lut = NULL;
1723 		hwna->na_lut_objtotal = 0;
1724 	}
1725 
1726 	return 0;
1727 }
1728 
1729 static int
1730 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1731 				    u_int *rxr, u_int *rxd)
1732 {
1733 	struct netmap_bwrap_adapter *bna =
1734 		(struct netmap_bwrap_adapter *)na;
1735 	struct netmap_adapter *hwna = bna->hwna;
1736 
1737 	/* forward the request */
1738 	netmap_update_config(hwna);
1739 	/* swap the results */
1740 	*txr = hwna->num_rx_rings;
1741 	*txd = hwna->num_rx_desc;
1742 	*rxr = hwna->num_tx_rings;
1743 	*rxd = hwna->num_rx_desc;
1744 
1745 	return 0;
1746 }
1747 
1748 static int
1749 netmap_bwrap_krings_create(struct netmap_adapter *na)
1750 {
1751 	struct netmap_bwrap_adapter *bna =
1752 		(struct netmap_bwrap_adapter *)na;
1753 	struct netmap_adapter *hwna = bna->hwna;
1754 	struct netmap_adapter *hostna = &bna->host.up;
1755 	int error;
1756 
1757 	ND("%s", NM_IFPNAME(na->ifp));
1758 
1759 	error = netmap_vp_krings_create(na);
1760 	if (error)
1761 		return error;
1762 
1763 	error = hwna->nm_krings_create(hwna);
1764 	if (error) {
1765 		netmap_vp_krings_delete(na);
1766 		return error;
1767 	}
1768 
1769 	hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1770 	hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1771 
1772 	return 0;
1773 }
1774 
1775 static void
1776 netmap_bwrap_krings_delete(struct netmap_adapter *na)
1777 {
1778 	struct netmap_bwrap_adapter *bna =
1779 		(struct netmap_bwrap_adapter *)na;
1780 	struct netmap_adapter *hwna = bna->hwna;
1781 
1782 	ND("%s", NM_IFPNAME(na->ifp));
1783 
1784 	hwna->nm_krings_delete(hwna);
1785 	netmap_vp_krings_delete(na);
1786 }
1787 
1788 /* notify method for the bridge-->hwna direction */
1789 static int
1790 netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1791 {
1792 	struct netmap_bwrap_adapter *bna =
1793 		(struct netmap_bwrap_adapter *)na;
1794 	struct netmap_adapter *hwna = bna->hwna;
1795 	struct netmap_kring *kring, *hw_kring;
1796 	struct netmap_ring *ring;
1797 	u_int lim, k;
1798 	int error = 0;
1799 
1800 	if (tx == NR_TX)
1801 	        return ENXIO;
1802 
1803 	kring = &na->rx_rings[ring_n];
1804 	hw_kring = &hwna->tx_rings[ring_n];
1805 	ring = kring->ring;
1806 
1807 	lim = kring->nkr_num_slots - 1;
1808 	k = nm_kr_rxpos(kring);
1809 
1810 	if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1811 		return 0;
1812 	ring->cur = k;
1813 	ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1814 		NM_IFPNAME(na->ifp), ring_n,
1815 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1816 		ring->cur, ring->avail, ring->reserved,
1817 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1818 	if (ring_n == na->num_rx_rings) {
1819 		netmap_txsync_to_host(hwna);
1820 	} else {
1821 		error = hwna->nm_txsync(hwna, ring_n, flags);
1822 	}
1823 	kring->nr_hwcur = ring->cur;
1824 	kring->nr_hwavail = 0;
1825 	kring->nr_hwreserved = lim - ring->avail;
1826 	ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1827 		NM_IFPNAME(na->ifp), ring_n,
1828 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1829 		ring->cur, ring->avail, ring->reserved,
1830 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1831 
1832 	return error;
1833 }
1834 
1835 static int
1836 netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1837 {
1838 	struct netmap_bwrap_adapter *bna = na->na_private;
1839 	struct netmap_adapter *port_na = &bna->up.up;
1840 	if (tx == NR_TX || ring_n != 0)
1841 		return ENXIO;
1842 	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
1843 }
1844 
1845 /* attach a bridge wrapper to the 'real' device */
1846 static int
1847 netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
1848 {
1849 	struct netmap_bwrap_adapter *bna;
1850 	struct netmap_adapter *na;
1851 	struct netmap_adapter *hwna = NA(real);
1852 	struct netmap_adapter *hostna;
1853 	int error;
1854 
1855 
1856 	bna = kmalloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
1857 	if (bna == NULL)
1858 		return ENOMEM;
1859 
1860 	na = &bna->up.up;
1861 	na->ifp = fake;
1862 	/* fill the ring data for the bwrap adapter with rx/tx meanings
1863 	 * swapped. The real cross-linking will be done during register,
1864 	 * when all the krings will have been created.
1865 	 */
1866 	na->num_rx_rings = hwna->num_tx_rings;
1867 	na->num_tx_rings = hwna->num_rx_rings;
1868 	na->num_tx_desc = hwna->num_rx_desc;
1869 	na->num_rx_desc = hwna->num_tx_desc;
1870 	na->nm_dtor = netmap_bwrap_dtor;
1871 	na->nm_register = netmap_bwrap_register;
1872 	// na->nm_txsync = netmap_bwrap_txsync;
1873 	// na->nm_rxsync = netmap_bwrap_rxsync;
1874 	na->nm_config = netmap_bwrap_config;
1875 	na->nm_krings_create = netmap_bwrap_krings_create;
1876 	na->nm_krings_delete = netmap_bwrap_krings_delete;
1877 	na->nm_notify = netmap_bwrap_notify;
1878 	na->nm_mem = hwna->nm_mem;
1879 	na->na_private = na; /* prevent NIOCREGIF */
1880 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
1881 
1882 	bna->hwna = hwna;
1883 	netmap_adapter_get(hwna);
1884 	hwna->na_private = bna; /* weak reference */
1885 
1886 	hostna = &bna->host.up;
1887 	hostna->ifp = hwna->ifp;
1888 	hostna->num_tx_rings = 1;
1889 	hostna->num_tx_desc = hwna->num_rx_desc;
1890 	hostna->num_rx_rings = 1;
1891 	hostna->num_rx_desc = hwna->num_tx_desc;
1892 	// hostna->nm_txsync = netmap_bwrap_host_txsync;
1893 	// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1894 	hostna->nm_notify = netmap_bwrap_host_notify;
1895 	hostna->nm_mem = na->nm_mem;
1896 	hostna->na_private = bna;
1897 
1898 	D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname,
1899 		na->num_tx_rings, na->num_tx_desc,
1900 		na->num_rx_rings, na->num_rx_desc);
1901 
1902 	error = netmap_attach_common(na);
1903 	if (error) {
1904 		netmap_adapter_put(hwna);
1905 		kfree(bna, M_DEVBUF);
1906 		return error;
1907 	}
1908 	return 0;
1909 }
1910 
1911 void
1912 netmap_init_bridges(void)
1913 {
1914 	int i;
1915 	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
1916 	for (i = 0; i < NM_BRIDGES; i++)
1917 		BDG_RWINIT(&nm_bridges[i]);
1918 }
1919 #endif /* WITH_VALE */
1920