xref: /dragonfly/sys/net/netmap/netmap_vale.c (revision ae071d8d)
1 /*
2  * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *   1. Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *   2. Redistributions in binary form must reproduce the above copyright
10  *      notice, this list of conditions and the following disclaimer in the
11  *      documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 
27 /*
28  * This module implements the VALE switch for netmap
29 
30 --- VALE SWITCH ---
31 
32 NMG_LOCK() serializes all modifications to switches and ports.
33 A switch cannot be deleted until all ports are gone.
34 
35 For each switch, an SX lock (RWlock on linux) protects
36 deletion of ports. When configuring or deleting a new port, the
37 lock is acquired in exclusive mode (after holding NMG_LOCK).
38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39 The lock is held throughout the entire forwarding cycle,
40 during which the thread may incur in a page fault.
41 Hence it is important that sleepable shared locks are used.
42 
43 On the rx ring, the per-port lock is grabbed initially to reserve
44 a number of slot in the ring, then the lock is released,
45 packets are copied from source to destination, and then
46 the lock is acquired again and the receive ring is updated.
47 (A similar thing is done on the tx ring for NIC and host stack
48 ports attached to the switch)
49 
50  */
51 
52 /*
53  * OS-specific code that is used only within this file.
54  * Other OS-specific code that must be accessed by drivers
55  * is present in netmap_kern.h
56  */
57 
58 #include <sys/cdefs.h> /* prerequisite */
59 __FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257176 2013-10-26 17:58:36Z glebius $");
60 
61 #include <sys/types.h>
62 #include <sys/errno.h>
63 #include <sys/param.h>	/* defines used in kernel.h */
64 #include <sys/kernel.h>	/* types used in module initialization */
65 #include <sys/conf.h>	/* cdevsw struct, UID, GID */
66 #include <sys/sockio.h>
67 #include <sys/socketvar.h>	/* struct socket */
68 #include <sys/malloc.h>
69 #include <sys/poll.h>
70 #include <sys/lock.h>
71 #include <sys/socket.h> /* sockaddrs */
72 #include <sys/sysctl.h>
73 #include <net/if.h>
74 #include <net/if_var.h>
75 #include <net/bpf.h>		/* BIOCIMMEDIATE */
76 #include <sys/bus.h>	/* bus_dmamap_* */
77 #include <sys/endian.h>
78 #include <sys/refcount.h>
79 
80 
81 #define BDG_RWLOCK_T		struct lock
82 
83 #define	BDG_RWINIT(b)		\
84 	lockinit(&(b)->bdg_lock, "bdg lock", 0, LK_CANRECURSE)
85 #define BDG_WLOCK(b)		lockmgr(&(b)->bdg_lock, LK_EXCLUSIVE)
86 #define BDG_WUNLOCK(b)		lockmgr(&(b)->bdg_lock, LK_RELEASE)
87 #define BDG_RLOCK(b)		lockmgr(&(b)->bdg_lock, LK_SHARED)
88 #define BDG_RTRYLOCK(b)		lockmgr(&(b)->bdg_lock, LK_SHARED|LK_NOWAIT)
89 #define BDG_RUNLOCK(b)		lockmgr(&(b)->bdg_lock, LK_RELEASE)
90 #define BDG_RWDESTROY(b)	lockuninit(&(b)->bdg_lock)
91 
92 /*
93  * common headers
94  */
95 
96 #include <net/netmap.h>
97 #include <net/netmap/netmap_kern.h>
98 #include <net/netmap/netmap_mem2.h>
99 
100 #ifdef WITH_VALE
101 
102 /*
103  * system parameters (most of them in netmap_kern.h)
104  * NM_NAME	prefix for switch port names, default "vale"
105  * NM_BDG_MAXPORTS	number of ports
106  * NM_BRIDGES	max number of switches in the system.
107  *	XXX should become a sysctl or tunable
108  *
109  * Switch ports are named valeX:Y where X is the switch name and Y
110  * is the port. If Y matches a physical interface name, the port is
111  * connected to a physical device.
112  *
113  * Unlike physical interfaces, switch ports use their own memory region
114  * for rings and buffers.
115  * The virtual interfaces use per-queue lock instead of core lock.
116  * In the tx loop, we aggregate traffic in batches to make all operations
117  * faster. The batch size is bridge_batch.
118  */
119 #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
120 #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
121 #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
122 #define NM_BDG_HASH		1024	/* forwarding table entries */
123 #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
124 #define NM_MULTISEG		64	/* max size of a chain of bufs */
125 /* actual size of the tables */
126 #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
127 /* NM_FT_NULL terminates a list of slots in the ft */
128 #define NM_FT_NULL		NM_BDG_BATCH_MAX
129 #define	NM_BRIDGES		8	/* number of bridges */
130 
131 
132 /*
133  * bridge_batch is set via sysctl to the max batch size to be
134  * used in the bridge. The actual value may be larger as the
135  * last packet in the block may overflow the size.
136  */
137 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
138 SYSCTL_DECL(_dev_netmap);
139 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
140 
141 
142 static int bdg_netmap_attach(struct netmap_adapter *);
143 static int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
144 static int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
145 static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
146 int kern_netmap_regif(struct nmreq *nmr);
147 
148 /*
149  * Each transmit queue accumulates a batch of packets into
150  * a structure before forwarding. Packets to the same
151  * destination are put in a list using ft_next as a link field.
152  * ft_frags and ft_next are valid only on the first fragment.
153  */
154 struct nm_bdg_fwd {	/* forwarding entry for a bridge */
155 	void *ft_buf;		/* netmap or indirect buffer */
156 	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
157 	uint8_t _ft_port;	/* dst port (unused) */
158 	uint16_t ft_flags;	/* flags, e.g. indirect */
159 	uint16_t ft_len;	/* src fragment len */
160 	uint16_t ft_next;	/* next packet to same destination */
161 };
162 
163 /*
164  * For each output interface, nm_bdg_q is used to construct a list.
165  * bq_len is the number of output buffers (we can have coalescing
166  * during the copy).
167  */
168 struct nm_bdg_q {
169 	uint16_t bq_head;
170 	uint16_t bq_tail;
171 	uint32_t bq_len;	/* number of buffers */
172 };
173 
174 /* XXX revise this */
175 struct nm_hash_ent {
176 	uint64_t	mac;	/* the top 2 bytes are the epoch */
177 	uint64_t	ports;
178 };
179 
180 /*
181  * nm_bridge is a descriptor for a VALE switch.
182  * Interfaces for a bridge are all in bdg_ports[].
183  * The array has fixed size, an empty entry does not terminate
184  * the search, but lookups only occur on attach/detach so we
185  * don't mind if they are slow.
186  *
187  * The bridge is non blocking on the transmit ports: excess
188  * packets are dropped if there is no room on the output port.
189  *
190  * bdg_lock protects accesses to the bdg_ports array.
191  * This is a rw lock (or equivalent).
192  */
193 struct nm_bridge {
194 	/* XXX what is the proper alignment/layout ? */
195 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
196 	int		bdg_namelen;
197 	uint32_t	bdg_active_ports; /* 0 means free */
198 	char		bdg_basename[IFNAMSIZ];
199 
200 	/* Indexes of active ports (up to active_ports)
201 	 * and all other remaining ports.
202 	 */
203 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
204 
205 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
206 
207 
208 	/*
209 	 * The function to decide the destination port.
210 	 * It returns either of an index of the destination port,
211 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
212 	 * forward this packet.  ring_nr is the source ring index, and the
213 	 * function may overwrite this value to forward this packet to a
214 	 * different ring index.
215 	 * This function must be set by netmap_bdgctl().
216 	 */
217 	bdg_lookup_fn_t nm_bdg_lookup;
218 
219 	/* the forwarding table, MAC+ports.
220 	 * XXX should be changed to an argument to be passed to
221 	 * the lookup function, and allocated on attach
222 	 */
223 	struct nm_hash_ent ht[NM_BDG_HASH];
224 };
225 
226 
227 /*
228  * XXX in principle nm_bridges could be created dynamically
229  * Right now we have a static array and deletions are protected
230  * by an exclusive lock.
231  */
232 struct nm_bridge nm_bridges[NM_BRIDGES];
233 
234 
235 /*
236  * A few function to tell which kind of port are we using.
237  * XXX should we hold a lock ?
238  *
239  * nma_is_vp()		virtual port
240  * nma_is_host()	port connected to the host stack
241  * nma_is_hw()		port connected to a NIC
242  * nma_is_generic()	generic netmap adapter XXX stop this madness
243  */
244 static __inline int
245 nma_is_vp(struct netmap_adapter *na)
246 {
247 	return na->nm_register == bdg_netmap_reg;
248 }
249 
250 
251 static __inline int
252 nma_is_host(struct netmap_adapter *na)
253 {
254 	return na->nm_register == NULL;
255 }
256 
257 
258 static __inline int
259 nma_is_hw(struct netmap_adapter *na)
260 {
261 	/* In case of sw adapter, nm_register is NULL */
262 	return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na);
263 }
264 
265 static __inline int
266 nma_is_bwrap(struct netmap_adapter *na)
267 {
268 	return na->nm_register == netmap_bwrap_register;
269 }
270 
271 
272 
273 /*
274  * this is a slightly optimized copy routine which rounds
275  * to multiple of 64 bytes and is often faster than dealing
276  * with other odd sizes. We assume there is enough room
277  * in the source and destination buffers.
278  *
279  * XXX only for multiples of 64 bytes, non overlapped.
280  */
281 static inline void
282 pkt_copy(void *_src, void *_dst, int l)
283 {
284         uint64_t *src = _src;
285         uint64_t *dst = _dst;
286         if (unlikely(l >= 1024)) {
287                 memcpy(dst, src, l);
288                 return;
289         }
290         for (; likely(l > 0); l-=64) {
291                 *dst++ = *src++;
292                 *dst++ = *src++;
293                 *dst++ = *src++;
294                 *dst++ = *src++;
295                 *dst++ = *src++;
296                 *dst++ = *src++;
297                 *dst++ = *src++;
298                 *dst++ = *src++;
299         }
300 }
301 
302 
303 
304 /*
305  * locate a bridge among the existing ones.
306  * MUST BE CALLED WITH NMG_LOCK()
307  *
308  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
309  * We assume that this is called with a name of at least NM_NAME chars.
310  */
311 static struct nm_bridge *
312 nm_find_bridge(const char *name, int create)
313 {
314 	int i, l, namelen;
315 	struct nm_bridge *b = NULL;
316 
317 	NMG_LOCK_ASSERT();
318 
319 	namelen = strlen(NM_NAME);	/* base length */
320 	l = name ? strlen(name) : 0;		/* actual length */
321 	if (l < namelen) {
322 		D("invalid bridge name %s", name ? name : NULL);
323 		return NULL;
324 	}
325 	for (i = namelen + 1; i < l; i++) {
326 		if (name[i] == ':') {
327 			namelen = i;
328 			break;
329 		}
330 	}
331 	if (namelen >= IFNAMSIZ)
332 		namelen = IFNAMSIZ;
333 	ND("--- prefix is '%.*s' ---", namelen, name);
334 
335 	/* lookup the name, remember empty slot if there is one */
336 	for (i = 0; i < NM_BRIDGES; i++) {
337 		struct nm_bridge *x = nm_bridges + i;
338 
339 		if (x->bdg_active_ports == 0) {
340 			if (create && b == NULL)
341 				b = x;	/* record empty slot */
342 		} else if (x->bdg_namelen != namelen) {
343 			continue;
344 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
345 			ND("found '%.*s' at %d", namelen, name, i);
346 			b = x;
347 			break;
348 		}
349 	}
350 	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
351 		/* initialize the bridge */
352 		strncpy(b->bdg_basename, name, namelen);
353 		ND("create new bridge %s with ports %d", b->bdg_basename,
354 			b->bdg_active_ports);
355 		b->bdg_namelen = namelen;
356 		b->bdg_active_ports = 0;
357 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
358 			b->bdg_port_index[i] = i;
359 		/* set the default function */
360 		b->nm_bdg_lookup = netmap_bdg_learning;
361 		/* reset the MAC address table */
362 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
363 	}
364 	return b;
365 }
366 
367 
368 /*
369  * Free the forwarding tables for rings attached to switch ports.
370  */
371 static void
372 nm_free_bdgfwd(struct netmap_adapter *na)
373 {
374 	int nrings, i;
375 	struct netmap_kring *kring;
376 
377 	NMG_LOCK_ASSERT();
378 	nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
379 	kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
380 	for (i = 0; i < nrings; i++) {
381 		if (kring[i].nkr_ft) {
382 			kfree(kring[i].nkr_ft, M_DEVBUF);
383 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
384 		}
385 	}
386 }
387 
388 
389 /*
390  * Allocate the forwarding tables for the rings attached to the bridge ports.
391  */
392 static int
393 nm_alloc_bdgfwd(struct netmap_adapter *na)
394 {
395 	int nrings, l, i, num_dstq;
396 	struct netmap_kring *kring;
397 
398 	NMG_LOCK_ASSERT();
399 	/* all port:rings + broadcast */
400 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
401 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
402 	l += sizeof(struct nm_bdg_q) * num_dstq;
403 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
404 
405 	nrings = na->num_tx_rings + 1;
406 	kring = na->tx_rings;
407 	for (i = 0; i < nrings; i++) {
408 		struct nm_bdg_fwd *ft;
409 		struct nm_bdg_q *dstq;
410 		int j;
411 
412 		ft = kmalloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
413 		if (!ft) {
414 			nm_free_bdgfwd(na);
415 			return ENOMEM;
416 		}
417 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
418 		for (j = 0; j < num_dstq; j++) {
419 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
420 			dstq[j].bq_len = 0;
421 		}
422 		kring[i].nkr_ft = ft;
423 	}
424 	return 0;
425 }
426 
427 
428 static void
429 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
430 {
431 	int s_hw = hw, s_sw = sw;
432 	int i, lim =b->bdg_active_ports;
433 	uint8_t tmp[NM_BDG_MAXPORTS];
434 
435 	/*
436 	New algorithm:
437 	make a copy of bdg_port_index;
438 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
439 	in the array of bdg_port_index, replacing them with
440 	entries from the bottom of the array;
441 	decrement bdg_active_ports;
442 	acquire BDG_WLOCK() and copy back the array.
443 	 */
444 
445 	D("detach %d and %d (lim %d)", hw, sw, lim);
446 	/* make a copy of the list of active ports, update it,
447 	 * and then copy back within BDG_WLOCK().
448 	 */
449 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
450 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
451 		if (hw >= 0 && tmp[i] == hw) {
452 			ND("detach hw %d at %d", hw, i);
453 			lim--; /* point to last active port */
454 			tmp[i] = tmp[lim]; /* swap with i */
455 			tmp[lim] = hw;	/* now this is inactive */
456 			hw = -1;
457 		} else if (sw >= 0 && tmp[i] == sw) {
458 			ND("detach sw %d at %d", sw, i);
459 			lim--;
460 			tmp[i] = tmp[lim];
461 			tmp[lim] = sw;
462 			sw = -1;
463 		} else {
464 			i++;
465 		}
466 	}
467 	if (hw >= 0 || sw >= 0) {
468 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
469 	}
470 
471 	BDG_WLOCK(b);
472 	b->bdg_ports[s_hw] = NULL;
473 	if (s_sw >= 0) {
474 		b->bdg_ports[s_sw] = NULL;
475 	}
476 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
477 	b->bdg_active_ports = lim;
478 	BDG_WUNLOCK(b);
479 
480 	ND("now %d active ports", lim);
481 	if (lim == 0) {
482 		ND("marking bridge %s as free", b->bdg_basename);
483 		b->nm_bdg_lookup = NULL;
484 	}
485 }
486 
487 static void
488 netmap_adapter_vp_dtor(struct netmap_adapter *na)
489 {
490 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
491 	struct nm_bridge *b = vpna->na_bdg;
492 	struct ifnet *ifp = na->ifp;
493 
494 	ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
495 
496 	if (b) {
497 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
498 	}
499 
500 	bzero(ifp, sizeof(*ifp));
501 	kfree(ifp, M_DEVBUF);
502 	na->ifp = NULL;
503 }
504 
505 int
506 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
507 {
508 	const char *name = nmr->nr_name;
509 	struct ifnet *ifp;
510 	int error = 0;
511 	struct netmap_adapter *ret;
512 	struct netmap_vp_adapter *vpna;
513 	struct nm_bridge *b;
514 	int i, j, cand = -1, cand2 = -1;
515 	int needed;
516 
517 	*na = NULL;     /* default return value */
518 
519 	/* first try to see if this is a bridge port. */
520 	NMG_LOCK_ASSERT();
521 	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
522 		return 0;  /* no error, but no VALE prefix */
523 	}
524 
525 	b = nm_find_bridge(name, create);
526 	if (b == NULL) {
527 		D("no bridges available for '%s'", name);
528 		return (ENXIO);
529 	}
530 
531 	/* Now we are sure that name starts with the bridge's name,
532 	 * lookup the port in the bridge. We need to scan the entire
533 	 * list. It is not important to hold a WLOCK on the bridge
534 	 * during the search because NMG_LOCK already guarantees
535 	 * that there are no other possible writers.
536 	 */
537 
538 	/* lookup in the local list of ports */
539 	for (j = 0; j < b->bdg_active_ports; j++) {
540 		i = b->bdg_port_index[j];
541 		vpna = b->bdg_ports[i];
542 		// KASSERT(na != NULL);
543 		ifp = vpna->up.ifp;
544 		/* XXX make sure the name only contains one : */
545 		if (!strcmp(NM_IFPNAME(ifp), name)) {
546 			netmap_adapter_get(&vpna->up);
547 			ND("found existing if %s refs %d", name,
548 				vpna->na_bdg_refcount);
549 			*na = (struct netmap_adapter *)vpna;
550 			return 0;
551 		}
552 	}
553 	/* not found, should we create it? */
554 	if (!create)
555 		return ENXIO;
556 	/* yes we should, see if we have space to attach entries */
557 	needed = 2; /* in some cases we only need 1 */
558 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
559 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
560 		return EINVAL;
561 	}
562 	/* record the next two ports available, but do not allocate yet */
563 	cand = b->bdg_port_index[b->bdg_active_ports];
564 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
565 	ND("+++ bridge %s port %s used %d avail %d %d",
566 		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
567 
568 	/*
569 	 * try see if there is a matching NIC with this name
570 	 * (after the bridge's name)
571 	 */
572 	ifp = ifunit(name + b->bdg_namelen + 1);
573 	if (!ifp) { /* this is a virtual port */
574 		/* Create a temporary NA with arguments, then
575 		 * bdg_netmap_attach() will allocate the real one
576 		 * and attach it to the ifp
577 		 */
578 		struct netmap_adapter tmp_na;
579 
580 		if (nmr->nr_cmd) {
581 			/* nr_cmd must be 0 for a virtual port */
582 			return EINVAL;
583 		}
584 		bzero(&tmp_na, sizeof(tmp_na));
585 		/* bound checking */
586 		tmp_na.num_tx_rings = nmr->nr_tx_rings;
587 		nm_bound_var(&tmp_na.num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
588 		nmr->nr_tx_rings = tmp_na.num_tx_rings; // write back
589 		tmp_na.num_rx_rings = nmr->nr_rx_rings;
590 		nm_bound_var(&tmp_na.num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
591 		nmr->nr_rx_rings = tmp_na.num_rx_rings; // write back
592 		nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
593 				1, NM_BDG_MAXSLOTS, NULL);
594 		tmp_na.num_tx_desc = nmr->nr_tx_slots;
595 		nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
596 				1, NM_BDG_MAXSLOTS, NULL);
597 		tmp_na.num_rx_desc = nmr->nr_rx_slots;
598 
599 	 	/* create a struct ifnet for the new port.
600 		 * need M_NOWAIT as we are under nma_lock
601 		 */
602 		ifp = kmalloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
603 		if (!ifp)
604 			return ENOMEM;
605 
606 		strcpy(ifp->if_xname, name);
607 		tmp_na.ifp = ifp;
608 		/* bdg_netmap_attach creates a struct netmap_adapter */
609 		error = bdg_netmap_attach(&tmp_na);
610 		if (error) {
611 			D("error %d", error);
612 			kfree(ifp, M_DEVBUF);
613 			return error;
614 		}
615 		ret = NA(ifp);
616 		cand2 = -1;	/* only need one port */
617 	} else {  /* this is a NIC */
618 		struct ifnet *fake_ifp;
619 
620 		error = netmap_get_hw_na(ifp, &ret);
621 		if (error || ret == NULL)
622 			goto out;
623 
624 		/* make sure the NIC is not already in use */
625 		if (NETMAP_OWNED_BY_ANY(ret)) {
626 			D("NIC %s busy, cannot attach to bridge",
627 				NM_IFPNAME(ifp));
628 			error = EINVAL;
629 			goto out;
630 		}
631 		/* create a fake interface */
632 		fake_ifp = kmalloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
633 		if (!fake_ifp) {
634 			error = ENOMEM;
635 			goto out;
636 		}
637 		strcpy(fake_ifp->if_xname, name);
638 		error = netmap_bwrap_attach(fake_ifp, ifp);
639 		if (error) {
640 			kfree(fake_ifp, M_DEVBUF);
641 			goto out;
642 		}
643 		ret = NA(fake_ifp);
644 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
645 			cand2 = -1; /* only need one port */
646 #if 0
647 		if_rele(ifp);
648 #endif
649 	}
650 	vpna = (struct netmap_vp_adapter *)ret;
651 
652 	BDG_WLOCK(b);
653 	vpna->bdg_port = cand;
654 	ND("NIC  %p to bridge port %d", vpna, cand);
655 	/* bind the port to the bridge (virtual ports are not active) */
656 	b->bdg_ports[cand] = vpna;
657 	vpna->na_bdg = b;
658 	b->bdg_active_ports++;
659 	if (cand2 >= 0) {
660 		struct netmap_vp_adapter *hostna = vpna + 1;
661 		/* also bind the host stack to the bridge */
662 		b->bdg_ports[cand2] = hostna;
663 		hostna->bdg_port = cand2;
664 		hostna->na_bdg = b;
665 		b->bdg_active_ports++;
666 		ND("host %p to bridge port %d", hostna, cand2);
667 	}
668 	ND("if %s refs %d", name, vpna->up.na_refcount);
669 	BDG_WUNLOCK(b);
670 	*na = ret;
671 	netmap_adapter_get(ret);
672 	return 0;
673 
674 out:
675 #if 0
676 	if_rele(ifp);
677 #endif
678 
679 	return error;
680 }
681 
682 
683 /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
684 static int
685 nm_bdg_attach(struct nmreq *nmr)
686 {
687 	struct netmap_adapter *na;
688 	struct netmap_if *nifp;
689 	struct netmap_priv_d *npriv;
690 	struct netmap_bwrap_adapter *bna;
691 	int error;
692 
693 	npriv = kmalloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
694 	if (npriv == NULL)
695 		return ENOMEM;
696 	NMG_LOCK();
697 	/* XXX probably netmap_get_bdg_na() */
698 	error = netmap_get_na(nmr, &na, 1 /* create if not exists */);
699 	if (error) /* no device, or another bridge or user owns the device */
700 		goto unlock_exit;
701 	/* netmap_get_na() sets na_bdg if this is a physical interface
702 	 * that we can attach to a switch.
703 	 */
704 	if (!nma_is_bwrap(na)) {
705 		/* got reference to a virtual port or direct access to a NIC.
706 		 * perhaps specified no bridge prefix or wrong NIC name
707 		 */
708 		error = EINVAL;
709 		goto unref_exit;
710 	}
711 
712 	if (na->active_fds > 0) { /* already registered */
713 		error = EBUSY;
714 		goto unref_exit;
715 	}
716 
717 	nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error);
718 	if (!nifp) {
719 		goto unref_exit;
720 	}
721 
722 	bna = (struct netmap_bwrap_adapter*)na;
723 	bna->na_kpriv = npriv;
724 	NMG_UNLOCK();
725 	ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
726 	return 0;
727 
728 unref_exit:
729 	netmap_adapter_put(na);
730 unlock_exit:
731 	NMG_UNLOCK();
732 	bzero(npriv, sizeof(*npriv));
733 	kfree(npriv, M_DEVBUF);
734 	return error;
735 }
736 
737 static int
738 nm_bdg_detach(struct nmreq *nmr)
739 {
740 	struct netmap_adapter *na;
741 	int error;
742 	struct netmap_bwrap_adapter *bna;
743 	int last_instance;
744 
745 	NMG_LOCK();
746 	error = netmap_get_na(nmr, &na, 0 /* don't create */);
747 	if (error) { /* no device, or another bridge or user owns the device */
748 		goto unlock_exit;
749 	}
750 	if (!nma_is_bwrap(na)) {
751 		/* got reference to a virtual port or direct access to a NIC.
752 		 * perhaps specified no bridge's prefix or wrong NIC's name
753 		 */
754 		error = EINVAL;
755 		goto unref_exit;
756 	}
757 	bna = (struct netmap_bwrap_adapter *)na;
758 
759 	if (na->active_fds == 0) { /* not registered */
760 		error = EINVAL;
761 		goto unref_exit;
762 	}
763 
764 	last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
765 	if (!last_instance) {
766 		D("--- error, trying to detach an entry with active mmaps");
767 		error = EINVAL;
768 	} else {
769 		struct netmap_priv_d *npriv = bna->na_kpriv;
770 
771 		bna->na_kpriv = NULL;
772 		D("deleting priv");
773 
774 		bzero(npriv, sizeof(*npriv));
775 		kfree(npriv, M_DEVBUF);
776 	}
777 
778 unref_exit:
779 	netmap_adapter_put(na);
780 unlock_exit:
781 	NMG_UNLOCK();
782 	return error;
783 
784 }
785 
786 
787 /* exported to kernel callers, e.g. OVS ?
788  * Entry point.
789  * Called without NMG_LOCK.
790  */
791 int
792 netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
793 {
794 	struct nm_bridge *b;
795 	struct netmap_vp_adapter *na;
796 	struct ifnet *iter;
797 	char *name = nmr->nr_name;
798 	int cmd = nmr->nr_cmd, namelen = strlen(name);
799 	int error = 0, i, j;
800 
801 	switch (cmd) {
802 	case NETMAP_BDG_ATTACH:
803 		error = nm_bdg_attach(nmr);
804 		break;
805 
806 	case NETMAP_BDG_DETACH:
807 		error = nm_bdg_detach(nmr);
808 		break;
809 
810 	case NETMAP_BDG_LIST:
811 		/* this is used to enumerate bridges and ports */
812 		if (namelen) { /* look up indexes of bridge and port */
813 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
814 				error = EINVAL;
815 				break;
816 			}
817 			NMG_LOCK();
818 			b = nm_find_bridge(name, 0 /* don't create */);
819 			if (!b) {
820 				error = ENOENT;
821 				NMG_UNLOCK();
822 				break;
823 			}
824 
825 			error = ENOENT;
826 			for (j = 0; j < b->bdg_active_ports; j++) {
827 				i = b->bdg_port_index[j];
828 				na = b->bdg_ports[i];
829 				if (na == NULL) {
830 					D("---AAAAAAAAARGH-------");
831 					continue;
832 				}
833 				iter = na->up.ifp;
834 				/* the former and the latter identify a
835 				 * virtual port and a NIC, respectively
836 				 */
837 				if (!strcmp(iter->if_xname, name)) {
838 					/* bridge index */
839 					nmr->nr_arg1 = b - nm_bridges;
840 					nmr->nr_arg2 = i; /* port index */
841 					error = 0;
842 					break;
843 				}
844 			}
845 			NMG_UNLOCK();
846 		} else {
847 			/* return the first non-empty entry starting from
848 			 * bridge nr_arg1 and port nr_arg2.
849 			 *
850 			 * Users can detect the end of the same bridge by
851 			 * seeing the new and old value of nr_arg1, and can
852 			 * detect the end of all the bridge by error != 0
853 			 */
854 			i = nmr->nr_arg1;
855 			j = nmr->nr_arg2;
856 
857 			NMG_LOCK();
858 			for (error = ENOENT; i < NM_BRIDGES; i++) {
859 				b = nm_bridges + i;
860 				if (j >= b->bdg_active_ports) {
861 					j = 0; /* following bridges scan from 0 */
862 					continue;
863 				}
864 				nmr->nr_arg1 = i;
865 				nmr->nr_arg2 = j;
866 				j = b->bdg_port_index[j];
867 				na = b->bdg_ports[j];
868 				iter = na->up.ifp;
869 				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
870 				error = 0;
871 				break;
872 			}
873 			NMG_UNLOCK();
874 		}
875 		break;
876 
877 	case NETMAP_BDG_LOOKUP_REG:
878 		/* register a lookup function to the given bridge.
879 		 * nmr->nr_name may be just bridge's name (including ':'
880 		 * if it is not just NM_NAME).
881 		 */
882 		if (!func) {
883 			error = EINVAL;
884 			break;
885 		}
886 		NMG_LOCK();
887 		b = nm_find_bridge(name, 0 /* don't create */);
888 		if (!b) {
889 			error = EINVAL;
890 		} else {
891 			b->nm_bdg_lookup = func;
892 		}
893 		NMG_UNLOCK();
894 		break;
895 
896 	default:
897 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
898 		error = EINVAL;
899 		break;
900 	}
901 	return error;
902 }
903 
904 
905 static int
906 netmap_vp_krings_create(struct netmap_adapter *na)
907 {
908 	u_int ntx, nrx, tailroom;
909 	int error, i;
910 	uint32_t *leases;
911 
912 	/* XXX vps do not need host rings,
913 	 * but we crash if we don't have one
914 	 */
915 	ntx = na->num_tx_rings + 1;
916 	nrx = na->num_rx_rings + 1;
917 
918 	/*
919 	 * Leases are attached to RX rings on vale ports
920 	 */
921 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
922 
923 	error = netmap_krings_create(na, ntx, nrx, tailroom);
924 	if (error)
925 		return error;
926 
927 	leases = na->tailroom;
928 
929 	for (i = 0; i < nrx; i++) { /* Receive rings */
930 		na->rx_rings[i].nkr_leases = leases;
931 		leases += na->num_rx_desc;
932 	}
933 
934 	error = nm_alloc_bdgfwd(na);
935 	if (error) {
936 		netmap_krings_delete(na);
937 		return error;
938 	}
939 
940 	return 0;
941 }
942 
943 static void
944 netmap_vp_krings_delete(struct netmap_adapter *na)
945 {
946 	nm_free_bdgfwd(na);
947 	netmap_krings_delete(na);
948 }
949 
950 
951 static int
952 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
953 	struct netmap_vp_adapter *na, u_int ring_nr);
954 
955 
956 /*
957  * Grab packets from a kring, move them into the ft structure
958  * associated to the tx (input) port. Max one instance per port,
959  * filtered on input (ioctl, poll or XXX).
960  * Returns the next position in the ring.
961  */
962 static int
963 nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
964 	struct netmap_kring *kring, u_int end)
965 {
966 	struct netmap_ring *ring = kring->ring;
967 	struct nm_bdg_fwd *ft;
968 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
969 	u_int ft_i = 0;	/* start from 0 */
970 	u_int frags = 1; /* how many frags ? */
971 	struct nm_bridge *b = na->na_bdg;
972 
973 	/* To protect against modifications to the bridge we acquire a
974 	 * shared lock, waiting if we can sleep (if the source port is
975 	 * attached to a user process) or with a trylock otherwise (NICs).
976 	 */
977 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
978 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
979 		BDG_RLOCK(b);
980 	else if (!BDG_RTRYLOCK(b))
981 		return 0;
982 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
983 	ft = kring->nkr_ft;
984 
985 	for (; likely(j != end); j = nm_next(j, lim)) {
986 		struct netmap_slot *slot = &ring->slot[j];
987 		char *buf;
988 
989 		ft[ft_i].ft_len = slot->len;
990 		ft[ft_i].ft_flags = slot->flags;
991 
992 		ND("flags is 0x%x", slot->flags);
993 		/* this slot goes into a list so initialize the link field */
994 		ft[ft_i].ft_next = NM_FT_NULL;
995 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
996 			(void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
997 		prefetch(buf);
998 		++ft_i;
999 		if (slot->flags & NS_MOREFRAG) {
1000 			frags++;
1001 			continue;
1002 		}
1003 		if (unlikely(netmap_verbose && frags > 1))
1004 			RD(5, "%d frags at %d", frags, ft_i - frags);
1005 		ft[ft_i - frags].ft_frags = frags;
1006 		frags = 1;
1007 		if (unlikely((int)ft_i >= bridge_batch))
1008 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1009 	}
1010 	if (frags > 1) {
1011 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1012 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1013 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1014 		ft[ft_i - frags].ft_frags = frags - 1;
1015 	}
1016 	if (ft_i)
1017 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1018 	BDG_RUNLOCK(b);
1019 	return j;
1020 }
1021 
1022 
1023 /*
1024  *---- support for virtual bridge -----
1025  */
1026 
1027 /* ----- FreeBSD if_bridge hash function ------- */
1028 
1029 /*
1030  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1031  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1032  *
1033  * http://www.burtleburtle.net/bob/hash/spooky.html
1034  */
1035 #define mix(a, b, c)                                                    \
1036 do {                                                                    \
1037         a -= b; a -= c; a ^= (c >> 13);                                 \
1038         b -= c; b -= a; b ^= (a << 8);                                  \
1039         c -= a; c -= b; c ^= (b >> 13);                                 \
1040         a -= b; a -= c; a ^= (c >> 12);                                 \
1041         b -= c; b -= a; b ^= (a << 16);                                 \
1042         c -= a; c -= b; c ^= (b >> 5);                                  \
1043         a -= b; a -= c; a ^= (c >> 3);                                  \
1044         b -= c; b -= a; b ^= (a << 10);                                 \
1045         c -= a; c -= b; c ^= (b >> 15);                                 \
1046 } while (/*CONSTCOND*/0)
1047 
1048 static __inline uint32_t
1049 nm_bridge_rthash(const uint8_t *addr)
1050 {
1051         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1052 
1053         b += addr[5] << 8;
1054         b += addr[4];
1055         a += addr[3] << 24;
1056         a += addr[2] << 16;
1057         a += addr[1] << 8;
1058         a += addr[0];
1059 
1060         mix(a, b, c);
1061 #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1062         return (c & BRIDGE_RTHASH_MASK);
1063 }
1064 
1065 #undef mix
1066 
1067 
1068 static int
1069 bdg_netmap_reg(struct netmap_adapter *na, int onoff)
1070 {
1071 	struct netmap_vp_adapter *vpna =
1072 		(struct netmap_vp_adapter*)na;
1073 	struct ifnet *ifp = na->ifp;
1074 
1075 	/* the interface is already attached to the bridge,
1076 	 * so we only need to toggle IFCAP_NETMAP.
1077 	 */
1078 	BDG_WLOCK(vpna->na_bdg);
1079 	if (onoff) {
1080 		ifp->if_capenable |= IFCAP_NETMAP;
1081 	} else {
1082 		ifp->if_capenable &= ~IFCAP_NETMAP;
1083 	}
1084 	BDG_WUNLOCK(vpna->na_bdg);
1085 	return 0;
1086 }
1087 
1088 
1089 /*
1090  * Lookup function for a learning bridge.
1091  * Update the hash table with the source address,
1092  * and then returns the destination port index, and the
1093  * ring in *dst_ring (at the moment, always use ring 0)
1094  */
1095 u_int
1096 netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1097 		struct netmap_vp_adapter *na)
1098 {
1099 	struct nm_hash_ent *ht = na->na_bdg->ht;
1100 	uint32_t sh, dh;
1101 	u_int dst, mysrc = na->bdg_port;
1102 	uint64_t smac, dmac;
1103 
1104 	if (buf_len < 14) {
1105 		D("invalid buf length %d", buf_len);
1106 		return NM_BDG_NOPORT;
1107 	}
1108 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1109 	smac = le64toh(*(uint64_t *)(buf + 4));
1110 	smac >>= 16;
1111 
1112 	/*
1113 	 * The hash is somewhat expensive, there might be some
1114 	 * worthwhile optimizations here.
1115 	 */
1116 	if ((buf[6] & 1) == 0) { /* valid src */
1117 		uint8_t *s = buf+6;
1118 		sh = nm_bridge_rthash(s); // XXX hash of source
1119 		/* update source port forwarding entry */
1120 		ht[sh].mac = smac;	/* XXX expire ? */
1121 		ht[sh].ports = mysrc;
1122 		if (netmap_verbose)
1123 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1124 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1125 	}
1126 	dst = NM_BDG_BROADCAST;
1127 	if ((buf[0] & 1) == 0) { /* unicast */
1128 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1129 		if (ht[dh].mac == dmac) {	/* found dst */
1130 			dst = ht[dh].ports;
1131 		}
1132 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1133 	}
1134 	*dst_ring = 0;
1135 	return dst;
1136 }
1137 
1138 
1139 /*
1140  * This flush routine supports only unicast and broadcast but a large
1141  * number of ports, and lets us replace the learn and dispatch functions.
1142  */
1143 int
1144 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1145 		u_int ring_nr)
1146 {
1147 	struct nm_bdg_q *dst_ents, *brddst;
1148 	uint16_t num_dsts = 0, *dsts;
1149 	struct nm_bridge *b = na->na_bdg;
1150 	u_int i, j, me = na->bdg_port;
1151 
1152 	/*
1153 	 * The work area (pointed by ft) is followed by an array of
1154 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1155 	 * queues per port plus one for the broadcast traffic.
1156 	 * Then we have an array of destination indexes.
1157 	 */
1158 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1159 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1160 
1161 	/* first pass: find a destination for each packet in the batch */
1162 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1163 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1164 		uint16_t dst_port, d_i;
1165 		struct nm_bdg_q *d;
1166 
1167 		ND("slot %d frags %d", i, ft[i].ft_frags);
1168 		dst_port = b->nm_bdg_lookup(ft[i].ft_buf, ft[i].ft_len,
1169 			&dst_ring, na);
1170 		if (netmap_verbose > 255)
1171 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1172 		if (dst_port == NM_BDG_NOPORT)
1173 			continue; /* this packet is identified to be dropped */
1174 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1175 			continue;
1176 		else if (dst_port == NM_BDG_BROADCAST)
1177 			dst_ring = 0; /* broadcasts always go to ring 0 */
1178 		else if (unlikely(dst_port == me ||
1179 		    !b->bdg_ports[dst_port]))
1180 			continue;
1181 
1182 		/* get a position in the scratch pad */
1183 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1184 		d = dst_ents + d_i;
1185 
1186 		/* append the first fragment to the list */
1187 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1188 			d->bq_head = d->bq_tail = i;
1189 			/* remember this position to be scanned later */
1190 			if (dst_port != NM_BDG_BROADCAST)
1191 				dsts[num_dsts++] = d_i;
1192 		} else {
1193 			ft[d->bq_tail].ft_next = i;
1194 			d->bq_tail = i;
1195 		}
1196 		d->bq_len += ft[i].ft_frags;
1197 	}
1198 
1199 	/*
1200 	 * Broadcast traffic goes to ring 0 on all destinations.
1201 	 * So we need to add these rings to the list of ports to scan.
1202 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1203 	 * expensive. We should keep a compact list of active destinations
1204 	 * so we could shorten this loop.
1205 	 */
1206 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1207 	if (brddst->bq_head != NM_FT_NULL) {
1208 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1209 			uint16_t d_i;
1210 			i = b->bdg_port_index[j];
1211 			if (unlikely(i == me))
1212 				continue;
1213 			d_i = i * NM_BDG_MAXRINGS;
1214 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1215 				dsts[num_dsts++] = d_i;
1216 		}
1217 	}
1218 
1219 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1220 	/* second pass: scan destinations (XXX will be modular somehow) */
1221 	for (i = 0; i < num_dsts; i++) {
1222 		struct ifnet *dst_ifp;
1223 		struct netmap_vp_adapter *dst_na;
1224 		struct netmap_kring *kring;
1225 		struct netmap_ring *ring;
1226 		u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next;
1227 		u_int needed, howmany;
1228 		int retry = netmap_txsync_retry;
1229 		struct nm_bdg_q *d;
1230 		uint32_t my_start = 0, lease_idx = 0;
1231 		int nrings;
1232 
1233 		d_i = dsts[i];
1234 		ND("second pass %d port %d", i, d_i);
1235 		d = dst_ents + d_i;
1236 		// XXX fix the division
1237 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1238 		/* protect from the lookup function returning an inactive
1239 		 * destination port
1240 		 */
1241 		if (unlikely(dst_na == NULL))
1242 			goto cleanup;
1243 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1244 			goto cleanup;
1245 		dst_ifp = dst_na->up.ifp;
1246 		/*
1247 		 * The interface may be in !netmap mode in two cases:
1248 		 * - when na is attached but not activated yet;
1249 		 * - when na is being deactivated but is still attached.
1250 		 */
1251 		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1252 			ND("not in netmap mode!");
1253 			goto cleanup;
1254 		}
1255 
1256 		/* there is at least one either unicast or broadcast packet */
1257 		brd_next = brddst->bq_head;
1258 		next = d->bq_head;
1259 		/* we need to reserve this many slots. If fewer are
1260 		 * available, some packets will be dropped.
1261 		 * Packets may have multiple fragments, so we may not use
1262 		 * there is a chance that we may not use all of the slots
1263 		 * we have claimed, so we will need to handle the leftover
1264 		 * ones when we regain the lock.
1265 		 */
1266 		needed = d->bq_len + brddst->bq_len;
1267 
1268 		ND(5, "pass 2 dst %d is %x %s",
1269 			i, d_i, is_vp ? "virtual" : "nic/host");
1270 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1271 		nrings = dst_na->up.num_rx_rings;
1272 		if (dst_nr >= nrings)
1273 			dst_nr = dst_nr % nrings;
1274 		kring = &dst_na->up.rx_rings[dst_nr];
1275 		ring = kring->ring;
1276 		lim = kring->nkr_num_slots - 1;
1277 
1278 retry:
1279 
1280 		/* reserve the buffers in the queue and an entry
1281 		 * to report completion, and drop lock.
1282 		 * XXX this might become a helper function.
1283 		 */
1284 		lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1285 		if (kring->nkr_stopped) {
1286 			lockmgr(&kring->q_lock, LK_RELEASE);
1287 			goto cleanup;
1288 		}
1289 		if (dst_na->retry) {
1290 			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1291 		}
1292 		my_start = j = kring->nkr_hwlease;
1293 		howmany = nm_kr_space(kring, 1);
1294 		if (needed < howmany)
1295 			howmany = needed;
1296 		lease_idx = nm_kr_lease(kring, howmany, 1);
1297 		lockmgr(&kring->q_lock, LK_RELEASE);
1298 
1299 		/* only retry if we need more than available slots */
1300 		if (retry && needed <= howmany)
1301 			retry = 0;
1302 
1303 		/* copy to the destination queue */
1304 		while (howmany > 0) {
1305 			struct netmap_slot *slot;
1306 			struct nm_bdg_fwd *ft_p, *ft_end;
1307 			u_int cnt;
1308 
1309 			/* find the queue from which we pick next packet.
1310 			 * NM_FT_NULL is always higher than valid indexes
1311 			 * so we never dereference it if the other list
1312 			 * has packets (and if both are empty we never
1313 			 * get here).
1314 			 */
1315 			if (next < brd_next) {
1316 				ft_p = ft + next;
1317 				next = ft_p->ft_next;
1318 			} else { /* insert broadcast */
1319 				ft_p = ft + brd_next;
1320 				brd_next = ft_p->ft_next;
1321 			}
1322 			cnt = ft_p->ft_frags; // cnt > 0
1323 			if (unlikely(cnt > howmany))
1324 			    break; /* no more space */
1325 			howmany -= cnt;
1326 			if (netmap_verbose && cnt > 1)
1327 				RD(5, "rx %d frags to %d", cnt, j);
1328 			ft_end = ft_p + cnt;
1329 			do {
1330 			    void *dst, *src = ft_p->ft_buf;
1331 			    size_t len = (ft_p->ft_len + 63) & ~63;
1332 
1333 			    slot = &ring->slot[j];
1334 			    dst = BDG_NMB(&dst_na->up, slot);
1335 			    /* round to a multiple of 64 */
1336 
1337 			    ND("send %d %d bytes at %s:%d",
1338 				i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j);
1339 			    if (ft_p->ft_flags & NS_INDIRECT) {
1340 				if (copyin(src, dst, len)) {
1341 					// invalid user pointer, pretend len is 0
1342 					ft_p->ft_len = 0;
1343 				}
1344 			    } else {
1345 				//memcpy(dst, src, len);
1346 				pkt_copy(src, dst, (int)len);
1347 			    }
1348 			    slot->len = ft_p->ft_len;
1349 			    slot->flags = (cnt << 8)| NS_MOREFRAG;
1350 			    j = nm_next(j, lim);
1351 			    ft_p++;
1352 			    sent++;
1353 			} while (ft_p != ft_end);
1354 			slot->flags = (cnt << 8); /* clear flag on last entry */
1355 			/* are we done ? */
1356 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1357 				break;
1358 		}
1359 		{
1360 		    /* current position */
1361 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1362 		    uint32_t update_pos;
1363 		    int still_locked = 1;
1364 
1365 		    lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1366 		    if (unlikely(howmany > 0)) {
1367 			/* not used all bufs. If i am the last one
1368 			 * i can recover the slots, otherwise must
1369 			 * fill them with 0 to mark empty packets.
1370 			 */
1371 			ND("leftover %d bufs", howmany);
1372 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1373 			    /* yes i am the last one */
1374 			    ND("roll back nkr_hwlease to %d", j);
1375 			    kring->nkr_hwlease = j;
1376 			} else {
1377 			    while (howmany-- > 0) {
1378 				ring->slot[j].len = 0;
1379 				ring->slot[j].flags = 0;
1380 				j = nm_next(j, lim);
1381 			    }
1382 			}
1383 		    }
1384 		    p[lease_idx] = j; /* report I am done */
1385 
1386 		    update_pos = nm_kr_rxpos(kring);
1387 
1388 		    if (my_start == update_pos) {
1389 			/* all slots before my_start have been reported,
1390 			 * so scan subsequent leases to see if other ranges
1391 			 * have been completed, and to a selwakeup or txsync.
1392 		         */
1393 			while (lease_idx != kring->nkr_lease_idx &&
1394 				p[lease_idx] != NR_NOSLOT) {
1395 			    j = p[lease_idx];
1396 			    p[lease_idx] = NR_NOSLOT;
1397 			    lease_idx = nm_next(lease_idx, lim);
1398 			}
1399 			/* j is the new 'write' position. j != my_start
1400 			 * means there are new buffers to report
1401 			 */
1402 			if (likely(j != my_start)) {
1403 				uint32_t old_avail = kring->nr_hwavail;
1404 
1405 				kring->nr_hwavail = (j >= kring->nr_hwcur) ?
1406 					j - kring->nr_hwcur :
1407 					j + lim + 1 - kring->nr_hwcur;
1408 				if (kring->nr_hwavail < old_avail) {
1409 					D("avail shrink %d -> %d",
1410 						old_avail, kring->nr_hwavail);
1411 				}
1412 				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1413 				still_locked = 0;
1414 				lockmgr(&kring->q_lock, LK_RELEASE);
1415 				if (dst_na->retry && retry--)
1416 					goto retry;
1417 			}
1418 		    }
1419 		    if (still_locked)
1420 			lockmgr(&kring->q_lock, LK_RELEASE);
1421 		}
1422 cleanup:
1423 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1424 		d->bq_len = 0;
1425 	}
1426 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1427 	brddst->bq_len = 0;
1428 	return 0;
1429 }
1430 
1431 static int
1432 netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1433 {
1434 	struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
1435 	struct netmap_ring *ring = kring->ring;
1436 	u_int j, k, lim = kring->nkr_num_slots - 1;
1437 
1438 	k = ring->cur;
1439 	if (k > lim)
1440 		return netmap_ring_reinit(kring);
1441 
1442 	if (bridge_batch <= 0) { /* testing only */
1443 		j = k; // used all
1444 		goto done;
1445 	}
1446 	if (bridge_batch > NM_BDG_BATCH)
1447 		bridge_batch = NM_BDG_BATCH;
1448 
1449 	j = nm_bdg_preflush(na, ring_nr, kring, k);
1450 	if (j != k)
1451 		D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
1452 	/* k-j modulo ring size is the number of slots processed */
1453 	if (k < j)
1454 		k += kring->nkr_num_slots;
1455 	kring->nr_hwavail = lim - (k - j);
1456 
1457 done:
1458 	kring->nr_hwcur = j;
1459 	ring->avail = kring->nr_hwavail;
1460 	if (netmap_verbose)
1461 		D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1462 	return 0;
1463 }
1464 
1465 
1466 /*
1467  * main dispatch routine for the bridge.
1468  * We already know that only one thread is running this.
1469  * we must run nm_bdg_preflush without lock.
1470  */
1471 static int
1472 bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1473 {
1474 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1475 	return netmap_vp_txsync(vpna, ring_nr, flags);
1476 }
1477 
1478 
1479 /*
1480  * user process reading from a VALE switch.
1481  * Already protected against concurrent calls from userspace,
1482  * but we must acquire the queue's lock to protect against
1483  * writers on the same queue.
1484  */
1485 static int
1486 bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1487 {
1488 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1489 	struct netmap_ring *ring = kring->ring;
1490 	u_int j, lim = kring->nkr_num_slots - 1;
1491 	u_int k = ring->cur, resvd = ring->reserved;
1492 	int n;
1493 
1494 	lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1495 	if (k > lim) {
1496 		D("ouch dangerous reset!!!");
1497 		n = netmap_ring_reinit(kring);
1498 		goto done;
1499 	}
1500 
1501 	/* skip past packets that userspace has released */
1502 	j = kring->nr_hwcur;    /* netmap ring index */
1503 	if (resvd > 0) {
1504 		if (resvd + ring->avail >= lim + 1) {
1505 			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
1506 			ring->reserved = resvd = 0; // XXX panic...
1507 		}
1508 		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
1509 	}
1510 
1511 	if (j != k) { /* userspace has released some packets. */
1512 		n = k - j;
1513 		if (n < 0)
1514 			n += kring->nkr_num_slots;
1515 		ND("userspace releases %d packets", n);
1516 		for (n = 0; likely(j != k); n++) {
1517 			struct netmap_slot *slot = &ring->slot[j];
1518 			void *addr = BDG_NMB(na, slot);
1519 
1520 			if (addr == netmap_buffer_base) { /* bad buf */
1521 				D("bad buffer index %d, ignore ?",
1522 					slot->buf_idx);
1523 			}
1524 			slot->flags &= ~NS_BUF_CHANGED;
1525 			j = nm_next(j, lim);
1526 		}
1527 		kring->nr_hwavail -= n;
1528 		kring->nr_hwcur = k;
1529 	}
1530 	/* tell userspace that there are new packets */
1531 	ring->avail = kring->nr_hwavail - resvd;
1532 	n = 0;
1533 done:
1534 	lockmgr(&kring->q_lock, LK_RELEASE);
1535 	return n;
1536 }
1537 
1538 static int
1539 bdg_netmap_attach(struct netmap_adapter *arg)
1540 {
1541 	struct netmap_vp_adapter *vpna;
1542 	struct netmap_adapter *na;
1543 	int error;
1544 
1545 	vpna = kmalloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1546 	if (vpna == NULL)
1547 		return ENOMEM;
1548  	na = &vpna->up;
1549 	*na = *arg;
1550 	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1551 	na->nm_txsync = bdg_netmap_txsync;
1552 	na->nm_rxsync = bdg_netmap_rxsync;
1553 	na->nm_register = bdg_netmap_reg;
1554 	na->nm_dtor = netmap_adapter_vp_dtor;
1555 	na->nm_krings_create = netmap_vp_krings_create;
1556 	na->nm_krings_delete = netmap_vp_krings_delete;
1557 	na->nm_mem = netmap_mem_private_new(NM_IFPNAME(arg->ifp),
1558 			na->num_tx_rings, na->num_tx_desc,
1559 			na->num_rx_rings, na->num_rx_desc);
1560 	/* other nmd fields are set in the common routine */
1561 	error = netmap_attach_common(na);
1562 	if (error) {
1563 		kfree(vpna, M_DEVBUF);
1564 		return error;
1565 	}
1566 	return 0;
1567 }
1568 
1569 static void
1570 netmap_bwrap_dtor(struct netmap_adapter *na)
1571 {
1572 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1573 	struct netmap_adapter *hwna = bna->hwna;
1574 	struct nm_bridge *b = bna->up.na_bdg,
1575 		*bh = bna->host.na_bdg;
1576 	struct ifnet *ifp = na->ifp;
1577 
1578 	ND("na %p", na);
1579 
1580 	if (b) {
1581 		netmap_bdg_detach_common(b, bna->up.bdg_port,
1582 			(bh ? bna->host.bdg_port : -1));
1583 	}
1584 
1585 	hwna->na_private = NULL;
1586 	netmap_adapter_put(hwna);
1587 
1588 	bzero(ifp, sizeof(*ifp));
1589 	kfree(ifp, M_DEVBUF);
1590 	na->ifp = NULL;
1591 
1592 }
1593 
1594 /*
1595  * Pass packets from nic to the bridge.
1596  * XXX TODO check locking: this is called from the interrupt
1597  * handler so we should make sure that the interface is not
1598  * disconnected while passing down an interrupt.
1599  *
1600  * Note, no user process can access this NIC so we can ignore
1601  * the info in the 'ring'.
1602  */
1603 /* callback that overwrites the hwna notify callback.
1604  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1605  * The bridge wrapper then sends the packets through the bridge.
1606  */
1607 static int
1608 netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1609 {
1610 	struct ifnet *ifp = na->ifp;
1611 	struct netmap_bwrap_adapter *bna = na->na_private;
1612 	struct netmap_vp_adapter *hostna = &bna->host;
1613 	struct netmap_kring *kring, *bkring;
1614 	struct netmap_ring *ring;
1615 	int is_host_ring = ring_nr == na->num_rx_rings;
1616 	struct netmap_vp_adapter *vpna = &bna->up;
1617 	int error = 0;
1618 
1619 	ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags);
1620 
1621 	if (flags & NAF_DISABLE_NOTIFY) {
1622 		kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1623 		bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
1624 		if (kring->nkr_stopped)
1625 			netmap_disable_ring(bkring);
1626 		else
1627 			bkring->nkr_stopped = 0;
1628 		return 0;
1629 	}
1630 
1631 	if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1632 		return 0;
1633 
1634 	if (tx == NR_TX)
1635 		return 0;
1636 
1637 	kring = &na->rx_rings[ring_nr];
1638 	ring = kring->ring;
1639 
1640 	/* make sure the ring is not disabled */
1641 	if (nm_kr_tryget(kring))
1642 		return 0;
1643 
1644 	if (is_host_ring && hostna->na_bdg == NULL) {
1645 		error = bna->save_notify(na, ring_nr, tx, flags);
1646 		goto put_out;
1647 	}
1648 
1649 	if (is_host_ring) {
1650 		vpna = hostna;
1651 		ring_nr = 0;
1652 	} else {
1653 		/* fetch packets that have arrived.
1654 		 * XXX maybe do this in a loop ?
1655 		 */
1656 		error = na->nm_rxsync(na, ring_nr, 0);
1657 		if (error)
1658 			goto put_out;
1659 	}
1660 	if (kring->nr_hwavail == 0 && netmap_verbose) {
1661 		D("how strange, interrupt with no packets on %s",
1662 			NM_IFPNAME(ifp));
1663 		goto put_out;
1664 	}
1665 	/* XXX avail ? */
1666 	ring->cur = nm_kr_rxpos(kring);
1667 	netmap_vp_txsync(vpna, ring_nr, flags);
1668 
1669 	if (!is_host_ring)
1670 		error = na->nm_rxsync(na, ring_nr, 0);
1671 
1672 put_out:
1673 	nm_kr_put(kring);
1674 	return error;
1675 }
1676 
1677 static int
1678 netmap_bwrap_register(struct netmap_adapter *na, int onoff)
1679 {
1680 	struct netmap_bwrap_adapter *bna =
1681 		(struct netmap_bwrap_adapter *)na;
1682 	struct netmap_adapter *hwna = bna->hwna;
1683 	struct netmap_vp_adapter *hostna = &bna->host;
1684 	int error;
1685 
1686 	ND("%s %d", NM_IFPNAME(ifp), onoff);
1687 
1688 	if (onoff) {
1689 		int i;
1690 
1691 		hwna->na_lut = na->na_lut;
1692 		hwna->na_lut_objtotal = na->na_lut_objtotal;
1693 
1694 		if (hostna->na_bdg) {
1695 			hostna->up.na_lut = na->na_lut;
1696 			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1697 		}
1698 
1699 		/* cross-link the netmap rings */
1700 		for (i = 0; i <= na->num_tx_rings; i++) {
1701 			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1702 			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1703 		}
1704 		for (i = 0; i <= na->num_rx_rings; i++) {
1705 			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1706 			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1707 		}
1708 	}
1709 
1710 	if (hwna->ifp) {
1711 		error = hwna->nm_register(hwna, onoff);
1712 		if (error)
1713 			return error;
1714 	}
1715 
1716 	bdg_netmap_reg(na, onoff);
1717 
1718 	if (onoff) {
1719 		bna->save_notify = hwna->nm_notify;
1720 		hwna->nm_notify = netmap_bwrap_intr_notify;
1721 	} else {
1722 		hwna->nm_notify = bna->save_notify;
1723 		hwna->na_lut = NULL;
1724 		hwna->na_lut_objtotal = 0;
1725 	}
1726 
1727 	return 0;
1728 }
1729 
1730 static int
1731 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1732 				    u_int *rxr, u_int *rxd)
1733 {
1734 	struct netmap_bwrap_adapter *bna =
1735 		(struct netmap_bwrap_adapter *)na;
1736 	struct netmap_adapter *hwna = bna->hwna;
1737 
1738 	/* forward the request */
1739 	netmap_update_config(hwna);
1740 	/* swap the results */
1741 	*txr = hwna->num_rx_rings;
1742 	*txd = hwna->num_rx_desc;
1743 	*rxr = hwna->num_tx_rings;
1744 	*rxd = hwna->num_rx_desc;
1745 
1746 	return 0;
1747 }
1748 
1749 static int
1750 netmap_bwrap_krings_create(struct netmap_adapter *na)
1751 {
1752 	struct netmap_bwrap_adapter *bna =
1753 		(struct netmap_bwrap_adapter *)na;
1754 	struct netmap_adapter *hwna = bna->hwna;
1755 	struct netmap_adapter *hostna = &bna->host.up;
1756 	int error;
1757 
1758 	ND("%s", NM_IFPNAME(na->ifp));
1759 
1760 	error = netmap_vp_krings_create(na);
1761 	if (error)
1762 		return error;
1763 
1764 	error = hwna->nm_krings_create(hwna);
1765 	if (error) {
1766 		netmap_vp_krings_delete(na);
1767 		return error;
1768 	}
1769 
1770 	hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1771 	hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1772 
1773 	return 0;
1774 }
1775 
1776 static void
1777 netmap_bwrap_krings_delete(struct netmap_adapter *na)
1778 {
1779 	struct netmap_bwrap_adapter *bna =
1780 		(struct netmap_bwrap_adapter *)na;
1781 	struct netmap_adapter *hwna = bna->hwna;
1782 
1783 	ND("%s", NM_IFPNAME(na->ifp));
1784 
1785 	hwna->nm_krings_delete(hwna);
1786 	netmap_vp_krings_delete(na);
1787 }
1788 
1789 /* notify method for the bridge-->hwna direction */
1790 static int
1791 netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1792 {
1793 	struct netmap_bwrap_adapter *bna =
1794 		(struct netmap_bwrap_adapter *)na;
1795 	struct netmap_adapter *hwna = bna->hwna;
1796 	struct netmap_kring *kring, *hw_kring;
1797 	struct netmap_ring *ring;
1798 	u_int lim, k;
1799 	int error = 0;
1800 
1801 	if (tx == NR_TX)
1802 	        return ENXIO;
1803 
1804 	kring = &na->rx_rings[ring_n];
1805 	hw_kring = &hwna->tx_rings[ring_n];
1806 	ring = kring->ring;
1807 
1808 	lim = kring->nkr_num_slots - 1;
1809 	k = nm_kr_rxpos(kring);
1810 
1811 	if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1812 		return 0;
1813 	ring->cur = k;
1814 	ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1815 		NM_IFPNAME(na->ifp), ring_n,
1816 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1817 		ring->cur, ring->avail, ring->reserved,
1818 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1819 	if (ring_n == na->num_rx_rings) {
1820 		netmap_txsync_to_host(hwna);
1821 	} else {
1822 		error = hwna->nm_txsync(hwna, ring_n, flags);
1823 	}
1824 	kring->nr_hwcur = ring->cur;
1825 	kring->nr_hwavail = 0;
1826 	kring->nr_hwreserved = lim - ring->avail;
1827 	ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1828 		NM_IFPNAME(na->ifp), ring_n,
1829 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1830 		ring->cur, ring->avail, ring->reserved,
1831 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1832 
1833 	return error;
1834 }
1835 
1836 static int
1837 netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1838 {
1839 	struct netmap_bwrap_adapter *bna = na->na_private;
1840 	struct netmap_adapter *port_na = &bna->up.up;
1841 	if (tx == NR_TX || ring_n != 0)
1842 		return ENXIO;
1843 	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
1844 }
1845 
1846 /* attach a bridge wrapper to the 'real' device */
1847 static int
1848 netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
1849 {
1850 	struct netmap_bwrap_adapter *bna;
1851 	struct netmap_adapter *na;
1852 	struct netmap_adapter *hwna = NA(real);
1853 	struct netmap_adapter *hostna;
1854 	int error;
1855 
1856 
1857 	bna = kmalloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
1858 	if (bna == NULL)
1859 		return ENOMEM;
1860 
1861 	na = &bna->up.up;
1862 	na->ifp = fake;
1863 	/* fill the ring data for the bwrap adapter with rx/tx meanings
1864 	 * swapped. The real cross-linking will be done during register,
1865 	 * when all the krings will have been created.
1866 	 */
1867 	na->num_rx_rings = hwna->num_tx_rings;
1868 	na->num_tx_rings = hwna->num_rx_rings;
1869 	na->num_tx_desc = hwna->num_rx_desc;
1870 	na->num_rx_desc = hwna->num_tx_desc;
1871 	na->nm_dtor = netmap_bwrap_dtor;
1872 	na->nm_register = netmap_bwrap_register;
1873 	// na->nm_txsync = netmap_bwrap_txsync;
1874 	// na->nm_rxsync = netmap_bwrap_rxsync;
1875 	na->nm_config = netmap_bwrap_config;
1876 	na->nm_krings_create = netmap_bwrap_krings_create;
1877 	na->nm_krings_delete = netmap_bwrap_krings_delete;
1878 	na->nm_notify = netmap_bwrap_notify;
1879 	na->nm_mem = hwna->nm_mem;
1880 	na->na_private = na; /* prevent NIOCREGIF */
1881 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
1882 
1883 	bna->hwna = hwna;
1884 	netmap_adapter_get(hwna);
1885 	hwna->na_private = bna; /* weak reference */
1886 
1887 	hostna = &bna->host.up;
1888 	hostna->ifp = hwna->ifp;
1889 	hostna->num_tx_rings = 1;
1890 	hostna->num_tx_desc = hwna->num_rx_desc;
1891 	hostna->num_rx_rings = 1;
1892 	hostna->num_rx_desc = hwna->num_tx_desc;
1893 	// hostna->nm_txsync = netmap_bwrap_host_txsync;
1894 	// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1895 	hostna->nm_notify = netmap_bwrap_host_notify;
1896 	hostna->nm_mem = na->nm_mem;
1897 	hostna->na_private = bna;
1898 
1899 	D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname,
1900 		na->num_tx_rings, na->num_tx_desc,
1901 		na->num_rx_rings, na->num_rx_desc);
1902 
1903 	error = netmap_attach_common(na);
1904 	if (error) {
1905 		netmap_adapter_put(hwna);
1906 		kfree(bna, M_DEVBUF);
1907 		return error;
1908 	}
1909 	return 0;
1910 }
1911 
1912 void
1913 netmap_init_bridges(void)
1914 {
1915 	int i;
1916 	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
1917 	for (i = 0; i < NM_BRIDGES; i++)
1918 		BDG_RWINIT(&nm_bridges[i]);
1919 }
1920 #endif /* WITH_VALE */
1921