xref: /freebsd/sys/dev/netmap/netmap_monitor.c (revision d6b92ffa)
1 /*
2  * Copyright (C) 2014-2016 Giuseppe Lettieri
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *   1. Redistributions of source code must retain the above copyright
9  *      notice, this list of conditions and the following disclaimer.
10  *   2. Redistributions in binary form must reproduce the above copyright
11  *      notice, this list of conditions and the following disclaimer in the
12  *      documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * $FreeBSD$
29  *
30  * Monitors
31  *
32  * netmap monitors can be used to do monitoring of network traffic
33  * on another adapter, when the latter adapter is working in netmap mode.
34  *
35  * Monitors offer to userspace the same interface as any other netmap port,
36  * with as many pairs of netmap rings as the monitored adapter.
37  * However, only the rx rings are actually used. Each monitor rx ring receives
38  * the traffic transiting on both the tx and rx corresponding rings in the
39  * monitored adapter. During registration, the user can choose if she wants
40  * to intercept tx only, rx only, or both tx and rx traffic.
41  *
42  * If the monitor is not able to cope with the stream of frames, excess traffic
43  * will be dropped.
44  *
45  * If the monitored adapter leaves netmap mode, the monitor has to be restarted.
46  *
47  * Monitors can be either zero-copy or copy-based.
48  *
49  * Copy monitors see the frames before they are consumed:
50  *
51  *  - For tx traffic, this is when the application sends them, before they are
52  *    passed down to the adapter.
53  *
54  *  - For rx traffic, this is when they are received by the adapter, before
55  *    they are sent up to the application, if any (note that, if no
56  *    application is reading from a monitored ring, the ring will eventually
57  *    fill up and traffic will stop).
58  *
59  * Zero-copy monitors only see the frames after they have been consumed:
60  *
61  *  - For tx traffic, this is after the slots containing the frames have been
62  *    marked as free. Note that this may happen at a considerably delay after
63  *    frame transmission, since freeing of slots is often done lazily.
64  *
65  *  - For rx traffic, this is after the consumer on the monitored adapter
66  *    has released them. In most cases, the consumer is a userspace
67  *    application which may have modified the frame contents.
68  *
69  * Several copy monitors may be active on any ring.  Zero-copy monitors,
70  * instead, need exclusive access to each of the monitored rings.  This may
71  * change in the future, if we implement zero-copy monitor chaining.
72  *
73  */
74 
75 
76 #if defined(__FreeBSD__)
77 #include <sys/cdefs.h> /* prerequisite */
78 
79 #include <sys/types.h>
80 #include <sys/errno.h>
81 #include <sys/param.h>	/* defines used in kernel.h */
82 #include <sys/kernel.h>	/* types used in module initialization */
83 #include <sys/malloc.h>
84 #include <sys/poll.h>
85 #include <sys/lock.h>
86 #include <sys/rwlock.h>
87 #include <sys/selinfo.h>
88 #include <sys/sysctl.h>
89 #include <sys/socket.h> /* sockaddrs */
90 #include <net/if.h>
91 #include <net/if_var.h>
92 #include <machine/bus.h>	/* bus_dmamap_* */
93 #include <sys/refcount.h>
94 
95 
96 #elif defined(linux)
97 
98 #include "bsd_glue.h"
99 
100 #elif defined(__APPLE__)
101 
102 #warning OSX support is only partial
103 #include "osx_glue.h"
104 
105 #elif defined(_WIN32)
106 #include "win_glue.h"
107 #else
108 
109 #error	Unsupported platform
110 
111 #endif /* unsupported */
112 
113 /*
114  * common headers
115  */
116 
117 #include <net/netmap.h>
118 #include <dev/netmap/netmap_kern.h>
119 #include <dev/netmap/netmap_mem2.h>
120 
121 #ifdef WITH_MONITOR
122 
123 #define NM_MONITOR_MAXSLOTS 4096
124 
125 /*
126  ********************************************************************
127  * functions common to both kind of monitors
128  ********************************************************************
129  */
130 
131 static int netmap_zmon_reg(struct netmap_adapter *, int);
132 static int
133 nm_is_zmon(struct netmap_adapter *na)
134 {
135 	return na->nm_register == netmap_zmon_reg;
136 }
137 
138 /* nm_sync callback for the monitor's own tx rings.
139  * This makes no sense and always returns error
140  */
141 static int
142 netmap_monitor_txsync(struct netmap_kring *kring, int flags)
143 {
144         RD(1, "%s %x", kring->name, flags);
145 	return EIO;
146 }
147 
148 /* nm_sync callback for the monitor's own rx rings.
149  * Note that the lock in netmap_zmon_parent_sync only protects
150  * writers among themselves. Synchronization between writers
151  * (i.e., netmap_zmon_parent_txsync and netmap_zmon_parent_rxsync)
152  * and readers (i.e., netmap_zmon_rxsync) relies on memory barriers.
153  */
154 static int
155 netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
156 {
157         ND("%s %x", kring->name, flags);
158 	kring->nr_hwcur = kring->rhead;
159 	mb();
160         return 0;
161 }
162 
163 /* nm_krings_create callbacks for monitors.
164  */
165 static int
166 netmap_monitor_krings_create(struct netmap_adapter *na)
167 {
168 	int error = netmap_krings_create(na, 0);
169 	if (error)
170 		return error;
171 	/* override the host rings callbacks */
172 	na->tx_rings[na->num_tx_rings].nm_sync = netmap_monitor_txsync;
173 	na->rx_rings[na->num_rx_rings].nm_sync = netmap_monitor_rxsync;
174 	return 0;
175 }
176 
177 /* nm_krings_delete callback for monitors */
178 static void
179 netmap_monitor_krings_delete(struct netmap_adapter *na)
180 {
181 	netmap_krings_delete(na);
182 }
183 
184 
185 static u_int
186 nm_txrx2flag(enum txrx t)
187 {
188 	return (t == NR_RX ? NR_MONITOR_RX : NR_MONITOR_TX);
189 }
190 
191 /* allocate the monitors array in the monitored kring */
192 static int
193 nm_monitor_alloc(struct netmap_kring *kring, u_int n)
194 {
195 	size_t old_len, len;
196 	struct netmap_kring **nm;
197 
198 	if (n <= kring->max_monitors)
199 		/* we already have more entries that requested */
200 		return 0;
201 
202 	old_len = sizeof(struct netmap_kring *)*kring->max_monitors;
203         len = sizeof(struct netmap_kring *) * n;
204 	nm = nm_os_realloc(kring->monitors, len, old_len);
205 	if (nm == NULL)
206 		return ENOMEM;
207 
208 	kring->monitors = nm;
209 	kring->max_monitors = n;
210 
211 	return 0;
212 }
213 
214 /* deallocate the parent array in the parent adapter */
215 static void
216 nm_monitor_dealloc(struct netmap_kring *kring)
217 {
218 	if (kring->monitors) {
219 		if (kring->n_monitors > 0) {
220 			D("freeing not empty monitor array for %s (%d dangling monitors)!", kring->name,
221 					kring->n_monitors);
222 		}
223 		nm_os_free(kring->monitors);
224 		kring->monitors = NULL;
225 		kring->max_monitors = 0;
226 		kring->n_monitors = 0;
227 	}
228 }
229 
230 /* returns 1 iff kring has no monitors */
231 static inline int
232 nm_monitor_none(struct netmap_kring *kring)
233 {
234 	return kring->n_monitors == 0 &&
235 		kring->zmon_list[NR_TX].next == NULL &&
236 		kring->zmon_list[NR_RX].next == NULL;
237 }
238 
239 /*
240  * monitors work by replacing the nm_sync() and possibly the
241  * nm_notify() callbacks in the monitored rings.
242  */
243 static int netmap_zmon_parent_txsync(struct netmap_kring *, int);
244 static int netmap_zmon_parent_rxsync(struct netmap_kring *, int);
245 static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
246 static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
247 static int netmap_monitor_parent_notify(struct netmap_kring *, int);
248 
249 /* add the monitor mkring to the list of monitors of kring.
250  * If this is the first monitor, intercept the callbacks
251  */
252 static int
253 netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zmon)
254 {
255 	int error = NM_IRQ_COMPLETED;
256 	enum txrx t = kring->tx;
257 	struct netmap_zmon_list *z = &kring->zmon_list[t];
258 	struct netmap_zmon_list *mz = &mkring->zmon_list[t];
259 
260 	/* a zero-copy monitor which is not the first in the list
261 	 * must monitor the previous monitor
262 	 */
263 	if (zmon && z->prev != NULL)
264 		kring = z->prev;
265 
266 	/* sinchronize with concurrently running nm_sync()s */
267 	nm_kr_stop(kring, NM_KR_LOCKED);
268 
269 	if (nm_monitor_none(kring)) {
270 		/* this is the first monitor, intercept callbacks */
271 		ND("intercept callbacks on %s", kring->name);
272 		kring->mon_sync = kring->nm_sync;
273 		kring->mon_notify = kring->nm_notify;
274 		if (kring->tx == NR_TX) {
275 			kring->nm_sync = netmap_monitor_parent_txsync;
276 		} else {
277 			kring->nm_sync = netmap_monitor_parent_rxsync;
278 			kring->nm_notify = netmap_monitor_parent_notify;
279 			kring->mon_tail = kring->nr_hwtail;
280 		}
281 	}
282 
283 	if (zmon) {
284 		/* append the zmon to the list */
285 		struct netmap_monitor_adapter *mna =
286 			(struct netmap_monitor_adapter *)mkring->na;
287 		struct netmap_adapter *pna;
288 
289 		if (z->prev != NULL)
290 			z->prev->zmon_list[t].next = mkring;
291 		mz->prev = z->prev;
292 		z->prev = mkring;
293 		if (z->next == NULL)
294 			z->next = mkring;
295 
296 		/* grap a reference to the previous netmap adapter
297 		 * in the chain (this may be the monitored port
298 		 * or another zero-copy monitor)
299 		 */
300 		pna = kring->na;
301 		netmap_adapter_get(pna);
302 		netmap_adapter_put(mna->priv.np_na);
303 		mna->priv.np_na = pna;
304 	} else {
305 		/* make sure the monitor array exists and is big enough */
306 		error = nm_monitor_alloc(kring, kring->n_monitors + 1);
307 		if (error)
308 			goto out;
309 		kring->monitors[kring->n_monitors] = mkring;
310 		mkring->mon_pos[kring->tx] = kring->n_monitors;
311 		kring->n_monitors++;
312 	}
313 
314 out:
315 	nm_kr_start(kring);
316 	return error;
317 }
318 
319 /* remove the monitor mkring from the list of monitors of kring.
320  * If this is the last monitor, restore the original callbacks
321  */
322 static void
323 netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
324 {
325 	struct netmap_zmon_list *mz = &mkring->zmon_list[kring->tx];
326 	int zmon = nm_is_zmon(mkring->na);
327 
328 
329 	if (zmon && mz->prev != NULL)
330 		kring = mz->prev;
331 
332 	/* sinchronize with concurrently running nm_sync()s */
333 	nm_kr_stop(kring, NM_KR_LOCKED);
334 
335 	if (zmon) {
336 		/* remove the monitor from the list */
337 		if (mz->prev != NULL)
338 			mz->prev->zmon_list[kring->tx].next = mz->next;
339 		else
340 			kring->zmon_list[kring->tx].next = mz->next;
341 		if (mz->next != NULL) {
342 			mz->next->zmon_list[kring->tx].prev = mz->prev;
343 		} else {
344 			kring->zmon_list[kring->tx].prev = mz->prev;
345 		}
346 	} else {
347 		/* this is a copy monitor */
348 		uint32_t mon_pos = mkring->mon_pos[kring->tx];
349 		kring->n_monitors--;
350 		if (mon_pos != kring->n_monitors) {
351 			kring->monitors[mon_pos] =
352 				kring->monitors[kring->n_monitors];
353 			kring->monitors[mon_pos]->mon_pos[kring->tx] = mon_pos;
354 		}
355 		kring->monitors[kring->n_monitors] = NULL;
356 		if (kring->n_monitors == 0) {
357 			nm_monitor_dealloc(kring);
358 		}
359 	}
360 
361 	if (nm_monitor_none(kring)) {
362 		/* this was the last monitor, restore the callbacks */
363 		ND("%s: restoring sync on %s: %p", mkring->name, kring->name,
364 				kring->mon_sync);
365 		kring->nm_sync = kring->mon_sync;
366 		kring->mon_sync = NULL;
367 		if (kring->tx == NR_RX) {
368 			ND("%s: restoring notify on %s: %p",
369 					mkring->name, kring->name, kring->mon_notify);
370 			kring->nm_notify = kring->mon_notify;
371 			kring->mon_notify = NULL;
372 		}
373 	}
374 
375 	nm_kr_start(kring);
376 }
377 
378 
379 /* This is called when the monitored adapter leaves netmap mode
380  * (see netmap_do_unregif).
381  * We need to notify the monitors that the monitored rings are gone.
382  * We do this by setting their mna->priv.np_na to NULL.
383  * Note that the rings are already stopped when this happens, so
384  * no monitor ring callback can be active.
385  */
386 void
387 netmap_monitor_stop(struct netmap_adapter *na)
388 {
389 	enum txrx t;
390 
391 	for_rx_tx(t) {
392 		u_int i;
393 
394 		for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
395 			struct netmap_kring *kring = &NMR(na, t)[i];
396 			struct netmap_kring *zkring;
397 			u_int j;
398 
399 			for (j = 0; j < kring->n_monitors; j++) {
400 				struct netmap_kring *mkring =
401 					kring->monitors[j];
402 				struct netmap_monitor_adapter *mna =
403 					(struct netmap_monitor_adapter *)mkring->na;
404 				/* forget about this adapter */
405 				if (mna->priv.np_na != NULL) {
406 					netmap_adapter_put(mna->priv.np_na);
407 					mna->priv.np_na = NULL;
408 				}
409 			}
410 
411 			zkring = kring->zmon_list[kring->tx].next;
412 			if (zkring != NULL) {
413 				struct netmap_monitor_adapter *next =
414 					(struct netmap_monitor_adapter *)zkring->na;
415 				struct netmap_monitor_adapter *this =
416 						(struct netmap_monitor_adapter *)na;
417 				struct netmap_adapter *pna = this->priv.np_na;
418 				/* let the next monitor forget about us */
419 				if (next->priv.np_na != NULL) {
420 					netmap_adapter_put(next->priv.np_na);
421 				}
422 				if (pna != NULL && nm_is_zmon(na)) {
423 					/* we are a monitor ourselves and we may
424 					 * need to pass down the reference to
425 					 * the previous adapter in the chain
426 					 */
427 					netmap_adapter_get(pna);
428 					next->priv.np_na = pna;
429 					continue;
430 				}
431 				next->priv.np_na = NULL;
432 			}
433 		}
434 	}
435 }
436 
437 
438 /* common functions for the nm_register() callbacks of both kind of
439  * monitors.
440  */
441 static int
442 netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
443 {
444 	struct netmap_monitor_adapter *mna =
445 		(struct netmap_monitor_adapter *)na;
446 	struct netmap_priv_d *priv = &mna->priv;
447 	struct netmap_adapter *pna = priv->np_na;
448 	struct netmap_kring *kring, *mkring;
449 	int i;
450 	enum txrx t, s;
451 
452 	ND("%p: onoff %d", na, onoff);
453 	if (onoff) {
454 		if (pna == NULL) {
455 			/* parent left netmap mode, fatal */
456 			D("%s: internal error", na->name);
457 			return ENXIO;
458 		}
459 		for_rx_tx(t) {
460 			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
461 				mkring = &NMR(na, t)[i];
462 				if (!nm_kring_pending_on(mkring))
463 					continue;
464 				mkring->nr_mode = NKR_NETMAP_ON;
465 				if (t == NR_TX)
466 					continue;
467 				for_rx_tx(s) {
468 					if (i > nma_get_nrings(pna, s))
469 						continue;
470 					if (mna->flags & nm_txrx2flag(s)) {
471 						kring = &NMR(pna, s)[i];
472 						netmap_monitor_add(mkring, kring, zmon);
473 					}
474 				}
475 			}
476 		}
477 		na->na_flags |= NAF_NETMAP_ON;
478 	} else {
479 		if (na->active_fds == 0)
480 			na->na_flags &= ~NAF_NETMAP_ON;
481 		for_rx_tx(t) {
482 			for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
483 				mkring = &NMR(na, t)[i];
484 				if (!nm_kring_pending_off(mkring))
485 					continue;
486 				mkring->nr_mode = NKR_NETMAP_OFF;
487 				if (t == NR_TX)
488 					continue;
489 				/* we cannot access the parent krings if the parent
490 				 * has left netmap mode. This is signaled by a NULL
491 				 * pna pointer
492 				 */
493 				if (pna == NULL)
494 					continue;
495 				for_rx_tx(s) {
496 					if (i > nma_get_nrings(pna, s))
497 						continue;
498 					if (mna->flags & nm_txrx2flag(s)) {
499 						kring = &NMR(pna, s)[i];
500 						netmap_monitor_del(mkring, kring);
501 					}
502 				}
503 			}
504 		}
505 	}
506 	return 0;
507 }
508 
509 /*
510  ****************************************************************
511  * functions specific for zero-copy monitors
512  ****************************************************************
513  */
514 
515 /*
516  * Common function for both zero-copy tx and rx nm_sync()
517  * callbacks
518  */
519 static int
520 netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
521 {
522 	struct netmap_kring *mkring = kring->zmon_list[tx].next;
523 	struct netmap_ring *ring = kring->ring, *mring;
524 	int error = 0;
525 	int rel_slots, free_slots, busy, sent = 0;
526 	u_int beg, end, i;
527 	u_int lim = kring->nkr_num_slots - 1,
528 	      mlim; // = mkring->nkr_num_slots - 1;
529 
530 	if (mkring == NULL) {
531 		RD(5, "NULL monitor on %s", kring->name);
532 		return 0;
533 	}
534 	mring = mkring->ring;
535 	mlim = mkring->nkr_num_slots - 1;
536 
537 	/* get the relased slots (rel_slots) */
538 	if (tx == NR_TX) {
539 		beg = kring->nr_hwtail + 1;
540 		error = kring->mon_sync(kring, flags);
541 		if (error)
542 			return error;
543 		end = kring->nr_hwtail + 1;
544 	} else { /* NR_RX */
545 		beg = kring->nr_hwcur;
546 		end = kring->rhead;
547 	}
548 
549 	rel_slots = end - beg;
550 	if (rel_slots < 0)
551 		rel_slots += kring->nkr_num_slots;
552 
553 	if (!rel_slots) {
554 		/* no released slots, but we still need
555 		 * to call rxsync if this is a rx ring
556 		 */
557 		goto out_rxsync;
558 	}
559 
560 	/* we need to lock the monitor receive ring, since it
561 	 * is the target of bot tx and rx traffic from the monitored
562 	 * adapter
563 	 */
564 	mtx_lock(&mkring->q_lock);
565 	/* get the free slots available on the monitor ring */
566 	i = mkring->nr_hwtail;
567 	busy = i - mkring->nr_hwcur;
568 	if (busy < 0)
569 		busy += mkring->nkr_num_slots;
570 	free_slots = mlim - busy;
571 
572 	if (!free_slots)
573 		goto out;
574 
575 	/* swap min(free_slots, rel_slots) slots */
576 	if (free_slots < rel_slots) {
577 		beg += (rel_slots - free_slots);
578 		rel_slots = free_slots;
579 	}
580 	if (unlikely(beg >= kring->nkr_num_slots))
581 		beg -= kring->nkr_num_slots;
582 
583 	sent = rel_slots;
584 	for ( ; rel_slots; rel_slots--) {
585 		struct netmap_slot *s = &ring->slot[beg];
586 		struct netmap_slot *ms = &mring->slot[i];
587 		uint32_t tmp;
588 
589 		tmp = ms->buf_idx;
590 		ms->buf_idx = s->buf_idx;
591 		s->buf_idx = tmp;
592 		ND(5, "beg %d buf_idx %d", beg, tmp);
593 
594 		tmp = ms->len;
595 		ms->len = s->len;
596 		s->len = tmp;
597 
598 		s->flags |= NS_BUF_CHANGED;
599 
600 		beg = nm_next(beg, lim);
601 		i = nm_next(i, mlim);
602 
603 	}
604 	mb();
605 	mkring->nr_hwtail = i;
606 
607 out:
608 	mtx_unlock(&mkring->q_lock);
609 
610 	if (sent) {
611 		/* notify the new frames to the monitor */
612 		mkring->nm_notify(mkring, 0);
613 	}
614 
615 out_rxsync:
616 	if (tx == NR_RX)
617 		error = kring->mon_sync(kring, flags);
618 
619 	return error;
620 }
621 
622 /* callback used to replace the nm_sync callback in the monitored tx rings */
623 static int
624 netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
625 {
626         return netmap_zmon_parent_sync(kring, flags, NR_TX);
627 }
628 
629 /* callback used to replace the nm_sync callback in the monitored rx rings */
630 static int
631 netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
632 {
633         return netmap_zmon_parent_sync(kring, flags, NR_RX);
634 }
635 
636 static int
637 netmap_zmon_reg(struct netmap_adapter *na, int onoff)
638 {
639 	return netmap_monitor_reg_common(na, onoff, 1 /* zcopy */);
640 }
641 
642 /* nm_dtor callback for monitors */
643 static void
644 netmap_zmon_dtor(struct netmap_adapter *na)
645 {
646 	struct netmap_monitor_adapter *mna =
647 		(struct netmap_monitor_adapter *)na;
648 	struct netmap_priv_d *priv = &mna->priv;
649 	struct netmap_adapter *pna = priv->np_na;
650 
651 	netmap_adapter_put(pna);
652 }
653 
654 /*
655  ****************************************************************
656  * functions specific for copy monitors
657  ****************************************************************
658  */
659 
660 static void
661 netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_slots)
662 {
663 	u_int j;
664 
665 	for (j = 0; j < kring->n_monitors; j++) {
666 		struct netmap_kring *mkring = kring->monitors[j];
667 		u_int i, mlim, beg;
668 		int free_slots, busy, sent = 0, m;
669 		u_int lim = kring->nkr_num_slots - 1;
670 		struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
671 		u_int max_len = NETMAP_BUF_SIZE(mkring->na);
672 
673 		mlim = mkring->nkr_num_slots - 1;
674 
675 		/* we need to lock the monitor receive ring, since it
676 		 * is the target of bot tx and rx traffic from the monitored
677 		 * adapter
678 		 */
679 		mtx_lock(&mkring->q_lock);
680 		/* get the free slots available on the monitor ring */
681 		i = mkring->nr_hwtail;
682 		busy = i - mkring->nr_hwcur;
683 		if (busy < 0)
684 			busy += mkring->nkr_num_slots;
685 		free_slots = mlim - busy;
686 
687 		if (!free_slots)
688 			goto out;
689 
690 		/* copy min(free_slots, new_slots) slots */
691 		m = new_slots;
692 		beg = first_new;
693 		if (free_slots < m) {
694 			beg += (m - free_slots);
695 			if (beg >= kring->nkr_num_slots)
696 				beg -= kring->nkr_num_slots;
697 			m = free_slots;
698 		}
699 
700 		for ( ; m; m--) {
701 			struct netmap_slot *s = &ring->slot[beg];
702 			struct netmap_slot *ms = &mring->slot[i];
703 			u_int copy_len = s->len;
704 			char *src = NMB(kring->na, s),
705 			     *dst = NMB(mkring->na, ms);
706 
707 			if (unlikely(copy_len > max_len)) {
708 				RD(5, "%s->%s: truncating %d to %d", kring->name,
709 						mkring->name, copy_len, max_len);
710 				copy_len = max_len;
711 			}
712 
713 			memcpy(dst, src, copy_len);
714 			ms->len = copy_len;
715 			sent++;
716 
717 			beg = nm_next(beg, lim);
718 			i = nm_next(i, mlim);
719 		}
720 		mb();
721 		mkring->nr_hwtail = i;
722 	out:
723 		mtx_unlock(&mkring->q_lock);
724 
725 		if (sent) {
726 			/* notify the new frames to the monitor */
727 			mkring->nm_notify(mkring, 0);
728 		}
729 	}
730 }
731 
732 /* callback used to replace the nm_sync callback in the monitored tx rings */
733 static int
734 netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
735 {
736 	u_int first_new;
737 	int new_slots;
738 
739 	/* get the new slots */
740 	if (kring->n_monitors > 0) {
741 		first_new = kring->nr_hwcur;
742 		new_slots = kring->rhead - first_new;
743 		if (new_slots < 0)
744 			new_slots += kring->nkr_num_slots;
745 		if (new_slots)
746 			netmap_monitor_parent_sync(kring, first_new, new_slots);
747 	}
748 	if (kring->zmon_list[NR_TX].next != NULL) {
749 		return netmap_zmon_parent_txsync(kring, flags);
750 	}
751 	return kring->mon_sync(kring, flags);
752 }
753 
754 /* callback used to replace the nm_sync callback in the monitored rx rings */
755 static int
756 netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
757 {
758 	u_int first_new;
759 	int new_slots, error;
760 
761 	/* get the new slots */
762 	if (kring->zmon_list[NR_RX].next != NULL) {
763 		error = netmap_zmon_parent_rxsync(kring, flags);
764 	} else {
765 		error =  kring->mon_sync(kring, flags);
766 	}
767 	if (error)
768 		return error;
769 	if (kring->n_monitors > 0) {
770 		first_new = kring->mon_tail;
771 		new_slots = kring->nr_hwtail - first_new;
772 		if (new_slots < 0)
773 			new_slots += kring->nkr_num_slots;
774 		if (new_slots)
775 			netmap_monitor_parent_sync(kring, first_new, new_slots);
776 		kring->mon_tail = kring->nr_hwtail;
777 	}
778 	return 0;
779 }
780 
781 /* callback used to replace the nm_notify() callback in the monitored rx rings */
782 static int
783 netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
784 {
785 	int (*notify)(struct netmap_kring*, int);
786 	ND(5, "%s %x", kring->name, flags);
787 	/* ?xsync callbacks have tryget called by their callers
788 	 * (NIOCREGIF and poll()), but here we have to call it
789 	 * by ourself
790 	 */
791 	if (nm_kr_tryget(kring, 0, NULL)) {
792 		/* in all cases, just skip the sync */
793 		return NM_IRQ_COMPLETED;
794 	}
795 	if (kring->n_monitors > 0) {
796 		netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
797 	}
798 	if (nm_monitor_none(kring)) {
799 		/* we are no longer monitoring this ring, so both
800 		 * mon_sync and mon_notify are NULL
801 		 */
802 		notify = kring->nm_notify;
803 	} else {
804 		notify = kring->mon_notify;
805 	}
806 	nm_kr_put(kring);
807         return notify(kring, flags);
808 }
809 
810 
811 static int
812 netmap_monitor_reg(struct netmap_adapter *na, int onoff)
813 {
814 	return netmap_monitor_reg_common(na, onoff, 0 /* no zcopy */);
815 }
816 
817 static void
818 netmap_monitor_dtor(struct netmap_adapter *na)
819 {
820 	struct netmap_monitor_adapter *mna =
821 		(struct netmap_monitor_adapter *)na;
822 	struct netmap_priv_d *priv = &mna->priv;
823 	struct netmap_adapter *pna = priv->np_na;
824 
825 	netmap_adapter_put(pna);
826 }
827 
828 
829 /* check if nmr is a request for a monitor adapter that we can satisfy */
830 int
831 netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na,
832 		struct netmap_mem_d *nmd, int create)
833 {
834 	struct nmreq pnmr;
835 	struct netmap_adapter *pna; /* parent adapter */
836 	struct netmap_monitor_adapter *mna;
837 	struct ifnet *ifp = NULL;
838 	int  error;
839 	int zcopy = (nmr->nr_flags & NR_ZCOPY_MON);
840 	char monsuff[10] = "";
841 
842 	if (zcopy) {
843 		nmr->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
844 	}
845 	if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
846 		ND("not a monitor");
847 		return 0;
848 	}
849 	/* this is a request for a monitor adapter */
850 
851 	ND("flags %x", nmr->nr_flags);
852 
853 	/* first, try to find the adapter that we want to monitor
854 	 * We use the same nmr, after we have turned off the monitor flags.
855 	 * In this way we can potentially monitor everything netmap understands,
856 	 * except other monitors.
857 	 */
858 	memcpy(&pnmr, nmr, sizeof(pnmr));
859 	pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
860 	error = netmap_get_na(&pnmr, &pna, &ifp, nmd, create);
861 	if (error) {
862 		D("parent lookup failed: %d", error);
863 		return error;
864 	}
865 	ND("found parent: %s", pna->name);
866 
867 	if (!nm_netmap_on(pna)) {
868 		/* parent not in netmap mode */
869 		/* XXX we can wait for the parent to enter netmap mode,
870 		 * by intercepting its nm_register callback (2014-03-16)
871 		 */
872 		D("%s not in netmap mode", pna->name);
873 		error = EINVAL;
874 		goto put_out;
875 	}
876 
877 	mna = nm_os_malloc(sizeof(*mna));
878 	if (mna == NULL) {
879 		D("memory error");
880 		error = ENOMEM;
881 		goto put_out;
882 	}
883 	mna->priv.np_na = pna;
884 
885 	/* grab all the rings we need in the parent */
886 	error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags);
887 	if (error) {
888 		D("ringid error");
889 		goto free_out;
890 	}
891 	if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) {
892 		snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]);
893 	}
894 	snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name,
895 			monsuff,
896 			zcopy ? "z" : "",
897 			(nmr->nr_flags & NR_MONITOR_RX) ? "r" : "",
898 			(nmr->nr_flags & NR_MONITOR_TX) ? "t" : "");
899 
900 	/* the monitor supports the host rings iff the parent does */
901 	mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS);
902 	/* a do-nothing txsync: monitors cannot be used to inject packets */
903 	mna->up.nm_txsync = netmap_monitor_txsync;
904 	mna->up.nm_rxsync = netmap_monitor_rxsync;
905 	mna->up.nm_krings_create = netmap_monitor_krings_create;
906 	mna->up.nm_krings_delete = netmap_monitor_krings_delete;
907 	mna->up.num_tx_rings = 1; // XXX what should we do here with chained zmons?
908 	/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
909 	 * in the parent
910 	 */
911 	mna->up.num_rx_rings = pna->num_rx_rings;
912 	if (pna->num_tx_rings > pna->num_rx_rings)
913 		mna->up.num_rx_rings = pna->num_tx_rings;
914 	/* by default, the number of slots is the same as in
915 	 * the parent rings, but the user may ask for a different
916 	 * number
917 	 */
918 	mna->up.num_tx_desc = nmr->nr_tx_slots;
919 	nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
920 			1, NM_MONITOR_MAXSLOTS, NULL);
921 	mna->up.num_rx_desc = nmr->nr_rx_slots;
922 	nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
923 			1, NM_MONITOR_MAXSLOTS, NULL);
924 	if (zcopy) {
925 		mna->up.nm_register = netmap_zmon_reg;
926 		mna->up.nm_dtor = netmap_zmon_dtor;
927 		/* to have zero copy, we need to use the same memory allocator
928 		 * as the monitored port
929 		 */
930 		mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
931 		/* and the allocator cannot be changed */
932 		mna->up.na_flags |= NAF_MEM_OWNER;
933 	} else {
934 		mna->up.nm_register = netmap_monitor_reg;
935 		mna->up.nm_dtor = netmap_monitor_dtor;
936 		mna->up.nm_mem = netmap_mem_private_new(
937 				mna->up.num_tx_rings,
938 				mna->up.num_tx_desc,
939 				mna->up.num_rx_rings,
940 				mna->up.num_rx_desc,
941 				0, /* extra bufs */
942 				0, /* pipes */
943 				&error);
944 		if (mna->up.nm_mem == NULL)
945 			goto put_out;
946 	}
947 
948 	error = netmap_attach_common(&mna->up);
949 	if (error) {
950 		D("attach_common error");
951 		goto mem_put_out;
952 	}
953 
954 	/* remember the traffic directions we have to monitor */
955 	mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON));
956 
957 	*na = &mna->up;
958 	netmap_adapter_get(*na);
959 
960 	/* keep the reference to the parent */
961 	ND("monitor ok");
962 
963 	/* drop the reference to the ifp, if any */
964 	if (ifp)
965 		if_rele(ifp);
966 
967 	return 0;
968 
969 mem_put_out:
970 	netmap_mem_put(mna->up.nm_mem);
971 free_out:
972 	nm_os_free(mna);
973 put_out:
974 	netmap_unget_na(pna, ifp);
975 	return error;
976 }
977 
978 
979 #endif /* WITH_MONITOR */
980