xref: /dragonfly/sys/dev/virtual/vkernel/net/if_vke.c (revision c9c5aa9e)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/proc.h>
40 #include <sys/serialize.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/sysctl.h>
44 
45 #include <machine/md_var.h>
46 #include <machine/cothread.h>
47 
48 #include <net/ethernet.h>
49 #include <net/if.h>
50 #include <net/bpf.h>
51 #include <net/if_arp.h>
52 #include <net/if_media.h>
53 #include <net/ifq_var.h>
54 #include <net/vlan/if_vlan_ether.h>
55 
56 #include <netinet/in_var.h>
57 
58 #include <sys/stat.h>
59 #include <net/tap/if_tap.h>
60 #include <err.h>
61 #include <errno.h>
62 #include <stdio.h>
63 #include <string.h>
64 #include <unistd.h>
65 #include <fcntl.h>
66 
67 #define VKE_DEVNAME		"vke"
68 
69 #define VKE_CHUNK	8 /* number of mbufs to queue before interrupting */
70 
71 #define NETFIFOINDEX(u, sc) ((u) & ((sc)->sc_ringsize - 1))
72 
73 #define VKE_COTD_RUN	0
74 #define VKE_COTD_EXIT	1
75 #define VKE_COTD_DEAD	2
76 
77 struct vke_fifo {
78 	struct mbuf	**array;
79 	int		rindex;
80 	int		windex;
81 };
82 typedef struct vke_fifo *fifo_t;
83 
84 /* Default value for a long time */
85 #define VKE_DEFAULT_RINGSIZE	256
86 static int vke_max_ringsize = 0;
87 TUNABLE_INT("hw.vke.max_ringsize", &vke_max_ringsize);
88 
89 #define LOW_POW_2(n)	(1 << (fls(n) - 1))
90 
91 struct vke_softc {
92 	struct arpcom		arpcom;
93 	int			sc_fd;
94 	int			sc_unit;
95 
96 	cothread_t		cotd_tx;
97 	cothread_t		cotd_rx;
98 
99 	int			cotd_tx_exit;
100 	int			cotd_rx_exit;
101 
102 	void			*sc_txbuf;
103 	int			sc_txbuf_len;
104 
105 	fifo_t			sc_txfifo;
106 	fifo_t			sc_txfifo_done;
107 	fifo_t			sc_rxfifo;
108 
109 	int			sc_ringsize;
110 
111 	long			cotd_ipackets;
112 	long			cotd_oerrors;
113 	long			cotd_opackets;
114 
115 	struct sysctl_ctx_list	sc_sysctl_ctx;
116 	struct sysctl_oid	*sc_sysctl_tree;
117 
118 	int			sc_tap_unit;	/* unit of backend tap(4) */
119 	in_addr_t		sc_addr;	/* address */
120 	in_addr_t		sc_mask;	/* netmask */
121 
122 	struct ifmedia		sc_media;
123 };
124 
125 static void	vke_start(struct ifnet *, struct ifaltq_subque *);
126 static void	vke_init(void *);
127 static int	vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
128 
129 static int	vke_media_change(struct ifnet *);
130 static void	vke_media_status(struct ifnet *, struct ifmediareq *);
131 
132 static int	vke_attach(const struct vknetif_info *, int);
133 static int	vke_stop(struct vke_softc *);
134 static int	vke_init_addr(struct ifnet *, in_addr_t, in_addr_t);
135 static void	vke_tx_intr(cothread_t cotd);
136 static void	vke_tx_thread(cothread_t cotd);
137 static void	vke_rx_intr(cothread_t cotd);
138 static void	vke_rx_thread(cothread_t cotd);
139 
140 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m);
141 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc);
142 
143 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m);
144 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm);
145 
146 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm);
147 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc);
148 
149 static void
150 vke_sysinit(void *arg __unused)
151 {
152 	int i, unit;
153 
154 	KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d", NetifNum));
155 
156 	unit = 0;
157 	for (i = 0; i < NetifNum; ++i) {
158 		if (vke_attach(&NetifInfo[i], unit) == 0)
159 			++unit;
160 	}
161 }
162 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL);
163 
164 /*
165  * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo.  Since
166  * the cothread cannot free transmit mbufs after processing we put them on
167  * the done fifo so the kernel can free them.
168  */
169 static int
170 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m)
171 {
172 	fifo_t fifo = sc->sc_txfifo_done;
173 
174 	while (NETFIFOINDEX(fifo->windex + 1, sc) ==
175 	       NETFIFOINDEX(fifo->rindex, sc)) {
176 		usleep(20000);
177 	}
178 	fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m;
179 	cpu_sfence();
180 	++fifo->windex;
181 
182 	return (0);
183 }
184 
185 /*
186  * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo.
187  */
188 static struct mbuf *
189 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm)
190 {
191 	fifo_t fifo = sc->sc_txfifo_done;
192 	struct mbuf *m;
193 
194 	if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc))
195 		return (NULL);
196 
197 	cpu_lfence();
198 	m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)];
199 	fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = nm;
200 	++fifo->rindex;
201 
202 	return (m);
203 }
204 
205 /*
206  * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo.
207  */
208 static int
209 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m)
210 {
211 	fifo_t fifo = sc->sc_txfifo;
212 
213 	if (NETFIFOINDEX(fifo->windex + 1, sc) ==
214 	    NETFIFOINDEX(fifo->rindex, sc)) {
215 		return (-1);
216 	}
217 
218 	fifo->array[NETFIFOINDEX(fifo->windex, sc)] = m;
219 	cpu_sfence();
220 	++fifo->windex;
221 
222 	return (0);
223 }
224 
225 /*
226  * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one
227  * exists.
228  */
229 static struct mbuf *
230 vke_txfifo_dequeue(struct vke_softc *sc)
231 {
232 	fifo_t fifo = sc->sc_txfifo;
233 	struct mbuf *m;
234 
235 	if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc))
236 		return (NULL);
237 
238 	cpu_lfence();
239 	m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)];
240 	fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = NULL;
241 	cpu_sfence();
242 	++fifo->rindex;
243 
244 	return (m);
245 }
246 
247 static int
248 vke_txfifo_empty(struct vke_softc *sc)
249 {
250 	fifo_t fifo = sc->sc_txfifo;
251 
252 	if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc))
253 		return (1);
254 	return(0);
255 }
256 
257 /*
258  * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one
259  * exists replacing it with newm which should point to a newly allocated
260  * mbuf.
261  */
262 static struct mbuf *
263 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm)
264 {
265 	fifo_t fifo = sc->sc_rxfifo;
266 	struct mbuf *m;
267 
268 	if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc))
269 		return (NULL);
270 
271 	cpu_lfence();
272 	m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)];
273 	fifo->array[NETFIFOINDEX(fifo->rindex, sc)] = newm;
274 	cpu_sfence();
275 	++fifo->rindex;
276 
277 	return (m);
278 }
279 
280 /*
281  * Return the next mbuf if available but do NOT remove it from the FIFO.
282  */
283 static struct mbuf *
284 vke_rxfifo_sniff(struct vke_softc *sc)
285 {
286 	fifo_t fifo = sc->sc_rxfifo;
287 	struct mbuf *m;
288 
289 	if (NETFIFOINDEX(fifo->rindex, sc) == NETFIFOINDEX(fifo->windex, sc))
290 		return (NULL);
291 
292 	cpu_lfence();
293 	m = fifo->array[NETFIFOINDEX(fifo->rindex, sc)];
294 
295 	return (m);
296 }
297 
298 static void
299 vke_init(void *xsc)
300 {
301 	struct vke_softc *sc = xsc;
302 	struct ifnet *ifp = &sc->arpcom.ac_if;
303 	size_t ringsize = sc->sc_ringsize * sizeof(struct mbuf *);
304 	int i;
305 
306 	ASSERT_SERIALIZED(ifp->if_serializer);
307 
308 	vke_stop(sc);
309 
310 	ifp->if_flags |= IFF_RUNNING;
311 	ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd));
312 
313 	/*
314 	 * Allocate memory for FIFO structures and mbufs.
315 	 */
316 	sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo),
317 				M_DEVBUF, M_WAITOK | M_ZERO);
318 	sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done),
319 				M_DEVBUF, M_WAITOK | M_ZERO);
320 	sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo),
321 				M_DEVBUF, M_WAITOK | M_ZERO);
322 	sc->sc_txfifo->array = kmalloc(ringsize,
323 				M_DEVBUF, M_WAITOK | M_ZERO);
324 	sc->sc_txfifo_done->array = kmalloc(ringsize,
325 				M_DEVBUF, M_WAITOK | M_ZERO);
326 	sc->sc_rxfifo->array = kmalloc(ringsize,
327 				M_DEVBUF, M_WAITOK | M_ZERO);
328 
329 	for (i = 0; i < sc->sc_ringsize; i++) {
330 		sc->sc_rxfifo->array[i] = m_getcl(M_WAITOK, MT_DATA, M_PKTHDR);
331 		sc->sc_txfifo->array[i] = NULL;
332 		sc->sc_txfifo_done->array[i] = NULL;
333 	}
334 
335 	sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN;
336 	sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx");
337 	sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx");
338 
339 	if (sc->sc_addr != 0) {
340 		in_addr_t addr, mask;
341 
342 		addr = sc->sc_addr;
343 		mask = sc->sc_mask;
344 
345 		/*
346 		 * Make sure vkernel assigned
347 		 * address will not be added
348 		 * again.
349 		 */
350 		sc->sc_addr = 0;
351 		sc->sc_mask = 0;
352 
353 		vke_init_addr(ifp, addr, mask);
354 	}
355 
356 }
357 
358 /*
359  * Called from kernel.
360  *
361  * NOTE: We can't make any kernel callbacks while holding cothread lock
362  *	 because the cothread lock is not governed by the kernel scheduler
363  *	 (so mplock, tokens, etc will not be released).
364  */
365 static void
366 vke_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
367 {
368 	struct vke_softc *sc = ifp->if_softc;
369 	struct mbuf *m;
370 	cothread_t cotd = sc->cotd_tx;
371 	int count;
372 
373 	ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
374 	ASSERT_SERIALIZED(ifp->if_serializer);
375 
376 	if ((ifp->if_flags & IFF_RUNNING) == 0 || ifsq_is_oactive(ifsq))
377 		return;
378 
379 	count = 0;
380 	while ((m = ifsq_dequeue(ifsq)) != NULL) {
381 		if (vke_txfifo_enqueue(sc, m) != -1) {
382 			ETHER_BPF_MTAP(ifp, m);
383 			if (count++ == VKE_CHUNK) {
384 				cothread_lock(cotd, 0);
385 				cothread_signal(cotd);
386 				cothread_unlock(cotd, 0);
387 				count = 0;
388 			}
389 		} else {
390 			m_freem(m);
391 		}
392 	}
393 	if (count) {
394 		cothread_lock(cotd, 0);
395 		cothread_signal(cotd);
396 		cothread_unlock(cotd, 0);
397 	}
398 }
399 
400 static int
401 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
402 {
403 	struct vke_softc *sc = ifp->if_softc;
404 	struct ifreq *ifr = (struct ifreq *)data;
405 	int error = 0;
406 
407 	ASSERT_SERIALIZED(ifp->if_serializer);
408 
409 	switch (cmd) {
410 	case SIOCSIFFLAGS:
411 		if (ifp->if_flags & IFF_UP) {
412 			if ((ifp->if_flags & IFF_RUNNING) == 0)
413 				vke_init(sc);
414 		} else {
415 			if (ifp->if_flags & IFF_RUNNING)
416 				vke_stop(sc);
417 		}
418 		break;
419 	case SIOCGIFMEDIA:
420 	case SIOCGIFXMEDIA:
421 	case SIOCSIFMEDIA:
422 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
423 		break;
424 	case SIOCGIFSTATUS: {
425 		struct ifstat *ifs = (struct ifstat *)data;
426 		int len;
427 
428 		len = strlen(ifs->ascii);
429 		if (len < sizeof(ifs->ascii)) {
430 			if (sc->sc_tap_unit >= 0) {
431 				ksnprintf(ifs->ascii + len,
432 					  sizeof(ifs->ascii) - len,
433 					  "\tBacked by tap%d\n",
434 					  sc->sc_tap_unit);
435 			}
436 		}
437 		break;
438 	}
439 	case SIOCSIFADDR:
440 		if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) {
441 			/*
442 			 * If we are explicitly requested to change address,
443 			 * we should invalidate address/netmask passed in
444 			 * from vkernel command line.
445 			 */
446 			sc->sc_addr = 0;
447 			sc->sc_mask = 0;
448 		}
449 		/* FALL THROUGH */
450 	default:
451 		error = ether_ioctl(ifp, cmd, data);
452 		break;
453 	}
454 	return error;
455 }
456 
457 static int
458 vke_stop(struct vke_softc *sc)
459 {
460 	struct ifnet *ifp = &sc->arpcom.ac_if;
461 	int i;
462 
463 	ASSERT_SERIALIZED(ifp->if_serializer);
464 
465 	ifp->if_flags &= ~IFF_RUNNING;
466 	ifsq_clr_oactive(ifq_get_subq_default(&ifp->if_snd));
467 
468 	if (sc) {
469 		if (sc->cotd_tx) {
470 			cothread_lock(sc->cotd_tx, 0);
471 			if (sc->cotd_tx_exit == VKE_COTD_RUN)
472 				sc->cotd_tx_exit = VKE_COTD_EXIT;
473 			cothread_signal(sc->cotd_tx);
474 			cothread_unlock(sc->cotd_tx, 0);
475 			cothread_delete(&sc->cotd_tx);
476 		}
477 		if (sc->cotd_rx) {
478 			cothread_lock(sc->cotd_rx, 0);
479 			if (sc->cotd_rx_exit == VKE_COTD_RUN)
480 				sc->cotd_rx_exit = VKE_COTD_EXIT;
481 			cothread_signal(sc->cotd_rx);
482 			cothread_unlock(sc->cotd_rx, 0);
483 			cothread_delete(&sc->cotd_rx);
484 		}
485 
486 		for (i = 0; i < sc->sc_ringsize; i++) {
487 			if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) {
488 				m_freem(sc->sc_rxfifo->array[i]);
489 				sc->sc_rxfifo->array[i] = NULL;
490 			}
491 			if (sc->sc_txfifo && sc->sc_txfifo->array[i]) {
492 				m_freem(sc->sc_txfifo->array[i]);
493 				sc->sc_txfifo->array[i] = NULL;
494 			}
495 			if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) {
496 				m_freem(sc->sc_txfifo_done->array[i]);
497 				sc->sc_txfifo_done->array[i] = NULL;
498 			}
499 		}
500 
501 		if (sc->sc_txfifo) {
502 			if (sc->sc_txfifo->array)
503 				kfree(sc->sc_txfifo->array, M_DEVBUF);
504 			kfree(sc->sc_txfifo, M_DEVBUF);
505 			sc->sc_txfifo = NULL;
506 		}
507 
508 		if (sc->sc_txfifo_done) {
509 			if (sc->sc_txfifo_done->array)
510 				kfree(sc->sc_txfifo_done->array, M_DEVBUF);
511 			kfree(sc->sc_txfifo_done, M_DEVBUF);
512 			sc->sc_txfifo_done = NULL;
513 		}
514 
515 		if (sc->sc_rxfifo) {
516 			if (sc->sc_rxfifo->array)
517 				kfree(sc->sc_rxfifo->array, M_DEVBUF);
518 			kfree(sc->sc_rxfifo, M_DEVBUF);
519 			sc->sc_rxfifo = NULL;
520 		}
521 	}
522 
523 
524 	return 0;
525 }
526 
527 /*
528  * vke_rx_intr() is the interrupt function for the receive cothread.
529  */
530 static void
531 vke_rx_intr(cothread_t cotd)
532 {
533 	struct mbuf *m;
534 	struct mbuf *nm;
535 	struct vke_softc *sc = cotd->arg;
536 	struct ifnet *ifp = &sc->arpcom.ac_if;
537 	static int count = 0;
538 
539 	ifnet_serialize_all(ifp);
540 	cothread_lock(cotd, 0);
541 
542 	if (sc->cotd_rx_exit != VKE_COTD_RUN) {
543 		cothread_unlock(cotd, 0);
544 		ifnet_deserialize_all(ifp);
545 		return;
546 	}
547 	if (sc->cotd_ipackets) {
548 		IFNET_STAT_INC(ifp, ipackets, 1);
549 		sc->cotd_ipackets = 0;
550 	}
551 	cothread_unlock(cotd, 0);
552 
553 	while ((m = vke_rxfifo_sniff(sc)) != NULL) {
554 		nm = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
555 		if (nm) {
556 			vke_rxfifo_dequeue(sc, nm);
557 			ifp->if_input(ifp, m, NULL, -1);
558 			if (count++ == VKE_CHUNK) {
559 				cothread_lock(cotd, 0);
560 				cothread_signal(cotd);
561 				cothread_unlock(cotd, 0);
562 				count = 0;
563 			}
564 		} else {
565 			vke_rxfifo_dequeue(sc, m);
566 		}
567 	}
568 
569 	if (count) {
570 		cothread_lock(cotd, 0);
571 		cothread_signal(cotd);
572 		cothread_unlock(cotd, 0);
573 	}
574 	ifnet_deserialize_all(ifp);
575 }
576 
577 /*
578  * vke_tx_intr() is the interrupt function for the transmit cothread.
579  * Calls vke_start() to handle processing transmit mbufs.
580  */
581 static void
582 vke_tx_intr(cothread_t cotd)
583 {
584 	struct vke_softc *sc = cotd->arg;
585 	struct ifnet *ifp = &sc->arpcom.ac_if;
586 	struct mbuf *m;
587 
588 	ifnet_serialize_all(ifp);
589 	cothread_lock(cotd, 0);
590 	if (sc->cotd_tx_exit != VKE_COTD_RUN) {
591 		cothread_unlock(cotd, 0);
592 		ifnet_deserialize_all(ifp);
593 		return;
594 	}
595 	if (sc->cotd_opackets) {
596 		IFNET_STAT_INC(ifp, opackets, 1);
597 		sc->cotd_opackets = 0;
598 	}
599 	if (sc->cotd_oerrors) {
600 		IFNET_STAT_INC(ifp, oerrors, 1);
601 		sc->cotd_oerrors = 0;
602 	}
603 	cothread_unlock(cotd, 0);
604 
605 	/*
606 	 * Free TX mbufs that have been processed before starting new
607 	 * ones going to be pipeline friendly.
608 	 */
609 	while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) {
610 		m_freem(m);
611 	}
612 
613 	if ((ifp->if_flags & IFF_RUNNING) == 0)
614 		if_devstart(ifp);
615 
616 	ifnet_deserialize_all(ifp);
617 }
618 
619 /*
620  * vke_rx_thread() is the body of the receive cothread.
621  *
622  * WARNING!  THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!!
623  */
624 static void
625 vke_rx_thread(cothread_t cotd)
626 {
627 	struct mbuf *m;
628 	struct vke_softc *sc = cotd->arg;
629 	struct ifnet *ifp = &sc->arpcom.ac_if;
630 	fifo_t fifo = sc->sc_rxfifo;
631 	fd_set fdset;
632 	struct timeval tv;
633 	int count;
634 	int n;
635 	int r;
636 
637 	/* Select timeout cannot be infinite since we need to check for
638 	 * the exit flag sc->cotd_rx_exit.
639 	 */
640 	tv.tv_sec = 0;
641 	tv.tv_usec = 500000;
642 
643 	FD_ZERO(&fdset);
644 	count = 0;
645 
646 	while (sc->cotd_rx_exit == VKE_COTD_RUN) {
647 		/*
648 		 * Wait for the RX FIFO to be loaded with
649 		 * empty mbufs.
650 		 */
651 		if (NETFIFOINDEX(fifo->windex + 1, sc) ==
652 		    NETFIFOINDEX(fifo->rindex, sc)) {
653 			usleep(20000);
654 			continue;
655 		}
656 
657 		/*
658 		 * Load data into the rx fifo
659 		 */
660 		cpu_lfence();
661 		m = fifo->array[NETFIFOINDEX(fifo->windex, sc)];
662 		if (m == NULL) {
663 			fprintf(stderr,
664 				VKE_DEVNAME "%d: NULL rxring mbuf\n",
665 				sc->sc_unit);
666 			*(volatile int *)0 = 1;
667 		}
668 		n = read(sc->sc_fd, mtod(m, void *), MCLBYTES);
669 		if (n > 0) {
670 			/* no mycpu in cothread */
671 			/*IFNET_STAT_INC(ifp, ipackets, 1);*/
672 			++sc->cotd_ipackets;
673 			m->m_pkthdr.rcvif = ifp;
674 			m->m_pkthdr.len = m->m_len = n;
675 			cpu_sfence();
676 			++fifo->windex;
677 			if (count++ == VKE_CHUNK) {
678 				cothread_intr(cotd);
679 				count = 0;
680 			}
681 		} else {
682 			if (count) {
683 				cothread_intr(cotd);
684 				count = 0;
685 			}
686 			FD_SET(sc->sc_fd, &fdset);
687 			r = select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv);
688 			if (r == -1) {
689 				fprintf(stderr,
690 					VKE_DEVNAME "%d: select failed for "
691 					"TAP device\n", sc->sc_unit);
692 				usleep(1000000);
693 			}
694 		}
695 	}
696 	cpu_sfence();
697 	sc->cotd_rx_exit = VKE_COTD_DEAD;
698 }
699 
700 /*
701  * vke_tx_thread() is the body of the transmit cothread.
702  *
703  * WARNING!  THIS IS A COTHREAD WHICH HAS NO PER-CPU GLOBALDATA!!!!!
704  */
705 static void
706 vke_tx_thread(cothread_t cotd)
707 {
708 	struct mbuf *m;
709 	struct vke_softc *sc = cotd->arg;
710 	/*struct ifnet *ifp = &sc->arpcom.ac_if;*/
711 	int count = 0;
712 
713 	while (sc->cotd_tx_exit == VKE_COTD_RUN) {
714 		/*
715 		 * Write outgoing packets to the TAP interface
716 		 */
717 		m = vke_txfifo_dequeue(sc);
718 		if (m) {
719 			if (m->m_pkthdr.len <= MCLBYTES) {
720 				m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
721 				sc->sc_txbuf_len = m->m_pkthdr.len;
722 
723 				if (write(sc->sc_fd, sc->sc_txbuf,
724 					  sc->sc_txbuf_len) < 0) {
725 					/* no mycpu in cothread */
726 					/*IFNET_STAT_INC(ifp, oerrors, 1);*/
727 					++sc->cotd_oerrors;
728 				} else {
729 					/* no mycpu in cothread */
730 					/*IFNET_STAT_INC(ifp, opackets, 1);*/
731 					++sc->cotd_opackets;
732 				}
733 			}
734 			if (count++ == VKE_CHUNK) {
735 				cothread_intr(cotd);
736 				count = 0;
737 			}
738 			vke_txfifo_done_enqueue(sc, m);
739 		} else {
740 			if (count) {
741 				cothread_intr(cotd);
742 				count = 0;
743 			}
744 			cothread_lock(cotd, 1);
745 			if (vke_txfifo_empty(sc))
746 				cothread_wait(cotd);
747 			cothread_unlock(cotd, 1);
748 		}
749 	}
750 	cpu_sfence();
751 	sc->cotd_tx_exit = VKE_COTD_DEAD;
752 }
753 
754 static void
755 vke_ifmedia_add(struct vke_softc *sc, int mword)
756 {
757 	ifmedia_add(&sc->sc_media, IFM_ETHER | mword, 0, NULL);
758 }
759 
760 static void
761 vke_ifmedia_addfdx(struct vke_softc *sc, int mword)
762 {
763 	vke_ifmedia_add(sc, mword | IFM_FDX);
764 }
765 
766 static int
767 vke_attach(const struct vknetif_info *info, int unit)
768 {
769 	struct vke_softc *sc;
770 	struct ifnet *ifp;
771 	struct tapinfo tapinfo;
772 	uint8_t enaddr[ETHER_ADDR_LEN];
773 	int nmbufs;
774 	int fd;
775 
776 	KKASSERT(info->tap_fd >= 0);
777 	fd = info->tap_fd;
778 
779 	if (info->enaddr) {
780 		/*
781 		 * enaddr is supplied
782 		 */
783 		bcopy(info->enaddr, enaddr, ETHER_ADDR_LEN);
784 	} else {
785 		/*
786 		 * This is only a TAP device if tap_unit is non-zero.  If
787 		 * connecting to a virtual socket we generate a unique MAC.
788 		 *
789 		 * WARNING: enaddr[0] bit 0 is the multicast bit, when
790 		 *          randomizing enaddr[] just leave the first
791 		 *	    two bytes 00 00 for now.
792 		 */
793 		bzero(enaddr, sizeof(enaddr));
794 		if (info->tap_unit >= 0) {
795 			if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) {
796 				kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) "
797 					"failed: %s\n", unit, strerror(errno));
798 				return ENXIO;
799 			}
800 
801 			if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) {
802 				kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) "
803 					"failed: %s\n", unit, strerror(errno));
804 				return ENXIO;
805 			}
806 		} else {
807 			int fd = open("/dev/urandom", O_RDONLY);
808 			if (fd >= 0) {
809 				read(fd, enaddr + 2, 4);
810 				close(fd);
811 			}
812 			enaddr[4] = (int)getpid() >> 8;
813 			enaddr[5] = (int)getpid() & 255;
814 
815 		}
816 		enaddr[1] += 1;
817 	}
818 	if (ETHER_IS_MULTICAST(enaddr)) {
819 		kprintf(VKE_DEVNAME "%d: illegal MULTICAST ether mac!\n", unit);
820 		return ENXIO;
821 	}
822 
823 	sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
824 
825 	sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK);
826 	sc->sc_fd = fd;
827 	sc->sc_unit = unit;
828 	sc->sc_tap_unit = info->tap_unit;
829 	sc->sc_addr = info->netif_addr;
830 	sc->sc_mask = info->netif_mask;
831 
832 	if (vke_max_ringsize == 0) {
833 		nmbufs = nmbclusters / (NetifNum * 2);
834 		sc->sc_ringsize = LOW_POW_2(nmbufs);
835 		if (sc->sc_ringsize > VKE_DEFAULT_RINGSIZE)
836 			sc->sc_ringsize = VKE_DEFAULT_RINGSIZE;
837 	} else if (vke_max_ringsize >= VKE_CHUNK) {	/* Tunable specified */
838 		sc->sc_ringsize = LOW_POW_2(vke_max_ringsize);
839 	} else {
840 		sc->sc_ringsize = LOW_POW_2(VKE_CHUNK);
841 	}
842 
843 	ifp = &sc->arpcom.ac_if;
844 	if_initname(ifp, VKE_DEVNAME, sc->sc_unit);
845 
846 	/* NB: after if_initname() */
847 	sysctl_ctx_init(&sc->sc_sysctl_ctx);
848 	sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
849 					     SYSCTL_STATIC_CHILDREN(_hw),
850 					     OID_AUTO, ifp->if_xname,
851 					     CTLFLAG_RD, 0, "");
852 	if (sc->sc_sysctl_tree == NULL) {
853 		kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit);
854 	} else {
855 		SYSCTL_ADD_INT(&sc->sc_sysctl_ctx,
856 			       SYSCTL_CHILDREN(sc->sc_sysctl_tree),
857 			       OID_AUTO, "tap_unit",
858 			       CTLFLAG_RD, &sc->sc_tap_unit, 0,
859 			       "Backend tap(4) unit");
860 	}
861 
862 	ifp->if_softc = sc;
863 	ifp->if_ioctl = vke_ioctl;
864 	ifp->if_start = vke_start;
865 	ifp->if_init = vke_init;
866 	ifp->if_mtu = tapinfo.mtu;
867 	ifp->if_baudrate = tapinfo.baudrate;
868 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
869 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
870 	ifq_set_ready(&ifp->if_snd);
871 
872 	ifmedia_init(&sc->sc_media, 0, vke_media_change, vke_media_status);
873 	/* We support as many media types as we please for
874 	   debugging purposes */
875 	vke_ifmedia_add(sc, IFM_10_T);
876 	vke_ifmedia_add(sc, IFM_10_T);
877 	vke_ifmedia_add(sc, IFM_10_2);
878 	vke_ifmedia_add(sc, IFM_10_5);
879 	vke_ifmedia_add(sc, IFM_100_TX);
880 	vke_ifmedia_addfdx(sc, IFM_100_TX);
881 	vke_ifmedia_add(sc, IFM_100_FX);
882 	vke_ifmedia_add(sc, IFM_100_T4);
883 	vke_ifmedia_add(sc, IFM_100_VG);
884 	vke_ifmedia_add(sc, IFM_100_T2);
885 	vke_ifmedia_addfdx(sc, IFM_1000_SX);
886 	vke_ifmedia_add(sc, IFM_10_STP);
887 	vke_ifmedia_add(sc, IFM_10_FL);
888 	vke_ifmedia_addfdx(sc, IFM_1000_LX);
889 	vke_ifmedia_addfdx(sc, IFM_1000_CX);
890 	vke_ifmedia_addfdx(sc, IFM_1000_T);
891 	vke_ifmedia_add(sc, IFM_HPNA_1);
892 	vke_ifmedia_addfdx(sc, IFM_10G_LR);
893 	vke_ifmedia_addfdx(sc, IFM_10G_SR);
894 	vke_ifmedia_addfdx(sc, IFM_10G_CX4);
895 	vke_ifmedia_addfdx(sc, IFM_2500_SX);
896 	vke_ifmedia_addfdx(sc, IFM_10G_TWINAX);
897 	vke_ifmedia_addfdx(sc, IFM_10G_TWINAX_LONG);
898 	vke_ifmedia_addfdx(sc, IFM_10G_LRM);
899 	vke_ifmedia_addfdx(sc, IFM_10G_T);
900 	vke_ifmedia_addfdx(sc, IFM_40G_CR4);
901 	vke_ifmedia_addfdx(sc, IFM_40G_SR4);
902 	vke_ifmedia_addfdx(sc, IFM_40G_LR4);
903 	vke_ifmedia_addfdx(sc, IFM_1000_KX);
904 	vke_ifmedia_addfdx(sc, IFM_10G_KX4);
905 	vke_ifmedia_addfdx(sc, IFM_10G_KR);
906 	vke_ifmedia_addfdx(sc, IFM_10G_CR1);
907 	vke_ifmedia_addfdx(sc, IFM_20G_KR2);
908 	vke_ifmedia_addfdx(sc, IFM_2500_KX);
909 	vke_ifmedia_addfdx(sc, IFM_2500_T);
910 	vke_ifmedia_addfdx(sc, IFM_5000_T);
911 	vke_ifmedia_addfdx(sc, IFM_50G_PCIE);
912 	vke_ifmedia_addfdx(sc, IFM_25G_PCIE);
913 	vke_ifmedia_addfdx(sc, IFM_1000_SGMII);
914 	vke_ifmedia_addfdx(sc, IFM_10G_SFI);
915 	vke_ifmedia_addfdx(sc, IFM_40G_XLPPI);
916 	vke_ifmedia_addfdx(sc, IFM_1000_CX_SGMII);
917 	vke_ifmedia_addfdx(sc, IFM_40G_KR4);
918 	vke_ifmedia_addfdx(sc, IFM_10G_ER);
919 	vke_ifmedia_addfdx(sc, IFM_100G_CR4);
920 	vke_ifmedia_addfdx(sc, IFM_100G_SR4);
921 	vke_ifmedia_addfdx(sc, IFM_100G_KR4);
922 	vke_ifmedia_addfdx(sc, IFM_100G_LR4);
923 	vke_ifmedia_addfdx(sc, IFM_56G_R4);
924 	vke_ifmedia_addfdx(sc, IFM_100_T);
925 	vke_ifmedia_addfdx(sc, IFM_25G_CR);
926 	vke_ifmedia_addfdx(sc, IFM_25G_KR);
927 	vke_ifmedia_addfdx(sc, IFM_25G_SR);
928 	vke_ifmedia_addfdx(sc, IFM_50G_CR2);
929 	vke_ifmedia_addfdx(sc, IFM_50G_KR2);
930 	vke_ifmedia_add(sc, IFM_AUTO);
931 
932 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
933 
934 	ifp->if_link_state = LINK_STATE_UP;
935 
936 	ether_ifattach(ifp, enaddr, NULL);
937 
938 	if (bootverbose && sc->sc_addr != 0) {
939 		if_printf(ifp, "pre-configured "
940 		    "address 0x%08x, netmask 0x%08x, %d mbuf clusters\n",
941 		    ntohl(sc->sc_addr), ntohl(sc->sc_mask), sc->sc_ringsize);
942 	}
943 
944 	return 0;
945 }
946 
947 static int
948 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask)
949 {
950 	struct ifaliasreq ifra;
951 	struct sockaddr_in *sin;
952 	int ret;
953 
954 	ASSERT_SERIALIZED(ifp->if_serializer);
955 
956 	if (bootverbose) {
957 		if_printf(ifp, "add pre-configured "
958 			  "address 0x%08x, netmask 0x%08x\n",
959 			  ntohl(addr), ntohl(mask));
960 	}
961 
962 	bzero(&ifra, sizeof(ifra));
963 
964 	/* NB: no need to set ifaliasreq.ifra_name */
965 
966 	sin = (struct sockaddr_in *)&ifra.ifra_addr;
967 	sin->sin_family = AF_INET;
968 	sin->sin_len = sizeof(*sin);
969 	sin->sin_addr.s_addr = addr;
970 
971 	if (mask != 0) {
972 		sin = (struct sockaddr_in *)&ifra.ifra_mask;
973 		sin->sin_len = sizeof(*sin);
974 		sin->sin_addr.s_addr = mask;
975 	}
976 
977 	/*
978 	 * Temporarily release serializer, in_control() will hold
979 	 * it again before calling ifnet.if_ioctl().
980 	 */
981 	ifnet_deserialize_all(ifp);
982 	ret = in_control(SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL);
983 	ifnet_serialize_all(ifp);
984 
985 	return ret;
986 }
987 
988 static int vke_media_change(struct ifnet *ifp)
989 {
990 	/* ignored */
991 	return(0);
992 }
993 
994 static void vke_media_status(struct ifnet *ifp, struct ifmediareq *imr)
995 {
996 	struct vke_softc *sc = (struct vke_softc *)ifp->if_softc;
997 
998 	imr->ifm_status = IFM_AVALID;
999 	imr->ifm_status |= IFM_ACTIVE;
1000 
1001         if(sc->sc_media.ifm_cur) {
1002 		if(sc->sc_media.ifm_cur->ifm_media == IFM_ETHER) {
1003 			imr->ifm_active = IFM_ETHER | IFM_1000_T | IFM_FDX;
1004 		} else {
1005 			imr->ifm_active = sc->sc_media.ifm_cur->ifm_media;
1006 		}
1007 	} else {
1008 		imr->ifm_active = IFM_ETHER | IFM_1000_T | IFM_FDX;
1009 	}
1010 }
1011