xref: /dragonfly/sys/dev/virtual/vkernel/net/if_vke.c (revision a32bc35d)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/proc.h>
40 #include <sys/serialize.h>
41 #include <sys/socket.h>
42 #include <sys/sockio.h>
43 #include <sys/sysctl.h>
44 
45 #include <machine/md_var.h>
46 #include <machine/cothread.h>
47 
48 #include <net/ethernet.h>
49 #include <net/if.h>
50 #include <net/bpf.h>
51 #include <net/if_arp.h>
52 #include <net/ifq_var.h>
53 
54 #include <netinet/in_var.h>
55 
56 #include <sys/stat.h>
57 #include <net/tap/if_tap.h>
58 #include <err.h>
59 #include <errno.h>
60 #include <stdio.h>
61 #include <string.h>
62 #include <unistd.h>
63 #include <fcntl.h>
64 
65 #define VKE_DEVNAME		"vke"
66 
67 #define VKE_CHUNK	8 /* number of mbufs to queue before interrupting */
68 
69 #define NETFIFOSIZE	256
70 #define NETFIFOMASK	(NETFIFOSIZE -1)
71 #define NETFIFOINDEX(u) ((u) & NETFIFOMASK)
72 
73 #define VKE_COTD_RUN	0
74 #define VKE_COTD_EXIT	1
75 #define VKE_COTD_DEAD	2
76 
77 struct vke_fifo {
78 	struct mbuf	*array[NETFIFOSIZE];
79 	int		rindex;
80 	int		windex;
81 };
82 typedef struct vke_fifo *fifo_t;
83 
84 struct vke_softc {
85 	struct arpcom		arpcom;
86 	int			sc_fd;
87 	int			sc_unit;
88 
89 	cothread_t		cotd_tx;
90 	cothread_t		cotd_rx;
91 
92 	int			cotd_tx_exit;
93 	int			cotd_rx_exit;
94 
95 	void			*sc_txbuf;
96 	int			sc_txbuf_len;
97 
98 	fifo_t			sc_txfifo;
99 	fifo_t			sc_txfifo_done;
100 	fifo_t			sc_rxfifo;
101 
102 	struct sysctl_ctx_list	sc_sysctl_ctx;
103 	struct sysctl_oid	*sc_sysctl_tree;
104 
105 	int			sc_tap_unit;	/* unit of backend tap(4) */
106 	in_addr_t		sc_addr;	/* address */
107 	in_addr_t		sc_mask;	/* netmask */
108 };
109 
110 static void	vke_start(struct ifnet *);
111 static void	vke_init(void *);
112 static int	vke_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
113 
114 static int	vke_attach(const struct vknetif_info *, int);
115 static int	vke_stop(struct vke_softc *);
116 static int	vke_init_addr(struct ifnet *, in_addr_t, in_addr_t);
117 static void	vke_tx_intr(cothread_t cotd);
118 static void	vke_tx_thread(cothread_t cotd);
119 static void	vke_rx_intr(cothread_t cotd);
120 static void	vke_rx_thread(cothread_t cotd);
121 
122 static int vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m);
123 static struct mbuf *vke_txfifo_dequeue(struct vke_softc *sc);
124 
125 static int vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m);
126 static struct mbuf * vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm);
127 
128 static struct mbuf *vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *nm);
129 static struct mbuf *vke_rxfifo_sniff(struct vke_softc *sc);
130 
131 static void
132 vke_sysinit(void *arg __unused)
133 {
134 	int i, unit;
135 
136 	KASSERT(NetifNum <= VKNETIF_MAX, ("too many netifs: %d", NetifNum));
137 
138 	unit = 0;
139 	for (i = 0; i < NetifNum; ++i) {
140 		if (vke_attach(&NetifInfo[i], unit) == 0)
141 			++unit;
142 	}
143 }
144 SYSINIT(vke, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, vke_sysinit, NULL);
145 
146 /*
147  * vke_txfifo_done_enqueue() - Add an mbuf to the transmit done fifo.  Since
148  * the cothread cannot free transmit mbufs after processing we put them on
149  * the done fifo so the kernel can free them.
150  */
151 static int
152 vke_txfifo_done_enqueue(struct vke_softc *sc, struct mbuf *m)
153 {
154 	fifo_t fifo = sc->sc_txfifo_done;
155 
156 	while (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex)) {
157 		usleep(20000);
158 	}
159 
160 	fifo->array[NETFIFOINDEX(fifo->windex)] = m;
161 	cpu_sfence();
162 	++fifo->windex;
163 	return (0);
164 }
165 
166 /*
167  * vke_txfifo_done_dequeue() - Remove an mbuf from the transmit done fifo.
168  */
169 static struct mbuf *
170 vke_txfifo_done_dequeue(struct vke_softc *sc, struct mbuf *nm)
171 {
172 	fifo_t fifo = sc->sc_txfifo_done;
173 	struct mbuf *m;
174 
175 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
176 		return (NULL);
177 
178 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
179 	fifo->array[NETFIFOINDEX(fifo->rindex)] = nm;
180 	cpu_lfence();
181 	++fifo->rindex;
182 	return (m);
183 }
184 
185 /*
186  * vke_txfifo_enqueue() - Add an mbuf to the transmit fifo.
187  */
188 static int
189 vke_txfifo_enqueue(struct vke_softc *sc, struct mbuf *m)
190 {
191 	fifo_t fifo = sc->sc_txfifo;
192 
193 	if (NETFIFOINDEX(fifo->windex + 1) == NETFIFOINDEX(fifo->rindex))
194 		return (-1);
195 
196 	fifo->array[NETFIFOINDEX(fifo->windex)] = m;
197 	cpu_sfence();
198 	++fifo->windex;
199 
200 	return (0);
201 }
202 
203 /*
204  * vke_txfifo_dequeue() - Return next mbuf on the transmit fifo if one
205  * exists.
206  */
207 static struct mbuf *
208 vke_txfifo_dequeue(struct vke_softc *sc)
209 {
210 	fifo_t fifo = sc->sc_txfifo;
211 	struct mbuf *m;
212 
213 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
214 		return (NULL);
215 
216 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
217 	fifo->array[NETFIFOINDEX(fifo->rindex)] = NULL;
218 
219 	cpu_lfence();
220 	++fifo->rindex;
221 	return (m);
222 }
223 
224 static int
225 vke_txfifo_empty(struct vke_softc *sc)
226 {
227 	fifo_t fifo = sc->sc_txfifo;
228 
229 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
230 		return (1);
231 	return(0);
232 }
233 
234 /*
235  * vke_rxfifo_dequeue() - Return next mbuf on the receice fifo if one
236  * exists replacing it with newm which should point to a newly allocated
237  * mbuf.
238  */
239 static struct mbuf *
240 vke_rxfifo_dequeue(struct vke_softc *sc, struct mbuf *newm)
241 {
242 	fifo_t fifo = sc->sc_rxfifo;
243 	struct mbuf *m;
244 
245 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
246 		return (NULL);
247 
248 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
249 	fifo->array[NETFIFOINDEX(fifo->rindex)] = newm;
250 	cpu_lfence();
251 	++fifo->rindex;
252 	return (m);
253 }
254 
255 /*
256  * Return the next mbuf if available but do NOT remove it from the FIFO.
257  */
258 static struct mbuf *
259 vke_rxfifo_sniff(struct vke_softc *sc)
260 {
261 	fifo_t fifo = sc->sc_rxfifo;
262 	struct mbuf *m;
263 
264 	if (NETFIFOINDEX(fifo->rindex) == NETFIFOINDEX(fifo->windex))
265 		return (NULL);
266 
267 	m = fifo->array[NETFIFOINDEX(fifo->rindex)];
268 	cpu_lfence();
269 	return (m);
270 }
271 
272 static void
273 vke_init(void *xsc)
274 {
275 	struct vke_softc *sc = xsc;
276 	struct ifnet *ifp = &sc->arpcom.ac_if;
277 	int i;
278 
279 	ASSERT_SERIALIZED(ifp->if_serializer);
280 
281 	vke_stop(sc);
282 
283 	ifp->if_flags |= IFF_RUNNING;
284 	ifp->if_flags &= ~IFF_OACTIVE;
285 
286 	sc->sc_txfifo = kmalloc(sizeof(*sc->sc_txfifo), M_DEVBUF, M_WAITOK);
287 	sc->sc_txfifo_done = kmalloc(sizeof(*sc->sc_txfifo_done), M_DEVBUF, M_WAITOK);
288 
289 	sc->sc_rxfifo = kmalloc(sizeof(*sc->sc_rxfifo), M_DEVBUF, M_WAITOK);
290 	for (i = 0; i < NETFIFOSIZE; i++) {
291 		sc->sc_rxfifo->array[i] = m_getcl(MB_WAIT, MT_DATA, M_PKTHDR);
292 		sc->sc_txfifo->array[i] = NULL;
293 		sc->sc_txfifo_done->array[i] = NULL;
294 	}
295 
296 	sc->cotd_tx_exit = sc->cotd_rx_exit = VKE_COTD_RUN;
297 	sc->cotd_tx = cothread_create(vke_tx_thread, vke_tx_intr, sc, "vke_tx");
298 	sc->cotd_rx = cothread_create(vke_rx_thread, vke_rx_intr, sc, "vke_rx");
299 
300 	if (sc->sc_addr != 0) {
301 		in_addr_t addr, mask;
302 
303 		addr = sc->sc_addr;
304 		mask = sc->sc_mask;
305 
306 		/*
307 		 * Make sure vkernel assigned
308 		 * address will not be added
309 		 * again.
310 		 */
311 		sc->sc_addr = 0;
312 		sc->sc_mask = 0;
313 
314 		vke_init_addr(ifp, addr, mask);
315 	}
316 
317 }
318 
319 /*
320  * Called from kernel.
321  *
322  * NOTE: We can't make any kernel callbacks while holding cothread lock
323  *	 because the cothread lock is not governed by the kernel scheduler
324  *	 (so mplock, tokens, etc will not be released).
325  */
326 static void
327 vke_start(struct ifnet *ifp)
328 {
329 	struct vke_softc *sc = ifp->if_softc;
330 	struct mbuf *m;
331 	cothread_t cotd = sc->cotd_tx;
332 	int count;
333 
334 	ASSERT_SERIALIZED(ifp->if_serializer);
335 
336 	if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
337 		return;
338 
339 	count = 0;
340 	while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
341 		if (vke_txfifo_enqueue(sc, m) != -1) {
342 			if (count++ == VKE_CHUNK) {
343 				cothread_lock(cotd, 0);
344 				cothread_signal(cotd);
345 				cothread_unlock(cotd, 0);
346 				count = 0;
347 			}
348 		} else {
349 			m_freem(m);
350 		}
351 	}
352 	if (count) {
353 		cothread_lock(cotd, 0);
354 		cothread_signal(cotd);
355 		cothread_unlock(cotd, 0);
356 	}
357 }
358 
359 static int
360 vke_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
361 {
362 	struct vke_softc *sc = ifp->if_softc;
363 	int error = 0;
364 
365 	ASSERT_SERIALIZED(ifp->if_serializer);
366 
367 	switch (cmd) {
368 	case SIOCSIFFLAGS:
369 		if (ifp->if_flags & IFF_UP) {
370 			if ((ifp->if_flags & IFF_RUNNING) == 0)
371 				vke_init(sc);
372 		} else {
373 			if (ifp->if_flags & IFF_RUNNING)
374 				vke_stop(sc);
375 		}
376 		break;
377 	case SIOCGIFMEDIA:
378 	case SIOCSIFMEDIA:
379 		error = EOPNOTSUPP;
380 		/* TODO */
381 		break;
382 	case SIOCGIFSTATUS: {
383 		struct ifstat *ifs = (struct ifstat *)data;
384 		int len;
385 
386 		len = strlen(ifs->ascii);
387 		if (len < sizeof(ifs->ascii)) {
388 			ksnprintf(ifs->ascii + len, sizeof(ifs->ascii) - len,
389 				  "\tBacked by tap%d\n", sc->sc_tap_unit);
390 		}
391 		break;
392 	}
393 	case SIOCSIFADDR:
394 		if (((struct ifaddr *)data)->ifa_addr->sa_family == AF_INET) {
395 			/*
396 			 * If we are explicitly requested to change address,
397 			 * we should invalidate address/netmask passed in
398 			 * from vkernel command line.
399 			 */
400 			sc->sc_addr = 0;
401 			sc->sc_mask = 0;
402 		}
403 		/* FALL THROUGH */
404 	default:
405 		error = ether_ioctl(ifp, cmd, data);
406 		break;
407 	}
408 	return error;
409 }
410 
411 static int
412 vke_stop(struct vke_softc *sc)
413 {
414 	struct ifnet *ifp = &sc->arpcom.ac_if;
415 	int i;
416 
417 	ASSERT_SERIALIZED(ifp->if_serializer);
418 
419 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
420 
421 	if (sc) {
422 		if (sc->cotd_tx) {
423 			cothread_lock(sc->cotd_tx, 0);
424 			if (sc->cotd_tx_exit == VKE_COTD_RUN)
425 				sc->cotd_tx_exit = VKE_COTD_EXIT;
426 			cothread_signal(sc->cotd_tx);
427 			cothread_unlock(sc->cotd_tx, 0);
428 			cothread_delete(&sc->cotd_tx);
429 		}
430 		if (sc->cotd_rx) {
431 			cothread_lock(sc->cotd_rx, 0);
432 			if (sc->cotd_rx_exit == VKE_COTD_RUN)
433 				sc->cotd_rx_exit = VKE_COTD_EXIT;
434 			cothread_signal(sc->cotd_rx);
435 			cothread_unlock(sc->cotd_rx, 0);
436 			cothread_delete(&sc->cotd_rx);
437 		}
438 
439 		for (i = 0; i < NETFIFOSIZE; i++) {
440 			if (sc->sc_rxfifo && sc->sc_rxfifo->array[i]) {
441 				m_freem(sc->sc_rxfifo->array[i]);
442 				sc->sc_rxfifo->array[i] = NULL;
443 			}
444 			if (sc->sc_txfifo && sc->sc_txfifo->array[i]) {
445 				m_freem(sc->sc_txfifo->array[i]);
446 				sc->sc_txfifo->array[i] = NULL;
447 			}
448 			if (sc->sc_txfifo_done && sc->sc_txfifo_done->array[i]) {
449 				m_freem(sc->sc_txfifo_done->array[i]);
450 				sc->sc_txfifo_done->array[i] = NULL;
451 			}
452 		}
453 
454 		if (sc->sc_txfifo) {
455 			kfree(sc->sc_txfifo, M_DEVBUF);
456 			sc->sc_txfifo = NULL;
457 		}
458 
459 		if (sc->sc_txfifo_done) {
460 			kfree(sc->sc_txfifo_done, M_DEVBUF);
461 			sc->sc_txfifo_done = NULL;
462 		}
463 
464 		if (sc->sc_rxfifo) {
465 			kfree(sc->sc_rxfifo, M_DEVBUF);
466 			sc->sc_rxfifo = NULL;
467 		}
468 	}
469 
470 
471 	return 0;
472 }
473 
474 /*
475  * vke_rx_intr() is the interrupt function for the receive cothread.
476  */
477 static void
478 vke_rx_intr(cothread_t cotd)
479 {
480 	struct mbuf *m;
481 	struct mbuf *nm;
482 	struct vke_softc *sc = cotd->arg;
483 	struct ifnet *ifp = &sc->arpcom.ac_if;
484 	static int count = 0;
485 
486 	ifnet_serialize_all(ifp);
487 	cothread_lock(cotd, 0);
488 
489 	if (sc->cotd_rx_exit != VKE_COTD_RUN) {
490 		cothread_unlock(cotd, 0);
491 		ifnet_deserialize_all(ifp);
492 		return;
493 	}
494 	cothread_unlock(cotd, 0);
495 
496 	while ((m = vke_rxfifo_sniff(sc)) != NULL) {
497 		nm = m_getcl(MB_DONTWAIT, MT_DATA, M_PKTHDR);
498 		if (nm) {
499 			vke_rxfifo_dequeue(sc, nm);
500 			ifp->if_input(ifp, m);
501 			if (count++ == VKE_CHUNK) {
502 				cothread_lock(cotd, 0);
503 				cothread_signal(cotd);
504 				cothread_unlock(cotd, 0);
505 				count = 0;
506 			}
507 		} else {
508 			vke_rxfifo_dequeue(sc, m);
509 		}
510 	}
511 
512 	if (count) {
513 		cothread_lock(cotd, 0);
514 		cothread_signal(cotd);
515 		cothread_unlock(cotd, 0);
516 	}
517 	ifnet_deserialize_all(ifp);
518 }
519 
520 /*
521  * vke_tx_intr() is the interrupt function for the transmit cothread.
522  * Calls vke_start() to handle processing transmit mbufs.
523  */
524 static void
525 vke_tx_intr(cothread_t cotd)
526 {
527 	struct vke_softc *sc = cotd->arg;
528 	struct ifnet *ifp = &sc->arpcom.ac_if;
529 	struct mbuf *m;
530 
531 	ifnet_serialize_all(ifp);
532 	cothread_lock(cotd, 0);
533 	if (sc->cotd_tx_exit != VKE_COTD_RUN) {
534 		cothread_unlock(cotd, 0);
535 		ifnet_deserialize_all(ifp);
536 		return;
537 	}
538 	cothread_unlock(cotd, 0);
539 
540 	/*
541 	 * Free TX mbufs that have been processed before starting new
542 	 * ones going to be pipeline friendly.
543 	 */
544 	while ((m = vke_txfifo_done_dequeue(sc, NULL)) != NULL) {
545 		m_freem(m);
546 	}
547 
548 	if ((ifp->if_flags & IFF_RUNNING) == 0)
549 		ifp->if_start(ifp);
550 
551 	ifnet_deserialize_all(ifp);
552 }
553 
554 /*
555  * vke_rx_thread() is the body of the receive cothread.
556  */
557 static void
558 vke_rx_thread(cothread_t cotd)
559 {
560 	struct mbuf *m;
561 	struct vke_softc *sc = cotd->arg;
562 	struct ifnet *ifp = &sc->arpcom.ac_if;
563 	fifo_t fifo = sc->sc_rxfifo;
564 	fd_set fdset;
565 	struct timeval tv;
566 	int count;
567 	int n;
568 
569 	/* Select timeout cannot be infinite since we need to check for
570 	 * the exit flag sc->cotd_rx_exit.
571 	 */
572 	tv.tv_sec = 0;
573 	tv.tv_usec = 500000;
574 
575 	FD_ZERO(&fdset);
576 	count = 0;
577 
578 	while (sc->cotd_rx_exit == VKE_COTD_RUN) {
579 		/*
580 		 * Wait for the RX FIFO to be loaded with
581 		 * empty mbufs.
582 		 */
583 		if (NETFIFOINDEX(fifo->windex + 1) ==
584 		    NETFIFOINDEX(fifo->rindex)) {
585 			usleep(20000);
586 			continue;
587 		}
588 
589 		/*
590 		 * Load data into the rx fifo
591 		 */
592 		m = fifo->array[NETFIFOINDEX(fifo->windex)];
593 		if (m == NULL)
594 			continue;
595 		n = read(sc->sc_fd, mtod(m, void *), MCLBYTES);
596 		if (n > 0) {
597 			ifp->if_ipackets++;
598 			m->m_pkthdr.rcvif = ifp;
599 			m->m_pkthdr.len = m->m_len = n;
600 			cpu_sfence();
601 			++fifo->windex;
602 			if (count++ == VKE_CHUNK) {
603 				cothread_intr(cotd);
604 				count = 0;
605 			}
606 		} else {
607 			if (count) {
608 				cothread_intr(cotd);
609 				count = 0;
610 			}
611 			FD_SET(sc->sc_fd, &fdset);
612 
613 			if (select(sc->sc_fd + 1, &fdset, NULL, NULL, &tv) == -1) {
614 				kprintf(VKE_DEVNAME "%d: select failed for "
615 					"TAP device\n", sc->sc_unit);
616 				usleep(1000000);
617 			}
618 		}
619 	}
620 	cpu_sfence();
621 	sc->cotd_rx_exit = VKE_COTD_DEAD;
622 }
623 
624 /*
625  * vke_tx_thread() is the body of the transmit cothread.
626  */
627 static void
628 vke_tx_thread(cothread_t cotd)
629 {
630 	struct mbuf *m;
631 	struct vke_softc *sc = cotd->arg;
632 	struct ifnet *ifp = &sc->arpcom.ac_if;
633 	int count = 0;
634 
635 	while (sc->cotd_tx_exit == VKE_COTD_RUN) {
636 		/*
637 		 * Write outgoing packets to the TAP interface
638 		 */
639 		m = vke_txfifo_dequeue(sc);
640 		if (m) {
641 			if (m->m_pkthdr.len <= MCLBYTES) {
642 				m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
643 				sc->sc_txbuf_len = m->m_pkthdr.len;
644 
645 				if (write(sc->sc_fd, sc->sc_txbuf,
646 					  sc->sc_txbuf_len) < 0) {
647 					ifp->if_oerrors++;
648 				} else {
649 					ifp->if_opackets++;
650 				}
651 			}
652 			if (count++ == VKE_CHUNK) {
653 				cothread_intr(cotd);
654 				count = 0;
655 			}
656 			vke_txfifo_done_enqueue(sc, m);
657 		} else {
658 			if (count) {
659 				cothread_intr(cotd);
660 				count = 0;
661 			}
662 			cothread_lock(cotd, 1);
663 			if (vke_txfifo_empty(sc))
664 				cothread_wait(cotd);
665 			cothread_unlock(cotd, 1);
666 		}
667 	}
668 	cpu_sfence();
669 	sc->cotd_tx_exit = VKE_COTD_DEAD;
670 }
671 
672 static int
673 vke_attach(const struct vknetif_info *info, int unit)
674 {
675 	struct vke_softc *sc;
676 	struct ifnet *ifp;
677 	struct tapinfo tapinfo;
678 	uint8_t enaddr[ETHER_ADDR_LEN];
679 	int fd;
680 
681 	KKASSERT(info->tap_fd >= 0);
682 	fd = info->tap_fd;
683 
684 	/*
685 	 * This is only a TAP device if tap_unit is non-zero.  If
686 	 * connecting to a virtual socket we generate a unique MAC.
687 	 */
688 	if (info->tap_unit >= 0) {
689 		if (ioctl(fd, TAPGIFINFO, &tapinfo) < 0) {
690 			kprintf(VKE_DEVNAME "%d: ioctl(TAPGIFINFO) "
691 				"failed: %s\n", unit, strerror(errno));
692 			return ENXIO;
693 		}
694 
695 		if (ioctl(fd, SIOCGIFADDR, enaddr) < 0) {
696 			kprintf(VKE_DEVNAME "%d: ioctl(SIOCGIFADDR) "
697 				"failed: %s\n", unit, strerror(errno));
698 			return ENXIO;
699 		}
700 	} else {
701 		int fd = open("/dev/urandom", O_RDONLY);
702 		if (fd >= 0) {
703 			read(fd, enaddr + 2, 4);
704 			close(fd);
705 		}
706 		enaddr[4] = (int)getpid() >> 8;
707 		enaddr[5] = (int)getpid() & 255;
708 
709 	}
710 	enaddr[1] += 1;
711 
712 	sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
713 
714 	sc->sc_txbuf = kmalloc(MCLBYTES, M_DEVBUF, M_WAITOK);
715 	sc->sc_fd = fd;
716 	sc->sc_unit = unit;
717 	sc->sc_tap_unit = info->tap_unit;
718 	sc->sc_addr = info->netif_addr;
719 	sc->sc_mask = info->netif_mask;
720 
721 	ifp = &sc->arpcom.ac_if;
722 	if_initname(ifp, VKE_DEVNAME, sc->sc_unit);
723 
724 	/* NB: after if_initname() */
725 	sysctl_ctx_init(&sc->sc_sysctl_ctx);
726 	sc->sc_sysctl_tree = SYSCTL_ADD_NODE(&sc->sc_sysctl_ctx,
727 					     SYSCTL_STATIC_CHILDREN(_hw),
728 					     OID_AUTO, ifp->if_xname,
729 					     CTLFLAG_RD, 0, "");
730 	if (sc->sc_sysctl_tree == NULL) {
731 		kprintf(VKE_DEVNAME "%d: can't add sysctl node\n", unit);
732 	} else {
733 		SYSCTL_ADD_INT(&sc->sc_sysctl_ctx,
734 			       SYSCTL_CHILDREN(sc->sc_sysctl_tree),
735 			       OID_AUTO, "tap_unit",
736 			       CTLFLAG_RD, &sc->sc_tap_unit, 0,
737 			       "Backend tap(4) unit");
738 	}
739 
740 	ifp->if_softc = sc;
741 	ifp->if_ioctl = vke_ioctl;
742 	ifp->if_start = vke_start;
743 	ifp->if_init = vke_init;
744 	ifp->if_mtu = tapinfo.mtu;
745 	ifp->if_baudrate = tapinfo.baudrate;
746 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
747 	ifq_set_maxlen(&ifp->if_snd, IFQ_MAXLEN);
748 	ifq_set_ready(&ifp->if_snd);
749 
750 	/* TODO: if_media */
751 
752 	ether_ifattach(ifp, enaddr, NULL);
753 
754 	if (bootverbose && sc->sc_addr != 0) {
755 		if_printf(ifp, "pre-configured "
756 			  "address 0x%08x, netmask 0x%08x\n",
757 			  ntohl(sc->sc_addr), ntohl(sc->sc_mask));
758 	}
759 
760 	return 0;
761 }
762 
763 static int
764 vke_init_addr(struct ifnet *ifp, in_addr_t addr, in_addr_t mask)
765 {
766 	struct ifaliasreq ifra;
767 	struct sockaddr_in *sin;
768 	int ret;
769 
770 	ASSERT_SERIALIZED(ifp->if_serializer);
771 
772 	if (bootverbose) {
773 		if_printf(ifp, "add pre-configured "
774 			  "address 0x%08x, netmask 0x%08x\n",
775 			  ntohl(addr), ntohl(mask));
776 	}
777 
778 	bzero(&ifra, sizeof(ifra));
779 
780 	/* NB: no need to set ifaliasreq.ifra_name */
781 
782 	sin = (struct sockaddr_in *)&ifra.ifra_addr;
783 	sin->sin_family = AF_INET;
784 	sin->sin_len = sizeof(*sin);
785 	sin->sin_addr.s_addr = addr;
786 
787 	if (mask != 0) {
788 		sin = (struct sockaddr_in *)&ifra.ifra_mask;
789 		sin->sin_len = sizeof(*sin);
790 		sin->sin_addr.s_addr = mask;
791 	}
792 
793 	/*
794 	 * Temporarily release serializer, in_control() will hold
795 	 * it again before calling ifnet.if_ioctl().
796 	 */
797 	ifnet_deserialize_all(ifp);
798 	ret = in_control(NULL, SIOCAIFADDR, (caddr_t)&ifra, ifp, NULL);
799 	ifnet_serialize_all(ifp);
800 
801 	return ret;
802 }
803