xref: /dragonfly/sys/dev/virtual/virtio/net/if_vtnet.c (revision 1318cd54)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for VirtIO network devices. */
28 
29 #include <sys/cdefs.h>
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sockio.h>
35 #include <sys/mbuf.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/taskqueue.h>
41 #include <sys/random.h>
42 #include <sys/sglist.h>
43 #include <sys/serialize.h>
44 #include <sys/bus.h>
45 #include <sys/rman.h>
46 
47 #include <machine/limits.h>
48 
49 #include <net/ethernet.h>
50 #include <net/if.h>
51 #include <net/if_arp.h>
52 #include <net/if_dl.h>
53 #include <net/if_types.h>
54 #include <net/if_media.h>
55 #include <net/vlan/if_vlan_var.h>
56 #include <net/vlan/if_vlan_ether.h>
57 #include <net/ifq_var.h>
58 
59 #include <net/bpf.h>
60 
61 #include <netinet/in_systm.h>
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip6.h>
65 #include <netinet/udp.h>
66 #include <netinet/tcp.h>
67 
68 #include <dev/virtual/virtio/virtio/virtio.h>
69 #include <dev/virtual/virtio/virtio/virtqueue.h>
70 #include <dev/virtual/virtio/net/virtio_net.h>
71 #include <dev/virtual/virtio/net/if_vtnetvar.h>
72 
73 #include "virtio_if.h"
74 
75 MALLOC_DEFINE(M_VTNET, "VTNET_TX", "Outgoing VTNET TX frame header");
76 
77 static int	vtnet_modevent(module_t, int, void *);
78 
79 static int	vtnet_probe(device_t);
80 static int	vtnet_attach(device_t);
81 static int	vtnet_detach(device_t);
82 static int	vtnet_suspend(device_t);
83 static int	vtnet_resume(device_t);
84 static int	vtnet_shutdown(device_t);
85 static int	vtnet_config_change(device_t);
86 
87 static void	vtnet_negotiate_features(struct vtnet_softc *);
88 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
89 static void	vtnet_get_hwaddr(struct vtnet_softc *);
90 static void	vtnet_set_hwaddr(struct vtnet_softc *);
91 static int	vtnet_is_link_up(struct vtnet_softc *);
92 static void	vtnet_update_link_status(struct vtnet_softc *);
93 #if 0
94 static void	vtnet_watchdog(struct vtnet_softc *);
95 #endif
96 static void	vtnet_config_change_task(void *, int);
97 static int	vtnet_setup_interface(struct vtnet_softc *);
98 static int	vtnet_change_mtu(struct vtnet_softc *, int);
99 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
100 
101 static int	vtnet_init_rx_vq(struct vtnet_softc *);
102 static void	vtnet_free_rx_mbufs(struct vtnet_softc *);
103 static void	vtnet_free_tx_mbufs(struct vtnet_softc *);
104 static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
105 
106 static struct mbuf * vtnet_alloc_rxbuf(struct vtnet_softc *, int,
107 		    struct mbuf **);
108 static int	vtnet_replace_rxbuf(struct vtnet_softc *,
109 		    struct mbuf *, int);
110 static int	vtnet_newbuf(struct vtnet_softc *);
111 static void	vtnet_discard_merged_rxbuf(struct vtnet_softc *, int);
112 static void	vtnet_discard_rxbuf(struct vtnet_softc *, struct mbuf *);
113 static int	vtnet_enqueue_rxbuf(struct vtnet_softc *, struct mbuf *);
114 static void	vtnet_vlan_tag_remove(struct mbuf *);
115 static int	vtnet_rx_csum(struct vtnet_softc *, struct mbuf *,
116 		    struct virtio_net_hdr *);
117 static int	vtnet_rxeof_merged(struct vtnet_softc *, struct mbuf *, int);
118 static int	vtnet_rxeof(struct vtnet_softc *, int, int *);
119 static void	vtnet_rx_intr_task(void *);
120 static int	vtnet_rx_vq_intr(void *);
121 
122 static void	vtnet_enqueue_txhdr(struct vtnet_softc *,
123 		    struct vtnet_tx_header *);
124 static void	vtnet_txeof(struct vtnet_softc *);
125 static struct mbuf * vtnet_tx_offload(struct vtnet_softc *, struct mbuf *,
126 		    struct virtio_net_hdr *);
127 static int	vtnet_enqueue_txbuf(struct vtnet_softc *, struct mbuf **,
128 		    struct vtnet_tx_header *);
129 static int	vtnet_encap(struct vtnet_softc *, struct mbuf **);
130 static void	vtnet_start_locked(struct ifnet *, struct ifaltq_subque *);
131 static void	vtnet_start(struct ifnet *, struct ifaltq_subque *);
132 static void	vtnet_tick(void *);
133 static void	vtnet_tx_intr_task(void *);
134 static int	vtnet_tx_vq_intr(void *);
135 
136 static void	vtnet_stop(struct vtnet_softc *);
137 static int	vtnet_virtio_reinit(struct vtnet_softc *);
138 static void	vtnet_init_locked(struct vtnet_softc *);
139 static void	vtnet_init(void *);
140 
141 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
142 		    struct sglist *, int, int);
143 
144 static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
145 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
146 static int	vtnet_set_promisc(struct vtnet_softc *, int);
147 static int	vtnet_set_allmulti(struct vtnet_softc *, int);
148 static void	vtnet_rx_filter(struct vtnet_softc *sc);
149 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
150 
151 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
152 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
153 static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
154 static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
155 static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
156 
157 static int	vtnet_ifmedia_upd(struct ifnet *);
158 static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
159 
160 static void	vtnet_add_statistics(struct vtnet_softc *);
161 
162 static int	vtnet_enable_rx_intr(struct vtnet_softc *);
163 static int	vtnet_enable_tx_intr(struct vtnet_softc *);
164 static void	vtnet_disable_rx_intr(struct vtnet_softc *);
165 static void	vtnet_disable_tx_intr(struct vtnet_softc *);
166 
167 /* Tunables. */
168 static int vtnet_csum_disable = 0;
169 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
170 static int vtnet_tso_disable = 1;
171 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
172 static int vtnet_lro_disable = 0;
173 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
174 
175 /*
176  * Reducing the number of transmit completed interrupts can
177  * improve performance. To do so, the define below keeps the
178  * Tx vq interrupt disabled and adds calls to vtnet_txeof()
179  * in the start and watchdog paths. The price to pay for this
180  * is the m_free'ing of transmitted mbufs may be delayed until
181  * the watchdog fires.
182  */
183 #define VTNET_TX_INTR_MODERATION
184 
185 static struct virtio_feature_desc vtnet_feature_desc[] = {
186 	{ VIRTIO_NET_F_CSUM,		"TxChecksum"	},
187 	{ VIRTIO_NET_F_GUEST_CSUM,	"RxChecksum"	},
188 	{ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "DynOffload"	},
189 	{ VIRTIO_NET_F_MAC,		"MacAddress"	},
190 	{ VIRTIO_NET_F_GSO,		"TxAllGSO"	},
191 	{ VIRTIO_NET_F_GUEST_TSO4,	"RxTSOv4"	},
192 	{ VIRTIO_NET_F_GUEST_TSO6,	"RxTSOv6"	},
193 	{ VIRTIO_NET_F_GUEST_ECN,	"RxECN"		},
194 	{ VIRTIO_NET_F_GUEST_UFO,	"RxUFO"		},
195 	{ VIRTIO_NET_F_HOST_TSO4,	"TxTSOv4"	},
196 	{ VIRTIO_NET_F_HOST_TSO6,	"TxTSOv6"	},
197 	{ VIRTIO_NET_F_HOST_ECN,	"TxTSOECN"	},
198 	{ VIRTIO_NET_F_HOST_UFO,	"TxUFO"		},
199 	{ VIRTIO_NET_F_MRG_RXBUF,	"MrgRxBuf"	},
200 	{ VIRTIO_NET_F_STATUS,		"Status"	},
201 	{ VIRTIO_NET_F_CTRL_VQ,		"ControlVq"	},
202 	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
203 	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
204 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
205 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
206 	{ VIRTIO_NET_F_MQ,		"RFS"		},
207 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
208 	{ 0, NULL }
209 };
210 
211 static device_method_t vtnet_methods[] = {
212 	/* Device methods. */
213 	DEVMETHOD(device_probe,		vtnet_probe),
214 	DEVMETHOD(device_attach,	vtnet_attach),
215 	DEVMETHOD(device_detach,	vtnet_detach),
216 	DEVMETHOD(device_suspend,	vtnet_suspend),
217 	DEVMETHOD(device_resume,	vtnet_resume),
218 	DEVMETHOD(device_shutdown,	vtnet_shutdown),
219 
220 	/* VirtIO methods. */
221 	DEVMETHOD(virtio_config_change, vtnet_config_change),
222 
223 	{ 0, 0 }
224 };
225 
226 static driver_t vtnet_driver = {
227 	"vtnet",
228 	vtnet_methods,
229 	sizeof(struct vtnet_softc)
230 };
231 
232 static devclass_t vtnet_devclass;
233 
234 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
235     vtnet_modevent, 0);
236 MODULE_VERSION(vtnet, 1);
237 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
238 
239 static int
240 vtnet_modevent(module_t mod, int type, void *unused)
241 {
242 	int error;
243 
244 	error = 0;
245 
246 	switch (type) {
247 	case MOD_LOAD:
248 		break;
249 	case MOD_UNLOAD:
250 		break;
251 	case MOD_SHUTDOWN:
252 		break;
253 	default:
254 		error = EOPNOTSUPP;
255 		break;
256 	}
257 
258 	return (error);
259 }
260 
261 static int
262 vtnet_probe(device_t dev)
263 {
264 	if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
265 		return (ENXIO);
266 
267 	device_set_desc(dev, "VirtIO Networking Adapter");
268 
269 	return (BUS_PROBE_DEFAULT);
270 }
271 
272 static int
273 vtnet_attach(device_t dev)
274 {
275 	struct vtnet_softc *sc;
276 	int error;
277 
278 	sc = device_get_softc(dev);
279 	sc->vtnet_dev = dev;
280 
281 	lwkt_serialize_init(&sc->vtnet_slz);
282 	callout_init(&sc->vtnet_tick_ch);
283 
284 	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
285 		     vtnet_ifmedia_sts);
286 	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
287 	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
288 
289 	vtnet_add_statistics(sc);
290 	SLIST_INIT(&sc->vtnet_txhdr_free);
291 
292 	/* Register our feature descriptions. */
293 	virtio_set_feature_desc(dev, vtnet_feature_desc);
294 	vtnet_negotiate_features(sc);
295 
296 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
297 		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
298 
299 	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
300 		/* This feature should always be negotiated. */
301 		sc->vtnet_flags |= VTNET_FLAG_MAC;
302 	}
303 
304 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
305 		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
306 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
307 	} else {
308 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
309 	}
310 
311 	sc->vtnet_rx_mbuf_size = MCLBYTES;
312 	sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
313 
314 	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
315 		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
316 
317 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
318 			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
319 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
320 			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
321 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
322 		    virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
323 			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
324 	}
325 
326 	/* Read (or generate) the MAC address for the adapter. */
327 	vtnet_get_hwaddr(sc);
328 
329 	error = vtnet_alloc_virtqueues(sc);
330 	if (error) {
331 		device_printf(dev, "cannot allocate virtqueues\n");
332 		goto fail;
333 	}
334 
335 	error = vtnet_setup_interface(sc);
336 	if (error) {
337 		device_printf(dev, "cannot setup interface\n");
338 		goto fail;
339 	}
340 
341 	TASK_INIT(&sc->vtnet_cfgchg_task, 0, vtnet_config_change_task, sc);
342 
343 	error = virtio_setup_intr(dev, &sc->vtnet_slz);
344 	if (error) {
345 		device_printf(dev, "cannot setup virtqueue interrupts\n");
346 		ether_ifdetach(sc->vtnet_ifp);
347 		goto fail;
348 	}
349 
350 	/*
351 	 * Device defaults to promiscuous mode for backwards
352 	 * compatibility. Turn it off if possible.
353 	 */
354 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
355 		lwkt_serialize_enter(&sc->vtnet_slz);
356 		if (vtnet_set_promisc(sc, 0) != 0) {
357 			sc->vtnet_ifp->if_flags |= IFF_PROMISC;
358 			device_printf(dev,
359 			    "cannot disable promiscuous mode\n");
360 		}
361 		lwkt_serialize_exit(&sc->vtnet_slz);
362 	} else
363 		sc->vtnet_ifp->if_flags |= IFF_PROMISC;
364 
365 fail:
366 	if (error)
367 		vtnet_detach(dev);
368 
369 	return (error);
370 }
371 
372 static int
373 vtnet_detach(device_t dev)
374 {
375 	struct vtnet_softc *sc;
376 	struct ifnet *ifp;
377 
378 	sc = device_get_softc(dev);
379 	ifp = sc->vtnet_ifp;
380 
381 	if (device_is_attached(dev)) {
382 		lwkt_serialize_enter(&sc->vtnet_slz);
383 		vtnet_stop(sc);
384 		lwkt_serialize_exit(&sc->vtnet_slz);
385 
386 		callout_stop(&sc->vtnet_tick_ch);
387 		taskqueue_drain(taskqueue_swi, &sc->vtnet_cfgchg_task);
388 
389 		ether_ifdetach(ifp);
390 	}
391 
392 	if (sc->vtnet_vlan_attach != NULL) {
393 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
394 		sc->vtnet_vlan_attach = NULL;
395 	}
396 	if (sc->vtnet_vlan_detach != NULL) {
397 		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
398 		sc->vtnet_vlan_detach = NULL;
399 	}
400 
401 	if (ifp) {
402 		if_free(ifp);
403 		sc->vtnet_ifp = NULL;
404 	}
405 
406 	if (sc->vtnet_rx_vq != NULL)
407 		vtnet_free_rx_mbufs(sc);
408 	if (sc->vtnet_tx_vq != NULL)
409 		vtnet_free_tx_mbufs(sc);
410 	if (sc->vtnet_ctrl_vq != NULL)
411 		vtnet_free_ctrl_vq(sc);
412 
413 	if (sc->vtnet_txhdrarea != NULL) {
414 		contigfree(sc->vtnet_txhdrarea,
415 		    sc->vtnet_txhdrcount * sizeof(struct vtnet_tx_header),
416 		    M_VTNET);
417 		sc->vtnet_txhdrarea = NULL;
418 	}
419 	SLIST_INIT(&sc->vtnet_txhdr_free);
420 	if (sc->vtnet_macfilter != NULL) {
421 		contigfree(sc->vtnet_macfilter,
422 		    sizeof(struct vtnet_mac_filter), M_DEVBUF);
423 		sc->vtnet_macfilter = NULL;
424 	}
425 
426 	ifmedia_removeall(&sc->vtnet_media);
427 
428 	return (0);
429 }
430 
431 static int
432 vtnet_suspend(device_t dev)
433 {
434 	struct vtnet_softc *sc;
435 
436 	sc = device_get_softc(dev);
437 
438 	lwkt_serialize_enter(&sc->vtnet_slz);
439 	vtnet_stop(sc);
440 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
441 	lwkt_serialize_exit(&sc->vtnet_slz);
442 
443 	return (0);
444 }
445 
446 static int
447 vtnet_resume(device_t dev)
448 {
449 	struct vtnet_softc *sc;
450 	struct ifnet *ifp;
451 
452 	sc = device_get_softc(dev);
453 	ifp = sc->vtnet_ifp;
454 
455 	lwkt_serialize_enter(&sc->vtnet_slz);
456 	if (ifp->if_flags & IFF_UP)
457 		vtnet_init_locked(sc);
458 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
459 	lwkt_serialize_exit(&sc->vtnet_slz);
460 
461 	return (0);
462 }
463 
464 static int
465 vtnet_shutdown(device_t dev)
466 {
467 
468 	/*
469 	 * Suspend already does all of what we need to
470 	 * do here; we just never expect to be resumed.
471 	 */
472 	return (vtnet_suspend(dev));
473 }
474 
475 static int
476 vtnet_config_change(device_t dev)
477 {
478 	struct vtnet_softc *sc;
479 
480 	sc = device_get_softc(dev);
481 
482 	taskqueue_enqueue(taskqueue_thread[mycpuid], &sc->vtnet_cfgchg_task);
483 
484 	return (1);
485 }
486 
487 static void
488 vtnet_negotiate_features(struct vtnet_softc *sc)
489 {
490 	device_t dev;
491 	uint64_t mask, features;
492 
493 	dev = sc->vtnet_dev;
494 	mask = 0;
495 
496 	if (vtnet_csum_disable)
497 		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
498 
499 	/*
500 	 * TSO and LRO are only available when their corresponding checksum
501 	 * offload feature is also negotiated.
502 	 */
503 
504 	if (vtnet_csum_disable || vtnet_tso_disable)
505 		mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
506 		    VIRTIO_NET_F_HOST_ECN;
507 
508 	if (vtnet_csum_disable || vtnet_lro_disable)
509 		mask |= VTNET_LRO_FEATURES;
510 
511 	features = VTNET_FEATURES & ~mask;
512 	features |= VIRTIO_F_NOTIFY_ON_EMPTY;
513 	features |= VIRTIO_F_ANY_LAYOUT;
514 	sc->vtnet_features = virtio_negotiate_features(dev, features);
515 
516 	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
517 	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
518 		/*
519 		 * LRO without mergeable buffers requires special care. This
520 		 * is not ideal because every receive buffer must be large
521 		 * enough to hold the maximum TCP packet, the Ethernet header,
522 		 * and the header. This requires up to 34 descriptors with
523 		 * MCLBYTES clusters. If we do not have indirect descriptors,
524 		 * LRO is disabled since the virtqueue will not contain very
525 		 * many receive buffers.
526 		 */
527 		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
528 			device_printf(dev,
529 			    "LRO disabled due to both mergeable buffers and "
530 			    "indirect descriptors not negotiated\n");
531 
532 			features &= ~VTNET_LRO_FEATURES;
533 			sc->vtnet_features =
534 			    virtio_negotiate_features(dev, features);
535 		} else
536 			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
537 	}
538 }
539 
540 static int
541 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
542 {
543 	device_t dev;
544 	struct vq_alloc_info vq_info[3];
545 	int nvqs;
546 
547 	dev = sc->vtnet_dev;
548 	nvqs = 2;
549 
550 	/*
551 	 * Indirect descriptors are not needed for the Rx
552 	 * virtqueue when mergeable buffers are negotiated.
553 	 * The header is placed inline with the data, not
554 	 * in a separate descriptor, and mbuf clusters are
555 	 * always physically contiguous.
556 	 */
557 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
558 		sc->vtnet_rx_nsegs = (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) ?
559 		    VTNET_MAX_RX_SEGS : VTNET_MIN_RX_SEGS;
560 	} else
561 		sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
562 
563 	if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
564             virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
565 		sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
566 	else
567 		sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
568 
569 	VQ_ALLOC_INFO_INIT(&vq_info[0], sc->vtnet_rx_nsegs,
570 	    vtnet_rx_vq_intr, sc, &sc->vtnet_rx_vq,
571 	    "%s receive", device_get_nameunit(dev));
572 
573 	VQ_ALLOC_INFO_INIT(&vq_info[1], sc->vtnet_tx_nsegs,
574 	    vtnet_tx_vq_intr, sc, &sc->vtnet_tx_vq,
575 	    "%s transmit", device_get_nameunit(dev));
576 
577 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
578 		nvqs++;
579 
580 		VQ_ALLOC_INFO_INIT(&vq_info[2], 0, NULL, NULL,
581 		    &sc->vtnet_ctrl_vq, "%s control",
582 		    device_get_nameunit(dev));
583 	}
584 
585 	return (virtio_alloc_virtqueues(dev, 0, nvqs, vq_info));
586 }
587 
588 static int
589 vtnet_setup_interface(struct vtnet_softc *sc)
590 {
591 	device_t dev;
592 	struct ifnet *ifp;
593 	int i, tx_size;
594 
595 	dev = sc->vtnet_dev;
596 
597 	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
598 	if (ifp == NULL) {
599 		device_printf(dev, "cannot allocate ifnet structure\n");
600 		return (ENOSPC);
601 	}
602 
603 	ifp->if_softc = sc;
604 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
605 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
606 	ifp->if_init = vtnet_init;
607 	ifp->if_start = vtnet_start;
608 	ifp->if_ioctl = vtnet_ioctl;
609 
610 	sc->vtnet_rx_size = virtqueue_size(sc->vtnet_rx_vq);
611 	sc->vtnet_rx_process_limit = sc->vtnet_rx_size;
612 
613 	tx_size = virtqueue_size(sc->vtnet_tx_vq);
614 	sc->vtnet_tx_size = tx_size;
615 	/* Select size, such that we never run out of tx_header entries. */
616 	if (sc->vtnet_flags & VTNET_FLAG_INDIRECT)
617 		sc->vtnet_txhdrcount = sc->vtnet_tx_size;
618 	else
619 		sc->vtnet_txhdrcount = (sc->vtnet_tx_size / 2) + 1;
620 	sc->vtnet_txhdrarea = contigmalloc(
621 	    sc->vtnet_txhdrcount * sizeof(struct vtnet_tx_header),
622 	    M_VTNET, M_WAITOK, 0, BUS_SPACE_MAXADDR, 4, 0);
623 	if (sc->vtnet_txhdrarea == NULL) {
624 		device_printf(dev, "cannot contigmalloc the tx headers\n");
625 		return (ENOMEM);
626 	}
627 	for (i = 0; i < sc->vtnet_txhdrcount; i++)
628 		vtnet_enqueue_txhdr(sc, &sc->vtnet_txhdrarea[i]);
629 	sc->vtnet_macfilter = contigmalloc(
630 	    sizeof(struct vtnet_mac_filter),
631 	    M_DEVBUF, M_WAITOK, 0, BUS_SPACE_MAXADDR, 4, 0);
632 	if (sc->vtnet_macfilter == NULL) {
633 		device_printf(dev,
634 		    "cannot contigmalloc the mac filter table\n");
635 		return (ENOMEM);
636 	}
637 	ifq_set_maxlen(&ifp->if_snd, tx_size - 1);
638 	ifq_set_ready(&ifp->if_snd);
639 
640 	ether_ifattach(ifp, sc->vtnet_hwaddr, NULL);
641 
642 	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)){
643 		//ifp->if_capabilities |= IFCAP_LINKSTATE;
644 		 kprintf("add dynamic link state\n");
645 	}
646 
647 	/* Tell the upper layer(s) we support long frames. */
648 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
649 	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
650 
651 	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
652 		ifp->if_capabilities |= IFCAP_TXCSUM;
653 
654 		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
655 			ifp->if_capabilities |= IFCAP_TSO4;
656 		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
657 			ifp->if_capabilities |= IFCAP_TSO6;
658 		if (ifp->if_capabilities & IFCAP_TSO)
659 			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
660 
661 		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
662 			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
663 	}
664 
665 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
666 		ifp->if_capabilities |= IFCAP_RXCSUM;
667 
668 		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
669 		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
670 			ifp->if_capabilities |= IFCAP_LRO;
671 	}
672 
673 	if (ifp->if_capabilities & IFCAP_HWCSUM) {
674 		/*
675 		 * VirtIO does not support VLAN tagging, but we can fake
676 		 * it by inserting and removing the 802.1Q header during
677 		 * transmit and receive. We are then able to do checksum
678 		 * offloading of VLAN frames.
679 		 */
680 		ifp->if_capabilities |=
681 			IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
682 	}
683 
684 	ifp->if_capenable = ifp->if_capabilities;
685 
686 	/*
687 	 * Capabilities after here are not enabled by default.
688 	 */
689 
690 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
691 		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
692 
693 		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
694 		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
695 		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
696 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
697 	}
698 
699 	return (0);
700 }
701 
702 static void
703 vtnet_set_hwaddr(struct vtnet_softc *sc)
704 {
705 	device_t dev;
706 
707 	dev = sc->vtnet_dev;
708 
709 	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) &&
710 	    (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)) {
711 		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
712 			device_printf(dev, "unable to set MAC address\n");
713 	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
714 		virtio_write_device_config(dev,
715 		    offsetof(struct virtio_net_config, mac),
716 		    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
717 	}
718 }
719 
720 static void
721 vtnet_get_hwaddr(struct vtnet_softc *sc)
722 {
723 	device_t dev;
724 
725 	dev = sc->vtnet_dev;
726 
727 	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
728 		/*
729 		 * Generate a random locally administered unicast address.
730 		 *
731 		 * It would be nice to generate the same MAC address across
732 		 * reboots, but it seems all the hosts currently available
733 		 * support the MAC feature, so this isn't too important.
734 		 */
735 		sc->vtnet_hwaddr[0] = 0xB2;
736 		karc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1);
737 		vtnet_set_hwaddr(sc);
738 		return;
739 	}
740 
741 	virtio_read_device_config(dev,
742 	    offsetof(struct virtio_net_config, mac),
743 	    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
744 }
745 
746 static int
747 vtnet_is_link_up(struct vtnet_softc *sc)
748 {
749 	device_t dev;
750 	struct ifnet *ifp;
751 	uint16_t status;
752 
753 	dev = sc->vtnet_dev;
754 	ifp = sc->vtnet_ifp;
755 
756 	ASSERT_SERIALIZED(&sc->vtnet_slz);
757 
758 	status = virtio_read_dev_config_2(dev,
759 			offsetof(struct virtio_net_config, status));
760 
761 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
762 }
763 
764 static void
765 vtnet_update_link_status(struct vtnet_softc *sc)
766 {
767 	device_t dev;
768 	struct ifnet *ifp;
769 	struct ifaltq_subque *ifsq;
770 	int link;
771 
772 	dev = sc->vtnet_dev;
773 	ifp = sc->vtnet_ifp;
774 	ifsq = ifq_get_subq_default(&ifp->if_snd);
775 
776 	link = vtnet_is_link_up(sc);
777 
778 	if (link && ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0)) {
779 		sc->vtnet_flags |= VTNET_FLAG_LINK;
780 		if (bootverbose)
781 			device_printf(dev, "Link is up\n");
782 		ifp->if_link_state = LINK_STATE_UP;
783 		if_link_state_change(ifp);
784 		if (!ifsq_is_empty(ifsq))
785 			vtnet_start_locked(ifp, ifsq);
786 	} else if (!link && (sc->vtnet_flags & VTNET_FLAG_LINK)) {
787 		sc->vtnet_flags &= ~VTNET_FLAG_LINK;
788 		if (bootverbose)
789 			device_printf(dev, "Link is down\n");
790 
791 		ifp->if_link_state = LINK_STATE_DOWN;
792 		if_link_state_change(ifp);
793 	}
794 }
795 
796 #if 0
797 static void
798 vtnet_watchdog(struct vtnet_softc *sc)
799 {
800 	struct ifnet *ifp;
801 
802 	ifp = sc->vtnet_ifp;
803 
804 #ifdef VTNET_TX_INTR_MODERATION
805 	vtnet_txeof(sc);
806 #endif
807 
808 	if (sc->vtnet_watchdog_timer == 0 || --sc->vtnet_watchdog_timer)
809 		return;
810 
811 	if_printf(ifp, "watchdog timeout -- resetting\n");
812 #ifdef VTNET_DEBUG
813 	virtqueue_dump(sc->vtnet_tx_vq);
814 #endif
815 	ifp->if_oerrors++;
816 	ifp->if_flags &= ~IFF_RUNNING;
817 	vtnet_init_locked(sc);
818 }
819 #endif
820 
821 static void
822 vtnet_config_change_task(void *arg, int pending)
823 {
824 	struct vtnet_softc *sc;
825 
826 	sc = arg;
827 
828 	lwkt_serialize_enter(&sc->vtnet_slz);
829 	vtnet_update_link_status(sc);
830 	lwkt_serialize_exit(&sc->vtnet_slz);
831 }
832 
833 static int
834 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data,struct ucred *cr)
835 {
836 	struct vtnet_softc *sc;
837 	struct ifreq *ifr;
838 	int reinit, mask, error;
839 
840 	sc = ifp->if_softc;
841 	ifr = (struct ifreq *) data;
842 	reinit = 0;
843 	error = 0;
844 
845 	switch (cmd) {
846 	case SIOCSIFMTU:
847 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VTNET_MAX_MTU)
848 			error = EINVAL;
849 		else if (ifp->if_mtu != ifr->ifr_mtu) {
850 			lwkt_serialize_enter(&sc->vtnet_slz);
851 			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
852 			lwkt_serialize_exit(&sc->vtnet_slz);
853 		}
854 		break;
855 
856 	case SIOCSIFFLAGS:
857 		lwkt_serialize_enter(&sc->vtnet_slz);
858 		if ((ifp->if_flags & IFF_UP) == 0) {
859 			if (ifp->if_flags & IFF_RUNNING)
860 				vtnet_stop(sc);
861 		} else if (ifp->if_flags & IFF_RUNNING) {
862 			if ((ifp->if_flags ^ sc->vtnet_if_flags) &
863 			    (IFF_PROMISC | IFF_ALLMULTI)) {
864 				if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
865 					vtnet_rx_filter(sc);
866 				else
867 					error = ENOTSUP;
868 			}
869 		} else
870 			vtnet_init_locked(sc);
871 
872 		if (error == 0)
873 			sc->vtnet_if_flags = ifp->if_flags;
874 		lwkt_serialize_exit(&sc->vtnet_slz);
875 		break;
876 
877 	case SIOCADDMULTI:
878 	case SIOCDELMULTI:
879 		lwkt_serialize_enter(&sc->vtnet_slz);
880 		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) &&
881 		    (ifp->if_flags & IFF_RUNNING))
882 			vtnet_rx_filter_mac(sc);
883 		lwkt_serialize_exit(&sc->vtnet_slz);
884 		break;
885 
886 	case SIOCSIFMEDIA:
887 	case SIOCGIFMEDIA:
888 		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
889 		break;
890 
891 	case SIOCSIFCAP:
892 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
893 
894 		lwkt_serialize_enter(&sc->vtnet_slz);
895 
896 		if (mask & IFCAP_TXCSUM) {
897 			ifp->if_capenable ^= IFCAP_TXCSUM;
898 			if (ifp->if_capenable & IFCAP_TXCSUM)
899 				ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
900 			else
901 				ifp->if_hwassist &= ~VTNET_CSUM_OFFLOAD;
902 		}
903 
904 		if (mask & IFCAP_TSO4) {
905 			ifp->if_capenable ^= IFCAP_TSO4;
906 			if (ifp->if_capenable & IFCAP_TSO4)
907 				ifp->if_hwassist |= CSUM_TSO;
908 			else
909 				ifp->if_hwassist &= ~CSUM_TSO;
910 		}
911 
912 		if (mask & IFCAP_RXCSUM) {
913 			ifp->if_capenable ^= IFCAP_RXCSUM;
914 			reinit = 1;
915 		}
916 
917 		if (mask & IFCAP_LRO) {
918 			ifp->if_capenable ^= IFCAP_LRO;
919 			reinit = 1;
920 		}
921 
922 		if (mask & IFCAP_VLAN_HWFILTER) {
923 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
924 			reinit = 1;
925 		}
926 
927 		if (mask & IFCAP_VLAN_HWTSO)
928 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
929 
930 		if (mask & IFCAP_VLAN_HWTAGGING)
931 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
932 
933 		if (reinit && (ifp->if_flags & IFF_RUNNING)) {
934 			ifp->if_flags &= ~IFF_RUNNING;
935 			vtnet_init_locked(sc);
936 		}
937 		//VLAN_CAPABILITIES(ifp);
938 
939 		lwkt_serialize_exit(&sc->vtnet_slz);
940 		break;
941 
942 	default:
943 		error = ether_ioctl(ifp, cmd, data);
944 		break;
945 	}
946 
947 	return (error);
948 }
949 
950 static int
951 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
952 {
953 	struct ifnet *ifp;
954 	int new_frame_size, clsize;
955 
956 	ifp = sc->vtnet_ifp;
957 
958 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
959 		new_frame_size = sizeof(struct vtnet_rx_header) +
960 		    sizeof(struct ether_vlan_header) + new_mtu;
961 
962 		if (new_frame_size > MJUM9BYTES)
963 			return (EINVAL);
964 
965 		if (new_frame_size <= MCLBYTES)
966 			clsize = MCLBYTES;
967 		else
968 			clsize = MJUM9BYTES;
969 	} else {
970 		new_frame_size = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
971 		    sizeof(struct ether_vlan_header) + new_mtu;
972 
973 		if (new_frame_size <= MCLBYTES)
974 			clsize = MCLBYTES;
975 		else
976 			clsize = MJUMPAGESIZE;
977 	}
978 
979 	sc->vtnet_rx_mbuf_size = clsize;
980 	sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
981 	KASSERT(sc->vtnet_rx_mbuf_count < VTNET_MAX_RX_SEGS,
982 	    ("too many rx mbufs: %d", sc->vtnet_rx_mbuf_count));
983 
984 	ifp->if_mtu = new_mtu;
985 
986 	if (ifp->if_flags & IFF_RUNNING) {
987 		ifp->if_flags &= ~IFF_RUNNING;
988 		vtnet_init_locked(sc);
989 	}
990 
991 	return (0);
992 }
993 
994 static int
995 vtnet_init_rx_vq(struct vtnet_softc *sc)
996 {
997 	struct virtqueue *vq;
998 	int nbufs, error;
999 
1000 	vq = sc->vtnet_rx_vq;
1001 	nbufs = 0;
1002 	error = ENOSPC;
1003 
1004 	while (!virtqueue_full(vq)) {
1005 		if ((error = vtnet_newbuf(sc)) != 0)
1006 			break;
1007 		nbufs++;
1008 	}
1009 
1010 	if (nbufs > 0) {
1011 		virtqueue_notify(vq, &sc->vtnet_slz);
1012 
1013 		/*
1014 		 * EMSGSIZE signifies the virtqueue did not have enough
1015 		 * entries available to hold the last mbuf. This is not
1016 		 * an error. We should not get ENOSPC since we check if
1017 		 * the virtqueue is full before attempting to add a
1018 		 * buffer.
1019 		 */
1020 		if (error == EMSGSIZE)
1021 			error = 0;
1022 	}
1023 
1024 	return (error);
1025 }
1026 
1027 static void
1028 vtnet_free_rx_mbufs(struct vtnet_softc *sc)
1029 {
1030 	struct virtqueue *vq;
1031 	struct mbuf *m;
1032 	int last;
1033 
1034 	vq = sc->vtnet_rx_vq;
1035 	last = 0;
1036 
1037 	while ((m = virtqueue_drain(vq, &last)) != NULL)
1038 		m_freem(m);
1039 
1040 	KASSERT(virtqueue_empty(vq), ("mbufs remaining in Rx Vq"));
1041 }
1042 
1043 static void
1044 vtnet_free_tx_mbufs(struct vtnet_softc *sc)
1045 {
1046 	struct virtqueue *vq;
1047 	struct vtnet_tx_header *txhdr;
1048 	int last;
1049 
1050 	vq = sc->vtnet_tx_vq;
1051 	last = 0;
1052 
1053 	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
1054 		m_freem(txhdr->vth_mbuf);
1055 		vtnet_enqueue_txhdr(sc, txhdr);
1056 	}
1057 
1058 	KASSERT(virtqueue_empty(vq), ("mbufs remaining in Tx Vq"));
1059 }
1060 
1061 static void
1062 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
1063 {
1064 	/*
1065 	 * The control virtqueue is only polled, therefore
1066 	 * it should already be empty.
1067 	 */
1068 	KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
1069 		("Ctrl Vq not empty"));
1070 }
1071 
1072 static struct mbuf *
1073 vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1074 {
1075 	struct mbuf *m_head, *m_tail, *m;
1076 	int i, clsize;
1077 
1078 	clsize = sc->vtnet_rx_mbuf_size;
1079 
1080 	/*use getcl instead of getjcl. see  if_mxge.c comment line 2398*/
1081 	//m_head = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, clsize);
1082 	m_head = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR );
1083 	if (m_head == NULL)
1084 		goto fail;
1085 
1086 	m_head->m_len = clsize;
1087 	m_tail = m_head;
1088 
1089 	if (nbufs > 1) {
1090 		KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1091 			("chained Rx mbuf requested without LRO_NOMRG"));
1092 
1093 		for (i = 0; i < nbufs - 1; i++) {
1094 			//m = m_getjcl(M_DONTWAIT, MT_DATA, 0, clsize);
1095 			m = m_getcl(M_NOWAIT, MT_DATA, 0);
1096 			if (m == NULL)
1097 				goto fail;
1098 
1099 			m->m_len = clsize;
1100 			m_tail->m_next = m;
1101 			m_tail = m;
1102 		}
1103 	}
1104 
1105 	if (m_tailp != NULL)
1106 		*m_tailp = m_tail;
1107 
1108 	return (m_head);
1109 
1110 fail:
1111 	sc->vtnet_stats.mbuf_alloc_failed++;
1112 	m_freem(m_head);
1113 
1114 	return (NULL);
1115 }
1116 
1117 static int
1118 vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
1119 {
1120 	struct mbuf *m, *m_prev;
1121 	struct mbuf *m_new, *m_tail;
1122 	int len, clsize, nreplace, error;
1123 
1124 	m = m0;
1125 	m_prev = NULL;
1126 	len = len0;
1127 
1128 	m_tail = NULL;
1129 	clsize = sc->vtnet_rx_mbuf_size;
1130 	nreplace = 0;
1131 
1132 	if (m->m_next != NULL)
1133 		KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1134 		    ("chained Rx mbuf without LRO_NOMRG"));
1135 
1136 	/*
1137 	 * Since LRO_NOMRG mbuf chains are so large, we want to avoid
1138 	 * allocating an entire chain for each received frame. When
1139 	 * the received frame's length is less than that of the chain,
1140 	 * the unused mbufs are reassigned to the new chain.
1141 	 */
1142 	while (len > 0) {
1143 		/*
1144 		 * Something is seriously wrong if we received
1145 		 * a frame larger than the mbuf chain. Drop it.
1146 		 */
1147 		if (m == NULL) {
1148 			sc->vtnet_stats.rx_frame_too_large++;
1149 			return (EMSGSIZE);
1150 		}
1151 
1152 		KASSERT(m->m_len == clsize,
1153 		    ("mbuf length not expected cluster size: %d",
1154 		    m->m_len));
1155 
1156 		m->m_len = MIN(m->m_len, len);
1157 		len -= m->m_len;
1158 
1159 		m_prev = m;
1160 		m = m->m_next;
1161 		nreplace++;
1162 	}
1163 
1164 	KASSERT(m_prev != NULL, ("m_prev == NULL"));
1165 	KASSERT(nreplace <= sc->vtnet_rx_mbuf_count,
1166 		("too many replacement mbufs: %d/%d", nreplace,
1167 		sc->vtnet_rx_mbuf_count));
1168 
1169 	m_new = vtnet_alloc_rxbuf(sc, nreplace, &m_tail);
1170 	if (m_new == NULL) {
1171 		m_prev->m_len = clsize;
1172 		return (ENOBUFS);
1173 	}
1174 
1175 	/*
1176 	 * Move unused mbufs, if any, from the original chain
1177 	 * onto the end of the new chain.
1178 	 */
1179 	if (m_prev->m_next != NULL) {
1180 		m_tail->m_next = m_prev->m_next;
1181 		m_prev->m_next = NULL;
1182 	}
1183 
1184 	error = vtnet_enqueue_rxbuf(sc, m_new);
1185 	if (error) {
1186 		/*
1187 		 * BAD! We could not enqueue the replacement mbuf chain. We
1188 		 * must restore the m0 chain to the original state if it was
1189 		 * modified so we can subsequently discard it.
1190 		 *
1191 		 * NOTE: The replacement is suppose to be an identical copy
1192 		 * to the one just dequeued so this is an unexpected error.
1193 		 */
1194 		sc->vtnet_stats.rx_enq_replacement_failed++;
1195 
1196 		if (m_tail->m_next != NULL) {
1197 			m_prev->m_next = m_tail->m_next;
1198 			m_tail->m_next = NULL;
1199 		}
1200 
1201 		m_prev->m_len = clsize;
1202 		m_freem(m_new);
1203 	}
1204 
1205 	return (error);
1206 }
1207 
1208 static int
1209 vtnet_newbuf(struct vtnet_softc *sc)
1210 {
1211 	struct mbuf *m;
1212 	int error;
1213 
1214 	m = vtnet_alloc_rxbuf(sc, sc->vtnet_rx_mbuf_count, NULL);
1215 	if (m == NULL)
1216 		return (ENOBUFS);
1217 
1218 	error = vtnet_enqueue_rxbuf(sc, m);
1219 	if (error)
1220 		m_freem(m);
1221 
1222 	return (error);
1223 }
1224 
1225 static void
1226 vtnet_discard_merged_rxbuf(struct vtnet_softc *sc, int nbufs)
1227 {
1228 	struct virtqueue *vq;
1229 	struct mbuf *m;
1230 
1231 	vq = sc->vtnet_rx_vq;
1232 
1233 	while (--nbufs > 0) {
1234 		if ((m = virtqueue_dequeue(vq, NULL)) == NULL)
1235 			break;
1236 		vtnet_discard_rxbuf(sc, m);
1237 	}
1238 }
1239 
1240 static void
1241 vtnet_discard_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
1242 {
1243 	int error;
1244 
1245 	/*
1246 	 * Requeue the discarded mbuf. This should always be
1247 	 * successful since it was just dequeued.
1248 	 */
1249 	error = vtnet_enqueue_rxbuf(sc, m);
1250 	KASSERT(error == 0, ("cannot requeue discarded mbuf"));
1251 }
1252 
1253 static int
1254 vtnet_enqueue_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
1255 {
1256 	struct sglist sg;
1257 	struct sglist_seg segs[VTNET_MAX_RX_SEGS];
1258 	struct vtnet_rx_header *rxhdr;
1259 	struct virtio_net_hdr *hdr;
1260 	uint8_t *mdata;
1261 	int offset, error;
1262 
1263 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1264 	if ((sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0)
1265 		KASSERT(m->m_next == NULL, ("chained Rx mbuf"));
1266 
1267 	sglist_init(&sg, sc->vtnet_rx_nsegs, segs);
1268 
1269 	mdata = mtod(m, uint8_t *);
1270 	offset = 0;
1271 
1272 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1273 		rxhdr = (struct vtnet_rx_header *) mdata;
1274 		hdr = &rxhdr->vrh_hdr;
1275 		offset += sizeof(struct vtnet_rx_header);
1276 
1277 		error = sglist_append(&sg, hdr, sc->vtnet_hdr_size);
1278 		KASSERT(error == 0, ("cannot add header to sglist"));
1279 	}
1280 
1281 	error = sglist_append(&sg, mdata + offset, m->m_len - offset);
1282 	if (error)
1283 		return (error);
1284 
1285 	if (m->m_next != NULL) {
1286 		error = sglist_append_mbuf(&sg, m->m_next);
1287 		if (error)
1288 			return (error);
1289 	}
1290 
1291 	return (virtqueue_enqueue(sc->vtnet_rx_vq, m, &sg, 0, sg.sg_nseg));
1292 }
1293 
1294 static void
1295 vtnet_vlan_tag_remove(struct mbuf *m)
1296 {
1297 	struct ether_vlan_header *evl;
1298 
1299 	evl = mtod(m, struct ether_vlan_header *);
1300 
1301 	m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
1302 	m->m_flags |= M_VLANTAG;
1303 
1304 	/* Strip the 802.1Q header. */
1305 	bcopy((char *) evl, (char *) evl + ETHER_VLAN_ENCAP_LEN,
1306 	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
1307 	m_adj(m, ETHER_VLAN_ENCAP_LEN);
1308 }
1309 
1310 /*
1311  * Alternative method of doing receive checksum offloading. Rather
1312  * than parsing the received frame down to the IP header, use the
1313  * csum_offset to determine which CSUM_* flags are appropriate. We
1314  * can get by with doing this only because the checksum offsets are
1315  * unique for the things we care about.
1316  */
1317 static int
1318 vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
1319     struct virtio_net_hdr *hdr)
1320 {
1321 	struct ether_header *eh;
1322 	struct ether_vlan_header *evh;
1323 	struct udphdr *udp;
1324 	int csum_len;
1325 	uint16_t eth_type;
1326 
1327 	csum_len = hdr->csum_start + hdr->csum_offset;
1328 
1329 	if (csum_len < sizeof(struct ether_header) + sizeof(struct ip))
1330 		return (1);
1331 	if (m->m_len < csum_len)
1332 		return (1);
1333 
1334 	eh = mtod(m, struct ether_header *);
1335 	eth_type = ntohs(eh->ether_type);
1336 	if (eth_type == ETHERTYPE_VLAN) {
1337 		evh = mtod(m, struct ether_vlan_header *);
1338 		eth_type = ntohs(evh->evl_proto);
1339 	}
1340 
1341 	if (eth_type != ETHERTYPE_IP && eth_type != ETHERTYPE_IPV6) {
1342 		sc->vtnet_stats.rx_csum_bad_ethtype++;
1343 		return (1);
1344 	}
1345 
1346 	/* Use the offset to determine the appropriate CSUM_* flags. */
1347 	switch (hdr->csum_offset) {
1348 	case offsetof(struct udphdr, uh_sum):
1349 		if (m->m_len < hdr->csum_start + sizeof(struct udphdr))
1350 			return (1);
1351 		udp = (struct udphdr *)(mtod(m, uint8_t *) + hdr->csum_start);
1352 		if (udp->uh_sum == 0)
1353 			return (0);
1354 
1355 		/* FALLTHROUGH */
1356 
1357 	case offsetof(struct tcphdr, th_sum):
1358 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1359 		m->m_pkthdr.csum_data = 0xFFFF;
1360 		break;
1361 
1362 	default:
1363 		sc->vtnet_stats.rx_csum_bad_offset++;
1364 		return (1);
1365 	}
1366 
1367 	sc->vtnet_stats.rx_csum_offloaded++;
1368 
1369 	return (0);
1370 }
1371 
1372 static int
1373 vtnet_rxeof_merged(struct vtnet_softc *sc, struct mbuf *m_head, int nbufs)
1374 {
1375 	struct ifnet *ifp;
1376 	struct virtqueue *vq;
1377 	struct mbuf *m, *m_tail;
1378 	int len;
1379 
1380 	ifp = sc->vtnet_ifp;
1381 	vq = sc->vtnet_rx_vq;
1382 	m_tail = m_head;
1383 
1384 	while (--nbufs > 0) {
1385 		m = virtqueue_dequeue(vq, &len);
1386 		if (m == NULL) {
1387 			ifp->if_ierrors++;
1388 			goto fail;
1389 		}
1390 
1391 		if (vtnet_newbuf(sc) != 0) {
1392 			ifp->if_iqdrops++;
1393 			vtnet_discard_rxbuf(sc, m);
1394 			if (nbufs > 1)
1395 				vtnet_discard_merged_rxbuf(sc, nbufs);
1396 			goto fail;
1397 		}
1398 
1399 		if (m->m_len < len)
1400 			len = m->m_len;
1401 
1402 		m->m_len = len;
1403 		m->m_flags &= ~M_PKTHDR;
1404 
1405 		m_head->m_pkthdr.len += len;
1406 		m_tail->m_next = m;
1407 		m_tail = m;
1408 	}
1409 
1410 	return (0);
1411 
1412 fail:
1413 	sc->vtnet_stats.rx_mergeable_failed++;
1414 	m_freem(m_head);
1415 
1416 	return (1);
1417 }
1418 
1419 static int
1420 vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
1421 {
1422 	struct virtio_net_hdr lhdr;
1423 	struct ifnet *ifp;
1424 	struct virtqueue *vq;
1425 	struct mbuf *m;
1426 	struct ether_header *eh;
1427 	struct virtio_net_hdr *hdr;
1428 	struct virtio_net_hdr_mrg_rxbuf *mhdr;
1429 	int len, deq, nbufs, adjsz, rx_npkts;
1430 
1431 	ifp = sc->vtnet_ifp;
1432 	vq = sc->vtnet_rx_vq;
1433 	hdr = &lhdr;
1434 	deq = 0;
1435 	rx_npkts = 0;
1436 
1437 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1438 
1439 	while (--count >= 0) {
1440 		m = virtqueue_dequeue(vq, &len);
1441 		if (m == NULL)
1442 			break;
1443 		deq++;
1444 
1445 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1446 			ifp->if_ierrors++;
1447 			vtnet_discard_rxbuf(sc, m);
1448 			continue;
1449 		}
1450 
1451 		if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1452 			nbufs = 1;
1453 			adjsz = sizeof(struct vtnet_rx_header);
1454 			/*
1455 			 * Account for our pad between the header and
1456 			 * the actual start of the frame.
1457 			 */
1458 			len += VTNET_RX_HEADER_PAD;
1459 		} else {
1460 			mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1461 			nbufs = mhdr->num_buffers;
1462 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1463 		}
1464 
1465 		if (vtnet_replace_rxbuf(sc, m, len) != 0) {
1466 			ifp->if_iqdrops++;
1467 			vtnet_discard_rxbuf(sc, m);
1468 			if (nbufs > 1)
1469 				vtnet_discard_merged_rxbuf(sc, nbufs);
1470 			continue;
1471 		}
1472 
1473 		m->m_pkthdr.len = len;
1474 		m->m_pkthdr.rcvif = ifp;
1475 		m->m_pkthdr.csum_flags = 0;
1476 
1477 		if (nbufs > 1) {
1478 			if (vtnet_rxeof_merged(sc, m, nbufs) != 0)
1479 				continue;
1480 		}
1481 
1482 		ifp->if_ipackets++;
1483 
1484 		/*
1485 		 * Save copy of header before we strip it. For both mergeable
1486 		 * and non-mergeable, the VirtIO header is placed first in the
1487 		 * mbuf's data. We no longer need num_buffers, so always use a
1488 		 * virtio_net_hdr.
1489 		 */
1490 		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1491 		m_adj(m, adjsz);
1492 
1493 		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1494 			eh = mtod(m, struct ether_header *);
1495 			if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1496 				vtnet_vlan_tag_remove(m);
1497 
1498 				/*
1499 				 * With the 802.1Q header removed, update the
1500 				 * checksum starting location accordingly.
1501 				 */
1502 				if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1503 					hdr->csum_start -=
1504 					    ETHER_VLAN_ENCAP_LEN;
1505 			}
1506 		}
1507 
1508 		if (ifp->if_capenable & IFCAP_RXCSUM &&
1509 		    hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1510 			if (vtnet_rx_csum(sc, m, hdr) != 0)
1511 				sc->vtnet_stats.rx_csum_failed++;
1512 		}
1513 
1514 		lwkt_serialize_exit(&sc->vtnet_slz);
1515 		rx_npkts++;
1516 		ifp->if_input(ifp, m, NULL, -1);
1517 		lwkt_serialize_enter(&sc->vtnet_slz);
1518 
1519 		/*
1520 		 * The interface may have been stopped while we were
1521 		 * passing the packet up the network stack.
1522 		 */
1523 		if ((ifp->if_flags & IFF_RUNNING) == 0)
1524 			break;
1525 	}
1526 
1527 	virtqueue_notify(vq, &sc->vtnet_slz);
1528 
1529 	if (rx_npktsp != NULL)
1530 		*rx_npktsp = rx_npkts;
1531 
1532 	return (count > 0 ? 0 : EAGAIN);
1533 }
1534 
1535 static void
1536 vtnet_rx_intr_task(void *arg)
1537 {
1538 	struct vtnet_softc *sc;
1539 	struct ifnet *ifp;
1540 	int more;
1541 
1542 	sc = arg;
1543 	ifp = sc->vtnet_ifp;
1544 
1545 next:
1546 //	lwkt_serialize_enter(&sc->vtnet_slz);
1547 
1548 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1549 		vtnet_enable_rx_intr(sc);
1550 //		lwkt_serialize_exit(&sc->vtnet_slz);
1551 		return;
1552 	}
1553 
1554 	more = vtnet_rxeof(sc, sc->vtnet_rx_process_limit, NULL);
1555 	if (!more && vtnet_enable_rx_intr(sc) != 0) {
1556 		vtnet_disable_rx_intr(sc);
1557 		more = 1;
1558 	}
1559 
1560 //	lwkt_serialize_exit(&sc->vtnet_slz);
1561 
1562 	if (more) {
1563 		sc->vtnet_stats.rx_task_rescheduled++;
1564 		goto next;
1565 	}
1566 }
1567 
1568 static int
1569 vtnet_rx_vq_intr(void *xsc)
1570 {
1571 	struct vtnet_softc *sc;
1572 
1573 	sc = xsc;
1574 
1575 	vtnet_disable_rx_intr(sc);
1576 	vtnet_rx_intr_task(sc);
1577 
1578 	return (1);
1579 }
1580 
1581 static void
1582 vtnet_enqueue_txhdr(struct vtnet_softc *sc, struct vtnet_tx_header *txhdr)
1583 {
1584 	bzero(txhdr, sizeof(*txhdr));
1585 	SLIST_INSERT_HEAD(&sc->vtnet_txhdr_free, txhdr, link);
1586 }
1587 
1588 static void
1589 vtnet_txeof(struct vtnet_softc *sc)
1590 {
1591 	struct virtqueue *vq;
1592 	struct ifnet *ifp;
1593 	struct vtnet_tx_header *txhdr;
1594 	int deq;
1595 
1596 	vq = sc->vtnet_tx_vq;
1597 	ifp = sc->vtnet_ifp;
1598 	deq = 0;
1599 
1600 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1601 
1602 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
1603 		deq++;
1604 		ifp->if_opackets++;
1605 		m_freem(txhdr->vth_mbuf);
1606 		vtnet_enqueue_txhdr(sc, txhdr);
1607 	}
1608 
1609 	if (deq > 0) {
1610 		ifq_clr_oactive(&ifp->if_snd);
1611 		if (virtqueue_empty(vq))
1612 			sc->vtnet_watchdog_timer = 0;
1613 	}
1614 }
1615 
1616 static struct mbuf *
1617 vtnet_tx_offload(struct vtnet_softc *sc, struct mbuf *m,
1618     struct virtio_net_hdr *hdr)
1619 {
1620 	struct ifnet *ifp;
1621 	struct ether_header *eh;
1622 	struct ether_vlan_header *evh;
1623 	struct ip *ip;
1624 	struct ip6_hdr *ip6;
1625 	struct tcphdr *tcp;
1626 	int ip_offset;
1627 	uint16_t eth_type, csum_start;
1628 	uint8_t ip_proto, gso_type;
1629 
1630 	ifp = sc->vtnet_ifp;
1631 	M_ASSERTPKTHDR(m);
1632 
1633 	ip_offset = sizeof(struct ether_header);
1634 	if (m->m_len < ip_offset) {
1635 		if ((m = m_pullup(m, ip_offset)) == NULL)
1636 			return (NULL);
1637 	}
1638 
1639 	eh = mtod(m, struct ether_header *);
1640 	eth_type = ntohs(eh->ether_type);
1641 	if (eth_type == ETHERTYPE_VLAN) {
1642 		ip_offset = sizeof(struct ether_vlan_header);
1643 		if (m->m_len < ip_offset) {
1644 			if ((m = m_pullup(m, ip_offset)) == NULL)
1645 				return (NULL);
1646 		}
1647 		evh = mtod(m, struct ether_vlan_header *);
1648 		eth_type = ntohs(evh->evl_proto);
1649 	}
1650 
1651 	switch (eth_type) {
1652 	case ETHERTYPE_IP:
1653 		if (m->m_len < ip_offset + sizeof(struct ip)) {
1654 			m = m_pullup(m, ip_offset + sizeof(struct ip));
1655 			if (m == NULL)
1656 				return (NULL);
1657 		}
1658 
1659 		ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
1660 		ip_proto = ip->ip_p;
1661 		csum_start = ip_offset + (ip->ip_hl << 2);
1662 		gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1663 		break;
1664 
1665 	case ETHERTYPE_IPV6:
1666 		if (m->m_len < ip_offset + sizeof(struct ip6_hdr)) {
1667 			m = m_pullup(m, ip_offset + sizeof(struct ip6_hdr));
1668 			if (m == NULL)
1669 				return (NULL);
1670 		}
1671 
1672 		ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
1673 		/*
1674 		 * XXX Assume no extension headers are present. Presently,
1675 		 * this will always be true in the case of TSO, and FreeBSD
1676 		 * does not perform checksum offloading of IPv6 yet.
1677 		 */
1678 		ip_proto = ip6->ip6_nxt;
1679 		csum_start = ip_offset + sizeof(struct ip6_hdr);
1680 		gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1681 		break;
1682 
1683 	default:
1684 		return (m);
1685 	}
1686 
1687 	if (m->m_pkthdr.csum_flags & VTNET_CSUM_OFFLOAD) {
1688 		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
1689 		hdr->csum_start = csum_start;
1690 		hdr->csum_offset = m->m_pkthdr.csum_data;
1691 
1692 		sc->vtnet_stats.tx_csum_offloaded++;
1693 	}
1694 
1695 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1696 		if (ip_proto != IPPROTO_TCP)
1697 			return (m);
1698 
1699 		if (m->m_len < csum_start + sizeof(struct tcphdr)) {
1700 			m = m_pullup(m, csum_start + sizeof(struct tcphdr));
1701 			if (m == NULL)
1702 				return (NULL);
1703 		}
1704 
1705 		tcp = (struct tcphdr *)(mtod(m, uint8_t *) + csum_start);
1706 		hdr->gso_type = gso_type;
1707 		hdr->hdr_len = csum_start + (tcp->th_off << 2);
1708 		hdr->gso_size = m->m_pkthdr.tso_segsz;
1709 
1710 		if (tcp->th_flags & TH_CWR) {
1711 			/*
1712 			 * Drop if we did not negotiate VIRTIO_NET_F_HOST_ECN.
1713 			 * ECN support is only configurable globally with the
1714 			 * net.inet.tcp.ecn.enable sysctl knob.
1715 			 */
1716 			if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
1717 				if_printf(ifp, "TSO with ECN not supported "
1718 				    "by host\n");
1719 				m_freem(m);
1720 				return (NULL);
1721 			}
1722 
1723 			hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1724 		}
1725 
1726 		sc->vtnet_stats.tx_tso_offloaded++;
1727 	}
1728 
1729 	return (m);
1730 }
1731 
1732 static int
1733 vtnet_enqueue_txbuf(struct vtnet_softc *sc, struct mbuf **m_head,
1734     struct vtnet_tx_header *txhdr)
1735 {
1736 	struct sglist sg;
1737 	struct sglist_seg segs[VTNET_MAX_TX_SEGS];
1738 	struct virtqueue *vq;
1739 	struct mbuf *m;
1740 	int error;
1741 
1742 	vq = sc->vtnet_tx_vq;
1743 	m = *m_head;
1744 
1745 	sglist_init(&sg, sc->vtnet_tx_nsegs, segs);
1746 	error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
1747 	KASSERT(error == 0 && sg.sg_nseg == 1,
1748 	    ("%s: error %d adding header to sglist", __func__, error));
1749 
1750 	error = sglist_append_mbuf(&sg, m);
1751 	if (error) {
1752 		m = m_defrag(m, M_NOWAIT);
1753 		if (m == NULL)
1754 			goto fail;
1755 
1756 		*m_head = m;
1757 		sc->vtnet_stats.tx_defragged++;
1758 
1759 		error = sglist_append_mbuf(&sg, m);
1760 		if (error)
1761 			goto fail;
1762 	}
1763 
1764 	txhdr->vth_mbuf = m;
1765 	error = virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0);
1766 
1767 	return (error);
1768 
1769 fail:
1770 	sc->vtnet_stats.tx_defrag_failed++;
1771 	m_freem(*m_head);
1772 	*m_head = NULL;
1773 
1774 	return (ENOBUFS);
1775 }
1776 
1777 static struct mbuf *
1778 vtnet_vlan_tag_insert(struct mbuf *m)
1779 {
1780 	struct mbuf *n;
1781 	struct ether_vlan_header *evl;
1782 
1783 	if (M_WRITABLE(m) == 0) {
1784 		n = m_dup(m, M_NOWAIT);
1785 		m_freem(m);
1786 		if ((m = n) == NULL)
1787 			return (NULL);
1788 	}
1789 
1790 	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
1791 	if (m == NULL)
1792 		return (NULL);
1793 	if (m->m_len < sizeof(struct ether_vlan_header)) {
1794 		m = m_pullup(m, sizeof(struct ether_vlan_header));
1795 		if (m == NULL)
1796 			return (NULL);
1797 	}
1798 
1799 	/* Insert 802.1Q header into the existing Ethernet header. */
1800 	evl = mtod(m, struct ether_vlan_header *);
1801 	bcopy((char *) evl + ETHER_VLAN_ENCAP_LEN,
1802 	      (char *) evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1803 	evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1804 	evl->evl_tag = htons(m->m_pkthdr.ether_vlantag);
1805 	m->m_flags &= ~M_VLANTAG;
1806 
1807 	return (m);
1808 }
1809 
1810 static int
1811 vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head)
1812 {
1813 	struct vtnet_tx_header *txhdr;
1814 	struct virtio_net_hdr *hdr;
1815 	struct mbuf *m;
1816 	int error;
1817 
1818 	txhdr = SLIST_FIRST(&sc->vtnet_txhdr_free);
1819 	if (txhdr == NULL)
1820 		return (ENOBUFS);
1821 	SLIST_REMOVE_HEAD(&sc->vtnet_txhdr_free, link);
1822 
1823 	/*
1824 	 * Always use the non-mergeable header to simplify things. When
1825 	 * the mergeable feature is negotiated, the num_buffers field
1826 	 * must be set to zero. We use vtnet_hdr_size later to enqueue
1827 	 * the correct header size to the host.
1828 	 */
1829 	hdr = &txhdr->vth_uhdr.hdr;
1830 	m = *m_head;
1831 
1832 	error = ENOBUFS;
1833 
1834 	if (m->m_flags & M_VLANTAG) {
1835 		//m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
1836 		m = vtnet_vlan_tag_insert(m);
1837 		if ((*m_head = m) == NULL)
1838 			goto fail;
1839 		m->m_flags &= ~M_VLANTAG;
1840 	}
1841 
1842 	if (m->m_pkthdr.csum_flags != 0) {
1843 		m = vtnet_tx_offload(sc, m, hdr);
1844 		if ((*m_head = m) == NULL)
1845 			goto fail;
1846 	}
1847 
1848 	error = vtnet_enqueue_txbuf(sc, m_head, txhdr);
1849 fail:
1850 	if (error != 0)
1851 		vtnet_enqueue_txhdr(sc, txhdr);
1852 	return (error);
1853 }
1854 
1855 static void
1856 vtnet_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1857 {
1858 	struct vtnet_softc *sc;
1859 
1860 	sc = ifp->if_softc;
1861 
1862 	ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
1863 	lwkt_serialize_enter(&sc->vtnet_slz);
1864 	vtnet_start_locked(ifp, ifsq);
1865 	lwkt_serialize_exit(&sc->vtnet_slz);
1866 }
1867 
1868 static void
1869 vtnet_start_locked(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1870 {
1871 	struct vtnet_softc *sc;
1872 	struct virtqueue *vq;
1873 	struct mbuf *m0;
1874 	int enq;
1875 
1876 	sc = ifp->if_softc;
1877 	vq = sc->vtnet_tx_vq;
1878 	enq = 0;
1879 
1880 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1881 
1882 	if ((ifp->if_flags & (IFF_RUNNING)) !=
1883 	    IFF_RUNNING || ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0))
1884 		return;
1885 
1886 #ifdef VTNET_TX_INTR_MODERATION
1887 	if (virtqueue_nused(vq) >= sc->vtnet_tx_size / 2)
1888 		vtnet_txeof(sc);
1889 #endif
1890 
1891 	while (!ifsq_is_empty(ifsq)) {
1892 		if (virtqueue_full(vq)) {
1893 			ifq_set_oactive(&ifp->if_snd);
1894 			break;
1895 		}
1896 
1897 		m0 = ifq_dequeue(&ifp->if_snd);
1898 		if (m0 == NULL)
1899 			break;
1900 
1901 		if (vtnet_encap(sc, &m0) != 0) {
1902 			if (m0 == NULL)
1903 				break;
1904 			ifq_prepend(&ifp->if_snd, m0);
1905 			ifq_set_oactive(&ifp->if_snd);
1906 			break;
1907 		}
1908 
1909 		enq++;
1910 		ETHER_BPF_MTAP(ifp, m0);
1911 	}
1912 
1913 	if (enq > 0) {
1914 		virtqueue_notify(vq, &sc->vtnet_slz);
1915 		sc->vtnet_watchdog_timer = VTNET_WATCHDOG_TIMEOUT;
1916 	}
1917 }
1918 
1919 static void
1920 vtnet_tick(void *xsc)
1921 {
1922 	struct vtnet_softc *sc;
1923 
1924 	sc = xsc;
1925 
1926 #if 0
1927 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1928 #ifdef VTNET_DEBUG
1929 	virtqueue_dump(sc->vtnet_rx_vq);
1930 	virtqueue_dump(sc->vtnet_tx_vq);
1931 #endif
1932 
1933 	vtnet_watchdog(sc);
1934 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
1935 #endif
1936 }
1937 
1938 static void
1939 vtnet_tx_intr_task(void *arg)
1940 {
1941 	struct vtnet_softc *sc;
1942 	struct ifnet *ifp;
1943 	struct ifaltq_subque *ifsq;
1944 
1945 	sc = arg;
1946 	ifp = sc->vtnet_ifp;
1947 	ifsq = ifq_get_subq_default(&ifp->if_snd);
1948 
1949 next:
1950 //	lwkt_serialize_enter(&sc->vtnet_slz);
1951 
1952 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1953 		vtnet_enable_tx_intr(sc);
1954 //		lwkt_serialize_exit(&sc->vtnet_slz);
1955 		return;
1956 	}
1957 
1958 	vtnet_txeof(sc);
1959 
1960 	if (!ifsq_is_empty(ifsq))
1961 		vtnet_start_locked(ifp, ifsq);
1962 
1963 	if (vtnet_enable_tx_intr(sc) != 0) {
1964 		vtnet_disable_tx_intr(sc);
1965 		sc->vtnet_stats.tx_task_rescheduled++;
1966 //		lwkt_serialize_exit(&sc->vtnet_slz);
1967 		goto next;
1968 	}
1969 
1970 //	lwkt_serialize_exit(&sc->vtnet_slz);
1971 }
1972 
1973 static int
1974 vtnet_tx_vq_intr(void *xsc)
1975 {
1976 	struct vtnet_softc *sc;
1977 
1978 	sc = xsc;
1979 
1980 	vtnet_disable_tx_intr(sc);
1981 	vtnet_tx_intr_task(sc);
1982 
1983 	return (1);
1984 }
1985 
1986 static void
1987 vtnet_stop(struct vtnet_softc *sc)
1988 {
1989 	device_t dev;
1990 	struct ifnet *ifp;
1991 
1992 	dev = sc->vtnet_dev;
1993 	ifp = sc->vtnet_ifp;
1994 
1995 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1996 
1997 	sc->vtnet_watchdog_timer = 0;
1998 	callout_stop(&sc->vtnet_tick_ch);
1999 	ifq_clr_oactive(&ifp->if_snd);
2000 	ifp->if_flags &= ~(IFF_RUNNING);
2001 
2002 	vtnet_disable_rx_intr(sc);
2003 	vtnet_disable_tx_intr(sc);
2004 
2005 	/*
2006 	 * Stop the host VirtIO adapter. Note this will reset the host
2007 	 * adapter's state back to the pre-initialized state, so in
2008 	 * order to make the device usable again, we must drive it
2009 	 * through virtio_reinit() and virtio_reinit_complete().
2010 	 */
2011 	virtio_stop(dev);
2012 
2013 	sc->vtnet_flags &= ~VTNET_FLAG_LINK;
2014 
2015 	vtnet_free_rx_mbufs(sc);
2016 	vtnet_free_tx_mbufs(sc);
2017 }
2018 
2019 static int
2020 vtnet_virtio_reinit(struct vtnet_softc *sc)
2021 {
2022 	device_t dev;
2023 	struct ifnet *ifp;
2024 	uint64_t features;
2025 	int error;
2026 
2027 	dev = sc->vtnet_dev;
2028 	ifp = sc->vtnet_ifp;
2029 	features = sc->vtnet_features;
2030 
2031 	/*
2032 	 * Re-negotiate with the host, removing any disabled receive
2033 	 * features. Transmit features are disabled only on our side
2034 	 * via if_capenable and if_hwassist.
2035 	 */
2036 
2037 	if (ifp->if_capabilities & IFCAP_RXCSUM) {
2038 		if ((ifp->if_capenable & IFCAP_RXCSUM) == 0)
2039 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
2040 	}
2041 
2042 	if (ifp->if_capabilities & IFCAP_LRO) {
2043 		if ((ifp->if_capenable & IFCAP_LRO) == 0)
2044 			features &= ~VTNET_LRO_FEATURES;
2045 	}
2046 
2047 	if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2048 		if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2049 			features &= ~VIRTIO_NET_F_CTRL_VLAN;
2050 	}
2051 
2052 	error = virtio_reinit(dev, features);
2053 	if (error)
2054 		device_printf(dev, "virtio reinit error %d\n", error);
2055 
2056 	return (error);
2057 }
2058 
2059 static void
2060 vtnet_init_locked(struct vtnet_softc *sc)
2061 {
2062 	device_t dev;
2063 	struct ifnet *ifp;
2064 	int error;
2065 
2066 	dev = sc->vtnet_dev;
2067 	ifp = sc->vtnet_ifp;
2068 
2069 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2070 
2071 	if (ifp->if_flags & IFF_RUNNING)
2072 		return;
2073 
2074 	/* Stop host's adapter, cancel any pending I/O. */
2075 	vtnet_stop(sc);
2076 
2077 	/* Reinitialize the host device. */
2078 	error = vtnet_virtio_reinit(sc);
2079 	if (error) {
2080 		device_printf(dev,
2081 		    "reinitialization failed, stopping device...\n");
2082 		vtnet_stop(sc);
2083 		return;
2084 	}
2085 
2086 	/* Update host with assigned MAC address. */
2087 	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
2088 	vtnet_set_hwaddr(sc);
2089 
2090 	ifp->if_hwassist = 0;
2091 	if (ifp->if_capenable & IFCAP_TXCSUM)
2092 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
2093 	if (ifp->if_capenable & IFCAP_TSO4)
2094 		ifp->if_hwassist |= CSUM_TSO;
2095 
2096 	error = vtnet_init_rx_vq(sc);
2097 	if (error) {
2098 		device_printf(dev,
2099 		    "cannot allocate mbufs for Rx virtqueue\n");
2100 		vtnet_stop(sc);
2101 		return;
2102 	}
2103 
2104 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
2105 		if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2106 			/* Restore promiscuous and all-multicast modes. */
2107 			vtnet_rx_filter(sc);
2108 
2109 			/* Restore filtered MAC addresses. */
2110 			vtnet_rx_filter_mac(sc);
2111 		}
2112 
2113 		/* Restore VLAN filters. */
2114 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2115 			vtnet_rx_filter_vlan(sc);
2116 	}
2117 
2118 	{
2119 		vtnet_enable_rx_intr(sc);
2120 		vtnet_enable_tx_intr(sc);
2121 	}
2122 
2123 	ifp->if_flags |= IFF_RUNNING;
2124 	ifq_clr_oactive(&ifp->if_snd);
2125 
2126 	virtio_reinit_complete(dev);
2127 
2128 	vtnet_update_link_status(sc);
2129 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
2130 }
2131 
2132 static void
2133 vtnet_init(void *xsc)
2134 {
2135 	struct vtnet_softc *sc;
2136 
2137 	sc = xsc;
2138 
2139 	lwkt_serialize_enter(&sc->vtnet_slz);
2140 	vtnet_init_locked(sc);
2141 	lwkt_serialize_exit(&sc->vtnet_slz);
2142 }
2143 
2144 static void
2145 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
2146     struct sglist *sg, int readable, int writable)
2147 {
2148 	struct virtqueue *vq;
2149 	void *c;
2150 
2151 	vq = sc->vtnet_ctrl_vq;
2152 
2153 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2154 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
2155 	    ("no control virtqueue"));
2156 	KASSERT(virtqueue_empty(vq),
2157 	    ("control command already enqueued"));
2158 
2159 	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
2160 		return;
2161 
2162 	virtqueue_notify(vq, &sc->vtnet_slz);
2163 
2164 	/*
2165 	 * Poll until the command is complete. Previously, we would
2166 	 * sleep until the control virtqueue interrupt handler woke
2167 	 * us up, but dropping the VTNET_MTX leads to serialization
2168 	 * difficulties.
2169 	 *
2170 	 * Furthermore, it appears QEMU/KVM only allocates three MSIX
2171 	 * vectors. Two of those vectors are needed for the Rx and Tx
2172 	 * virtqueues. We do not support sharing both a Vq and config
2173 	 * changed notification on the same MSIX vector.
2174 	 */
2175 	c = virtqueue_poll(vq, NULL);
2176 	KASSERT(c == cookie, ("unexpected control command response"));
2177 }
2178 
2179 static int
2180 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
2181 {
2182 	struct {
2183 		struct virtio_net_ctrl_hdr hdr __aligned(2);
2184 		uint8_t pad1;
2185 		char aligned_hwaddr[ETHER_ADDR_LEN] __aligned(8);
2186 		uint8_t pad2;
2187 		uint8_t ack;
2188 	} s;
2189 	struct sglist_seg segs[3];
2190 	struct sglist sg;
2191 	int error;
2192 
2193 	s.hdr.class = VIRTIO_NET_CTRL_MAC;
2194 	s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
2195 	s.ack = VIRTIO_NET_ERR;
2196 
2197 	/* Copy the mac address into physically contiguous memory */
2198 	memcpy(s.aligned_hwaddr, hwaddr, ETHER_ADDR_LEN);
2199 
2200 	sglist_init(&sg, 3, segs);
2201 	error = 0;
2202 	error |= sglist_append(&sg, &s.hdr,
2203 	    sizeof(struct virtio_net_ctrl_hdr));
2204 	error |= sglist_append(&sg, s.aligned_hwaddr, ETHER_ADDR_LEN);
2205 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2206 	KASSERT(error == 0 && sg.sg_nseg == 3,
2207 	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
2208 
2209 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2210 
2211 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2212 }
2213 
2214 static void
2215 vtnet_rx_filter(struct vtnet_softc *sc)
2216 {
2217 	device_t dev;
2218 	struct ifnet *ifp;
2219 
2220 	dev = sc->vtnet_dev;
2221 	ifp = sc->vtnet_ifp;
2222 
2223 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2224 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2225 	    ("CTRL_RX feature not negotiated"));
2226 
2227 	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
2228 		device_printf(dev, "cannot %s promiscuous mode\n",
2229 		    (ifp->if_flags & IFF_PROMISC) ? "enable" : "disable");
2230 
2231 	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
2232 		device_printf(dev, "cannot %s all-multicast mode\n",
2233 		    (ifp->if_flags & IFF_ALLMULTI) ? "enable" : "disable");
2234 }
2235 
2236 static int
2237 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
2238 {
2239 	struct sglist_seg segs[3];
2240 	struct sglist sg;
2241 	struct {
2242 		struct virtio_net_ctrl_hdr hdr __aligned(2);
2243 		uint8_t pad1;
2244 		uint8_t onoff;
2245 		uint8_t pad2;
2246 		uint8_t ack;
2247 	} s;
2248 	int error;
2249 
2250 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2251 	    ("%s: CTRL_RX feature not negotiated", __func__));
2252 
2253 	s.hdr.class = VIRTIO_NET_CTRL_RX;
2254 	s.hdr.cmd = cmd;
2255 	s.onoff = !!on;
2256 	s.ack = VIRTIO_NET_ERR;
2257 
2258 	sglist_init(&sg, 3, segs);
2259 	error = 0;
2260 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
2261 	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
2262 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2263 	KASSERT(error == 0 && sg.sg_nseg == 3,
2264 	    ("%s: error %d adding Rx message to sglist", __func__, error));
2265 
2266 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2267 
2268 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2269 }
2270 
2271 static int
2272 vtnet_set_promisc(struct vtnet_softc *sc, int on)
2273 {
2274 
2275 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
2276 }
2277 
2278 static int
2279 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
2280 {
2281 
2282 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
2283 }
2284 
2285 static void
2286 vtnet_rx_filter_mac(struct vtnet_softc *sc)
2287 {
2288 	struct virtio_net_ctrl_hdr hdr __aligned(2);
2289 	struct vtnet_mac_filter *filter;
2290 	struct sglist_seg segs[4];
2291 	struct sglist sg;
2292 	struct ifnet *ifp;
2293 	struct ifaddr *ifa;
2294         struct ifaddr_container *ifac;
2295 	struct ifmultiaddr *ifma;
2296 	int ucnt, mcnt, promisc, allmulti, error;
2297 	uint8_t ack;
2298 
2299 	ifp = sc->vtnet_ifp;
2300 	ucnt = 0;
2301 	mcnt = 0;
2302 	promisc = 0;
2303 	allmulti = 0;
2304 
2305 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2306 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2307 	    ("%s: CTRL_RX feature not negotiated", __func__));
2308 
2309 	/* Use the MAC filtering table allocated in vtnet_attach. */
2310 	filter = sc->vtnet_macfilter;
2311 	memset(filter, 0, sizeof(struct vtnet_mac_filter));
2312 
2313 	/* Unicast MAC addresses: */
2314 	//if_addr_rlock(ifp);
2315 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2316 		ifa = ifac->ifa;
2317 		if (ifa->ifa_addr->sa_family != AF_LINK)
2318 			continue;
2319 		else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
2320 		    sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
2321 			continue;
2322 		else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
2323 			promisc = 1;
2324 			break;
2325 		}
2326 
2327 		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
2328 		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
2329 		ucnt++;
2330 	}
2331 	//if_addr_runlock(ifp);
2332 
2333 	if (promisc != 0) {
2334 		filter->vmf_unicast.nentries = 0;
2335 		if_printf(ifp, "more than %d MAC addresses assigned, "
2336 		    "falling back to promiscuous mode\n",
2337 		    VTNET_MAX_MAC_ENTRIES);
2338 	} else
2339 		filter->vmf_unicast.nentries = ucnt;
2340 
2341 	/* Multicast MAC addresses: */
2342 	//if_maddr_rlock(ifp);
2343 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2344 		if (ifma->ifma_addr->sa_family != AF_LINK)
2345 			continue;
2346 		else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
2347 			allmulti = 1;
2348 			break;
2349 		}
2350 
2351 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2352 		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
2353 		mcnt++;
2354 	}
2355 	//if_maddr_runlock(ifp);
2356 
2357 	if (allmulti != 0) {
2358 		filter->vmf_multicast.nentries = 0;
2359 		if_printf(ifp, "more than %d multicast MAC addresses "
2360 		    "assigned, falling back to all-multicast mode\n",
2361 		    VTNET_MAX_MAC_ENTRIES);
2362 	} else
2363 		filter->vmf_multicast.nentries = mcnt;
2364 
2365 	if (promisc != 0 && allmulti != 0)
2366 		goto out;
2367 
2368 	hdr.class = VIRTIO_NET_CTRL_MAC;
2369 	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
2370 	ack = VIRTIO_NET_ERR;
2371 
2372 	sglist_init(&sg, 4, segs);
2373 	error = 0;
2374 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
2375 	error |= sglist_append(&sg, &filter->vmf_unicast,
2376 	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
2377 	error |= sglist_append(&sg, &filter->vmf_multicast,
2378 	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
2379 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
2380 	KASSERT(error == 0 && sg.sg_nseg == 4,
2381 	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
2382 
2383 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
2384 
2385 	if (ack != VIRTIO_NET_OK)
2386 		if_printf(ifp, "error setting host MAC filter table\n");
2387 
2388 out:
2389 	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
2390 		if_printf(ifp, "cannot enable promiscuous mode\n");
2391 	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
2392 		if_printf(ifp, "cannot enable all-multicast mode\n");
2393 }
2394 
2395 static int
2396 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
2397 {
2398 	struct sglist_seg segs[3];
2399 	struct sglist sg;
2400 	struct {
2401 		struct virtio_net_ctrl_hdr hdr __aligned(2);
2402 		uint8_t pad1;
2403 		uint16_t tag;
2404 		uint8_t pad2;
2405 		uint8_t ack;
2406 	} s;
2407 	int error;
2408 
2409 	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
2410 	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
2411 	s.tag = tag;
2412 	s.ack = VIRTIO_NET_ERR;
2413 
2414 	sglist_init(&sg, 3, segs);
2415 	error = 0;
2416 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
2417 	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
2418 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2419 	KASSERT(error == 0 && sg.sg_nseg == 3,
2420 	    ("%s: error %d adding VLAN message to sglist", __func__, error));
2421 
2422 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2423 
2424 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2425 }
2426 
2427 static void
2428 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
2429 {
2430 	uint32_t w;
2431 	uint16_t tag;
2432 	int i, bit, nvlans;
2433 
2434 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2435 	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
2436 	    ("%s: VLAN_FILTER feature not negotiated", __func__));
2437 
2438 	nvlans = sc->vtnet_nvlans;
2439 
2440 	/* Enable the filter for each configured VLAN. */
2441 	for (i = 0; i < VTNET_VLAN_SHADOW_SIZE && nvlans > 0; i++) {
2442 		w = sc->vtnet_vlan_shadow[i];
2443 		while ((bit = ffs(w) - 1) != -1) {
2444 			w &= ~(1 << bit);
2445 			tag = sizeof(w) * CHAR_BIT * i + bit;
2446 			nvlans--;
2447 
2448 			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
2449 				device_printf(sc->vtnet_dev,
2450 				    "cannot enable VLAN %d filter\n", tag);
2451 			}
2452 		}
2453 	}
2454 
2455 	KASSERT(nvlans == 0, ("VLAN count incorrect"));
2456 }
2457 
2458 static void
2459 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
2460 {
2461 	struct ifnet *ifp;
2462 	int idx, bit;
2463 
2464 	ifp = sc->vtnet_ifp;
2465 	idx = (tag >> 5) & 0x7F;
2466 	bit = tag & 0x1F;
2467 
2468 	if (tag == 0 || tag > 4095)
2469 		return;
2470 
2471 	lwkt_serialize_enter(&sc->vtnet_slz);
2472 
2473 	/* Update shadow VLAN table. */
2474 	if (add) {
2475 		sc->vtnet_nvlans++;
2476 		sc->vtnet_vlan_shadow[idx] |= (1 << bit);
2477 	} else {
2478 		sc->vtnet_nvlans--;
2479 		sc->vtnet_vlan_shadow[idx] &= ~(1 << bit);
2480 	}
2481 
2482 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
2483 	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
2484 		device_printf(sc->vtnet_dev,
2485 		    "cannot %s VLAN %d %s the host filter table\n",
2486 		    add ? "add" : "remove", tag, add ? "to" : "from");
2487 	}
2488 
2489 	lwkt_serialize_exit(&sc->vtnet_slz);
2490 }
2491 
2492 static void
2493 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
2494 {
2495 
2496 	if (ifp->if_softc != arg)
2497 		return;
2498 
2499 	vtnet_update_vlan_filter(arg, 1, tag);
2500 }
2501 
2502 static void
2503 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
2504 {
2505 
2506 	if (ifp->if_softc != arg)
2507 		return;
2508 
2509 	vtnet_update_vlan_filter(arg, 0, tag);
2510 }
2511 
2512 static int
2513 vtnet_ifmedia_upd(struct ifnet *ifp)
2514 {
2515 	struct vtnet_softc *sc;
2516 	struct ifmedia *ifm;
2517 
2518 	sc = ifp->if_softc;
2519 	ifm = &sc->vtnet_media;
2520 
2521 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
2522 		return (EINVAL);
2523 
2524 	return (0);
2525 }
2526 
2527 static void
2528 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2529 {
2530 	struct vtnet_softc *sc;
2531 
2532 	sc = ifp->if_softc;
2533 
2534 	ifmr->ifm_status = IFM_AVALID;
2535 	ifmr->ifm_active = IFM_ETHER;
2536 
2537 	lwkt_serialize_enter(&sc->vtnet_slz);
2538 	if (vtnet_is_link_up(sc) != 0) {
2539 		ifmr->ifm_status |= IFM_ACTIVE;
2540 		ifmr->ifm_active |= VTNET_MEDIATYPE;
2541 	} else
2542 		ifmr->ifm_active |= IFM_NONE;
2543 	lwkt_serialize_exit(&sc->vtnet_slz);
2544 }
2545 
2546 static void
2547 vtnet_add_statistics(struct vtnet_softc *sc)
2548 {
2549 	device_t dev;
2550 	struct vtnet_statistics *stats;
2551 	struct sysctl_ctx_list *ctx;
2552 	struct sysctl_oid *tree;
2553 	struct sysctl_oid_list *child;
2554 
2555 	dev = sc->vtnet_dev;
2556 	stats = &sc->vtnet_stats;
2557 	ctx = device_get_sysctl_ctx(dev);
2558 	tree = device_get_sysctl_tree(dev);
2559 	child = SYSCTL_CHILDREN(tree);
2560 
2561 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
2562 	    CTLFLAG_RD, &stats->mbuf_alloc_failed, 0,
2563 	    "Mbuf cluster allocation failures");
2564 
2565 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
2566 	    CTLFLAG_RD, &stats->rx_frame_too_large, 0,
2567 	    "Received frame larger than the mbuf chain");
2568 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
2569 	    CTLFLAG_RD, &stats->rx_enq_replacement_failed, 0,
2570 	    "Enqueuing the replacement receive mbuf failed");
2571 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
2572 	    CTLFLAG_RD, &stats->rx_mergeable_failed, 0,
2573 	    "Mergeable buffers receive failures");
2574 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
2575 	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype, 0,
2576 	    "Received checksum offloaded buffer with unsupported "
2577 	    "Ethernet type");
2578 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
2579 	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto, 0,
2580 	    "Received checksum offloaded buffer with incorrect IP protocol");
2581 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
2582 	    CTLFLAG_RD, &stats->rx_csum_bad_offset, 0,
2583 	    "Received checksum offloaded buffer with incorrect offset");
2584 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
2585 	    CTLFLAG_RD, &stats->rx_csum_failed, 0,
2586 	    "Received buffer checksum offload failed");
2587 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
2588 	    CTLFLAG_RD, &stats->rx_csum_offloaded, 0,
2589 	    "Received buffer checksum offload succeeded");
2590 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
2591 	    CTLFLAG_RD, &stats->rx_task_rescheduled, 0,
2592 	    "Times the receive interrupt task rescheduled itself");
2593 
2594 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
2595 	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype, 0,
2596 	    "Aborted transmit of checksum offloaded buffer with unknown "
2597 	    "Ethernet type");
2598 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
2599 	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype, 0,
2600 	    "Aborted transmit of TSO buffer with unknown Ethernet type");
2601 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
2602 	    CTLFLAG_RD, &stats->tx_defragged, 0,
2603 	    "Transmit mbufs defragged");
2604 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
2605 	    CTLFLAG_RD, &stats->tx_defrag_failed, 0,
2606 	    "Aborted transmit of buffer because defrag failed");
2607 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
2608 	    CTLFLAG_RD, &stats->tx_csum_offloaded, 0,
2609 	    "Offloaded checksum of transmitted buffer");
2610 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
2611 	    CTLFLAG_RD, &stats->tx_tso_offloaded, 0,
2612 	    "Segmentation offload of transmitted buffer");
2613 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
2614 	    CTLFLAG_RD, &stats->tx_task_rescheduled, 0,
2615 	    "Times the transmit interrupt task rescheduled itself");
2616 }
2617 
2618 static int
2619 vtnet_enable_rx_intr(struct vtnet_softc *sc)
2620 {
2621 
2622 	return (virtqueue_enable_intr(sc->vtnet_rx_vq));
2623 }
2624 
2625 static void
2626 vtnet_disable_rx_intr(struct vtnet_softc *sc)
2627 {
2628 
2629 	virtqueue_disable_intr(sc->vtnet_rx_vq);
2630 }
2631 
2632 static int
2633 vtnet_enable_tx_intr(struct vtnet_softc *sc)
2634 {
2635 
2636 #ifdef VTNET_TX_INTR_MODERATION
2637 	return (0);
2638 #else
2639 	return (virtqueue_enable_intr(sc->vtnet_tx_vq));
2640 #endif
2641 }
2642 
2643 static void
2644 vtnet_disable_tx_intr(struct vtnet_softc *sc)
2645 {
2646 
2647 	virtqueue_disable_intr(sc->vtnet_tx_vq);
2648 }
2649