xref: /dragonfly/sys/dev/virtual/virtio/net/if_vtnet.c (revision 02fd838e)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for VirtIO network devices. */
28 
29 #include <sys/cdefs.h>
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sockio.h>
35 #include <sys/mbuf.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/taskqueue.h>
41 #include <sys/random.h>
42 #include <sys/sglist.h>
43 #include <sys/serialize.h>
44 #include <sys/bus.h>
45 #include <sys/rman.h>
46 
47 #include <machine/limits.h>
48 
49 #include <net/ethernet.h>
50 #include <net/if.h>
51 #include <net/if_arp.h>
52 #include <net/if_dl.h>
53 #include <net/if_types.h>
54 #include <net/if_media.h>
55 #include <net/vlan/if_vlan_var.h>
56 #include <net/vlan/if_vlan_ether.h>
57 #include <net/ifq_var.h>
58 
59 #include <net/bpf.h>
60 
61 #include <netinet/in_systm.h>
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip6.h>
65 #include <netinet/udp.h>
66 #include <netinet/tcp.h>
67 
68 #include <dev/virtual/virtio/virtio/virtio.h>
69 #include <dev/virtual/virtio/virtio/virtqueue.h>
70 #include <dev/virtual/virtio/net/virtio_net.h>
71 #include <dev/virtual/virtio/net/if_vtnetvar.h>
72 
73 #include "virtio_if.h"
74 
75 MALLOC_DEFINE(M_VTNET, "VTNET_TX", "Outgoing VTNET TX frame header");
76 
77 static int	vtnet_probe(device_t);
78 static int	vtnet_attach(device_t);
79 static int	vtnet_detach(device_t);
80 static int	vtnet_suspend(device_t);
81 static int	vtnet_resume(device_t);
82 static int	vtnet_shutdown(device_t);
83 static int	vtnet_config_change(device_t);
84 
85 static void	vtnet_negotiate_features(struct vtnet_softc *);
86 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
87 static void	vtnet_get_hwaddr(struct vtnet_softc *);
88 static void	vtnet_set_hwaddr(struct vtnet_softc *);
89 static int	vtnet_is_link_up(struct vtnet_softc *);
90 static void	vtnet_update_link_status(struct vtnet_softc *);
91 #if 0
92 static void	vtnet_watchdog(struct vtnet_softc *);
93 #endif
94 static void	vtnet_config_change_task(void *, int);
95 static int	vtnet_setup_interface(struct vtnet_softc *);
96 static int	vtnet_change_mtu(struct vtnet_softc *, int);
97 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
98 
99 static int	vtnet_init_rx_vq(struct vtnet_softc *);
100 static void	vtnet_free_rx_mbufs(struct vtnet_softc *);
101 static void	vtnet_free_tx_mbufs(struct vtnet_softc *);
102 static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
103 
104 static struct mbuf * vtnet_alloc_rxbuf(struct vtnet_softc *, int,
105 		    struct mbuf **);
106 static int	vtnet_replace_rxbuf(struct vtnet_softc *,
107 		    struct mbuf *, int);
108 static int	vtnet_newbuf(struct vtnet_softc *);
109 static void	vtnet_discard_merged_rxbuf(struct vtnet_softc *, int);
110 static void	vtnet_discard_rxbuf(struct vtnet_softc *, struct mbuf *);
111 static int	vtnet_enqueue_rxbuf(struct vtnet_softc *, struct mbuf *);
112 static void	vtnet_vlan_tag_remove(struct mbuf *);
113 static int	vtnet_rx_csum(struct vtnet_softc *, struct mbuf *,
114 		    struct virtio_net_hdr *);
115 static int	vtnet_rxeof_merged(struct vtnet_softc *, struct mbuf *, int);
116 static int	vtnet_rxeof(struct vtnet_softc *, int, int *);
117 static void	vtnet_rx_intr_task(void *);
118 static int	vtnet_rx_vq_intr(void *);
119 
120 static void	vtnet_enqueue_txhdr(struct vtnet_softc *,
121 		    struct vtnet_tx_header *);
122 static void	vtnet_txeof(struct vtnet_softc *);
123 static struct mbuf * vtnet_tx_offload(struct vtnet_softc *, struct mbuf *,
124 		    struct virtio_net_hdr *);
125 static int	vtnet_enqueue_txbuf(struct vtnet_softc *, struct mbuf **,
126 		    struct vtnet_tx_header *);
127 static int	vtnet_encap(struct vtnet_softc *, struct mbuf **);
128 static void	vtnet_start_locked(struct ifnet *, struct ifaltq_subque *);
129 static void	vtnet_start(struct ifnet *, struct ifaltq_subque *);
130 static void	vtnet_tick(void *);
131 static void	vtnet_tx_intr_task(void *);
132 static int	vtnet_tx_vq_intr(void *);
133 
134 static void	vtnet_stop(struct vtnet_softc *);
135 static int	vtnet_virtio_reinit(struct vtnet_softc *);
136 static void	vtnet_init_locked(struct vtnet_softc *);
137 static void	vtnet_init(void *);
138 
139 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
140 		    struct sglist *, int, int);
141 
142 static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
143 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
144 static int	vtnet_set_promisc(struct vtnet_softc *, int);
145 static int	vtnet_set_allmulti(struct vtnet_softc *, int);
146 static void	vtnet_rx_filter(struct vtnet_softc *sc);
147 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
148 
149 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
150 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
151 static void	vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
152 static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
153 static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
154 
155 static int	vtnet_ifmedia_upd(struct ifnet *);
156 static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
157 
158 static void	vtnet_add_statistics(struct vtnet_softc *);
159 
160 static int	vtnet_enable_rx_intr(struct vtnet_softc *);
161 static int	vtnet_enable_tx_intr(struct vtnet_softc *);
162 static void	vtnet_disable_rx_intr(struct vtnet_softc *);
163 static void	vtnet_disable_tx_intr(struct vtnet_softc *);
164 
165 /* Tunables. */
166 static int vtnet_csum_disable = 0;
167 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
168 static int vtnet_tso_disable = 1;
169 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
170 static int vtnet_lro_disable = 0;
171 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
172 
173 /*
174  * Reducing the number of transmit completed interrupts can
175  * improve performance. To do so, the define below keeps the
176  * Tx vq interrupt disabled and adds calls to vtnet_txeof()
177  * in the start and watchdog paths. The price to pay for this
178  * is the m_free'ing of transmitted mbufs may be delayed until
179  * the watchdog fires.
180  */
181 #define VTNET_TX_INTR_MODERATION
182 
183 static struct virtio_feature_desc vtnet_feature_desc[] = {
184 	{ VIRTIO_NET_F_CSUM,		"TxChecksum"	},
185 	{ VIRTIO_NET_F_GUEST_CSUM,	"RxChecksum"	},
186 	{ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "DynOffload"	},
187 	{ VIRTIO_NET_F_MAC,		"MacAddress"	},
188 	{ VIRTIO_NET_F_GSO,		"TxAllGSO"	},
189 	{ VIRTIO_NET_F_GUEST_TSO4,	"RxTSOv4"	},
190 	{ VIRTIO_NET_F_GUEST_TSO6,	"RxTSOv6"	},
191 	{ VIRTIO_NET_F_GUEST_ECN,	"RxECN"		},
192 	{ VIRTIO_NET_F_GUEST_UFO,	"RxUFO"		},
193 	{ VIRTIO_NET_F_HOST_TSO4,	"TxTSOv4"	},
194 	{ VIRTIO_NET_F_HOST_TSO6,	"TxTSOv6"	},
195 	{ VIRTIO_NET_F_HOST_ECN,	"TxTSOECN"	},
196 	{ VIRTIO_NET_F_HOST_UFO,	"TxUFO"		},
197 	{ VIRTIO_NET_F_MRG_RXBUF,	"MrgRxBuf"	},
198 	{ VIRTIO_NET_F_STATUS,		"Status"	},
199 	{ VIRTIO_NET_F_CTRL_VQ,		"ControlVq"	},
200 	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
201 	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
202 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
203 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
204 	{ VIRTIO_NET_F_MQ,		"RFS"		},
205 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
206 	{ 0, NULL }
207 };
208 
209 static device_method_t vtnet_methods[] = {
210 	/* Device methods. */
211 	DEVMETHOD(device_probe,		vtnet_probe),
212 	DEVMETHOD(device_attach,	vtnet_attach),
213 	DEVMETHOD(device_detach,	vtnet_detach),
214 	DEVMETHOD(device_suspend,	vtnet_suspend),
215 	DEVMETHOD(device_resume,	vtnet_resume),
216 	DEVMETHOD(device_shutdown,	vtnet_shutdown),
217 
218 	/* VirtIO methods. */
219 	DEVMETHOD(virtio_config_change, vtnet_config_change),
220 
221 	DEVMETHOD_END
222 };
223 
224 static driver_t vtnet_driver = {
225 	"vtnet",
226 	vtnet_methods,
227 	sizeof(struct vtnet_softc)
228 };
229 
230 static devclass_t vtnet_devclass;
231 
232 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass, NULL, NULL);
233 MODULE_VERSION(vtnet, 1);
234 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
235 
236 static int
237 vtnet_probe(device_t dev)
238 {
239 	if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
240 		return (ENXIO);
241 
242 	device_set_desc(dev, "VirtIO Networking Adapter");
243 
244 	return (BUS_PROBE_DEFAULT);
245 }
246 
247 static int
248 vtnet_attach(device_t dev)
249 {
250 	struct vtnet_softc *sc;
251 	int error;
252 
253 	sc = device_get_softc(dev);
254 	sc->vtnet_dev = dev;
255 
256 	lwkt_serialize_init(&sc->vtnet_slz);
257 	callout_init(&sc->vtnet_tick_ch);
258 
259 	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
260 		     vtnet_ifmedia_sts);
261 	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
262 	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
263 
264 	vtnet_add_statistics(sc);
265 	SLIST_INIT(&sc->vtnet_txhdr_free);
266 
267 	/* Register our feature descriptions. */
268 	virtio_set_feature_desc(dev, vtnet_feature_desc);
269 	vtnet_negotiate_features(sc);
270 
271 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
272 		sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
273 
274 	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
275 		/* This feature should always be negotiated. */
276 		sc->vtnet_flags |= VTNET_FLAG_MAC;
277 	}
278 
279 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
280 		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
281 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
282 	} else {
283 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
284 	}
285 
286 	sc->vtnet_rx_mbuf_size = MCLBYTES;
287 	sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
288 
289 	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
290 		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
291 
292 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
293 			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
294 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
295 			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
296 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
297 		    virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
298 			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
299 	}
300 
301 	/* Read (or generate) the MAC address for the adapter. */
302 	vtnet_get_hwaddr(sc);
303 
304 	error = vtnet_alloc_virtqueues(sc);
305 	if (error) {
306 		device_printf(dev, "cannot allocate virtqueues\n");
307 		goto fail;
308 	}
309 
310 	error = vtnet_setup_interface(sc);
311 	if (error) {
312 		device_printf(dev, "cannot setup interface\n");
313 		goto fail;
314 	}
315 
316 	TASK_INIT(&sc->vtnet_cfgchg_task, 0, vtnet_config_change_task, sc);
317 
318 	error = virtio_setup_intr(dev, &sc->vtnet_slz);
319 	if (error) {
320 		device_printf(dev, "cannot setup virtqueue interrupts\n");
321 		ether_ifdetach(sc->vtnet_ifp);
322 		goto fail;
323 	}
324 
325 	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
326 		lwkt_serialize_enter(&sc->vtnet_slz);
327 		vtnet_set_hwaddr(sc);
328 		lwkt_serialize_exit(&sc->vtnet_slz);
329 	}
330 
331 	/*
332 	 * Device defaults to promiscuous mode for backwards
333 	 * compatibility. Turn it off if possible.
334 	 */
335 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
336 		lwkt_serialize_enter(&sc->vtnet_slz);
337 		if (vtnet_set_promisc(sc, 0) != 0) {
338 			sc->vtnet_ifp->if_flags |= IFF_PROMISC;
339 			device_printf(dev,
340 			    "cannot disable promiscuous mode\n");
341 		}
342 		lwkt_serialize_exit(&sc->vtnet_slz);
343 	} else
344 		sc->vtnet_ifp->if_flags |= IFF_PROMISC;
345 
346 fail:
347 	if (error)
348 		vtnet_detach(dev);
349 
350 	return (error);
351 }
352 
353 static int
354 vtnet_detach(device_t dev)
355 {
356 	struct vtnet_softc *sc;
357 	struct ifnet *ifp;
358 
359 	sc = device_get_softc(dev);
360 	ifp = sc->vtnet_ifp;
361 
362 	if (device_is_attached(dev)) {
363 		lwkt_serialize_enter(&sc->vtnet_slz);
364 		vtnet_stop(sc);
365 		lwkt_serialize_exit(&sc->vtnet_slz);
366 
367 		callout_stop(&sc->vtnet_tick_ch);
368 		taskqueue_drain(taskqueue_swi, &sc->vtnet_cfgchg_task);
369 
370 		ether_ifdetach(ifp);
371 	}
372 
373 	if (sc->vtnet_vlan_attach != NULL) {
374 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
375 		sc->vtnet_vlan_attach = NULL;
376 	}
377 	if (sc->vtnet_vlan_detach != NULL) {
378 		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
379 		sc->vtnet_vlan_detach = NULL;
380 	}
381 
382 	if (ifp) {
383 		if_free(ifp);
384 		sc->vtnet_ifp = NULL;
385 	}
386 
387 	if (sc->vtnet_rx_vq != NULL)
388 		vtnet_free_rx_mbufs(sc);
389 	if (sc->vtnet_tx_vq != NULL)
390 		vtnet_free_tx_mbufs(sc);
391 	if (sc->vtnet_ctrl_vq != NULL)
392 		vtnet_free_ctrl_vq(sc);
393 
394 	if (sc->vtnet_txhdrarea != NULL) {
395 		contigfree(sc->vtnet_txhdrarea,
396 		    sc->vtnet_txhdrcount * sizeof(struct vtnet_tx_header),
397 		    M_VTNET);
398 		sc->vtnet_txhdrarea = NULL;
399 	}
400 	SLIST_INIT(&sc->vtnet_txhdr_free);
401 	if (sc->vtnet_macfilter != NULL) {
402 		contigfree(sc->vtnet_macfilter,
403 		    sizeof(struct vtnet_mac_filter), M_DEVBUF);
404 		sc->vtnet_macfilter = NULL;
405 	}
406 
407 	ifmedia_removeall(&sc->vtnet_media);
408 
409 	return (0);
410 }
411 
412 static int
413 vtnet_suspend(device_t dev)
414 {
415 	struct vtnet_softc *sc;
416 
417 	sc = device_get_softc(dev);
418 
419 	lwkt_serialize_enter(&sc->vtnet_slz);
420 	vtnet_stop(sc);
421 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
422 	lwkt_serialize_exit(&sc->vtnet_slz);
423 
424 	return (0);
425 }
426 
427 static int
428 vtnet_resume(device_t dev)
429 {
430 	struct vtnet_softc *sc;
431 	struct ifnet *ifp;
432 
433 	sc = device_get_softc(dev);
434 	ifp = sc->vtnet_ifp;
435 
436 	lwkt_serialize_enter(&sc->vtnet_slz);
437 	if (ifp->if_flags & IFF_UP)
438 		vtnet_init_locked(sc);
439 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
440 	lwkt_serialize_exit(&sc->vtnet_slz);
441 
442 	return (0);
443 }
444 
445 static int
446 vtnet_shutdown(device_t dev)
447 {
448 
449 	/*
450 	 * Suspend already does all of what we need to
451 	 * do here; we just never expect to be resumed.
452 	 */
453 	return (vtnet_suspend(dev));
454 }
455 
456 static int
457 vtnet_config_change(device_t dev)
458 {
459 	struct vtnet_softc *sc;
460 
461 	sc = device_get_softc(dev);
462 
463 	taskqueue_enqueue(taskqueue_thread[mycpuid], &sc->vtnet_cfgchg_task);
464 
465 	return (1);
466 }
467 
468 static void
469 vtnet_negotiate_features(struct vtnet_softc *sc)
470 {
471 	device_t dev;
472 	uint64_t mask, features;
473 
474 	dev = sc->vtnet_dev;
475 	mask = 0;
476 
477 	if (vtnet_csum_disable)
478 		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
479 
480 	/*
481 	 * XXX DragonFly doesn't support receive checksum offload for ipv6 yet,
482 	 *     hence always disable the virtio feature for now.
483 	 * XXX We need to support the DynOffload feature, in order to
484 	 *     dynamically enable/disable this feature.
485 	 */
486 	mask |= VIRTIO_NET_F_GUEST_CSUM;
487 
488 	/*
489 	 * TSO is only available when the tx checksum offload feature is also
490 	 * negotiated.
491 	 */
492 	if (vtnet_csum_disable || vtnet_tso_disable)
493 		mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
494 		    VIRTIO_NET_F_HOST_ECN;
495 
496 	if (vtnet_lro_disable)
497 		mask |= VTNET_LRO_FEATURES;
498 
499 	features = VTNET_FEATURES & ~mask;
500 	features |= VIRTIO_F_NOTIFY_ON_EMPTY;
501 	features |= VIRTIO_F_ANY_LAYOUT;
502 	sc->vtnet_features = virtio_negotiate_features(dev, features);
503 
504 	if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
505 	    virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
506 		/*
507 		 * LRO without mergeable buffers requires special care. This
508 		 * is not ideal because every receive buffer must be large
509 		 * enough to hold the maximum TCP packet, the Ethernet header,
510 		 * and the header. This requires up to 34 descriptors with
511 		 * MCLBYTES clusters. If we do not have indirect descriptors,
512 		 * LRO is disabled since the virtqueue will not contain very
513 		 * many receive buffers.
514 		 */
515 		if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
516 			device_printf(dev,
517 			    "LRO disabled due to both mergeable buffers and "
518 			    "indirect descriptors not negotiated\n");
519 
520 			features &= ~VTNET_LRO_FEATURES;
521 			sc->vtnet_features =
522 			    virtio_negotiate_features(dev, features);
523 		} else
524 			sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
525 	}
526 }
527 
528 static int
529 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
530 {
531 	device_t dev;
532 	struct vq_alloc_info vq_info[3];
533 	int nvqs;
534 
535 	dev = sc->vtnet_dev;
536 	nvqs = 2;
537 
538 	/*
539 	 * Indirect descriptors are not needed for the Rx
540 	 * virtqueue when mergeable buffers are negotiated.
541 	 * The header is placed inline with the data, not
542 	 * in a separate descriptor, and mbuf clusters are
543 	 * always physically contiguous.
544 	 */
545 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
546 		sc->vtnet_rx_nsegs = (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) ?
547 		    VTNET_MAX_RX_SEGS : VTNET_MIN_RX_SEGS;
548 	} else
549 		sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
550 
551 	if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
552             virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
553 		sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
554 	else
555 		sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
556 
557 	VQ_ALLOC_INFO_INIT(&vq_info[0], sc->vtnet_rx_nsegs,
558 	    vtnet_rx_vq_intr, sc, &sc->vtnet_rx_vq,
559 	    "%s receive", device_get_nameunit(dev));
560 
561 	VQ_ALLOC_INFO_INIT(&vq_info[1], sc->vtnet_tx_nsegs,
562 	    vtnet_tx_vq_intr, sc, &sc->vtnet_tx_vq,
563 	    "%s transmit", device_get_nameunit(dev));
564 
565 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
566 		nvqs++;
567 
568 		VQ_ALLOC_INFO_INIT(&vq_info[2], 0, NULL, NULL,
569 		    &sc->vtnet_ctrl_vq, "%s control",
570 		    device_get_nameunit(dev));
571 	}
572 
573 	return (virtio_alloc_virtqueues(dev, 0, nvqs, vq_info));
574 }
575 
576 static int
577 vtnet_setup_interface(struct vtnet_softc *sc)
578 {
579 	device_t dev;
580 	struct ifnet *ifp;
581 	int i;
582 
583 	dev = sc->vtnet_dev;
584 
585 	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
586 	if (ifp == NULL) {
587 		device_printf(dev, "cannot allocate ifnet structure\n");
588 		return (ENOSPC);
589 	}
590 
591 	ifp->if_softc = sc;
592 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
593 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
594 	ifp->if_init = vtnet_init;
595 	ifp->if_start = vtnet_start;
596 	ifp->if_ioctl = vtnet_ioctl;
597 
598 	sc->vtnet_rx_process_limit = virtqueue_size(sc->vtnet_rx_vq);
599 	sc->vtnet_tx_size = virtqueue_size(sc->vtnet_tx_vq);
600 	if (sc->vtnet_flags & VTNET_FLAG_INDIRECT)
601 		sc->vtnet_txhdrcount = sc->vtnet_tx_size;
602 	else
603 		sc->vtnet_txhdrcount = (sc->vtnet_tx_size / 2) + 1;
604 	sc->vtnet_txhdrarea = contigmalloc(
605 	    sc->vtnet_txhdrcount * sizeof(struct vtnet_tx_header),
606 	    M_VTNET, M_WAITOK, 0, BUS_SPACE_MAXADDR, 4, 0);
607 	if (sc->vtnet_txhdrarea == NULL) {
608 		device_printf(dev, "cannot contigmalloc the tx headers\n");
609 		return (ENOMEM);
610 	}
611 	for (i = 0; i < sc->vtnet_txhdrcount; i++)
612 		vtnet_enqueue_txhdr(sc, &sc->vtnet_txhdrarea[i]);
613 	sc->vtnet_macfilter = contigmalloc(
614 	    sizeof(struct vtnet_mac_filter),
615 	    M_DEVBUF, M_WAITOK, 0, BUS_SPACE_MAXADDR, 4, 0);
616 	if (sc->vtnet_macfilter == NULL) {
617 		device_printf(dev,
618 		    "cannot contigmalloc the mac filter table\n");
619 		return (ENOMEM);
620 	}
621 	ifq_set_maxlen(&ifp->if_snd, sc->vtnet_tx_size - 1);
622 	ifq_set_ready(&ifp->if_snd);
623 
624 	ether_ifattach(ifp, sc->vtnet_hwaddr, NULL);
625 
626 	/* Tell the upper layer(s) we support long frames. */
627 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
628 	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
629 
630 	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
631 		ifp->if_capabilities |= IFCAP_TXCSUM;
632 
633 		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
634 			ifp->if_capabilities |= IFCAP_TSO4;
635 		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
636 			ifp->if_capabilities |= IFCAP_TSO6;
637 		if (ifp->if_capabilities & IFCAP_TSO)
638 			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
639 
640 		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
641 			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
642 	}
643 
644 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM))
645 		ifp->if_capabilities |= IFCAP_RXCSUM;
646 
647 #if 0	/* IFCAP_LRO doesn't exist in DragonFly. */
648 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
649 	    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
650 		ifp->if_capabilities |= IFCAP_LRO;
651 #endif
652 
653 	if ((ifp->if_capabilities & IFCAP_HWCSUM) == IFCAP_HWCSUM) {
654 		/*
655 		 * VirtIO does not support VLAN tagging, but we can fake
656 		 * it by inserting and removing the 802.1Q header during
657 		 * transmit and receive. We are then able to do checksum
658 		 * offloading of VLAN frames.
659 		 */
660 		ifp->if_capabilities |=
661 			IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
662 	}
663 
664 	ifp->if_capenable = ifp->if_capabilities;
665 
666 	/*
667 	 * Capabilities after here are not enabled by default.
668 	 */
669 
670 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
671 		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
672 
673 		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
674 		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
675 		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
676 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
677 	}
678 
679 	return (0);
680 }
681 
682 static void
683 vtnet_set_hwaddr(struct vtnet_softc *sc)
684 {
685 	device_t dev;
686 
687 	dev = sc->vtnet_dev;
688 
689 	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) &&
690 	    (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)) {
691 		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
692 			device_printf(dev, "unable to set MAC address\n");
693 	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
694 		virtio_write_device_config(dev,
695 		    offsetof(struct virtio_net_config, mac),
696 		    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
697 	}
698 }
699 
700 static void
701 vtnet_get_hwaddr(struct vtnet_softc *sc)
702 {
703 	device_t dev;
704 
705 	dev = sc->vtnet_dev;
706 
707 	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
708 		/*
709 		 * Generate a random locally administered unicast address.
710 		 *
711 		 * It would be nice to generate the same MAC address across
712 		 * reboots, but it seems all the hosts currently available
713 		 * support the MAC feature, so this isn't too important.
714 		 */
715 		sc->vtnet_hwaddr[0] = 0xB2;
716 		karc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1);
717 		return;
718 	}
719 
720 	virtio_read_device_config(dev,
721 	    offsetof(struct virtio_net_config, mac),
722 	    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
723 }
724 
725 static int
726 vtnet_is_link_up(struct vtnet_softc *sc)
727 {
728 	device_t dev;
729 	struct ifnet *ifp;
730 	uint16_t status;
731 
732 	dev = sc->vtnet_dev;
733 	ifp = sc->vtnet_ifp;
734 
735 	ASSERT_SERIALIZED(&sc->vtnet_slz);
736 
737 	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) {
738 		status = virtio_read_dev_config_2(dev,
739 				offsetof(struct virtio_net_config, status));
740 	} else {
741 		status = VIRTIO_NET_S_LINK_UP;
742 	}
743 
744 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
745 }
746 
747 static void
748 vtnet_update_link_status(struct vtnet_softc *sc)
749 {
750 	device_t dev;
751 	struct ifnet *ifp;
752 	struct ifaltq_subque *ifsq;
753 	int link;
754 
755 	dev = sc->vtnet_dev;
756 	ifp = sc->vtnet_ifp;
757 	ifsq = ifq_get_subq_default(&ifp->if_snd);
758 
759 	link = vtnet_is_link_up(sc);
760 
761 	if (link && ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0)) {
762 		sc->vtnet_flags |= VTNET_FLAG_LINK;
763 		if (bootverbose)
764 			device_printf(dev, "Link is up\n");
765 		ifp->if_link_state = LINK_STATE_UP;
766 		if_link_state_change(ifp);
767 		if (!ifsq_is_empty(ifsq))
768 			vtnet_start_locked(ifp, ifsq);
769 	} else if (!link && (sc->vtnet_flags & VTNET_FLAG_LINK)) {
770 		sc->vtnet_flags &= ~VTNET_FLAG_LINK;
771 		if (bootverbose)
772 			device_printf(dev, "Link is down\n");
773 
774 		ifp->if_link_state = LINK_STATE_DOWN;
775 		if_link_state_change(ifp);
776 	}
777 }
778 
779 #if 0
780 static void
781 vtnet_watchdog(struct vtnet_softc *sc)
782 {
783 	struct ifnet *ifp;
784 
785 	ifp = sc->vtnet_ifp;
786 
787 #ifdef VTNET_TX_INTR_MODERATION
788 	vtnet_txeof(sc);
789 #endif
790 
791 	if (sc->vtnet_watchdog_timer == 0 || --sc->vtnet_watchdog_timer)
792 		return;
793 
794 	if_printf(ifp, "watchdog timeout -- resetting\n");
795 #ifdef VTNET_DEBUG
796 	virtqueue_dump(sc->vtnet_tx_vq);
797 #endif
798 	ifp->if_oerrors++;
799 	ifp->if_flags &= ~IFF_RUNNING;
800 	vtnet_init_locked(sc);
801 }
802 #endif
803 
804 static void
805 vtnet_config_change_task(void *arg, int pending)
806 {
807 	struct vtnet_softc *sc;
808 
809 	sc = arg;
810 
811 	lwkt_serialize_enter(&sc->vtnet_slz);
812 	vtnet_update_link_status(sc);
813 	lwkt_serialize_exit(&sc->vtnet_slz);
814 }
815 
816 static int
817 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data,struct ucred *cr)
818 {
819 	struct vtnet_softc *sc;
820 	struct ifreq *ifr;
821 	int reinit, mask, error;
822 
823 	sc = ifp->if_softc;
824 	ifr = (struct ifreq *) data;
825 	reinit = 0;
826 	error = 0;
827 
828 	switch (cmd) {
829 	case SIOCSIFMTU:
830 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VTNET_MAX_MTU)
831 			error = EINVAL;
832 		else if (ifp->if_mtu != ifr->ifr_mtu) {
833 			lwkt_serialize_enter(&sc->vtnet_slz);
834 			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
835 			lwkt_serialize_exit(&sc->vtnet_slz);
836 		}
837 		break;
838 
839 	case SIOCSIFFLAGS:
840 		lwkt_serialize_enter(&sc->vtnet_slz);
841 		if ((ifp->if_flags & IFF_UP) == 0) {
842 			if (ifp->if_flags & IFF_RUNNING)
843 				vtnet_stop(sc);
844 		} else if (ifp->if_flags & IFF_RUNNING) {
845 			if ((ifp->if_flags ^ sc->vtnet_if_flags) &
846 			    (IFF_PROMISC | IFF_ALLMULTI)) {
847 				if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
848 					vtnet_rx_filter(sc);
849 				else
850 					error = ENOTSUP;
851 			}
852 		} else
853 			vtnet_init_locked(sc);
854 
855 		if (error == 0)
856 			sc->vtnet_if_flags = ifp->if_flags;
857 		lwkt_serialize_exit(&sc->vtnet_slz);
858 		break;
859 
860 	case SIOCADDMULTI:
861 	case SIOCDELMULTI:
862 		lwkt_serialize_enter(&sc->vtnet_slz);
863 		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) &&
864 		    (ifp->if_flags & IFF_RUNNING))
865 			vtnet_rx_filter_mac(sc);
866 		lwkt_serialize_exit(&sc->vtnet_slz);
867 		break;
868 
869 	case SIOCSIFMEDIA:
870 	case SIOCGIFMEDIA:
871 		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
872 		break;
873 
874 	case SIOCSIFCAP:
875 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
876 
877 		lwkt_serialize_enter(&sc->vtnet_slz);
878 
879 		if (mask & IFCAP_TXCSUM) {
880 			ifp->if_capenable ^= IFCAP_TXCSUM;
881 			if (ifp->if_capenable & IFCAP_TXCSUM)
882 				ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
883 			else
884 				ifp->if_hwassist &= ~VTNET_CSUM_OFFLOAD;
885 		}
886 
887 		if (mask & IFCAP_TSO4) {
888 			ifp->if_capenable ^= IFCAP_TSO4;
889 			if (ifp->if_capenable & IFCAP_TSO4)
890 				ifp->if_hwassist |= CSUM_TSO;
891 			else
892 				ifp->if_hwassist &= ~CSUM_TSO;
893 		}
894 
895 		if (mask & IFCAP_RXCSUM) {
896 			ifp->if_capenable ^= IFCAP_RXCSUM;
897 			reinit = 1;
898 		}
899 
900 #if 0	/* IFCAP_LRO doesn't exist in DragonFly. */
901 		if (mask & IFCAP_LRO) {
902 			ifp->if_capenable ^= IFCAP_LRO;
903 			reinit = 1;
904 		}
905 #endif
906 
907 		if (mask & IFCAP_VLAN_HWFILTER) {
908 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
909 			reinit = 1;
910 		}
911 
912 		if (mask & IFCAP_VLAN_HWTSO)
913 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
914 
915 		if (mask & IFCAP_VLAN_HWTAGGING)
916 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
917 
918 		if (reinit && (ifp->if_flags & IFF_RUNNING)) {
919 			ifp->if_flags &= ~IFF_RUNNING;
920 			vtnet_init_locked(sc);
921 		}
922 		//VLAN_CAPABILITIES(ifp);
923 
924 		lwkt_serialize_exit(&sc->vtnet_slz);
925 		break;
926 
927 	default:
928 		error = ether_ioctl(ifp, cmd, data);
929 		break;
930 	}
931 
932 	return (error);
933 }
934 
935 static int
936 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
937 {
938 	struct ifnet *ifp;
939 	int new_frame_size, clsize;
940 
941 	ifp = sc->vtnet_ifp;
942 
943 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
944 		new_frame_size = sizeof(struct vtnet_rx_header) +
945 		    sizeof(struct ether_vlan_header) + new_mtu;
946 
947 		if (new_frame_size > MJUM9BYTES)
948 			return (EINVAL);
949 
950 		if (new_frame_size <= MCLBYTES)
951 			clsize = MCLBYTES;
952 		else
953 			clsize = MJUM9BYTES;
954 	} else {
955 		new_frame_size = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
956 		    sizeof(struct ether_vlan_header) + new_mtu;
957 
958 		if (new_frame_size <= MCLBYTES)
959 			clsize = MCLBYTES;
960 		else
961 			clsize = MJUMPAGESIZE;
962 	}
963 
964 	sc->vtnet_rx_mbuf_size = clsize;
965 	sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
966 	KASSERT(sc->vtnet_rx_mbuf_count < VTNET_MAX_RX_SEGS,
967 	    ("too many rx mbufs: %d", sc->vtnet_rx_mbuf_count));
968 
969 	ifp->if_mtu = new_mtu;
970 
971 	if (ifp->if_flags & IFF_RUNNING) {
972 		ifp->if_flags &= ~IFF_RUNNING;
973 		vtnet_init_locked(sc);
974 	}
975 
976 	return (0);
977 }
978 
979 static int
980 vtnet_init_rx_vq(struct vtnet_softc *sc)
981 {
982 	struct virtqueue *vq;
983 	int nbufs, error;
984 
985 	vq = sc->vtnet_rx_vq;
986 	nbufs = 0;
987 	error = ENOSPC;
988 
989 	while (!virtqueue_full(vq)) {
990 		if ((error = vtnet_newbuf(sc)) != 0)
991 			break;
992 		nbufs++;
993 	}
994 
995 	if (nbufs > 0) {
996 		virtqueue_notify(vq, &sc->vtnet_slz);
997 
998 		/*
999 		 * EMSGSIZE signifies the virtqueue did not have enough
1000 		 * entries available to hold the last mbuf. This is not
1001 		 * an error. We should not get ENOSPC since we check if
1002 		 * the virtqueue is full before attempting to add a
1003 		 * buffer.
1004 		 */
1005 		if (error == EMSGSIZE)
1006 			error = 0;
1007 	}
1008 
1009 	return (error);
1010 }
1011 
1012 static void
1013 vtnet_free_rx_mbufs(struct vtnet_softc *sc)
1014 {
1015 	struct virtqueue *vq;
1016 	struct mbuf *m;
1017 	int last;
1018 
1019 	vq = sc->vtnet_rx_vq;
1020 	last = 0;
1021 
1022 	while ((m = virtqueue_drain(vq, &last)) != NULL)
1023 		m_freem(m);
1024 
1025 	KASSERT(virtqueue_empty(vq), ("mbufs remaining in Rx Vq"));
1026 }
1027 
1028 static void
1029 vtnet_free_tx_mbufs(struct vtnet_softc *sc)
1030 {
1031 	struct virtqueue *vq;
1032 	struct vtnet_tx_header *txhdr;
1033 	int last;
1034 
1035 	vq = sc->vtnet_tx_vq;
1036 	last = 0;
1037 
1038 	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
1039 		m_freem(txhdr->vth_mbuf);
1040 		vtnet_enqueue_txhdr(sc, txhdr);
1041 	}
1042 
1043 	KASSERT(virtqueue_empty(vq), ("mbufs remaining in Tx Vq"));
1044 }
1045 
1046 static void
1047 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
1048 {
1049 	/*
1050 	 * The control virtqueue is only polled, therefore
1051 	 * it should already be empty.
1052 	 */
1053 	KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
1054 		("Ctrl Vq not empty"));
1055 }
1056 
1057 static struct mbuf *
1058 vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1059 {
1060 	struct mbuf *m_head, *m_tail, *m;
1061 	int i, clsize;
1062 
1063 	clsize = sc->vtnet_rx_mbuf_size;
1064 
1065 	/*use getcl instead of getjcl. see  if_mxge.c comment line 2398*/
1066 	//m_head = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, clsize);
1067 	m_head = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR );
1068 	if (m_head == NULL)
1069 		goto fail;
1070 
1071 	m_head->m_len = clsize;
1072 	m_tail = m_head;
1073 
1074 	if (nbufs > 1) {
1075 		KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1076 			("chained Rx mbuf requested without LRO_NOMRG"));
1077 
1078 		for (i = 0; i < nbufs - 1; i++) {
1079 			//m = m_getjcl(M_DONTWAIT, MT_DATA, 0, clsize);
1080 			m = m_getcl(M_NOWAIT, MT_DATA, 0);
1081 			if (m == NULL)
1082 				goto fail;
1083 
1084 			m->m_len = clsize;
1085 			m_tail->m_next = m;
1086 			m_tail = m;
1087 		}
1088 	}
1089 
1090 	if (m_tailp != NULL)
1091 		*m_tailp = m_tail;
1092 
1093 	return (m_head);
1094 
1095 fail:
1096 	sc->vtnet_stats.mbuf_alloc_failed++;
1097 	m_freem(m_head);
1098 
1099 	return (NULL);
1100 }
1101 
1102 static int
1103 vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
1104 {
1105 	struct mbuf *m, *m_prev;
1106 	struct mbuf *m_new, *m_tail;
1107 	int len, clsize, nreplace, error;
1108 
1109 	m = m0;
1110 	m_prev = NULL;
1111 	len = len0;
1112 
1113 	m_tail = NULL;
1114 	clsize = sc->vtnet_rx_mbuf_size;
1115 	nreplace = 0;
1116 
1117 	if (m->m_next != NULL)
1118 		KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1119 		    ("chained Rx mbuf without LRO_NOMRG"));
1120 
1121 	/*
1122 	 * Since LRO_NOMRG mbuf chains are so large, we want to avoid
1123 	 * allocating an entire chain for each received frame. When
1124 	 * the received frame's length is less than that of the chain,
1125 	 * the unused mbufs are reassigned to the new chain.
1126 	 */
1127 	while (len > 0) {
1128 		/*
1129 		 * Something is seriously wrong if we received
1130 		 * a frame larger than the mbuf chain. Drop it.
1131 		 */
1132 		if (m == NULL) {
1133 			sc->vtnet_stats.rx_frame_too_large++;
1134 			return (EMSGSIZE);
1135 		}
1136 
1137 		KASSERT(m->m_len == clsize,
1138 		    ("mbuf length not expected cluster size: %d",
1139 		    m->m_len));
1140 
1141 		m->m_len = MIN(m->m_len, len);
1142 		len -= m->m_len;
1143 
1144 		m_prev = m;
1145 		m = m->m_next;
1146 		nreplace++;
1147 	}
1148 
1149 	KASSERT(m_prev != NULL, ("m_prev == NULL"));
1150 	KASSERT(nreplace <= sc->vtnet_rx_mbuf_count,
1151 		("too many replacement mbufs: %d/%d", nreplace,
1152 		sc->vtnet_rx_mbuf_count));
1153 
1154 	m_new = vtnet_alloc_rxbuf(sc, nreplace, &m_tail);
1155 	if (m_new == NULL) {
1156 		m_prev->m_len = clsize;
1157 		return (ENOBUFS);
1158 	}
1159 
1160 	/*
1161 	 * Move unused mbufs, if any, from the original chain
1162 	 * onto the end of the new chain.
1163 	 */
1164 	if (m_prev->m_next != NULL) {
1165 		m_tail->m_next = m_prev->m_next;
1166 		m_prev->m_next = NULL;
1167 	}
1168 
1169 	error = vtnet_enqueue_rxbuf(sc, m_new);
1170 	if (error) {
1171 		/*
1172 		 * BAD! We could not enqueue the replacement mbuf chain. We
1173 		 * must restore the m0 chain to the original state if it was
1174 		 * modified so we can subsequently discard it.
1175 		 *
1176 		 * NOTE: The replacement is suppose to be an identical copy
1177 		 * to the one just dequeued so this is an unexpected error.
1178 		 */
1179 		sc->vtnet_stats.rx_enq_replacement_failed++;
1180 
1181 		if (m_tail->m_next != NULL) {
1182 			m_prev->m_next = m_tail->m_next;
1183 			m_tail->m_next = NULL;
1184 		}
1185 
1186 		m_prev->m_len = clsize;
1187 		m_freem(m_new);
1188 	}
1189 
1190 	return (error);
1191 }
1192 
1193 static int
1194 vtnet_newbuf(struct vtnet_softc *sc)
1195 {
1196 	struct mbuf *m;
1197 	int error;
1198 
1199 	m = vtnet_alloc_rxbuf(sc, sc->vtnet_rx_mbuf_count, NULL);
1200 	if (m == NULL)
1201 		return (ENOBUFS);
1202 
1203 	error = vtnet_enqueue_rxbuf(sc, m);
1204 	if (error)
1205 		m_freem(m);
1206 
1207 	return (error);
1208 }
1209 
1210 static void
1211 vtnet_discard_merged_rxbuf(struct vtnet_softc *sc, int nbufs)
1212 {
1213 	struct virtqueue *vq;
1214 	struct mbuf *m;
1215 
1216 	vq = sc->vtnet_rx_vq;
1217 
1218 	while (--nbufs > 0) {
1219 		if ((m = virtqueue_dequeue(vq, NULL)) == NULL)
1220 			break;
1221 		vtnet_discard_rxbuf(sc, m);
1222 	}
1223 }
1224 
1225 static void
1226 vtnet_discard_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
1227 {
1228 	int error;
1229 
1230 	/*
1231 	 * Requeue the discarded mbuf. This should always be
1232 	 * successful since it was just dequeued.
1233 	 */
1234 	error = vtnet_enqueue_rxbuf(sc, m);
1235 	KASSERT(error == 0, ("cannot requeue discarded mbuf"));
1236 }
1237 
1238 static int
1239 vtnet_enqueue_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
1240 {
1241 	struct sglist sg;
1242 	struct sglist_seg segs[VTNET_MAX_RX_SEGS];
1243 	struct vtnet_rx_header *rxhdr;
1244 	struct virtio_net_hdr *hdr;
1245 	uint8_t *mdata;
1246 	int offset, error;
1247 
1248 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1249 	if ((sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0)
1250 		KASSERT(m->m_next == NULL, ("chained Rx mbuf"));
1251 
1252 	sglist_init(&sg, sc->vtnet_rx_nsegs, segs);
1253 
1254 	mdata = mtod(m, uint8_t *);
1255 	offset = 0;
1256 
1257 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1258 		rxhdr = (struct vtnet_rx_header *) mdata;
1259 		hdr = &rxhdr->vrh_hdr;
1260 		offset += sizeof(struct vtnet_rx_header);
1261 
1262 		error = sglist_append(&sg, hdr, sc->vtnet_hdr_size);
1263 		KASSERT(error == 0, ("cannot add header to sglist"));
1264 	}
1265 
1266 	error = sglist_append(&sg, mdata + offset, m->m_len - offset);
1267 	if (error)
1268 		return (error);
1269 
1270 	if (m->m_next != NULL) {
1271 		error = sglist_append_mbuf(&sg, m->m_next);
1272 		if (error)
1273 			return (error);
1274 	}
1275 
1276 	return (virtqueue_enqueue(sc->vtnet_rx_vq, m, &sg, 0, sg.sg_nseg));
1277 }
1278 
1279 static void
1280 vtnet_vlan_tag_remove(struct mbuf *m)
1281 {
1282 	struct ether_vlan_header *evl;
1283 
1284 	evl = mtod(m, struct ether_vlan_header *);
1285 
1286 	m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
1287 	m->m_flags |= M_VLANTAG;
1288 
1289 	/* Strip the 802.1Q header. */
1290 	bcopy((char *) evl, (char *) evl + ETHER_VLAN_ENCAP_LEN,
1291 	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
1292 	m_adj(m, ETHER_VLAN_ENCAP_LEN);
1293 }
1294 
1295 /*
1296  * Alternative method of doing receive checksum offloading. Rather
1297  * than parsing the received frame down to the IP header, use the
1298  * csum_offset to determine which CSUM_* flags are appropriate. We
1299  * can get by with doing this only because the checksum offsets are
1300  * unique for the things we care about.
1301  */
1302 static int
1303 vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
1304     struct virtio_net_hdr *hdr)
1305 {
1306 	struct ether_header *eh;
1307 	struct ether_vlan_header *evh;
1308 	struct udphdr *udp;
1309 	int csum_len;
1310 	uint16_t eth_type;
1311 
1312 	csum_len = hdr->csum_start + hdr->csum_offset;
1313 
1314 	if (csum_len < sizeof(struct ether_header) + sizeof(struct ip))
1315 		return (1);
1316 	if (m->m_len < csum_len)
1317 		return (1);
1318 
1319 	eh = mtod(m, struct ether_header *);
1320 	eth_type = ntohs(eh->ether_type);
1321 	if (eth_type == ETHERTYPE_VLAN) {
1322 		evh = mtod(m, struct ether_vlan_header *);
1323 		eth_type = ntohs(evh->evl_proto);
1324 	}
1325 
1326 	if (eth_type != ETHERTYPE_IP && eth_type != ETHERTYPE_IPV6) {
1327 		sc->vtnet_stats.rx_csum_bad_ethtype++;
1328 		return (1);
1329 	}
1330 
1331 	/* Use the offset to determine the appropriate CSUM_* flags. */
1332 	switch (hdr->csum_offset) {
1333 	case offsetof(struct udphdr, uh_sum):
1334 		if (m->m_len < hdr->csum_start + sizeof(struct udphdr))
1335 			return (1);
1336 		udp = (struct udphdr *)(mtod(m, uint8_t *) + hdr->csum_start);
1337 		if (udp->uh_sum == 0)
1338 			return (0);
1339 
1340 		/* FALLTHROUGH */
1341 
1342 	case offsetof(struct tcphdr, th_sum):
1343 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1344 		m->m_pkthdr.csum_data = 0xFFFF;
1345 		break;
1346 
1347 	default:
1348 		sc->vtnet_stats.rx_csum_bad_offset++;
1349 		return (1);
1350 	}
1351 
1352 	sc->vtnet_stats.rx_csum_offloaded++;
1353 
1354 	return (0);
1355 }
1356 
1357 static int
1358 vtnet_rxeof_merged(struct vtnet_softc *sc, struct mbuf *m_head, int nbufs)
1359 {
1360 	struct ifnet *ifp;
1361 	struct virtqueue *vq;
1362 	struct mbuf *m, *m_tail;
1363 	int len;
1364 
1365 	ifp = sc->vtnet_ifp;
1366 	vq = sc->vtnet_rx_vq;
1367 	m_tail = m_head;
1368 
1369 	while (--nbufs > 0) {
1370 		m = virtqueue_dequeue(vq, &len);
1371 		if (m == NULL) {
1372 			ifp->if_ierrors++;
1373 			goto fail;
1374 		}
1375 
1376 		if (vtnet_newbuf(sc) != 0) {
1377 			ifp->if_iqdrops++;
1378 			vtnet_discard_rxbuf(sc, m);
1379 			if (nbufs > 1)
1380 				vtnet_discard_merged_rxbuf(sc, nbufs);
1381 			goto fail;
1382 		}
1383 
1384 		if (m->m_len < len)
1385 			len = m->m_len;
1386 
1387 		m->m_len = len;
1388 		m->m_flags &= ~M_PKTHDR;
1389 
1390 		m_head->m_pkthdr.len += len;
1391 		m_tail->m_next = m;
1392 		m_tail = m;
1393 	}
1394 
1395 	return (0);
1396 
1397 fail:
1398 	sc->vtnet_stats.rx_mergeable_failed++;
1399 	m_freem(m_head);
1400 
1401 	return (1);
1402 }
1403 
1404 static int
1405 vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
1406 {
1407 	struct virtio_net_hdr lhdr;
1408 	struct ifnet *ifp;
1409 	struct virtqueue *vq;
1410 	struct mbuf *m;
1411 	struct ether_header *eh;
1412 	struct virtio_net_hdr *hdr;
1413 	struct virtio_net_hdr_mrg_rxbuf *mhdr;
1414 	int len, deq, nbufs, adjsz, rx_npkts;
1415 
1416 	ifp = sc->vtnet_ifp;
1417 	vq = sc->vtnet_rx_vq;
1418 	hdr = &lhdr;
1419 	deq = 0;
1420 	rx_npkts = 0;
1421 
1422 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1423 
1424 	while (--count >= 0) {
1425 		m = virtqueue_dequeue(vq, &len);
1426 		if (m == NULL)
1427 			break;
1428 		deq++;
1429 
1430 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1431 			ifp->if_ierrors++;
1432 			vtnet_discard_rxbuf(sc, m);
1433 			continue;
1434 		}
1435 
1436 		if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1437 			nbufs = 1;
1438 			adjsz = sizeof(struct vtnet_rx_header);
1439 			/*
1440 			 * Account for our pad between the header and
1441 			 * the actual start of the frame.
1442 			 */
1443 			len += VTNET_RX_HEADER_PAD;
1444 		} else {
1445 			mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1446 			nbufs = mhdr->num_buffers;
1447 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1448 		}
1449 
1450 		if (vtnet_replace_rxbuf(sc, m, len) != 0) {
1451 			ifp->if_iqdrops++;
1452 			vtnet_discard_rxbuf(sc, m);
1453 			if (nbufs > 1)
1454 				vtnet_discard_merged_rxbuf(sc, nbufs);
1455 			continue;
1456 		}
1457 
1458 		m->m_pkthdr.len = len;
1459 		m->m_pkthdr.rcvif = ifp;
1460 		m->m_pkthdr.csum_flags = 0;
1461 
1462 		if (nbufs > 1) {
1463 			if (vtnet_rxeof_merged(sc, m, nbufs) != 0)
1464 				continue;
1465 		}
1466 
1467 		ifp->if_ipackets++;
1468 
1469 		/*
1470 		 * Save copy of header before we strip it. For both mergeable
1471 		 * and non-mergeable, the VirtIO header is placed first in the
1472 		 * mbuf's data. We no longer need num_buffers, so always use a
1473 		 * virtio_net_hdr.
1474 		 */
1475 		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1476 		m_adj(m, adjsz);
1477 
1478 		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1479 			eh = mtod(m, struct ether_header *);
1480 			if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1481 				vtnet_vlan_tag_remove(m);
1482 
1483 				/*
1484 				 * With the 802.1Q header removed, update the
1485 				 * checksum starting location accordingly.
1486 				 */
1487 				if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1488 					hdr->csum_start -=
1489 					    ETHER_VLAN_ENCAP_LEN;
1490 			}
1491 		}
1492 
1493 		if (ifp->if_capenable & IFCAP_RXCSUM &&
1494 		    hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1495 			if (vtnet_rx_csum(sc, m, hdr) != 0)
1496 				sc->vtnet_stats.rx_csum_failed++;
1497 		}
1498 
1499 		lwkt_serialize_exit(&sc->vtnet_slz);
1500 		rx_npkts++;
1501 		ifp->if_input(ifp, m, NULL, -1);
1502 		lwkt_serialize_enter(&sc->vtnet_slz);
1503 
1504 		/*
1505 		 * The interface may have been stopped while we were
1506 		 * passing the packet up the network stack.
1507 		 */
1508 		if ((ifp->if_flags & IFF_RUNNING) == 0)
1509 			break;
1510 	}
1511 
1512 	virtqueue_notify(vq, &sc->vtnet_slz);
1513 
1514 	if (rx_npktsp != NULL)
1515 		*rx_npktsp = rx_npkts;
1516 
1517 	return (count > 0 ? 0 : EAGAIN);
1518 }
1519 
1520 static void
1521 vtnet_rx_intr_task(void *arg)
1522 {
1523 	struct vtnet_softc *sc;
1524 	struct ifnet *ifp;
1525 	int more;
1526 
1527 	sc = arg;
1528 	ifp = sc->vtnet_ifp;
1529 
1530 next:
1531 //	lwkt_serialize_enter(&sc->vtnet_slz);
1532 
1533 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1534 		vtnet_enable_rx_intr(sc);
1535 //		lwkt_serialize_exit(&sc->vtnet_slz);
1536 		return;
1537 	}
1538 
1539 	more = vtnet_rxeof(sc, sc->vtnet_rx_process_limit, NULL);
1540 	if (!more && vtnet_enable_rx_intr(sc) != 0) {
1541 		vtnet_disable_rx_intr(sc);
1542 		more = 1;
1543 	}
1544 
1545 //	lwkt_serialize_exit(&sc->vtnet_slz);
1546 
1547 	if (more) {
1548 		sc->vtnet_stats.rx_task_rescheduled++;
1549 		goto next;
1550 	}
1551 }
1552 
1553 static int
1554 vtnet_rx_vq_intr(void *xsc)
1555 {
1556 	struct vtnet_softc *sc;
1557 
1558 	sc = xsc;
1559 
1560 	vtnet_disable_rx_intr(sc);
1561 	vtnet_rx_intr_task(sc);
1562 
1563 	return (1);
1564 }
1565 
1566 static void
1567 vtnet_enqueue_txhdr(struct vtnet_softc *sc, struct vtnet_tx_header *txhdr)
1568 {
1569 	bzero(txhdr, sizeof(*txhdr));
1570 	SLIST_INSERT_HEAD(&sc->vtnet_txhdr_free, txhdr, link);
1571 }
1572 
1573 static void
1574 vtnet_txeof(struct vtnet_softc *sc)
1575 {
1576 	struct virtqueue *vq;
1577 	struct ifnet *ifp;
1578 	struct vtnet_tx_header *txhdr;
1579 	int deq;
1580 
1581 	vq = sc->vtnet_tx_vq;
1582 	ifp = sc->vtnet_ifp;
1583 	deq = 0;
1584 
1585 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1586 
1587 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
1588 		deq++;
1589 		ifp->if_opackets++;
1590 		m_freem(txhdr->vth_mbuf);
1591 		vtnet_enqueue_txhdr(sc, txhdr);
1592 	}
1593 
1594 	if (deq > 0) {
1595 		ifq_clr_oactive(&ifp->if_snd);
1596 		if (virtqueue_empty(vq))
1597 			sc->vtnet_watchdog_timer = 0;
1598 	}
1599 }
1600 
1601 static struct mbuf *
1602 vtnet_tx_offload(struct vtnet_softc *sc, struct mbuf *m,
1603     struct virtio_net_hdr *hdr)
1604 {
1605 	struct ifnet *ifp;
1606 	struct ether_header *eh;
1607 	struct ether_vlan_header *evh;
1608 	struct ip *ip;
1609 	struct ip6_hdr *ip6;
1610 	struct tcphdr *tcp;
1611 	int ip_offset;
1612 	uint16_t eth_type, csum_start;
1613 	uint8_t ip_proto, gso_type;
1614 
1615 	ifp = sc->vtnet_ifp;
1616 	M_ASSERTPKTHDR(m);
1617 
1618 	ip_offset = sizeof(struct ether_header);
1619 	if (m->m_len < ip_offset) {
1620 		if ((m = m_pullup(m, ip_offset)) == NULL)
1621 			return (NULL);
1622 	}
1623 
1624 	eh = mtod(m, struct ether_header *);
1625 	eth_type = ntohs(eh->ether_type);
1626 	if (eth_type == ETHERTYPE_VLAN) {
1627 		ip_offset = sizeof(struct ether_vlan_header);
1628 		if (m->m_len < ip_offset) {
1629 			if ((m = m_pullup(m, ip_offset)) == NULL)
1630 				return (NULL);
1631 		}
1632 		evh = mtod(m, struct ether_vlan_header *);
1633 		eth_type = ntohs(evh->evl_proto);
1634 	}
1635 
1636 	switch (eth_type) {
1637 	case ETHERTYPE_IP:
1638 		if (m->m_len < ip_offset + sizeof(struct ip)) {
1639 			m = m_pullup(m, ip_offset + sizeof(struct ip));
1640 			if (m == NULL)
1641 				return (NULL);
1642 		}
1643 
1644 		ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
1645 		ip_proto = ip->ip_p;
1646 		csum_start = ip_offset + (ip->ip_hl << 2);
1647 		gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1648 		break;
1649 
1650 	case ETHERTYPE_IPV6:
1651 		if (m->m_len < ip_offset + sizeof(struct ip6_hdr)) {
1652 			m = m_pullup(m, ip_offset + sizeof(struct ip6_hdr));
1653 			if (m == NULL)
1654 				return (NULL);
1655 		}
1656 
1657 		ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
1658 		/*
1659 		 * XXX Assume no extension headers are present. Presently,
1660 		 * this will always be true in the case of TSO, and FreeBSD
1661 		 * does not perform checksum offloading of IPv6 yet.
1662 		 */
1663 		ip_proto = ip6->ip6_nxt;
1664 		csum_start = ip_offset + sizeof(struct ip6_hdr);
1665 		gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1666 		break;
1667 
1668 	default:
1669 		return (m);
1670 	}
1671 
1672 	if (m->m_pkthdr.csum_flags & VTNET_CSUM_OFFLOAD) {
1673 		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
1674 		hdr->csum_start = csum_start;
1675 		hdr->csum_offset = m->m_pkthdr.csum_data;
1676 
1677 		sc->vtnet_stats.tx_csum_offloaded++;
1678 	}
1679 
1680 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1681 		if (ip_proto != IPPROTO_TCP)
1682 			return (m);
1683 
1684 		if (m->m_len < csum_start + sizeof(struct tcphdr)) {
1685 			m = m_pullup(m, csum_start + sizeof(struct tcphdr));
1686 			if (m == NULL)
1687 				return (NULL);
1688 		}
1689 
1690 		tcp = (struct tcphdr *)(mtod(m, uint8_t *) + csum_start);
1691 		hdr->gso_type = gso_type;
1692 		hdr->hdr_len = csum_start + (tcp->th_off << 2);
1693 		hdr->gso_size = m->m_pkthdr.tso_segsz;
1694 
1695 		if (tcp->th_flags & TH_CWR) {
1696 			/*
1697 			 * Drop if we did not negotiate VIRTIO_NET_F_HOST_ECN.
1698 			 * ECN support is only configurable globally with the
1699 			 * net.inet.tcp.ecn.enable sysctl knob.
1700 			 */
1701 			if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
1702 				if_printf(ifp, "TSO with ECN not supported "
1703 				    "by host\n");
1704 				m_freem(m);
1705 				return (NULL);
1706 			}
1707 
1708 			hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1709 		}
1710 
1711 		sc->vtnet_stats.tx_tso_offloaded++;
1712 	}
1713 
1714 	return (m);
1715 }
1716 
1717 static int
1718 vtnet_enqueue_txbuf(struct vtnet_softc *sc, struct mbuf **m_head,
1719     struct vtnet_tx_header *txhdr)
1720 {
1721 	struct sglist sg;
1722 	struct sglist_seg segs[VTNET_MAX_TX_SEGS];
1723 	struct virtqueue *vq;
1724 	struct mbuf *m;
1725 	int error;
1726 
1727 	vq = sc->vtnet_tx_vq;
1728 	m = *m_head;
1729 
1730 	sglist_init(&sg, sc->vtnet_tx_nsegs, segs);
1731 	error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
1732 	KASSERT(error == 0 && sg.sg_nseg == 1,
1733 	    ("%s: error %d adding header to sglist", __func__, error));
1734 
1735 	error = sglist_append_mbuf(&sg, m);
1736 	if (error) {
1737 		m = m_defrag(m, M_NOWAIT);
1738 		if (m == NULL)
1739 			goto fail;
1740 
1741 		*m_head = m;
1742 		sc->vtnet_stats.tx_defragged++;
1743 
1744 		error = sglist_append_mbuf(&sg, m);
1745 		if (error)
1746 			goto fail;
1747 	}
1748 
1749 	txhdr->vth_mbuf = m;
1750 	error = virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0);
1751 
1752 	return (error);
1753 
1754 fail:
1755 	sc->vtnet_stats.tx_defrag_failed++;
1756 	m_freem(*m_head);
1757 	*m_head = NULL;
1758 
1759 	return (ENOBUFS);
1760 }
1761 
1762 static struct mbuf *
1763 vtnet_vlan_tag_insert(struct mbuf *m)
1764 {
1765 	struct mbuf *n;
1766 	struct ether_vlan_header *evl;
1767 
1768 	if (M_WRITABLE(m) == 0) {
1769 		n = m_dup(m, M_NOWAIT);
1770 		m_freem(m);
1771 		if ((m = n) == NULL)
1772 			return (NULL);
1773 	}
1774 
1775 	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
1776 	if (m == NULL)
1777 		return (NULL);
1778 	if (m->m_len < sizeof(struct ether_vlan_header)) {
1779 		m = m_pullup(m, sizeof(struct ether_vlan_header));
1780 		if (m == NULL)
1781 			return (NULL);
1782 	}
1783 
1784 	/* Insert 802.1Q header into the existing Ethernet header. */
1785 	evl = mtod(m, struct ether_vlan_header *);
1786 	bcopy((char *) evl + ETHER_VLAN_ENCAP_LEN,
1787 	      (char *) evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1788 	evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1789 	evl->evl_tag = htons(m->m_pkthdr.ether_vlantag);
1790 	m->m_flags &= ~M_VLANTAG;
1791 
1792 	return (m);
1793 }
1794 
1795 static int
1796 vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head)
1797 {
1798 	struct vtnet_tx_header *txhdr;
1799 	struct virtio_net_hdr *hdr;
1800 	struct mbuf *m;
1801 	int error;
1802 
1803 	txhdr = SLIST_FIRST(&sc->vtnet_txhdr_free);
1804 	if (txhdr == NULL)
1805 		return (ENOBUFS);
1806 	SLIST_REMOVE_HEAD(&sc->vtnet_txhdr_free, link);
1807 
1808 	/*
1809 	 * Always use the non-mergeable header to simplify things. When
1810 	 * the mergeable feature is negotiated, the num_buffers field
1811 	 * must be set to zero. We use vtnet_hdr_size later to enqueue
1812 	 * the correct header size to the host.
1813 	 */
1814 	hdr = &txhdr->vth_uhdr.hdr;
1815 	m = *m_head;
1816 
1817 	error = ENOBUFS;
1818 
1819 	if (m->m_flags & M_VLANTAG) {
1820 		//m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
1821 		m = vtnet_vlan_tag_insert(m);
1822 		if ((*m_head = m) == NULL)
1823 			goto fail;
1824 		m->m_flags &= ~M_VLANTAG;
1825 	}
1826 
1827 	if (m->m_pkthdr.csum_flags != 0) {
1828 		m = vtnet_tx_offload(sc, m, hdr);
1829 		if ((*m_head = m) == NULL)
1830 			goto fail;
1831 	}
1832 
1833 	error = vtnet_enqueue_txbuf(sc, m_head, txhdr);
1834 fail:
1835 	if (error != 0)
1836 		vtnet_enqueue_txhdr(sc, txhdr);
1837 	return (error);
1838 }
1839 
1840 static void
1841 vtnet_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1842 {
1843 	struct vtnet_softc *sc;
1844 
1845 	sc = ifp->if_softc;
1846 
1847 	ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
1848 	lwkt_serialize_enter(&sc->vtnet_slz);
1849 	vtnet_start_locked(ifp, ifsq);
1850 	lwkt_serialize_exit(&sc->vtnet_slz);
1851 }
1852 
1853 static void
1854 vtnet_start_locked(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1855 {
1856 	struct vtnet_softc *sc;
1857 	struct virtqueue *vq;
1858 	struct mbuf *m0;
1859 	int enq;
1860 
1861 	sc = ifp->if_softc;
1862 	vq = sc->vtnet_tx_vq;
1863 	enq = 0;
1864 
1865 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1866 
1867 	if ((ifp->if_flags & (IFF_RUNNING)) !=
1868 	    IFF_RUNNING || ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0))
1869 		return;
1870 
1871 #ifdef VTNET_TX_INTR_MODERATION
1872 	if (virtqueue_nused(vq) >= sc->vtnet_tx_size / 2)
1873 		vtnet_txeof(sc);
1874 #endif
1875 
1876 	while (!ifsq_is_empty(ifsq)) {
1877 		if (virtqueue_full(vq)) {
1878 			ifq_set_oactive(&ifp->if_snd);
1879 			break;
1880 		}
1881 
1882 		m0 = ifq_dequeue(&ifp->if_snd);
1883 		if (m0 == NULL)
1884 			break;
1885 
1886 		if (vtnet_encap(sc, &m0) != 0) {
1887 			if (m0 == NULL)
1888 				break;
1889 			ifq_prepend(&ifp->if_snd, m0);
1890 			ifq_set_oactive(&ifp->if_snd);
1891 			break;
1892 		}
1893 
1894 		enq++;
1895 		ETHER_BPF_MTAP(ifp, m0);
1896 	}
1897 
1898 	if (enq > 0) {
1899 		virtqueue_notify(vq, &sc->vtnet_slz);
1900 		sc->vtnet_watchdog_timer = VTNET_WATCHDOG_TIMEOUT;
1901 	}
1902 }
1903 
1904 static void
1905 vtnet_tick(void *xsc)
1906 {
1907 	struct vtnet_softc *sc;
1908 
1909 	sc = xsc;
1910 
1911 #if 0
1912 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1913 #ifdef VTNET_DEBUG
1914 	virtqueue_dump(sc->vtnet_rx_vq);
1915 	virtqueue_dump(sc->vtnet_tx_vq);
1916 #endif
1917 
1918 	vtnet_watchdog(sc);
1919 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
1920 #endif
1921 }
1922 
1923 static void
1924 vtnet_tx_intr_task(void *arg)
1925 {
1926 	struct vtnet_softc *sc;
1927 	struct ifnet *ifp;
1928 	struct ifaltq_subque *ifsq;
1929 
1930 	sc = arg;
1931 	ifp = sc->vtnet_ifp;
1932 	ifsq = ifq_get_subq_default(&ifp->if_snd);
1933 
1934 next:
1935 //	lwkt_serialize_enter(&sc->vtnet_slz);
1936 
1937 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1938 		vtnet_enable_tx_intr(sc);
1939 //		lwkt_serialize_exit(&sc->vtnet_slz);
1940 		return;
1941 	}
1942 
1943 	vtnet_txeof(sc);
1944 
1945 	if (!ifsq_is_empty(ifsq))
1946 		vtnet_start_locked(ifp, ifsq);
1947 
1948 	if (vtnet_enable_tx_intr(sc) != 0) {
1949 		vtnet_disable_tx_intr(sc);
1950 		sc->vtnet_stats.tx_task_rescheduled++;
1951 //		lwkt_serialize_exit(&sc->vtnet_slz);
1952 		goto next;
1953 	}
1954 
1955 //	lwkt_serialize_exit(&sc->vtnet_slz);
1956 }
1957 
1958 static int
1959 vtnet_tx_vq_intr(void *xsc)
1960 {
1961 	struct vtnet_softc *sc;
1962 
1963 	sc = xsc;
1964 
1965 	vtnet_disable_tx_intr(sc);
1966 	vtnet_tx_intr_task(sc);
1967 
1968 	return (1);
1969 }
1970 
1971 static void
1972 vtnet_stop(struct vtnet_softc *sc)
1973 {
1974 	device_t dev;
1975 	struct ifnet *ifp;
1976 
1977 	dev = sc->vtnet_dev;
1978 	ifp = sc->vtnet_ifp;
1979 
1980 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1981 
1982 	sc->vtnet_watchdog_timer = 0;
1983 	callout_stop(&sc->vtnet_tick_ch);
1984 	ifq_clr_oactive(&ifp->if_snd);
1985 	ifp->if_flags &= ~(IFF_RUNNING);
1986 
1987 	vtnet_disable_rx_intr(sc);
1988 	vtnet_disable_tx_intr(sc);
1989 
1990 	/*
1991 	 * Stop the host VirtIO adapter. Note this will reset the host
1992 	 * adapter's state back to the pre-initialized state, so in
1993 	 * order to make the device usable again, we must drive it
1994 	 * through virtio_reinit() and virtio_reinit_complete().
1995 	 */
1996 	virtio_stop(dev);
1997 
1998 	sc->vtnet_flags &= ~VTNET_FLAG_LINK;
1999 
2000 	vtnet_free_rx_mbufs(sc);
2001 	vtnet_free_tx_mbufs(sc);
2002 }
2003 
2004 static int
2005 vtnet_virtio_reinit(struct vtnet_softc *sc)
2006 {
2007 	device_t dev;
2008 	struct ifnet *ifp;
2009 	uint64_t features;
2010 	int error;
2011 
2012 	dev = sc->vtnet_dev;
2013 	ifp = sc->vtnet_ifp;
2014 	features = sc->vtnet_features;
2015 
2016 	/*
2017 	 * Re-negotiate with the host, removing any disabled receive
2018 	 * features. Transmit features are disabled only on our side
2019 	 * via if_capenable and if_hwassist.
2020 	 */
2021 
2022 	if (ifp->if_capabilities & IFCAP_RXCSUM) {
2023 		if ((ifp->if_capenable & IFCAP_RXCSUM) == 0)
2024 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
2025 	}
2026 
2027 #if 0	/* IFCAP_LRO doesn't exist in DragonFly. */
2028 	if (ifp->if_capabilities & IFCAP_LRO) {
2029 		if ((ifp->if_capenable & IFCAP_LRO) == 0)
2030 			features &= ~VTNET_LRO_FEATURES;
2031 	}
2032 #endif
2033 
2034 	if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2035 		if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2036 			features &= ~VIRTIO_NET_F_CTRL_VLAN;
2037 	}
2038 
2039 	error = virtio_reinit(dev, features);
2040 	if (error)
2041 		device_printf(dev, "virtio reinit error %d\n", error);
2042 
2043 	return (error);
2044 }
2045 
2046 static void
2047 vtnet_init_locked(struct vtnet_softc *sc)
2048 {
2049 	device_t dev;
2050 	struct ifnet *ifp;
2051 	int error;
2052 
2053 	dev = sc->vtnet_dev;
2054 	ifp = sc->vtnet_ifp;
2055 
2056 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2057 
2058 	if (ifp->if_flags & IFF_RUNNING)
2059 		return;
2060 
2061 	/* Stop host's adapter, cancel any pending I/O. */
2062 	vtnet_stop(sc);
2063 
2064 	/* Reinitialize the host device. */
2065 	error = vtnet_virtio_reinit(sc);
2066 	if (error) {
2067 		device_printf(dev,
2068 		    "reinitialization failed, stopping device...\n");
2069 		vtnet_stop(sc);
2070 		return;
2071 	}
2072 
2073 	/* Update host with assigned MAC address. */
2074 	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
2075 	vtnet_set_hwaddr(sc);
2076 
2077 	ifp->if_hwassist = 0;
2078 	if (ifp->if_capenable & IFCAP_TXCSUM)
2079 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
2080 	if (ifp->if_capenable & IFCAP_TSO4)
2081 		ifp->if_hwassist |= CSUM_TSO;
2082 
2083 	error = vtnet_init_rx_vq(sc);
2084 	if (error) {
2085 		device_printf(dev,
2086 		    "cannot allocate mbufs for Rx virtqueue\n");
2087 		vtnet_stop(sc);
2088 		return;
2089 	}
2090 
2091 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
2092 		if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2093 			/* Restore promiscuous and all-multicast modes. */
2094 			vtnet_rx_filter(sc);
2095 
2096 			/* Restore filtered MAC addresses. */
2097 			vtnet_rx_filter_mac(sc);
2098 		}
2099 
2100 		/* Restore VLAN filters. */
2101 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2102 			vtnet_rx_filter_vlan(sc);
2103 	}
2104 
2105 	{
2106 		vtnet_enable_rx_intr(sc);
2107 		vtnet_enable_tx_intr(sc);
2108 	}
2109 
2110 	ifp->if_flags |= IFF_RUNNING;
2111 	ifq_clr_oactive(&ifp->if_snd);
2112 
2113 	virtio_reinit_complete(dev);
2114 
2115 	vtnet_update_link_status(sc);
2116 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
2117 }
2118 
2119 static void
2120 vtnet_init(void *xsc)
2121 {
2122 	struct vtnet_softc *sc;
2123 
2124 	sc = xsc;
2125 
2126 	lwkt_serialize_enter(&sc->vtnet_slz);
2127 	vtnet_init_locked(sc);
2128 	lwkt_serialize_exit(&sc->vtnet_slz);
2129 }
2130 
2131 static void
2132 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
2133     struct sglist *sg, int readable, int writable)
2134 {
2135 	struct virtqueue *vq;
2136 	void *c;
2137 
2138 	vq = sc->vtnet_ctrl_vq;
2139 
2140 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2141 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
2142 	    ("no control virtqueue"));
2143 	KASSERT(virtqueue_empty(vq),
2144 	    ("control command already enqueued"));
2145 
2146 	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
2147 		return;
2148 
2149 	virtqueue_notify(vq, &sc->vtnet_slz);
2150 
2151 	/*
2152 	 * Poll until the command is complete. Previously, we would
2153 	 * sleep until the control virtqueue interrupt handler woke
2154 	 * us up, but dropping the VTNET_MTX leads to serialization
2155 	 * difficulties.
2156 	 *
2157 	 * Furthermore, it appears QEMU/KVM only allocates three MSIX
2158 	 * vectors. Two of those vectors are needed for the Rx and Tx
2159 	 * virtqueues. We do not support sharing both a Vq and config
2160 	 * changed notification on the same MSIX vector.
2161 	 */
2162 	c = virtqueue_poll(vq, NULL);
2163 	KASSERT(c == cookie, ("unexpected control command response"));
2164 }
2165 
2166 static int
2167 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
2168 {
2169 	struct {
2170 		struct virtio_net_ctrl_hdr hdr __aligned(2);
2171 		uint8_t pad1;
2172 		char aligned_hwaddr[ETHER_ADDR_LEN] __aligned(8);
2173 		uint8_t pad2;
2174 		uint8_t ack;
2175 	} s;
2176 	struct sglist_seg segs[3];
2177 	struct sglist sg;
2178 	int error;
2179 
2180 	s.hdr.class = VIRTIO_NET_CTRL_MAC;
2181 	s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
2182 	s.ack = VIRTIO_NET_ERR;
2183 
2184 	/* Copy the mac address into physically contiguous memory */
2185 	memcpy(s.aligned_hwaddr, hwaddr, ETHER_ADDR_LEN);
2186 
2187 	sglist_init(&sg, 3, segs);
2188 	error = 0;
2189 	error |= sglist_append(&sg, &s.hdr,
2190 	    sizeof(struct virtio_net_ctrl_hdr));
2191 	error |= sglist_append(&sg, s.aligned_hwaddr, ETHER_ADDR_LEN);
2192 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2193 	KASSERT(error == 0 && sg.sg_nseg == 3,
2194 	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
2195 
2196 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2197 
2198 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2199 }
2200 
2201 static void
2202 vtnet_rx_filter(struct vtnet_softc *sc)
2203 {
2204 	device_t dev;
2205 	struct ifnet *ifp;
2206 
2207 	dev = sc->vtnet_dev;
2208 	ifp = sc->vtnet_ifp;
2209 
2210 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2211 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2212 	    ("CTRL_RX feature not negotiated"));
2213 
2214 	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
2215 		device_printf(dev, "cannot %s promiscuous mode\n",
2216 		    (ifp->if_flags & IFF_PROMISC) ? "enable" : "disable");
2217 
2218 	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
2219 		device_printf(dev, "cannot %s all-multicast mode\n",
2220 		    (ifp->if_flags & IFF_ALLMULTI) ? "enable" : "disable");
2221 }
2222 
2223 static int
2224 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
2225 {
2226 	struct sglist_seg segs[3];
2227 	struct sglist sg;
2228 	struct {
2229 		struct virtio_net_ctrl_hdr hdr __aligned(2);
2230 		uint8_t pad1;
2231 		uint8_t onoff;
2232 		uint8_t pad2;
2233 		uint8_t ack;
2234 	} s;
2235 	int error;
2236 
2237 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2238 	    ("%s: CTRL_RX feature not negotiated", __func__));
2239 
2240 	s.hdr.class = VIRTIO_NET_CTRL_RX;
2241 	s.hdr.cmd = cmd;
2242 	s.onoff = !!on;
2243 	s.ack = VIRTIO_NET_ERR;
2244 
2245 	sglist_init(&sg, 3, segs);
2246 	error = 0;
2247 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
2248 	error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
2249 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2250 	KASSERT(error == 0 && sg.sg_nseg == 3,
2251 	    ("%s: error %d adding Rx message to sglist", __func__, error));
2252 
2253 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2254 
2255 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2256 }
2257 
2258 static int
2259 vtnet_set_promisc(struct vtnet_softc *sc, int on)
2260 {
2261 
2262 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
2263 }
2264 
2265 static int
2266 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
2267 {
2268 
2269 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
2270 }
2271 
2272 static void
2273 vtnet_rx_filter_mac(struct vtnet_softc *sc)
2274 {
2275 	struct virtio_net_ctrl_hdr hdr __aligned(2);
2276 	struct vtnet_mac_filter *filter;
2277 	struct sglist_seg segs[4];
2278 	struct sglist sg;
2279 	struct ifnet *ifp;
2280 	struct ifaddr *ifa;
2281         struct ifaddr_container *ifac;
2282 	struct ifmultiaddr *ifma;
2283 	int ucnt, mcnt, promisc, allmulti, error;
2284 	uint8_t ack;
2285 
2286 	ifp = sc->vtnet_ifp;
2287 	ucnt = 0;
2288 	mcnt = 0;
2289 	promisc = 0;
2290 	allmulti = 0;
2291 
2292 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2293 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2294 	    ("%s: CTRL_RX feature not negotiated", __func__));
2295 
2296 	/* Use the MAC filtering table allocated in vtnet_attach. */
2297 	filter = sc->vtnet_macfilter;
2298 	memset(filter, 0, sizeof(struct vtnet_mac_filter));
2299 
2300 	/* Unicast MAC addresses: */
2301 	//if_addr_rlock(ifp);
2302 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2303 		ifa = ifac->ifa;
2304 		if (ifa->ifa_addr->sa_family != AF_LINK)
2305 			continue;
2306 		else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
2307 		    sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
2308 			continue;
2309 		else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
2310 			promisc = 1;
2311 			break;
2312 		}
2313 
2314 		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
2315 		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
2316 		ucnt++;
2317 	}
2318 	//if_addr_runlock(ifp);
2319 
2320 	if (promisc != 0) {
2321 		filter->vmf_unicast.nentries = 0;
2322 		if_printf(ifp, "more than %d MAC addresses assigned, "
2323 		    "falling back to promiscuous mode\n",
2324 		    VTNET_MAX_MAC_ENTRIES);
2325 	} else
2326 		filter->vmf_unicast.nentries = ucnt;
2327 
2328 	/* Multicast MAC addresses: */
2329 	//if_maddr_rlock(ifp);
2330 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2331 		if (ifma->ifma_addr->sa_family != AF_LINK)
2332 			continue;
2333 		else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
2334 			allmulti = 1;
2335 			break;
2336 		}
2337 
2338 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2339 		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
2340 		mcnt++;
2341 	}
2342 	//if_maddr_runlock(ifp);
2343 
2344 	if (allmulti != 0) {
2345 		filter->vmf_multicast.nentries = 0;
2346 		if_printf(ifp, "more than %d multicast MAC addresses "
2347 		    "assigned, falling back to all-multicast mode\n",
2348 		    VTNET_MAX_MAC_ENTRIES);
2349 	} else
2350 		filter->vmf_multicast.nentries = mcnt;
2351 
2352 	if (promisc != 0 && allmulti != 0)
2353 		goto out;
2354 
2355 	hdr.class = VIRTIO_NET_CTRL_MAC;
2356 	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
2357 	ack = VIRTIO_NET_ERR;
2358 
2359 	sglist_init(&sg, 4, segs);
2360 	error = 0;
2361 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
2362 	error |= sglist_append(&sg, &filter->vmf_unicast,
2363 	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
2364 	error |= sglist_append(&sg, &filter->vmf_multicast,
2365 	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
2366 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
2367 	KASSERT(error == 0 && sg.sg_nseg == 4,
2368 	    ("%s: error %d adding MAC filter msg to sglist", __func__, error));
2369 
2370 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
2371 
2372 	if (ack != VIRTIO_NET_OK)
2373 		if_printf(ifp, "error setting host MAC filter table\n");
2374 
2375 out:
2376 	if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
2377 		if_printf(ifp, "cannot enable promiscuous mode\n");
2378 	if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
2379 		if_printf(ifp, "cannot enable all-multicast mode\n");
2380 }
2381 
2382 static int
2383 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
2384 {
2385 	struct sglist_seg segs[3];
2386 	struct sglist sg;
2387 	struct {
2388 		struct virtio_net_ctrl_hdr hdr __aligned(2);
2389 		uint8_t pad1;
2390 		uint16_t tag;
2391 		uint8_t pad2;
2392 		uint8_t ack;
2393 	} s;
2394 	int error;
2395 
2396 	s.hdr.class = VIRTIO_NET_CTRL_VLAN;
2397 	s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
2398 	s.tag = tag;
2399 	s.ack = VIRTIO_NET_ERR;
2400 
2401 	sglist_init(&sg, 3, segs);
2402 	error = 0;
2403 	error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
2404 	error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
2405 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2406 	KASSERT(error == 0 && sg.sg_nseg == 3,
2407 	    ("%s: error %d adding VLAN message to sglist", __func__, error));
2408 
2409 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2410 
2411 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2412 }
2413 
2414 static void
2415 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
2416 {
2417 	uint32_t w;
2418 	uint16_t tag;
2419 	int i, bit, nvlans;
2420 
2421 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2422 	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
2423 	    ("%s: VLAN_FILTER feature not negotiated", __func__));
2424 
2425 	nvlans = sc->vtnet_nvlans;
2426 
2427 	/* Enable the filter for each configured VLAN. */
2428 	for (i = 0; i < VTNET_VLAN_SHADOW_SIZE && nvlans > 0; i++) {
2429 		w = sc->vtnet_vlan_shadow[i];
2430 		while ((bit = ffs(w) - 1) != -1) {
2431 			w &= ~(1 << bit);
2432 			tag = sizeof(w) * CHAR_BIT * i + bit;
2433 			nvlans--;
2434 
2435 			if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
2436 				device_printf(sc->vtnet_dev,
2437 				    "cannot enable VLAN %d filter\n", tag);
2438 			}
2439 		}
2440 	}
2441 
2442 	KASSERT(nvlans == 0, ("VLAN count incorrect"));
2443 }
2444 
2445 static void
2446 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
2447 {
2448 	struct ifnet *ifp;
2449 	int idx, bit;
2450 
2451 	ifp = sc->vtnet_ifp;
2452 	idx = (tag >> 5) & 0x7F;
2453 	bit = tag & 0x1F;
2454 
2455 	if (tag == 0 || tag > 4095)
2456 		return;
2457 
2458 	lwkt_serialize_enter(&sc->vtnet_slz);
2459 
2460 	/* Update shadow VLAN table. */
2461 	if (add) {
2462 		sc->vtnet_nvlans++;
2463 		sc->vtnet_vlan_shadow[idx] |= (1 << bit);
2464 	} else {
2465 		sc->vtnet_nvlans--;
2466 		sc->vtnet_vlan_shadow[idx] &= ~(1 << bit);
2467 	}
2468 
2469 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
2470 	    vtnet_exec_vlan_filter(sc, add, tag) != 0) {
2471 		device_printf(sc->vtnet_dev,
2472 		    "cannot %s VLAN %d %s the host filter table\n",
2473 		    add ? "add" : "remove", tag, add ? "to" : "from");
2474 	}
2475 
2476 	lwkt_serialize_exit(&sc->vtnet_slz);
2477 }
2478 
2479 static void
2480 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
2481 {
2482 
2483 	if (ifp->if_softc != arg)
2484 		return;
2485 
2486 	vtnet_update_vlan_filter(arg, 1, tag);
2487 }
2488 
2489 static void
2490 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
2491 {
2492 
2493 	if (ifp->if_softc != arg)
2494 		return;
2495 
2496 	vtnet_update_vlan_filter(arg, 0, tag);
2497 }
2498 
2499 static int
2500 vtnet_ifmedia_upd(struct ifnet *ifp)
2501 {
2502 	struct vtnet_softc *sc;
2503 	struct ifmedia *ifm;
2504 
2505 	sc = ifp->if_softc;
2506 	ifm = &sc->vtnet_media;
2507 
2508 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
2509 		return (EINVAL);
2510 
2511 	return (0);
2512 }
2513 
2514 static void
2515 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2516 {
2517 	struct vtnet_softc *sc;
2518 
2519 	sc = ifp->if_softc;
2520 
2521 	ifmr->ifm_status = IFM_AVALID;
2522 	ifmr->ifm_active = IFM_ETHER;
2523 
2524 	lwkt_serialize_enter(&sc->vtnet_slz);
2525 	if (vtnet_is_link_up(sc) != 0) {
2526 		ifmr->ifm_status |= IFM_ACTIVE;
2527 		ifmr->ifm_active |= VTNET_MEDIATYPE;
2528 	} else
2529 		ifmr->ifm_active |= IFM_NONE;
2530 	lwkt_serialize_exit(&sc->vtnet_slz);
2531 }
2532 
2533 static void
2534 vtnet_add_statistics(struct vtnet_softc *sc)
2535 {
2536 	device_t dev;
2537 	struct vtnet_statistics *stats;
2538 	struct sysctl_ctx_list *ctx;
2539 	struct sysctl_oid *tree;
2540 	struct sysctl_oid_list *child;
2541 
2542 	dev = sc->vtnet_dev;
2543 	stats = &sc->vtnet_stats;
2544 	ctx = device_get_sysctl_ctx(dev);
2545 	tree = device_get_sysctl_tree(dev);
2546 	child = SYSCTL_CHILDREN(tree);
2547 
2548 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
2549 	    CTLFLAG_RD, &stats->mbuf_alloc_failed, 0,
2550 	    "Mbuf cluster allocation failures");
2551 
2552 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
2553 	    CTLFLAG_RD, &stats->rx_frame_too_large, 0,
2554 	    "Received frame larger than the mbuf chain");
2555 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
2556 	    CTLFLAG_RD, &stats->rx_enq_replacement_failed, 0,
2557 	    "Enqueuing the replacement receive mbuf failed");
2558 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
2559 	    CTLFLAG_RD, &stats->rx_mergeable_failed, 0,
2560 	    "Mergeable buffers receive failures");
2561 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
2562 	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype, 0,
2563 	    "Received checksum offloaded buffer with unsupported "
2564 	    "Ethernet type");
2565 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
2566 	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto, 0,
2567 	    "Received checksum offloaded buffer with incorrect IP protocol");
2568 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
2569 	    CTLFLAG_RD, &stats->rx_csum_bad_offset, 0,
2570 	    "Received checksum offloaded buffer with incorrect offset");
2571 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
2572 	    CTLFLAG_RD, &stats->rx_csum_failed, 0,
2573 	    "Received buffer checksum offload failed");
2574 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
2575 	    CTLFLAG_RD, &stats->rx_csum_offloaded, 0,
2576 	    "Received buffer checksum offload succeeded");
2577 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
2578 	    CTLFLAG_RD, &stats->rx_task_rescheduled, 0,
2579 	    "Times the receive interrupt task rescheduled itself");
2580 
2581 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
2582 	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype, 0,
2583 	    "Aborted transmit of checksum offloaded buffer with unknown "
2584 	    "Ethernet type");
2585 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
2586 	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype, 0,
2587 	    "Aborted transmit of TSO buffer with unknown Ethernet type");
2588 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
2589 	    CTLFLAG_RD, &stats->tx_defragged, 0,
2590 	    "Transmit mbufs defragged");
2591 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
2592 	    CTLFLAG_RD, &stats->tx_defrag_failed, 0,
2593 	    "Aborted transmit of buffer because defrag failed");
2594 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
2595 	    CTLFLAG_RD, &stats->tx_csum_offloaded, 0,
2596 	    "Offloaded checksum of transmitted buffer");
2597 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
2598 	    CTLFLAG_RD, &stats->tx_tso_offloaded, 0,
2599 	    "Segmentation offload of transmitted buffer");
2600 	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
2601 	    CTLFLAG_RD, &stats->tx_task_rescheduled, 0,
2602 	    "Times the transmit interrupt task rescheduled itself");
2603 }
2604 
2605 static int
2606 vtnet_enable_rx_intr(struct vtnet_softc *sc)
2607 {
2608 
2609 	return (virtqueue_enable_intr(sc->vtnet_rx_vq));
2610 }
2611 
2612 static void
2613 vtnet_disable_rx_intr(struct vtnet_softc *sc)
2614 {
2615 
2616 	virtqueue_disable_intr(sc->vtnet_rx_vq);
2617 }
2618 
2619 static int
2620 vtnet_enable_tx_intr(struct vtnet_softc *sc)
2621 {
2622 
2623 #ifdef VTNET_TX_INTR_MODERATION
2624 	return (0);
2625 #else
2626 	return (virtqueue_enable_intr(sc->vtnet_tx_vq));
2627 #endif
2628 }
2629 
2630 static void
2631 vtnet_disable_tx_intr(struct vtnet_softc *sc)
2632 {
2633 
2634 	virtqueue_disable_intr(sc->vtnet_tx_vq);
2635 }
2636