xref: /dragonfly/sys/dev/virtual/virtio/net/if_vtnet.c (revision 186bd7fc)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for VirtIO network devices. */
28 
29 #include <sys/cdefs.h>
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/sockio.h>
35 #include <sys/mbuf.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/taskqueue.h>
41 #include <sys/random.h>
42 #include <sys/sglist.h>
43 #include <sys/serialize.h>
44 #include <sys/bus.h>
45 #include <sys/rman.h>
46 
47 #include <net/ethernet.h>
48 #include <net/if.h>
49 #include <net/if_arp.h>
50 #include <net/if_dl.h>
51 #include <net/if_types.h>
52 #include <net/if_media.h>
53 #include <net/vlan/if_vlan_var.h>
54 #include <net/vlan/if_vlan_ether.h>
55 #include <net/ifq_var.h>
56 
57 #include <net/bpf.h>
58 
59 #include <netinet/in_systm.h>
60 #include <netinet/in.h>
61 #include <netinet/ip.h>
62 #include <netinet/ip6.h>
63 #include <netinet/udp.h>
64 #include <netinet/tcp.h>
65 
66 #include <dev/virtual/virtio/virtio/virtio.h>
67 #include <dev/virtual/virtio/virtio/virtqueue.h>
68 
69 #include "virtio_net.h"
70 #include "virtio_if.h"
71 
72 struct vtnet_statistics {
73 	unsigned long		mbuf_alloc_failed;
74 
75 	unsigned long		rx_frame_too_large;
76 	unsigned long		rx_enq_replacement_failed;
77 	unsigned long		rx_mergeable_failed;
78 	unsigned long		rx_csum_bad_ethtype;
79 	unsigned long		rx_csum_bad_start;
80 	unsigned long		rx_csum_bad_ipproto;
81 	unsigned long		rx_csum_bad_offset;
82 	unsigned long		rx_csum_failed;
83 	unsigned long		rx_csum_offloaded;
84 	unsigned long		rx_task_rescheduled;
85 
86 	unsigned long		tx_csum_offloaded;
87 	unsigned long		tx_tso_offloaded;
88 	unsigned long		tx_csum_bad_ethtype;
89 	unsigned long		tx_tso_bad_ethtype;
90 	unsigned long		tx_task_rescheduled;
91 };
92 
93 struct vtnet_softc {
94 	device_t		vtnet_dev;
95 	struct ifnet		*vtnet_ifp;
96 	struct lwkt_serialize	vtnet_slz;
97 
98 	uint32_t		vtnet_flags;
99 #define VTNET_FLAG_LINK		0x0001
100 #define VTNET_FLAG_SUSPENDED	0x0002
101 #define VTNET_FLAG_MAC		0x0004
102 #define VTNET_FLAG_CTRL_VQ	0x0008
103 #define VTNET_FLAG_CTRL_RX	0x0010
104 #define VTNET_FLAG_CTRL_MAC	0x0020
105 #define VTNET_FLAG_VLAN_FILTER	0x0040
106 #define VTNET_FLAG_TSO_ECN	0x0080
107 #define VTNET_FLAG_MRG_RXBUFS	0x0100
108 #define VTNET_FLAG_LRO_NOMRG	0x0200
109 
110 	struct virtqueue	*vtnet_rx_vq;
111 	struct virtqueue	*vtnet_tx_vq;
112 	struct virtqueue	*vtnet_ctrl_vq;
113 
114 	struct vtnet_tx_header	*vtnet_txhdrarea;
115 	uint32_t		vtnet_txhdridx;
116 	struct vtnet_mac_filter *vtnet_macfilter;
117 
118 	int			vtnet_hdr_size;
119 	int			vtnet_tx_size;
120 	int			vtnet_rx_size;
121 	int			vtnet_rx_process_limit;
122 	int			vtnet_rx_mbuf_size;
123 	int			vtnet_rx_mbuf_count;
124 	int			vtnet_if_flags;
125 	int			vtnet_watchdog_timer;
126 	uint64_t		vtnet_features;
127 
128 	struct task		vtnet_cfgchg_task;
129 
130 	struct vtnet_statistics	vtnet_stats;
131 
132 	struct callout		vtnet_tick_ch;
133 
134 	eventhandler_tag	vtnet_vlan_attach;
135 	eventhandler_tag	vtnet_vlan_detach;
136 
137 	struct ifmedia		vtnet_media;
138 	/*
139 	 * Fake media type; the host does not provide us with
140 	 * any real media information.
141 	 */
142 #define VTNET_MEDIATYPE		(IFM_ETHER | IFM_1000_T | IFM_FDX)
143 	char			vtnet_hwaddr[ETHER_ADDR_LEN];
144 
145 	/*
146 	 * During reset, the host's VLAN filtering table is lost. The
147 	 * array below is used to restore all the VLANs configured on
148 	 * this interface after a reset.
149 	 */
150 #define VTNET_VLAN_SHADOW_SIZE	(4096 / 32)
151 	int			vtnet_nvlans;
152 	uint32_t		vtnet_vlan_shadow[VTNET_VLAN_SHADOW_SIZE];
153 
154 	char			vtnet_mtx_name[16];
155 };
156 
157 /*
158  * When mergeable buffers are not negotiated, the vtnet_rx_header structure
159  * below is placed at the beginning of the mbuf data. Use 4 bytes of pad to
160  * both keep the VirtIO header and the data non-contiguous and to keep the
161  * frame's payload 4 byte aligned.
162  *
163  * When mergeable buffers are negotiated, the host puts the VirtIO header in
164  * the beginning of the first mbuf's data.
165  */
166 #define VTNET_RX_HEADER_PAD	4
167 struct vtnet_rx_header {
168 	struct virtio_net_hdr	vrh_hdr;
169 	char			vrh_pad[VTNET_RX_HEADER_PAD];
170 } __packed;
171 
172 /*
173  * For each outgoing frame, the vtnet_tx_header below is allocated from
174  * the vtnet_tx_header_zone.
175  */
176 struct vtnet_tx_header {
177 	union {
178 		struct virtio_net_hdr		hdr;
179 		struct virtio_net_hdr_mrg_rxbuf	mhdr;
180 	} vth_uhdr;
181 
182 	struct mbuf		*vth_mbuf;
183 };
184 
185 MALLOC_DEFINE(M_VTNET, "VTNET_TX", "Outgoing VTNET TX frame header");
186 
187 /*
188  * The VirtIO specification does not place a limit on the number of MAC
189  * addresses the guest driver may request to be filtered. In practice,
190  * the host is constrained by available resources. To simplify this driver,
191  * impose a reasonably high limit of MAC addresses we will filter before
192  * falling back to promiscuous or all-multicast modes.
193  */
194 #define VTNET_MAX_MAC_ENTRIES	128
195 
196 struct vtnet_mac_table {
197 	uint32_t		nentries;
198 	uint8_t			macs[VTNET_MAX_MAC_ENTRIES][ETHER_ADDR_LEN];
199 } __packed;
200 
201 struct vtnet_mac_filter {
202 	struct vtnet_mac_table	vmf_unicast;
203 	uint32_t		vmf_pad; /* Make tables non-contiguous. */
204 	struct vtnet_mac_table	vmf_multicast;
205 };
206 
207 #define VTNET_WATCHDOG_TIMEOUT	5
208 #define VTNET_CSUM_OFFLOAD	(CSUM_TCP | CSUM_UDP)
209 
210 /* Features desired/implemented by this driver. */
211 #define VTNET_FEATURES 		\
212     (VIRTIO_NET_F_MAC		| \
213      VIRTIO_NET_F_STATUS	| \
214      VIRTIO_NET_F_CTRL_VQ	| \
215      VIRTIO_NET_F_CTRL_RX	| \
216      VIRTIO_NET_F_CTRL_MAC_ADDR	| \
217      VIRTIO_NET_F_CTRL_VLAN	| \
218      VIRTIO_NET_F_CSUM		| \
219      VIRTIO_NET_F_HOST_TSO4	| \
220      VIRTIO_NET_F_HOST_TSO6	| \
221      VIRTIO_NET_F_HOST_ECN	| \
222      VIRTIO_NET_F_GUEST_CSUM	| \
223      VIRTIO_NET_F_GUEST_TSO4	| \
224      VIRTIO_NET_F_GUEST_TSO6	| \
225      VIRTIO_NET_F_GUEST_ECN	| \
226      VIRTIO_NET_F_MRG_RXBUF)
227 
228 /*
229  * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us
230  * frames larger than 1514 bytes. We do not yet support software LRO
231  * via tcp_lro_rx().
232  */
233 #define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \
234 			    VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN)
235 
236 #define VTNET_MAX_MTU		65536
237 #define VTNET_MAX_RX_SIZE	65550
238 
239 /*
240  * Used to preallocate the Vq indirect descriptors. The first segment
241  * is reserved for the header.
242  */
243 #define VTNET_MIN_RX_SEGS	2
244 #define VTNET_MAX_RX_SEGS	34
245 #define VTNET_MAX_TX_SEGS	34
246 
247 #define IFCAP_TSO4              0x00100 /* can do TCP Segmentation Offload */
248 #define IFCAP_TSO6              0x00200 /* can do TCP6 Segmentation Offload */
249 #define IFCAP_LRO               0x00400 /* can do Large Receive Offload */
250 #define IFCAP_VLAN_HWFILTER     0x10000 /* interface hw can filter vlan tag */
251 #define IFCAP_VLAN_HWTSO        0x40000 /* can do IFCAP_TSO on VLANs */
252 
253 
254 /*
255  * Assert we can receive and transmit the maximum with regular
256  * size clusters.
257  */
258 CTASSERT(((VTNET_MAX_RX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE);
259 CTASSERT(((VTNET_MAX_TX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_MTU);
260 
261 /*
262  * Determine how many mbufs are in each receive buffer. For LRO without
263  * mergeable descriptors, we must allocate an mbuf chain large enough to
264  * hold both the vtnet_rx_header and the maximum receivable data.
265  */
266 #define VTNET_NEEDED_RX_MBUFS(_sc)					\
267 	((_sc)->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0 ? 1 :		\
268 	howmany(sizeof(struct vtnet_rx_header) + VTNET_MAX_RX_SIZE,	\
269 	(_sc)->vtnet_rx_mbuf_size)
270 
271 static int	vtnet_modevent(module_t, int, void *);
272 
273 static int	vtnet_probe(device_t);
274 static int	vtnet_attach(device_t);
275 static int	vtnet_detach(device_t);
276 static int	vtnet_suspend(device_t);
277 static int	vtnet_resume(device_t);
278 static int	vtnet_shutdown(device_t);
279 static int	vtnet_config_change(device_t);
280 
281 static void	vtnet_negotiate_features(struct vtnet_softc *);
282 static int	vtnet_alloc_virtqueues(struct vtnet_softc *);
283 static void	vtnet_get_hwaddr(struct vtnet_softc *);
284 static void	vtnet_set_hwaddr(struct vtnet_softc *);
285 static int	vtnet_is_link_up(struct vtnet_softc *);
286 static void	vtnet_update_link_status(struct vtnet_softc *);
287 #if 0
288 static void	vtnet_watchdog(struct vtnet_softc *);
289 #endif
290 static void	vtnet_config_change_task(void *, int);
291 static int	vtnet_change_mtu(struct vtnet_softc *, int);
292 static int	vtnet_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
293 
294 static int	vtnet_init_rx_vq(struct vtnet_softc *);
295 static void	vtnet_free_rx_mbufs(struct vtnet_softc *);
296 static void	vtnet_free_tx_mbufs(struct vtnet_softc *);
297 static void	vtnet_free_ctrl_vq(struct vtnet_softc *);
298 
299 static struct mbuf * vtnet_alloc_rxbuf(struct vtnet_softc *, int,
300 		    struct mbuf **);
301 static int	vtnet_replace_rxbuf(struct vtnet_softc *,
302 		    struct mbuf *, int);
303 static int	vtnet_newbuf(struct vtnet_softc *);
304 static void	vtnet_discard_merged_rxbuf(struct vtnet_softc *, int);
305 static void	vtnet_discard_rxbuf(struct vtnet_softc *, struct mbuf *);
306 static int	vtnet_enqueue_rxbuf(struct vtnet_softc *, struct mbuf *);
307 static void	vtnet_vlan_tag_remove(struct mbuf *);
308 static int	vtnet_rx_csum(struct vtnet_softc *, struct mbuf *,
309 		    struct virtio_net_hdr *);
310 static int	vtnet_rxeof_merged(struct vtnet_softc *, struct mbuf *, int);
311 static int	vtnet_rxeof(struct vtnet_softc *, int, int *);
312 static void	vtnet_rx_intr_task(void *);
313 static int	vtnet_rx_vq_intr(void *);
314 
315 static void	vtnet_txeof(struct vtnet_softc *);
316 static struct mbuf * vtnet_tx_offload(struct vtnet_softc *, struct mbuf *,
317 		    struct virtio_net_hdr *);
318 static int	vtnet_enqueue_txbuf(struct vtnet_softc *, struct mbuf **,
319 		    struct vtnet_tx_header *);
320 static int	vtnet_encap(struct vtnet_softc *, struct mbuf **);
321 static void	vtnet_start_locked(struct ifnet *, struct ifaltq_subque *);
322 static void	vtnet_start(struct ifnet *, struct ifaltq_subque *);
323 static void	vtnet_tick(void *);
324 static void	vtnet_tx_intr_task(void *);
325 static int	vtnet_tx_vq_intr(void *);
326 
327 static void	vtnet_stop(struct vtnet_softc *);
328 static int	vtnet_reinit(struct vtnet_softc *);
329 static void	vtnet_init_locked(struct vtnet_softc *);
330 static void	vtnet_init(void *);
331 
332 static void	vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
333 		    struct sglist *, int, int);
334 
335 static int	vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
336 static int	vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
337 static int	vtnet_set_promisc(struct vtnet_softc *, int);
338 static int	vtnet_set_allmulti(struct vtnet_softc *, int);
339 static void	vtnet_rx_filter(struct vtnet_softc *sc);
340 static void	vtnet_rx_filter_mac(struct vtnet_softc *);
341 
342 static int	vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
343 static void	vtnet_rx_filter_vlan(struct vtnet_softc *);
344 static void	vtnet_set_vlan_filter(struct vtnet_softc *, int, uint16_t);
345 static void	vtnet_register_vlan(void *, struct ifnet *, uint16_t);
346 static void	vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
347 
348 static int	vtnet_ifmedia_upd(struct ifnet *);
349 static void	vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
350 
351 static void	vtnet_add_statistics(struct vtnet_softc *);
352 
353 static int	vtnet_enable_rx_intr(struct vtnet_softc *);
354 static int	vtnet_enable_tx_intr(struct vtnet_softc *);
355 static void	vtnet_disable_rx_intr(struct vtnet_softc *);
356 static void	vtnet_disable_tx_intr(struct vtnet_softc *);
357 
358 /* Tunables. */
359 static int vtnet_csum_disable = 0;
360 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
361 static int vtnet_tso_disable = 1;
362 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
363 static int vtnet_lro_disable = 1;
364 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
365 
366 /*
367  * Reducing the number of transmit completed interrupts can
368  * improve performance. To do so, the define below keeps the
369  * Tx vq interrupt disabled and adds calls to vtnet_txeof()
370  * in the start and watchdog paths. The price to pay for this
371  * is the m_free'ing of transmitted mbufs may be delayed until
372  * the watchdog fires.
373  */
374 #define VTNET_TX_INTR_MODERATION
375 
376 static struct virtio_feature_desc vtnet_feature_desc[] = {
377 	{ VIRTIO_NET_F_CSUM,		"TxChecksum"	},
378 	{ VIRTIO_NET_F_GUEST_CSUM,	"RxChecksum"	},
379 	{ VIRTIO_NET_F_MAC,		"MacAddress"	},
380 	{ VIRTIO_NET_F_GSO,		"TxAllGSO"	},
381 	{ VIRTIO_NET_F_GUEST_TSO4,	"RxTSOv4"	},
382 	{ VIRTIO_NET_F_GUEST_TSO6,	"RxTSOv6"	},
383 	{ VIRTIO_NET_F_GUEST_ECN,	"RxECN"		},
384 	{ VIRTIO_NET_F_GUEST_UFO,	"RxUFO"		},
385 	{ VIRTIO_NET_F_HOST_TSO4,	"TxTSOv4"	},
386 	{ VIRTIO_NET_F_HOST_TSO6,	"TxTSOv6"	},
387 	{ VIRTIO_NET_F_HOST_ECN,	"TxTSOECN"	},
388 	{ VIRTIO_NET_F_HOST_UFO,	"TxUFO"		},
389 	{ VIRTIO_NET_F_MRG_RXBUF,	"MrgRxBuf"	},
390 	{ VIRTIO_NET_F_STATUS,		"Status"	},
391 	{ VIRTIO_NET_F_CTRL_VQ,		"ControlVq"	},
392 	{ VIRTIO_NET_F_CTRL_RX,		"RxMode"	},
393 	{ VIRTIO_NET_F_CTRL_VLAN,	"VLanFilter"	},
394 	{ VIRTIO_NET_F_CTRL_RX_EXTRA,	"RxModeExtra"	},
395 	{ VIRTIO_NET_F_GUEST_ANNOUNCE,	"GuestAnnounce"	},
396 	{ VIRTIO_NET_F_MQ,		"RFS"		},
397 	{ VIRTIO_NET_F_CTRL_MAC_ADDR,	"SetMacAddress"	},
398 	{ 0, NULL }
399 };
400 
401 static device_method_t vtnet_methods[] = {
402 	/* Device methods. */
403 	DEVMETHOD(device_probe,		vtnet_probe),
404 	DEVMETHOD(device_attach,	vtnet_attach),
405 	DEVMETHOD(device_detach,	vtnet_detach),
406 	DEVMETHOD(device_suspend,	vtnet_suspend),
407 	DEVMETHOD(device_resume,	vtnet_resume),
408 	DEVMETHOD(device_shutdown,	vtnet_shutdown),
409 
410 	/* VirtIO methods. */
411 	DEVMETHOD(virtio_config_change, vtnet_config_change),
412 
413 	{ 0, 0 }
414 };
415 
416 static driver_t vtnet_driver = {
417 	"vtnet",
418 	vtnet_methods,
419 	sizeof(struct vtnet_softc)
420 };
421 
422 static devclass_t vtnet_devclass;
423 
424 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
425 	      vtnet_modevent, 0);
426 MODULE_VERSION(vtnet, 1);
427 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
428 
429 static int
430 vtnet_modevent(module_t mod, int type, void *unused)
431 {
432 	int error;
433 
434 	error = 0;
435 
436 	switch (type) {
437 	case MOD_LOAD:
438 		break;
439 	case MOD_UNLOAD:
440 		break;
441 	case MOD_SHUTDOWN:
442 		break;
443 	default:
444 		error = EOPNOTSUPP;
445 		break;
446 	}
447 
448 	return (error);
449 }
450 
451 static int
452 vtnet_probe(device_t dev)
453 {
454 	if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
455 		return (ENXIO);
456 
457 	device_set_desc(dev, "VirtIO Networking Adapter");
458 
459 	return (BUS_PROBE_DEFAULT);
460 }
461 
462 static int
463 vtnet_attach(device_t dev)
464 {
465 	struct vtnet_softc *sc;
466 	struct ifnet *ifp;
467 	int tx_size, error;
468 
469 	sc = device_get_softc(dev);
470 	sc->vtnet_dev = dev;
471 
472 	lwkt_serialize_init(&sc->vtnet_slz);
473 	callout_init(&sc->vtnet_tick_ch);
474 
475 	ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
476 		     vtnet_ifmedia_sts);
477 	ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
478 	ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
479 
480 	vtnet_add_statistics(sc);
481 
482 	virtio_set_feature_desc(dev, vtnet_feature_desc);
483 	vtnet_negotiate_features(sc);
484 
485 	if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
486 		/* This feature should always be negotiated. */
487 		sc->vtnet_flags |= VTNET_FLAG_MAC;
488 	}
489 
490 	if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
491 		sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
492 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
493 	} else {
494 		sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
495 	}
496 
497 	sc->vtnet_rx_mbuf_size = MCLBYTES;
498 	sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
499 
500 	if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
501 		sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
502 
503 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
504 			sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
505 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
506 			sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
507 		if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
508 		    virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
509 			sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
510 	}
511 
512 	/* Read (or generate) the MAC address for the adapter. */
513 	vtnet_get_hwaddr(sc);
514 
515 	error = vtnet_alloc_virtqueues(sc);
516 	if (error) {
517 		device_printf(dev, "cannot allocate virtqueues\n");
518 		goto fail;
519 	}
520 
521 	ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
522 	if (ifp == NULL) {
523 		device_printf(dev, "cannot allocate ifnet structure\n");
524 		error = ENOSPC;
525 		goto fail;
526 	}
527 
528 	ifp->if_softc = sc;
529 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
530 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
531 	ifp->if_init = vtnet_init;
532 	ifp->if_start = vtnet_start;
533 	ifp->if_ioctl = vtnet_ioctl;
534 
535 	sc->vtnet_rx_size = virtqueue_size(sc->vtnet_rx_vq);
536 	sc->vtnet_rx_process_limit = sc->vtnet_rx_size;
537 
538 	tx_size = virtqueue_size(sc->vtnet_tx_vq);
539 	sc->vtnet_tx_size = tx_size;
540 	sc->vtnet_txhdridx = 0;
541 	sc->vtnet_txhdrarea = contigmalloc(
542 	    ((sc->vtnet_tx_size / 2) + 1) * sizeof(struct vtnet_tx_header),
543 	    M_VTNET, M_WAITOK, 0, BUS_SPACE_MAXADDR, 4, 0);
544 	if (sc->vtnet_txhdrarea == NULL) {
545 		device_printf(dev, "cannot contigmalloc the tx headers\n");
546 		goto fail;
547 	}
548 	sc->vtnet_macfilter = contigmalloc(
549 	    sizeof(struct vtnet_mac_filter),
550 	    M_DEVBUF, M_WAITOK, 0, BUS_SPACE_MAXADDR, 4, 0);
551 	if (sc->vtnet_macfilter == NULL) {
552 		device_printf(dev,
553 		    "cannot contigmalloc the mac filter table\n");
554 		goto fail;
555 	}
556 	ifq_set_maxlen(&ifp->if_snd, tx_size - 1);
557 	ifq_set_ready(&ifp->if_snd);
558 
559 	ether_ifattach(ifp, sc->vtnet_hwaddr, NULL);
560 
561 	if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)){
562 		//ifp->if_capabilities |= IFCAP_LINKSTATE;
563 		 kprintf("add dynamic link state\n");
564 	}
565 
566 	/* Tell the upper layer(s) we support long frames. */
567 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
568 	ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
569 
570 	if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
571 		ifp->if_capabilities |= IFCAP_TXCSUM;
572 
573 		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
574 			ifp->if_capabilities |= IFCAP_TSO4;
575 		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
576 			ifp->if_capabilities |= IFCAP_TSO6;
577 		if (ifp->if_capabilities & IFCAP_TSO)
578 			ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
579 
580 		if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
581 			sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
582 	}
583 
584 	if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
585 		ifp->if_capabilities |= IFCAP_RXCSUM;
586 
587 		if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
588 		    virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
589 			ifp->if_capabilities |= IFCAP_LRO;
590 	}
591 
592 	if (ifp->if_capabilities & IFCAP_HWCSUM) {
593 		/*
594 		 * VirtIO does not support VLAN tagging, but we can fake
595 		 * it by inserting and removing the 802.1Q header during
596 		 * transmit and receive. We are then able to do checksum
597 		 * offloading of VLAN frames.
598 		 */
599 		ifp->if_capabilities |=
600 			IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
601 	}
602 
603 	ifp->if_capenable = ifp->if_capabilities;
604 
605 	/*
606 	 * Capabilities after here are not enabled by default.
607 	 */
608 
609 	if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
610 		ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
611 
612 		sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
613 		    vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
614 		sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
615 		    vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
616 	}
617 
618 	TASK_INIT(&sc->vtnet_cfgchg_task, 0, vtnet_config_change_task, sc);
619 
620 	error = virtio_setup_intr(dev, &sc->vtnet_slz);
621 	if (error) {
622 		device_printf(dev, "cannot setup virtqueue interrupts\n");
623 		ether_ifdetach(ifp);
624 		goto fail;
625 	}
626 
627 	/*
628 	 * Device defaults to promiscuous mode for backwards
629 	 * compatibility. Turn it off if possible.
630 	 */
631 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
632 		lwkt_serialize_enter(&sc->vtnet_slz);
633 		if (vtnet_set_promisc(sc, 0) != 0) {
634 			ifp->if_flags |= IFF_PROMISC;
635 			device_printf(dev,
636 			    "cannot disable promiscuous mode\n");
637 		}
638 		lwkt_serialize_exit(&sc->vtnet_slz);
639 	} else
640 		ifp->if_flags |= IFF_PROMISC;
641 
642 fail:
643 	if (error)
644 		vtnet_detach(dev);
645 
646 	return (error);
647 }
648 
649 static int
650 vtnet_detach(device_t dev)
651 {
652 	struct vtnet_softc *sc;
653 	struct ifnet *ifp;
654 
655 	sc = device_get_softc(dev);
656 	ifp = sc->vtnet_ifp;
657 
658 	if (device_is_attached(dev)) {
659 		lwkt_serialize_enter(&sc->vtnet_slz);
660 		vtnet_stop(sc);
661 		lwkt_serialize_exit(&sc->vtnet_slz);
662 
663 		callout_stop(&sc->vtnet_tick_ch);
664 		taskqueue_drain(taskqueue_swi, &sc->vtnet_cfgchg_task);
665 
666 		ether_ifdetach(ifp);
667 	}
668 
669 	if (sc->vtnet_vlan_attach != NULL) {
670 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
671 		sc->vtnet_vlan_attach = NULL;
672 	}
673 	if (sc->vtnet_vlan_detach != NULL) {
674 		EVENTHANDLER_DEREGISTER(vlan_unconfg, sc->vtnet_vlan_detach);
675 		sc->vtnet_vlan_detach = NULL;
676 	}
677 
678 	if (ifp) {
679 		if_free(ifp);
680 		sc->vtnet_ifp = NULL;
681 	}
682 
683 	if (sc->vtnet_rx_vq != NULL)
684 		vtnet_free_rx_mbufs(sc);
685 	if (sc->vtnet_tx_vq != NULL)
686 		vtnet_free_tx_mbufs(sc);
687 	if (sc->vtnet_ctrl_vq != NULL)
688 		vtnet_free_ctrl_vq(sc);
689 
690 	if (sc->vtnet_txhdrarea != NULL) {
691 		contigfree(sc->vtnet_txhdrarea,
692 		    ((sc->vtnet_tx_size / 2) + 1) *
693 		    sizeof(struct vtnet_tx_header), M_VTNET);
694 		sc->vtnet_txhdrarea = NULL;
695 	}
696 	if (sc->vtnet_macfilter != NULL) {
697 		contigfree(sc->vtnet_macfilter,
698 		    sizeof(struct vtnet_mac_filter), M_DEVBUF);
699 		sc->vtnet_macfilter = NULL;
700 	}
701 
702 	ifmedia_removeall(&sc->vtnet_media);
703 
704 	return (0);
705 }
706 
707 static int
708 vtnet_suspend(device_t dev)
709 {
710 	struct vtnet_softc *sc;
711 
712 	sc = device_get_softc(dev);
713 
714 	lwkt_serialize_enter(&sc->vtnet_slz);
715 	vtnet_stop(sc);
716 	sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
717 	lwkt_serialize_exit(&sc->vtnet_slz);
718 
719 	return (0);
720 }
721 
722 static int
723 vtnet_resume(device_t dev)
724 {
725 	struct vtnet_softc *sc;
726 	struct ifnet *ifp;
727 
728 	sc = device_get_softc(dev);
729 	ifp = sc->vtnet_ifp;
730 
731 	lwkt_serialize_enter(&sc->vtnet_slz);
732 	if (ifp->if_flags & IFF_UP)
733 		vtnet_init_locked(sc);
734 	sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
735 	lwkt_serialize_exit(&sc->vtnet_slz);
736 
737 	return (0);
738 }
739 
740 static int
741 vtnet_shutdown(device_t dev)
742 {
743 
744 	/*
745 	 * Suspend already does all of what we need to
746 	 * do here; we just never expect to be resumed.
747 	 */
748 	return (vtnet_suspend(dev));
749 }
750 
751 static int
752 vtnet_config_change(device_t dev)
753 {
754 	struct vtnet_softc *sc;
755 
756 	sc = device_get_softc(dev);
757 
758 	taskqueue_enqueue(taskqueue_thread[mycpuid], &sc->vtnet_cfgchg_task);
759 
760 	return (1);
761 }
762 
763 static void
764 vtnet_negotiate_features(struct vtnet_softc *sc)
765 {
766 	device_t dev;
767 	uint64_t mask, features;
768 
769 	dev = sc->vtnet_dev;
770 	mask = 0;
771 
772 	if (vtnet_csum_disable)
773 		mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
774 
775 	/*
776 	 * TSO and LRO are only available when their corresponding
777 	 * checksum offload feature is also negotiated.
778 	 */
779 
780 	if (vtnet_csum_disable || vtnet_tso_disable)
781 		mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 |
782 		    VIRTIO_NET_F_HOST_ECN;
783 
784 	if (vtnet_csum_disable || vtnet_lro_disable)
785 		mask |= VTNET_LRO_FEATURES;
786 
787 	features = VTNET_FEATURES & ~mask;
788 	features |= VIRTIO_F_NOTIFY_ON_EMPTY;
789 	sc->vtnet_features = virtio_negotiate_features(dev, features);
790 }
791 
792 static int
793 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
794 {
795 	device_t dev;
796 	struct vq_alloc_info vq_info[3];
797 	int nvqs, rxsegs;
798 
799 	dev = sc->vtnet_dev;
800 	nvqs = 2;
801 
802 	/*
803 	 * Indirect descriptors are not needed for the Rx
804 	 * virtqueue when mergeable buffers are negotiated.
805 	 * The header is placed inline with the data, not
806 	 * in a separate descriptor, and mbuf clusters are
807 	 * always physically contiguous.
808 	 */
809 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
810 		rxsegs = sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ?
811 		    VTNET_MAX_RX_SEGS : VTNET_MIN_RX_SEGS;
812 	} else
813 		rxsegs = 0;
814 
815 	VQ_ALLOC_INFO_INIT(&vq_info[0], rxsegs,
816 	    vtnet_rx_vq_intr, sc, &sc->vtnet_rx_vq,
817 	    "%s receive", device_get_nameunit(dev));
818 
819 	VQ_ALLOC_INFO_INIT(&vq_info[1], VTNET_MAX_TX_SEGS,
820 	    vtnet_tx_vq_intr, sc, &sc->vtnet_tx_vq,
821 	    "%s transmit", device_get_nameunit(dev));
822 
823 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
824 		nvqs++;
825 
826 		VQ_ALLOC_INFO_INIT(&vq_info[2], 0, NULL, NULL,
827 		    &sc->vtnet_ctrl_vq, "%s control",
828 		    device_get_nameunit(dev));
829 	}
830 
831 	return (virtio_alloc_virtqueues(dev, 0, nvqs, vq_info));
832 }
833 
834 static void
835 vtnet_set_hwaddr(struct vtnet_softc *sc)
836 {
837 	device_t dev;
838 
839 	dev = sc->vtnet_dev;
840 
841 	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) &&
842 	    (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)) {
843 		if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
844 			device_printf(dev, "unable to set MAC address\n");
845 	} else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
846 		virtio_write_device_config(dev,
847 		    offsetof(struct virtio_net_config, mac),
848 		    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
849 	}
850 }
851 
852 static void
853 vtnet_get_hwaddr(struct vtnet_softc *sc)
854 {
855 	device_t dev;
856 
857 	dev = sc->vtnet_dev;
858 
859 	if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
860 		/*
861 		 * Generate a random locally administered unicast address.
862 		 *
863 		 * It would be nice to generate the same MAC address across
864 		 * reboots, but it seems all the hosts currently available
865 		 * support the MAC feature, so this isn't too important.
866 		 */
867 		sc->vtnet_hwaddr[0] = 0xB2;
868 		karc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1);
869 		vtnet_set_hwaddr(sc);
870 		return;
871 	}
872 
873 	virtio_read_device_config(dev,
874 	    offsetof(struct virtio_net_config, mac),
875 	    sc->vtnet_hwaddr, ETHER_ADDR_LEN);
876 }
877 
878 static int
879 vtnet_is_link_up(struct vtnet_softc *sc)
880 {
881 	device_t dev;
882 	struct ifnet *ifp;
883 	uint16_t status;
884 
885 	dev = sc->vtnet_dev;
886 	ifp = sc->vtnet_ifp;
887 
888 	ASSERT_SERIALIZED(&sc->vtnet_slz);
889 
890 	status = virtio_read_dev_config_2(dev,
891 			offsetof(struct virtio_net_config, status));
892 
893 	return ((status & VIRTIO_NET_S_LINK_UP) != 0);
894 }
895 
896 static void
897 vtnet_update_link_status(struct vtnet_softc *sc)
898 {
899 	device_t dev;
900 	struct ifnet *ifp;
901 	struct ifaltq_subque *ifsq;
902 	int link;
903 
904 	dev = sc->vtnet_dev;
905 	ifp = sc->vtnet_ifp;
906 	ifsq = ifq_get_subq_default(&ifp->if_snd);
907 
908 	link = vtnet_is_link_up(sc);
909 
910 	if (link && ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0)) {
911 		sc->vtnet_flags |= VTNET_FLAG_LINK;
912 		if (bootverbose)
913 			device_printf(dev, "Link is up\n");
914 		ifp->if_link_state = LINK_STATE_UP;
915 		if_link_state_change(ifp);
916 		if (!ifsq_is_empty(ifsq))
917 			vtnet_start_locked(ifp, ifsq);
918 	} else if (!link && (sc->vtnet_flags & VTNET_FLAG_LINK)) {
919 		sc->vtnet_flags &= ~VTNET_FLAG_LINK;
920 		if (bootverbose)
921 			device_printf(dev, "Link is down\n");
922 
923 		ifp->if_link_state = LINK_STATE_DOWN;
924 		if_link_state_change(ifp);
925 	}
926 }
927 
928 #if 0
929 static void
930 vtnet_watchdog(struct vtnet_softc *sc)
931 {
932 	struct ifnet *ifp;
933 
934 	ifp = sc->vtnet_ifp;
935 
936 #ifdef VTNET_TX_INTR_MODERATION
937 	vtnet_txeof(sc);
938 #endif
939 
940 	if (sc->vtnet_watchdog_timer == 0 || --sc->vtnet_watchdog_timer)
941 		return;
942 
943 	if_printf(ifp, "watchdog timeout -- resetting\n");
944 #ifdef VTNET_DEBUG
945 	virtqueue_dump(sc->vtnet_tx_vq);
946 #endif
947 	ifp->if_oerrors++;
948 	ifp->if_flags &= ~IFF_RUNNING;
949 	vtnet_init_locked(sc);
950 }
951 #endif
952 
953 static void
954 vtnet_config_change_task(void *arg, int pending)
955 {
956 	struct vtnet_softc *sc;
957 
958 	sc = arg;
959 
960 	lwkt_serialize_enter(&sc->vtnet_slz);
961 	vtnet_update_link_status(sc);
962 	lwkt_serialize_exit(&sc->vtnet_slz);
963 }
964 
965 static int
966 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data,struct ucred *cr)
967 {
968 	struct vtnet_softc *sc;
969 	struct ifreq *ifr;
970 	int reinit, mask, error;
971 
972 	sc = ifp->if_softc;
973 	ifr = (struct ifreq *) data;
974 	reinit = 0;
975 	error = 0;
976 
977 	switch (cmd) {
978 	case SIOCSIFMTU:
979 		if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VTNET_MAX_MTU)
980 			error = EINVAL;
981 		else if (ifp->if_mtu != ifr->ifr_mtu) {
982 			lwkt_serialize_enter(&sc->vtnet_slz);
983 			error = vtnet_change_mtu(sc, ifr->ifr_mtu);
984 			lwkt_serialize_exit(&sc->vtnet_slz);
985 		}
986 		break;
987 
988 	case SIOCSIFFLAGS:
989 		lwkt_serialize_enter(&sc->vtnet_slz);
990 		if ((ifp->if_flags & IFF_UP) == 0) {
991 			if (ifp->if_flags & IFF_RUNNING)
992 				vtnet_stop(sc);
993 		} else if (ifp->if_flags & IFF_RUNNING) {
994 			if ((ifp->if_flags ^ sc->vtnet_if_flags) &
995 			    (IFF_PROMISC | IFF_ALLMULTI)) {
996 				if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
997 					vtnet_rx_filter(sc);
998 				else
999 					error = ENOTSUP;
1000 			}
1001 		} else
1002 			vtnet_init_locked(sc);
1003 
1004 		if (error == 0)
1005 			sc->vtnet_if_flags = ifp->if_flags;
1006 		lwkt_serialize_exit(&sc->vtnet_slz);
1007 		break;
1008 
1009 	case SIOCADDMULTI:
1010 	case SIOCDELMULTI:
1011 		lwkt_serialize_enter(&sc->vtnet_slz);
1012 		if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) &&
1013 		    (ifp->if_flags & IFF_RUNNING))
1014 			vtnet_rx_filter_mac(sc);
1015 		lwkt_serialize_exit(&sc->vtnet_slz);
1016 		break;
1017 
1018 	case SIOCSIFMEDIA:
1019 	case SIOCGIFMEDIA:
1020 		error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1021 		break;
1022 
1023 	case SIOCSIFCAP:
1024 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1025 
1026 		lwkt_serialize_enter(&sc->vtnet_slz);
1027 
1028 		if (mask & IFCAP_TXCSUM) {
1029 			ifp->if_capenable ^= IFCAP_TXCSUM;
1030 			if (ifp->if_capenable & IFCAP_TXCSUM)
1031 				ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
1032 			else
1033 				ifp->if_hwassist &= ~VTNET_CSUM_OFFLOAD;
1034 		}
1035 
1036 		if (mask & IFCAP_TSO4) {
1037 			ifp->if_capenable ^= IFCAP_TSO4;
1038 			if (ifp->if_capenable & IFCAP_TSO4)
1039 				ifp->if_hwassist |= CSUM_TSO;
1040 			else
1041 				ifp->if_hwassist &= ~CSUM_TSO;
1042 		}
1043 
1044 		if (mask & IFCAP_RXCSUM) {
1045 			ifp->if_capenable ^= IFCAP_RXCSUM;
1046 			reinit = 1;
1047 		}
1048 
1049 		if (mask & IFCAP_LRO) {
1050 			ifp->if_capenable ^= IFCAP_LRO;
1051 			reinit = 1;
1052 		}
1053 
1054 		if (mask & IFCAP_VLAN_HWFILTER) {
1055 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1056 			reinit = 1;
1057 		}
1058 
1059 		if (mask & IFCAP_VLAN_HWTSO)
1060 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1061 
1062 		if (mask & IFCAP_VLAN_HWTAGGING)
1063 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1064 
1065 		if (reinit && (ifp->if_flags & IFF_RUNNING)) {
1066 			ifp->if_flags &= ~IFF_RUNNING;
1067 			vtnet_init_locked(sc);
1068 		}
1069 		//VLAN_CAPABILITIES(ifp);
1070 
1071 		lwkt_serialize_exit(&sc->vtnet_slz);
1072 		break;
1073 
1074 	default:
1075 		error = ether_ioctl(ifp, cmd, data);
1076 		break;
1077 	}
1078 
1079 	return (error);
1080 }
1081 
1082 static int
1083 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
1084 {
1085 	struct ifnet *ifp;
1086 	int new_frame_size, clsize;
1087 
1088 	ifp = sc->vtnet_ifp;
1089 
1090 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1091 		new_frame_size = sizeof(struct vtnet_rx_header) +
1092 		    sizeof(struct ether_vlan_header) + new_mtu;
1093 
1094 		if (new_frame_size > MJUM9BYTES)
1095 			return (EINVAL);
1096 
1097 		if (new_frame_size <= MCLBYTES)
1098 			clsize = MCLBYTES;
1099 		else
1100 			clsize = MJUM9BYTES;
1101 	} else {
1102 		new_frame_size = sizeof(struct virtio_net_hdr_mrg_rxbuf) +
1103 		    sizeof(struct ether_vlan_header) + new_mtu;
1104 
1105 		if (new_frame_size <= MCLBYTES)
1106 			clsize = MCLBYTES;
1107 		else
1108 			clsize = MJUMPAGESIZE;
1109 	}
1110 
1111 	sc->vtnet_rx_mbuf_size = clsize;
1112 	sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc);
1113 	KASSERT(sc->vtnet_rx_mbuf_count < VTNET_MAX_RX_SEGS,
1114 	    ("too many rx mbufs: %d", sc->vtnet_rx_mbuf_count));
1115 
1116 	ifp->if_mtu = new_mtu;
1117 
1118 	if (ifp->if_flags & IFF_RUNNING) {
1119 		ifp->if_flags &= ~IFF_RUNNING;
1120 		vtnet_init_locked(sc);
1121 	}
1122 
1123 	return (0);
1124 }
1125 
1126 static int
1127 vtnet_init_rx_vq(struct vtnet_softc *sc)
1128 {
1129 	struct virtqueue *vq;
1130 	int nbufs, error;
1131 
1132 	vq = sc->vtnet_rx_vq;
1133 	nbufs = 0;
1134 	error = ENOSPC;
1135 
1136 	while (!virtqueue_full(vq)) {
1137 		if ((error = vtnet_newbuf(sc)) != 0)
1138 			break;
1139 		nbufs++;
1140 	}
1141 
1142 	if (nbufs > 0) {
1143 		virtqueue_notify(vq, &sc->vtnet_slz);
1144 
1145 		/*
1146 		 * EMSGSIZE signifies the virtqueue did not have enough
1147 		 * entries available to hold the last mbuf. This is not
1148 		 * an error. We should not get ENOSPC since we check if
1149 		 * the virtqueue is full before attempting to add a
1150 		 * buffer.
1151 		 */
1152 		if (error == EMSGSIZE)
1153 			error = 0;
1154 	}
1155 
1156 	return (error);
1157 }
1158 
1159 static void
1160 vtnet_free_rx_mbufs(struct vtnet_softc *sc)
1161 {
1162 	struct virtqueue *vq;
1163 	struct mbuf *m;
1164 	int last;
1165 
1166 	vq = sc->vtnet_rx_vq;
1167 	last = 0;
1168 
1169 	while ((m = virtqueue_drain(vq, &last)) != NULL)
1170 		m_freem(m);
1171 
1172 	KASSERT(virtqueue_empty(vq), ("mbufs remaining in Rx Vq"));
1173 }
1174 
1175 static void
1176 vtnet_free_tx_mbufs(struct vtnet_softc *sc)
1177 {
1178 	struct virtqueue *vq;
1179 	struct vtnet_tx_header *txhdr;
1180 	int last;
1181 
1182 	vq = sc->vtnet_tx_vq;
1183 	last = 0;
1184 
1185 	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
1186 		m_freem(txhdr->vth_mbuf);
1187 	}
1188 
1189 	KASSERT(virtqueue_empty(vq), ("mbufs remaining in Tx Vq"));
1190 }
1191 
1192 static void
1193 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
1194 {
1195 	/*
1196 	 * The control virtqueue is only polled, therefore
1197 	 * it should already be empty.
1198 	 */
1199 	KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
1200 		("Ctrl Vq not empty"));
1201 }
1202 
1203 static struct mbuf *
1204 vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1205 {
1206 	struct mbuf *m_head, *m_tail, *m;
1207 	int i, clsize;
1208 
1209 	clsize = sc->vtnet_rx_mbuf_size;
1210 
1211 	/*use getcl instead of getjcl. see  if_mxge.c comment line 2398*/
1212 	//m_head = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, clsize);
1213 	m_head = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR );
1214 	if (m_head == NULL)
1215 		goto fail;
1216 
1217 	m_head->m_len = clsize;
1218 	m_tail = m_head;
1219 
1220 	if (nbufs > 1) {
1221 		KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1222 			("chained Rx mbuf requested without LRO_NOMRG"));
1223 
1224 		for (i = 0; i < nbufs - 1; i++) {
1225 			//m = m_getjcl(M_DONTWAIT, MT_DATA, 0, clsize);
1226 			m = m_getcl(M_NOWAIT, MT_DATA, 0);
1227 			if (m == NULL)
1228 				goto fail;
1229 
1230 			m->m_len = clsize;
1231 			m_tail->m_next = m;
1232 			m_tail = m;
1233 		}
1234 	}
1235 
1236 	if (m_tailp != NULL)
1237 		*m_tailp = m_tail;
1238 
1239 	return (m_head);
1240 
1241 fail:
1242 	sc->vtnet_stats.mbuf_alloc_failed++;
1243 	m_freem(m_head);
1244 
1245 	return (NULL);
1246 }
1247 
1248 static int
1249 vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0)
1250 {
1251 	struct mbuf *m, *m_prev;
1252 	struct mbuf *m_new, *m_tail;
1253 	int len, clsize, nreplace, error;
1254 
1255 	m = m0;
1256 	m_prev = NULL;
1257 	len = len0;
1258 
1259 	m_tail = NULL;
1260 	clsize = sc->vtnet_rx_mbuf_size;
1261 	nreplace = 0;
1262 
1263 	if (m->m_next != NULL)
1264 		KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1265 		    ("chained Rx mbuf without LRO_NOMRG"));
1266 
1267 	/*
1268 	 * Since LRO_NOMRG mbuf chains are so large, we want to avoid
1269 	 * allocating an entire chain for each received frame. When
1270 	 * the received frame's length is less than that of the chain,
1271 	 * the unused mbufs are reassigned to the new chain.
1272 	 */
1273 	while (len > 0) {
1274 		/*
1275 		 * Something is seriously wrong if we received
1276 		 * a frame larger than the mbuf chain. Drop it.
1277 		 */
1278 		if (m == NULL) {
1279 			sc->vtnet_stats.rx_frame_too_large++;
1280 			return (EMSGSIZE);
1281 		}
1282 
1283 		KASSERT(m->m_len == clsize,
1284 		    ("mbuf length not expected cluster size: %d",
1285 		    m->m_len));
1286 
1287 		m->m_len = MIN(m->m_len, len);
1288 		len -= m->m_len;
1289 
1290 		m_prev = m;
1291 		m = m->m_next;
1292 		nreplace++;
1293 	}
1294 
1295 	KASSERT(m_prev != NULL, ("m_prev == NULL"));
1296 	KASSERT(nreplace <= sc->vtnet_rx_mbuf_count,
1297 		("too many replacement mbufs: %d/%d", nreplace,
1298 		sc->vtnet_rx_mbuf_count));
1299 
1300 	m_new = vtnet_alloc_rxbuf(sc, nreplace, &m_tail);
1301 	if (m_new == NULL) {
1302 		m_prev->m_len = clsize;
1303 		return (ENOBUFS);
1304 	}
1305 
1306 	/*
1307 	 * Move unused mbufs, if any, from the original chain
1308 	 * onto the end of the new chain.
1309 	 */
1310 	if (m_prev->m_next != NULL) {
1311 		m_tail->m_next = m_prev->m_next;
1312 		m_prev->m_next = NULL;
1313 	}
1314 
1315 	error = vtnet_enqueue_rxbuf(sc, m_new);
1316 	if (error) {
1317 		/*
1318 		 * BAD! We could not enqueue the replacement mbuf chain. We
1319 		 * must restore the m0 chain to the original state if it was
1320 		 * modified so we can subsequently discard it.
1321 		 *
1322 		 * NOTE: The replacement is suppose to be an identical copy
1323 		 * to the one just dequeued so this is an unexpected error.
1324 		 */
1325 		sc->vtnet_stats.rx_enq_replacement_failed++;
1326 
1327 		if (m_tail->m_next != NULL) {
1328 			m_prev->m_next = m_tail->m_next;
1329 			m_tail->m_next = NULL;
1330 		}
1331 
1332 		m_prev->m_len = clsize;
1333 		m_freem(m_new);
1334 	}
1335 
1336 	return (error);
1337 }
1338 
1339 static int
1340 vtnet_newbuf(struct vtnet_softc *sc)
1341 {
1342 	struct mbuf *m;
1343 	int error;
1344 
1345 	m = vtnet_alloc_rxbuf(sc, sc->vtnet_rx_mbuf_count, NULL);
1346 	if (m == NULL)
1347 		return (ENOBUFS);
1348 
1349 	error = vtnet_enqueue_rxbuf(sc, m);
1350 	if (error)
1351 		m_freem(m);
1352 
1353 	return (error);
1354 }
1355 
1356 static void
1357 vtnet_discard_merged_rxbuf(struct vtnet_softc *sc, int nbufs)
1358 {
1359 	struct virtqueue *vq;
1360 	struct mbuf *m;
1361 
1362 	vq = sc->vtnet_rx_vq;
1363 
1364 	while (--nbufs > 0) {
1365 		if ((m = virtqueue_dequeue(vq, NULL)) == NULL)
1366 			break;
1367 		vtnet_discard_rxbuf(sc, m);
1368 	}
1369 }
1370 
1371 static void
1372 vtnet_discard_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
1373 {
1374 	int error;
1375 
1376 	/*
1377 	 * Requeue the discarded mbuf. This should always be
1378 	 * successful since it was just dequeued.
1379 	 */
1380 	error = vtnet_enqueue_rxbuf(sc, m);
1381 	KASSERT(error == 0, ("cannot requeue discarded mbuf"));
1382 }
1383 
1384 static int
1385 vtnet_enqueue_rxbuf(struct vtnet_softc *sc, struct mbuf *m)
1386 {
1387 	struct sglist sg;
1388 	struct sglist_seg segs[VTNET_MAX_RX_SEGS];
1389 	struct vtnet_rx_header *rxhdr;
1390 	struct virtio_net_hdr *hdr;
1391 	uint8_t *mdata;
1392 	int offset, error;
1393 
1394 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1395 	if ((sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0)
1396 		KASSERT(m->m_next == NULL, ("chained Rx mbuf"));
1397 
1398 	sglist_init(&sg, VTNET_MAX_RX_SEGS, segs);
1399 
1400 	mdata = mtod(m, uint8_t *);
1401 	offset = 0;
1402 
1403 	if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1404 		rxhdr = (struct vtnet_rx_header *) mdata;
1405 		hdr = &rxhdr->vrh_hdr;
1406 		offset += sizeof(struct vtnet_rx_header);
1407 
1408 		error = sglist_append(&sg, hdr, sc->vtnet_hdr_size);
1409 		KASSERT(error == 0, ("cannot add header to sglist"));
1410 	}
1411 
1412 	error = sglist_append(&sg, mdata + offset, m->m_len - offset);
1413 	if (error)
1414 		return (error);
1415 
1416 	if (m->m_next != NULL) {
1417 		error = sglist_append_mbuf(&sg, m->m_next);
1418 		if (error)
1419 			return (error);
1420 	}
1421 
1422 	return (virtqueue_enqueue(sc->vtnet_rx_vq, m, &sg, 0, sg.sg_nseg));
1423 }
1424 
1425 static void
1426 vtnet_vlan_tag_remove(struct mbuf *m)
1427 {
1428 	struct ether_vlan_header *evl;
1429 
1430 	evl = mtod(m, struct ether_vlan_header *);
1431 
1432 	m->m_pkthdr.ether_vlantag = ntohs(evl->evl_tag);
1433 	m->m_flags |= M_VLANTAG;
1434 
1435 	/* Strip the 802.1Q header. */
1436 	bcopy((char *) evl, (char *) evl + ETHER_VLAN_ENCAP_LEN,
1437 	    ETHER_HDR_LEN - ETHER_TYPE_LEN);
1438 	m_adj(m, ETHER_VLAN_ENCAP_LEN);
1439 }
1440 
1441 /*
1442  * Alternative method of doing receive checksum offloading. Rather
1443  * than parsing the received frame down to the IP header, use the
1444  * csum_offset to determine which CSUM_* flags are appropriate. We
1445  * can get by with doing this only because the checksum offsets are
1446  * unique for the things we care about.
1447  */
1448 static int
1449 vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m,
1450     struct virtio_net_hdr *hdr)
1451 {
1452 	struct ether_header *eh;
1453 	struct ether_vlan_header *evh;
1454 	struct udphdr *udp;
1455 	int csum_len;
1456 	uint16_t eth_type;
1457 
1458 	csum_len = hdr->csum_start + hdr->csum_offset;
1459 
1460 	if (csum_len < sizeof(struct ether_header) + sizeof(struct ip))
1461 		return (1);
1462 	if (m->m_len < csum_len)
1463 		return (1);
1464 
1465 	eh = mtod(m, struct ether_header *);
1466 	eth_type = ntohs(eh->ether_type);
1467 	if (eth_type == ETHERTYPE_VLAN) {
1468 		evh = mtod(m, struct ether_vlan_header *);
1469 		eth_type = ntohs(evh->evl_proto);
1470 	}
1471 
1472 	if (eth_type != ETHERTYPE_IP && eth_type != ETHERTYPE_IPV6) {
1473 		sc->vtnet_stats.rx_csum_bad_ethtype++;
1474 		return (1);
1475 	}
1476 
1477 	/* Use the offset to determine the appropriate CSUM_* flags. */
1478 	switch (hdr->csum_offset) {
1479 	case offsetof(struct udphdr, uh_sum):
1480 		if (m->m_len < hdr->csum_start + sizeof(struct udphdr))
1481 			return (1);
1482 		udp = (struct udphdr *)(mtod(m, uint8_t *) + hdr->csum_start);
1483 		if (udp->uh_sum == 0)
1484 			return (0);
1485 
1486 		/* FALLTHROUGH */
1487 
1488 	case offsetof(struct tcphdr, th_sum):
1489 		m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1490 		m->m_pkthdr.csum_data = 0xFFFF;
1491 		break;
1492 
1493 	default:
1494 		sc->vtnet_stats.rx_csum_bad_offset++;
1495 		return (1);
1496 	}
1497 
1498 	sc->vtnet_stats.rx_csum_offloaded++;
1499 
1500 	return (0);
1501 }
1502 
1503 static int
1504 vtnet_rxeof_merged(struct vtnet_softc *sc, struct mbuf *m_head, int nbufs)
1505 {
1506 	struct ifnet *ifp;
1507 	struct virtqueue *vq;
1508 	struct mbuf *m, *m_tail;
1509 	int len;
1510 
1511 	ifp = sc->vtnet_ifp;
1512 	vq = sc->vtnet_rx_vq;
1513 	m_tail = m_head;
1514 
1515 	while (--nbufs > 0) {
1516 		m = virtqueue_dequeue(vq, &len);
1517 		if (m == NULL) {
1518 			ifp->if_ierrors++;
1519 			goto fail;
1520 		}
1521 
1522 		if (vtnet_newbuf(sc) != 0) {
1523 			ifp->if_iqdrops++;
1524 			vtnet_discard_rxbuf(sc, m);
1525 			if (nbufs > 1)
1526 				vtnet_discard_merged_rxbuf(sc, nbufs);
1527 			goto fail;
1528 		}
1529 
1530 		if (m->m_len < len)
1531 			len = m->m_len;
1532 
1533 		m->m_len = len;
1534 		m->m_flags &= ~M_PKTHDR;
1535 
1536 		m_head->m_pkthdr.len += len;
1537 		m_tail->m_next = m;
1538 		m_tail = m;
1539 	}
1540 
1541 	return (0);
1542 
1543 fail:
1544 	sc->vtnet_stats.rx_mergeable_failed++;
1545 	m_freem(m_head);
1546 
1547 	return (1);
1548 }
1549 
1550 static int
1551 vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp)
1552 {
1553 	struct virtio_net_hdr lhdr;
1554 	struct ifnet *ifp;
1555 	struct virtqueue *vq;
1556 	struct mbuf *m;
1557 	struct ether_header *eh;
1558 	struct virtio_net_hdr *hdr;
1559 	struct virtio_net_hdr_mrg_rxbuf *mhdr;
1560 	int len, deq, nbufs, adjsz, rx_npkts;
1561 
1562 	ifp = sc->vtnet_ifp;
1563 	vq = sc->vtnet_rx_vq;
1564 	hdr = &lhdr;
1565 	deq = 0;
1566 	rx_npkts = 0;
1567 
1568 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1569 
1570 	while (--count >= 0) {
1571 		m = virtqueue_dequeue(vq, &len);
1572 		if (m == NULL)
1573 			break;
1574 		deq++;
1575 
1576 		if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1577 			ifp->if_ierrors++;
1578 			vtnet_discard_rxbuf(sc, m);
1579 			continue;
1580 		}
1581 
1582 		if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1583 			nbufs = 1;
1584 			adjsz = sizeof(struct vtnet_rx_header);
1585 			/*
1586 			 * Account for our pad between the header and
1587 			 * the actual start of the frame.
1588 			 */
1589 			len += VTNET_RX_HEADER_PAD;
1590 		} else {
1591 			mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1592 			nbufs = mhdr->num_buffers;
1593 			adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1594 		}
1595 
1596 		if (vtnet_replace_rxbuf(sc, m, len) != 0) {
1597 			ifp->if_iqdrops++;
1598 			vtnet_discard_rxbuf(sc, m);
1599 			if (nbufs > 1)
1600 				vtnet_discard_merged_rxbuf(sc, nbufs);
1601 			continue;
1602 		}
1603 
1604 		m->m_pkthdr.len = len;
1605 		m->m_pkthdr.rcvif = ifp;
1606 		m->m_pkthdr.csum_flags = 0;
1607 
1608 		if (nbufs > 1) {
1609 			if (vtnet_rxeof_merged(sc, m, nbufs) != 0)
1610 				continue;
1611 		}
1612 
1613 		ifp->if_ipackets++;
1614 
1615 		/*
1616 		 * Save copy of header before we strip it. For both mergeable
1617 		 * and non-mergeable, the VirtIO header is placed first in the
1618 		 * mbuf's data. We no longer need num_buffers, so always use a
1619 		 * virtio_net_hdr.
1620 		 */
1621 		memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1622 		m_adj(m, adjsz);
1623 
1624 		if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1625 			eh = mtod(m, struct ether_header *);
1626 			if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1627 				vtnet_vlan_tag_remove(m);
1628 
1629 				/*
1630 				 * With the 802.1Q header removed, update the
1631 				 * checksum starting location accordingly.
1632 				 */
1633 				if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1634 					hdr->csum_start -=
1635 					    ETHER_VLAN_ENCAP_LEN;
1636 			}
1637 		}
1638 
1639 		if (ifp->if_capenable & IFCAP_RXCSUM &&
1640 		    hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1641 			if (vtnet_rx_csum(sc, m, hdr) != 0)
1642 				sc->vtnet_stats.rx_csum_failed++;
1643 		}
1644 
1645 		lwkt_serialize_exit(&sc->vtnet_slz);
1646 		rx_npkts++;
1647 		ifp->if_input(ifp, m, NULL, -1);
1648 		lwkt_serialize_enter(&sc->vtnet_slz);
1649 
1650 		/*
1651 		 * The interface may have been stopped while we were
1652 		 * passing the packet up the network stack.
1653 		 */
1654 		if ((ifp->if_flags & IFF_RUNNING) == 0)
1655 			break;
1656 	}
1657 
1658 	virtqueue_notify(vq, &sc->vtnet_slz);
1659 
1660 	if (rx_npktsp != NULL)
1661 		*rx_npktsp = rx_npkts;
1662 
1663 	return (count > 0 ? 0 : EAGAIN);
1664 }
1665 
1666 static void
1667 vtnet_rx_intr_task(void *arg)
1668 {
1669 	struct vtnet_softc *sc;
1670 	struct ifnet *ifp;
1671 	int more;
1672 
1673 	sc = arg;
1674 	ifp = sc->vtnet_ifp;
1675 
1676 next:
1677 //	lwkt_serialize_enter(&sc->vtnet_slz);
1678 
1679 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1680 		vtnet_enable_rx_intr(sc);
1681 //		lwkt_serialize_exit(&sc->vtnet_slz);
1682 		return;
1683 	}
1684 
1685 	more = vtnet_rxeof(sc, sc->vtnet_rx_process_limit, NULL);
1686 	if (!more && vtnet_enable_rx_intr(sc) != 0) {
1687 		vtnet_disable_rx_intr(sc);
1688 		more = 1;
1689 	}
1690 
1691 //	lwkt_serialize_exit(&sc->vtnet_slz);
1692 
1693 	if (more) {
1694 		sc->vtnet_stats.rx_task_rescheduled++;
1695 		goto next;
1696 	}
1697 }
1698 
1699 static int
1700 vtnet_rx_vq_intr(void *xsc)
1701 {
1702 	struct vtnet_softc *sc;
1703 
1704 	sc = xsc;
1705 
1706 	vtnet_disable_rx_intr(sc);
1707 	vtnet_rx_intr_task(sc);
1708 
1709 	return (1);
1710 }
1711 
1712 static void
1713 vtnet_txeof(struct vtnet_softc *sc)
1714 {
1715 	struct virtqueue *vq;
1716 	struct ifnet *ifp;
1717 	struct vtnet_tx_header *txhdr;
1718 	int deq;
1719 
1720 	vq = sc->vtnet_tx_vq;
1721 	ifp = sc->vtnet_ifp;
1722 	deq = 0;
1723 
1724 	ASSERT_SERIALIZED(&sc->vtnet_slz);
1725 
1726 	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
1727 		deq++;
1728 		ifp->if_opackets++;
1729 		m_freem(txhdr->vth_mbuf);
1730 	}
1731 
1732 	if (deq > 0) {
1733 		ifq_clr_oactive(&ifp->if_snd);
1734 		if (virtqueue_empty(vq))
1735 			sc->vtnet_watchdog_timer = 0;
1736 	}
1737 }
1738 
1739 static struct mbuf *
1740 vtnet_tx_offload(struct vtnet_softc *sc, struct mbuf *m,
1741     struct virtio_net_hdr *hdr)
1742 {
1743 	struct ifnet *ifp;
1744 	struct ether_header *eh;
1745 	struct ether_vlan_header *evh;
1746 	struct ip *ip;
1747 	struct ip6_hdr *ip6;
1748 	struct tcphdr *tcp;
1749 	int ip_offset;
1750 	uint16_t eth_type, csum_start;
1751 	uint8_t ip_proto, gso_type;
1752 
1753 	ifp = sc->vtnet_ifp;
1754 	M_ASSERTPKTHDR(m);
1755 
1756 	ip_offset = sizeof(struct ether_header);
1757 	if (m->m_len < ip_offset) {
1758 		if ((m = m_pullup(m, ip_offset)) == NULL)
1759 			return (NULL);
1760 	}
1761 
1762 	eh = mtod(m, struct ether_header *);
1763 	eth_type = ntohs(eh->ether_type);
1764 	if (eth_type == ETHERTYPE_VLAN) {
1765 		ip_offset = sizeof(struct ether_vlan_header);
1766 		if (m->m_len < ip_offset) {
1767 			if ((m = m_pullup(m, ip_offset)) == NULL)
1768 				return (NULL);
1769 		}
1770 		evh = mtod(m, struct ether_vlan_header *);
1771 		eth_type = ntohs(evh->evl_proto);
1772 	}
1773 
1774 	switch (eth_type) {
1775 	case ETHERTYPE_IP:
1776 		if (m->m_len < ip_offset + sizeof(struct ip)) {
1777 			m = m_pullup(m, ip_offset + sizeof(struct ip));
1778 			if (m == NULL)
1779 				return (NULL);
1780 		}
1781 
1782 		ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset);
1783 		ip_proto = ip->ip_p;
1784 		csum_start = ip_offset + (ip->ip_hl << 2);
1785 		gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1786 		break;
1787 
1788 	case ETHERTYPE_IPV6:
1789 		if (m->m_len < ip_offset + sizeof(struct ip6_hdr)) {
1790 			m = m_pullup(m, ip_offset + sizeof(struct ip6_hdr));
1791 			if (m == NULL)
1792 				return (NULL);
1793 		}
1794 
1795 		ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset);
1796 		/*
1797 		 * XXX Assume no extension headers are present. Presently,
1798 		 * this will always be true in the case of TSO, and FreeBSD
1799 		 * does not perform checksum offloading of IPv6 yet.
1800 		 */
1801 		ip_proto = ip6->ip6_nxt;
1802 		csum_start = ip_offset + sizeof(struct ip6_hdr);
1803 		gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1804 		break;
1805 
1806 	default:
1807 		return (m);
1808 	}
1809 
1810 	if (m->m_pkthdr.csum_flags & VTNET_CSUM_OFFLOAD) {
1811 		hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
1812 		hdr->csum_start = csum_start;
1813 		hdr->csum_offset = m->m_pkthdr.csum_data;
1814 
1815 		sc->vtnet_stats.tx_csum_offloaded++;
1816 	}
1817 
1818 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1819 		if (ip_proto != IPPROTO_TCP)
1820 			return (m);
1821 
1822 		if (m->m_len < csum_start + sizeof(struct tcphdr)) {
1823 			m = m_pullup(m, csum_start + sizeof(struct tcphdr));
1824 			if (m == NULL)
1825 				return (NULL);
1826 		}
1827 
1828 		tcp = (struct tcphdr *)(mtod(m, uint8_t *) + csum_start);
1829 		hdr->gso_type = gso_type;
1830 		hdr->hdr_len = csum_start + (tcp->th_off << 2);
1831 		hdr->gso_size = m->m_pkthdr.tso_segsz;
1832 
1833 		if (tcp->th_flags & TH_CWR) {
1834 			/*
1835 			 * Drop if we did not negotiate VIRTIO_NET_F_HOST_ECN.
1836 			 * ECN support is only configurable globally with the
1837 			 * net.inet.tcp.ecn.enable sysctl knob.
1838 			 */
1839 			if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
1840 				if_printf(ifp, "TSO with ECN not supported "
1841 				    "by host\n");
1842 				m_freem(m);
1843 				return (NULL);
1844 			}
1845 
1846 			hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1847 		}
1848 
1849 		sc->vtnet_stats.tx_tso_offloaded++;
1850 	}
1851 
1852 	return (m);
1853 }
1854 
1855 static int
1856 vtnet_enqueue_txbuf(struct vtnet_softc *sc, struct mbuf **m_head,
1857     struct vtnet_tx_header *txhdr)
1858 {
1859 	struct sglist sg;
1860 	struct sglist_seg segs[VTNET_MAX_TX_SEGS];
1861 	struct virtqueue *vq;
1862 	struct mbuf *m;
1863 	int collapsed, error;
1864 
1865 	vq = sc->vtnet_tx_vq;
1866 	m = *m_head;
1867 	collapsed = 0;
1868 
1869 	sglist_init(&sg, VTNET_MAX_TX_SEGS, segs);
1870 	error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
1871 	KASSERT(error == 0 && sg.sg_nseg == 1,
1872 	    ("cannot add header to sglist"));
1873 
1874 again:
1875 	error = sglist_append_mbuf(&sg, m);
1876 	if (error) {
1877 		if (collapsed)
1878 			goto fail;
1879 
1880 		//m = m_collapse(m, M_NOWAIT, VTNET_MAX_TX_SEGS - 1);
1881 		m = m_defrag(m, M_NOWAIT);
1882 		if (m == NULL)
1883 			goto fail;
1884 
1885 		*m_head = m;
1886 		collapsed = 1;
1887 		goto again;
1888 	}
1889 
1890 	txhdr->vth_mbuf = m;
1891 
1892 	return (virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0));
1893 
1894 fail:
1895 	m_freem(*m_head);
1896 	*m_head = NULL;
1897 
1898 	return (ENOBUFS);
1899 }
1900 
1901 static struct mbuf *
1902 vtnet_vlan_tag_insert(struct mbuf *m)
1903 {
1904 	struct mbuf *n;
1905 	struct ether_vlan_header *evl;
1906 
1907 	if (M_WRITABLE(m) == 0) {
1908 		n = m_dup(m, M_NOWAIT);
1909 		m_freem(m);
1910 		if ((m = n) == NULL)
1911 			return (NULL);
1912 	}
1913 
1914 	M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT);
1915 	if (m == NULL)
1916 		return (NULL);
1917 	if (m->m_len < sizeof(struct ether_vlan_header)) {
1918 		m = m_pullup(m, sizeof(struct ether_vlan_header));
1919 		if (m == NULL)
1920 			return (NULL);
1921 	}
1922 
1923 	/* Insert 802.1Q header into the existing Ethernet header. */
1924 	evl = mtod(m, struct ether_vlan_header *);
1925 	bcopy((char *) evl + ETHER_VLAN_ENCAP_LEN,
1926 	      (char *) evl, ETHER_HDR_LEN - ETHER_TYPE_LEN);
1927 	evl->evl_encap_proto = htons(ETHERTYPE_VLAN);
1928 	evl->evl_tag = htons(m->m_pkthdr.ether_vlantag);
1929 	m->m_flags &= ~M_VLANTAG;
1930 
1931 	return (m);
1932 }
1933 
1934 static int
1935 vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head)
1936 {
1937 	struct vtnet_tx_header *txhdr;
1938 	struct virtio_net_hdr *hdr;
1939 	struct mbuf *m;
1940 	int error;
1941 
1942 	txhdr = &sc->vtnet_txhdrarea[sc->vtnet_txhdridx];
1943 	memset(txhdr, 0, sizeof(struct vtnet_tx_header));
1944 
1945 	/*
1946 	 * Always use the non-mergeable header to simplify things. When
1947 	 * the mergeable feature is negotiated, the num_buffers field
1948 	 * must be set to zero. We use vtnet_hdr_size later to enqueue
1949 	 * the correct header size to the host.
1950 	 */
1951 	hdr = &txhdr->vth_uhdr.hdr;
1952 	m = *m_head;
1953 
1954 	error = ENOBUFS;
1955 
1956 	if (m->m_flags & M_VLANTAG) {
1957 		//m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
1958 		m = vtnet_vlan_tag_insert(m);
1959 		if ((*m_head = m) == NULL)
1960 			goto fail;
1961 		m->m_flags &= ~M_VLANTAG;
1962 	}
1963 
1964 	if (m->m_pkthdr.csum_flags != 0) {
1965 		m = vtnet_tx_offload(sc, m, hdr);
1966 		if ((*m_head = m) == NULL)
1967 			goto fail;
1968 	}
1969 
1970 	error = vtnet_enqueue_txbuf(sc, m_head, txhdr);
1971 	if (error == 0)
1972 		sc->vtnet_txhdridx =
1973 		    (sc->vtnet_txhdridx + 1) % ((sc->vtnet_tx_size / 2) + 1);
1974 fail:
1975 	return (error);
1976 }
1977 
1978 static void
1979 vtnet_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1980 {
1981 	struct vtnet_softc *sc;
1982 
1983 	sc = ifp->if_softc;
1984 
1985 	ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq);
1986 	lwkt_serialize_enter(&sc->vtnet_slz);
1987 	vtnet_start_locked(ifp, ifsq);
1988 	lwkt_serialize_exit(&sc->vtnet_slz);
1989 }
1990 
1991 static void
1992 vtnet_start_locked(struct ifnet *ifp, struct ifaltq_subque *ifsq)
1993 {
1994 	struct vtnet_softc *sc;
1995 	struct virtqueue *vq;
1996 	struct mbuf *m0;
1997 	int enq;
1998 
1999 	sc = ifp->if_softc;
2000 	vq = sc->vtnet_tx_vq;
2001 	enq = 0;
2002 
2003 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2004 
2005 	if ((ifp->if_flags & (IFF_RUNNING)) !=
2006 	    IFF_RUNNING || ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0))
2007 		return;
2008 
2009 #ifdef VTNET_TX_INTR_MODERATION
2010 	if (virtqueue_nused(vq) >= sc->vtnet_tx_size / 2)
2011 		vtnet_txeof(sc);
2012 #endif
2013 
2014 	while (!ifsq_is_empty(ifsq)) {
2015 		if (virtqueue_full(vq)) {
2016 			ifq_set_oactive(&ifp->if_snd);
2017 			break;
2018 		}
2019 
2020 		m0 = ifq_dequeue(&ifp->if_snd);
2021 		if (m0 == NULL)
2022 			break;
2023 
2024 		if (vtnet_encap(sc, &m0) != 0) {
2025 			if (m0 == NULL)
2026 				break;
2027 			ifq_prepend(&ifp->if_snd, m0);
2028 			ifq_set_oactive(&ifp->if_snd);
2029 			break;
2030 		}
2031 
2032 		enq++;
2033 		ETHER_BPF_MTAP(ifp, m0);
2034 	}
2035 
2036 	if (enq > 0) {
2037 		virtqueue_notify(vq, &sc->vtnet_slz);
2038 		sc->vtnet_watchdog_timer = VTNET_WATCHDOG_TIMEOUT;
2039 	}
2040 }
2041 
2042 static void
2043 vtnet_tick(void *xsc)
2044 {
2045 	struct vtnet_softc *sc;
2046 
2047 	sc = xsc;
2048 
2049 #if 0
2050 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2051 #ifdef VTNET_DEBUG
2052 	virtqueue_dump(sc->vtnet_rx_vq);
2053 	virtqueue_dump(sc->vtnet_tx_vq);
2054 #endif
2055 
2056 	vtnet_watchdog(sc);
2057 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
2058 #endif
2059 }
2060 
2061 static void
2062 vtnet_tx_intr_task(void *arg)
2063 {
2064 	struct vtnet_softc *sc;
2065 	struct ifnet *ifp;
2066 	struct ifaltq_subque *ifsq;
2067 
2068 	sc = arg;
2069 	ifp = sc->vtnet_ifp;
2070 	ifsq = ifq_get_subq_default(&ifp->if_snd);
2071 
2072 next:
2073 //	lwkt_serialize_enter(&sc->vtnet_slz);
2074 
2075 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
2076 		vtnet_enable_tx_intr(sc);
2077 //		lwkt_serialize_exit(&sc->vtnet_slz);
2078 		return;
2079 	}
2080 
2081 	vtnet_txeof(sc);
2082 
2083 	if (!ifsq_is_empty(ifsq))
2084 		vtnet_start_locked(ifp, ifsq);
2085 
2086 	if (vtnet_enable_tx_intr(sc) != 0) {
2087 		vtnet_disable_tx_intr(sc);
2088 		sc->vtnet_stats.tx_task_rescheduled++;
2089 //		lwkt_serialize_exit(&sc->vtnet_slz);
2090 		goto next;
2091 	}
2092 
2093 //	lwkt_serialize_exit(&sc->vtnet_slz);
2094 }
2095 
2096 static int
2097 vtnet_tx_vq_intr(void *xsc)
2098 {
2099 	struct vtnet_softc *sc;
2100 
2101 	sc = xsc;
2102 
2103 	vtnet_disable_tx_intr(sc);
2104 	vtnet_tx_intr_task(sc);
2105 
2106 	return (1);
2107 }
2108 
2109 static void
2110 vtnet_stop(struct vtnet_softc *sc)
2111 {
2112 	device_t dev;
2113 	struct ifnet *ifp;
2114 
2115 	dev = sc->vtnet_dev;
2116 	ifp = sc->vtnet_ifp;
2117 
2118 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2119 
2120 	sc->vtnet_watchdog_timer = 0;
2121 	callout_stop(&sc->vtnet_tick_ch);
2122 	ifq_clr_oactive(&ifp->if_snd);
2123 	ifp->if_flags &= ~(IFF_RUNNING);
2124 
2125 	vtnet_disable_rx_intr(sc);
2126 	vtnet_disable_tx_intr(sc);
2127 
2128 	/*
2129 	 * Stop the host VirtIO adapter. Note this will reset the host
2130 	 * adapter's state back to the pre-initialized state, so in
2131 	 * order to make the device usable again, we must drive it
2132 	 * through virtio_reinit() and virtio_reinit_complete().
2133 	 */
2134 	virtio_stop(dev);
2135 
2136 	sc->vtnet_flags &= ~VTNET_FLAG_LINK;
2137 
2138 	vtnet_free_rx_mbufs(sc);
2139 	vtnet_free_tx_mbufs(sc);
2140 }
2141 
2142 static int
2143 vtnet_reinit(struct vtnet_softc *sc)
2144 {
2145 	struct ifnet *ifp;
2146 	uint64_t features;
2147 
2148 	ifp = sc->vtnet_ifp;
2149 	features = sc->vtnet_features;
2150 
2151 	/*
2152 	 * Re-negotiate with the host, removing any disabled receive
2153 	 * features. Transmit features are disabled only on our side
2154 	 * via if_capenable and if_hwassist.
2155 	 */
2156 
2157 	if (ifp->if_capabilities & IFCAP_RXCSUM) {
2158 		if ((ifp->if_capenable & IFCAP_RXCSUM) == 0)
2159 			features &= ~VIRTIO_NET_F_GUEST_CSUM;
2160 	}
2161 
2162 	if (ifp->if_capabilities & IFCAP_LRO) {
2163 		if ((ifp->if_capenable & IFCAP_LRO) == 0)
2164 			features &= ~VTNET_LRO_FEATURES;
2165 	}
2166 
2167 	if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2168 		if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2169 			features &= ~VIRTIO_NET_F_CTRL_VLAN;
2170 	}
2171 
2172 	return (virtio_reinit(sc->vtnet_dev, features));
2173 }
2174 
2175 static void
2176 vtnet_init_locked(struct vtnet_softc *sc)
2177 {
2178 	device_t dev;
2179 	struct ifnet *ifp;
2180 	int error;
2181 
2182 	dev = sc->vtnet_dev;
2183 	ifp = sc->vtnet_ifp;
2184 
2185 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2186 
2187 	if (ifp->if_flags & IFF_RUNNING)
2188 		return;
2189 
2190 	/* Stop host's adapter, cancel any pending I/O. */
2191 	vtnet_stop(sc);
2192 
2193 	/* Reinitialize the host device. */
2194 	error = vtnet_reinit(sc);
2195 	if (error) {
2196 		device_printf(dev,
2197 		    "reinitialization failed, stopping device...\n");
2198 		vtnet_stop(sc);
2199 		return;
2200 	}
2201 
2202 	/* Update host with assigned MAC address. */
2203 	bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
2204 	vtnet_set_hwaddr(sc);
2205 
2206 	ifp->if_hwassist = 0;
2207 	if (ifp->if_capenable & IFCAP_TXCSUM)
2208 		ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
2209 	if (ifp->if_capenable & IFCAP_TSO4)
2210 		ifp->if_hwassist |= CSUM_TSO;
2211 
2212 	error = vtnet_init_rx_vq(sc);
2213 	if (error) {
2214 		device_printf(dev,
2215 		    "cannot allocate mbufs for Rx virtqueue\n");
2216 		vtnet_stop(sc);
2217 		return;
2218 	}
2219 
2220 	if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
2221 		if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2222 			/* Restore promiscuous and all-multicast modes. */
2223 			vtnet_rx_filter(sc);
2224 
2225 			/* Restore filtered MAC addresses. */
2226 			vtnet_rx_filter_mac(sc);
2227 		}
2228 
2229 		/* Restore VLAN filters. */
2230 		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2231 			vtnet_rx_filter_vlan(sc);
2232 	}
2233 
2234 	{
2235 		vtnet_enable_rx_intr(sc);
2236 		vtnet_enable_tx_intr(sc);
2237 	}
2238 
2239 	ifp->if_flags |= IFF_RUNNING;
2240 	ifq_clr_oactive(&ifp->if_snd);
2241 
2242 	virtio_reinit_complete(dev);
2243 
2244 	vtnet_update_link_status(sc);
2245 	callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
2246 }
2247 
2248 static void
2249 vtnet_init(void *xsc)
2250 {
2251 	struct vtnet_softc *sc;
2252 
2253 	sc = xsc;
2254 
2255 	lwkt_serialize_enter(&sc->vtnet_slz);
2256 	vtnet_init_locked(sc);
2257 	lwkt_serialize_exit(&sc->vtnet_slz);
2258 }
2259 
2260 static void
2261 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
2262     struct sglist *sg, int readable, int writable)
2263 {
2264 	struct virtqueue *vq;
2265 	void *c;
2266 
2267 	vq = sc->vtnet_ctrl_vq;
2268 
2269 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2270 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
2271 	    ("no control virtqueue"));
2272 	KASSERT(virtqueue_empty(vq),
2273 	    ("control command already enqueued"));
2274 
2275 	if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
2276 		return;
2277 
2278 	virtqueue_notify(vq, &sc->vtnet_slz);
2279 
2280 	/*
2281 	 * Poll until the command is complete. Previously, we would
2282 	 * sleep until the control virtqueue interrupt handler woke
2283 	 * us up, but dropping the VTNET_MTX leads to serialization
2284 	 * difficulties.
2285 	 *
2286 	 * Furthermore, it appears QEMU/KVM only allocates three MSIX
2287 	 * vectors. Two of those vectors are needed for the Rx and Tx
2288 	 * virtqueues. We do not support sharing both a Vq and config
2289 	 * changed notification on the same MSIX vector.
2290 	 */
2291 	c = virtqueue_poll(vq, NULL);
2292 	KASSERT(c == cookie, ("unexpected control command response"));
2293 }
2294 
2295 static int
2296 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
2297 {
2298 	struct {
2299 		struct virtio_net_ctrl_hdr hdr __aligned(2);
2300 		uint8_t pad1;
2301 		char aligned_hwaddr[ETHER_ADDR_LEN] __aligned(8);
2302 		uint8_t pad2;
2303 		uint8_t ack;
2304 	} s;
2305 	struct sglist_seg segs[3];
2306 	struct sglist sg;
2307 	int error;
2308 
2309 	s.hdr.class = VIRTIO_NET_CTRL_MAC;
2310 	s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
2311 	s.ack = VIRTIO_NET_ERR;
2312 
2313 	/* Copy the mac address into physically contiguous memory */
2314 	memcpy(s.aligned_hwaddr, hwaddr, ETHER_ADDR_LEN);
2315 
2316 	sglist_init(&sg, 3, segs);
2317 	error = 0;
2318 	error |= sglist_append(&sg, &s.hdr,
2319 	    sizeof(struct virtio_net_ctrl_hdr));
2320 	error |= sglist_append(&sg, s.aligned_hwaddr, ETHER_ADDR_LEN);
2321 	error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
2322 	KASSERT(error == 0 && sg.sg_nseg == 3,
2323 	    ("%s: error %d adding set MAC msg to sglist", __func__, error));
2324 
2325 	vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
2326 
2327 	return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
2328 }
2329 
2330 static void
2331 vtnet_rx_filter(struct vtnet_softc *sc)
2332 {
2333 	device_t dev;
2334 	struct ifnet *ifp;
2335 
2336 	dev = sc->vtnet_dev;
2337 	ifp = sc->vtnet_ifp;
2338 
2339 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2340 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2341 	    ("CTRL_RX feature not negotiated"));
2342 
2343 	if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
2344 		device_printf(dev, "cannot %s promiscuous mode\n",
2345 		    ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
2346 
2347 	if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
2348 		device_printf(dev, "cannot %s all-multicast mode\n",
2349 		    ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
2350 }
2351 
2352 static int
2353 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
2354 {
2355 	struct virtio_net_ctrl_hdr hdr __aligned(2);
2356 	struct sglist_seg segs[3];
2357 	struct sglist sg;
2358 	uint8_t onoff, ack;
2359 	int error;
2360 
2361 	if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
2362 		return (ENOTSUP);
2363 
2364 	error = 0;
2365 
2366 	hdr.class = VIRTIO_NET_CTRL_RX;
2367 	hdr.cmd = cmd;
2368 	onoff = !!on;
2369 	ack = VIRTIO_NET_ERR;
2370 
2371 	sglist_init(&sg, 3, segs);
2372 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
2373 	error |= sglist_append(&sg, &onoff, sizeof(uint8_t));
2374 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
2375 	KASSERT(error == 0 && sg.sg_nseg == 3,
2376 	    ("error adding Rx filter message to sglist"));
2377 
2378 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
2379 
2380 	return (ack == VIRTIO_NET_OK ? 0 : EIO);
2381 }
2382 
2383 static int
2384 vtnet_set_promisc(struct vtnet_softc *sc, int on)
2385 {
2386 
2387 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
2388 }
2389 
2390 static int
2391 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
2392 {
2393 
2394 	return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
2395 }
2396 
2397 static void
2398 vtnet_rx_filter_mac(struct vtnet_softc *sc)
2399 {
2400 	struct virtio_net_ctrl_hdr hdr __aligned(2);
2401 	struct vtnet_mac_filter *filter;
2402 	struct sglist_seg segs[4];
2403 	struct sglist sg;
2404 	struct ifnet *ifp;
2405 	struct ifaddr *ifa;
2406         struct ifaddr_container *ifac;
2407 	struct ifmultiaddr *ifma;
2408 	int ucnt, mcnt, promisc, allmulti, error;
2409 	uint8_t ack;
2410 
2411 	ifp = sc->vtnet_ifp;
2412 	ucnt = 0;
2413 	mcnt = 0;
2414 	promisc = 0;
2415 	allmulti = 0;
2416 	error = 0;
2417 
2418 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2419 	KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
2420 	    ("CTRL_RX feature not negotiated"));
2421 
2422 	/* Use the MAC filtering table allocated in vtnet_attach. */
2423 	filter = sc->vtnet_macfilter;
2424 	memset(filter, 0, sizeof(struct vtnet_mac_filter));
2425 
2426 	/* Unicast MAC addresses: */
2427 	//if_addr_rlock(ifp);
2428 	TAILQ_FOREACH(ifac, &ifp->if_addrheads[mycpuid], ifa_link) {
2429 		ifa = ifac->ifa;
2430 		if (ifa->ifa_addr->sa_family != AF_LINK)
2431 			continue;
2432 		else if (ucnt == VTNET_MAX_MAC_ENTRIES)
2433 			break;
2434 
2435 		bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
2436 		    &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
2437 		ucnt++;
2438 	}
2439 	//if_addr_runlock(ifp);
2440 
2441 	if (ucnt >= VTNET_MAX_MAC_ENTRIES) {
2442 		promisc = 1;
2443 		filter->vmf_unicast.nentries = 0;
2444 
2445 		if_printf(ifp, "more than %d MAC addresses assigned, "
2446 		    "falling back to promiscuous mode\n",
2447 		    VTNET_MAX_MAC_ENTRIES);
2448 	} else
2449 		filter->vmf_unicast.nentries = ucnt;
2450 
2451 	/* Multicast MAC addresses: */
2452 	//if_maddr_rlock(ifp);
2453 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2454 		if (ifma->ifma_addr->sa_family != AF_LINK)
2455 			continue;
2456 		else if (mcnt == VTNET_MAX_MAC_ENTRIES)
2457 			break;
2458 
2459 		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2460 		    &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
2461 		mcnt++;
2462 	}
2463 	//if_maddr_runlock(ifp);
2464 
2465 	if (mcnt >= VTNET_MAX_MAC_ENTRIES) {
2466 		allmulti = 1;
2467 		filter->vmf_multicast.nentries = 0;
2468 
2469 		if_printf(ifp, "more than %d multicast MAC addresses "
2470 		    "assigned, falling back to all-multicast mode\n",
2471 		    VTNET_MAX_MAC_ENTRIES);
2472 	} else
2473 		filter->vmf_multicast.nentries = mcnt;
2474 
2475 	if (promisc && allmulti)
2476 		goto out;
2477 
2478 	hdr.class = VIRTIO_NET_CTRL_MAC;
2479 	hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
2480 	ack = VIRTIO_NET_ERR;
2481 
2482 	sglist_init(&sg, 4, segs);
2483 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
2484 	error |= sglist_append(&sg, &filter->vmf_unicast,
2485 	    sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
2486 	error |= sglist_append(&sg, &filter->vmf_multicast,
2487 	    sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
2488 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
2489 	KASSERT(error == 0 && sg.sg_nseg == 4,
2490 	    ("error adding MAC filtering message to sglist"));
2491 
2492 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
2493 
2494 	if (ack != VIRTIO_NET_OK)
2495 		if_printf(ifp, "error setting host MAC filter table\n");
2496 
2497 out:
2498 	if (promisc)
2499 		if (vtnet_set_promisc(sc, 1) != 0)
2500 			if_printf(ifp, "cannot enable promiscuous mode\n");
2501 	if (allmulti)
2502 		if (vtnet_set_allmulti(sc, 1) != 0)
2503 			if_printf(ifp, "cannot enable all-multicast mode\n");
2504 }
2505 
2506 static int
2507 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
2508 {
2509 	struct virtio_net_ctrl_hdr hdr __aligned(2);
2510 	struct sglist_seg segs[3];
2511 	struct sglist sg;
2512 	uint8_t ack;
2513 	int error;
2514 
2515 	hdr.class = VIRTIO_NET_CTRL_VLAN;
2516 	hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
2517 	ack = VIRTIO_NET_ERR;
2518 	error = 0;
2519 
2520 	sglist_init(&sg, 3, segs);
2521 	error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
2522 	error |= sglist_append(&sg, &tag, sizeof(uint16_t));
2523 	error |= sglist_append(&sg, &ack, sizeof(uint8_t));
2524 	KASSERT(error == 0 && sg.sg_nseg == 3,
2525 	    ("error adding VLAN control message to sglist"));
2526 
2527 	vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
2528 
2529 	return (ack == VIRTIO_NET_OK ? 0 : EIO);
2530 }
2531 
2532 static void
2533 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
2534 {
2535 	device_t dev;
2536 	uint32_t w, mask;
2537 	uint16_t tag;
2538 	int i, nvlans, error;
2539 
2540 	ASSERT_SERIALIZED(&sc->vtnet_slz);
2541 	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
2542 	    ("VLAN_FILTER feature not negotiated"));
2543 
2544 	dev = sc->vtnet_dev;
2545 	nvlans = sc->vtnet_nvlans;
2546 	error = 0;
2547 
2548 	/* Enable filtering for each configured VLAN. */
2549 	for (i = 0; i < VTNET_VLAN_SHADOW_SIZE && nvlans > 0; i++) {
2550 		w = sc->vtnet_vlan_shadow[i];
2551 		for (mask = 1, tag = i * 32; w != 0; mask <<= 1, tag++) {
2552 			if ((w & mask) != 0) {
2553 				w &= ~mask;
2554 				nvlans--;
2555 				if (vtnet_exec_vlan_filter(sc, 1, tag) != 0)
2556 					error++;
2557 			}
2558 		}
2559 	}
2560 
2561 	KASSERT(nvlans == 0, ("VLAN count incorrect"));
2562 	if (error)
2563 		device_printf(dev, "cannot restore VLAN filter table\n");
2564 }
2565 
2566 static void
2567 vtnet_set_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
2568 {
2569 	struct ifnet *ifp;
2570 	int idx, bit;
2571 
2572 	KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
2573 	    ("VLAN_FILTER feature not negotiated"));
2574 
2575 	if ((tag == 0) || (tag > 4095))
2576 		return;
2577 
2578 	ifp = sc->vtnet_ifp;
2579 	idx = (tag >> 5) & 0x7F;
2580 	bit = tag & 0x1F;
2581 
2582 	lwkt_serialize_enter(&sc->vtnet_slz);
2583 
2584 	/* Update shadow VLAN table. */
2585 	if (add) {
2586 		sc->vtnet_nvlans++;
2587 		sc->vtnet_vlan_shadow[idx] |= (1 << bit);
2588 	} else {
2589 		sc->vtnet_nvlans--;
2590 		sc->vtnet_vlan_shadow[idx] &= ~(1 << bit);
2591 	}
2592 
2593 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
2594 		if (vtnet_exec_vlan_filter(sc, add, tag) != 0) {
2595 			device_printf(sc->vtnet_dev,
2596 			    "cannot %s VLAN %d %s the host filter table\n",
2597 			    add ? "add" : "remove", tag,
2598 			    add ? "to" : "from");
2599 		}
2600 	}
2601 
2602 	lwkt_serialize_exit(&sc->vtnet_slz);
2603 }
2604 
2605 static void
2606 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
2607 {
2608 
2609 	if (ifp->if_softc != arg)
2610 		return;
2611 
2612 	vtnet_set_vlan_filter(arg, 1, tag);
2613 }
2614 
2615 static void
2616 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
2617 {
2618 
2619 	if (ifp->if_softc != arg)
2620 		return;
2621 
2622 	vtnet_set_vlan_filter(arg, 0, tag);
2623 }
2624 
2625 static int
2626 vtnet_ifmedia_upd(struct ifnet *ifp)
2627 {
2628 	struct vtnet_softc *sc;
2629 	struct ifmedia *ifm;
2630 
2631 	sc = ifp->if_softc;
2632 	ifm = &sc->vtnet_media;
2633 
2634 	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
2635 		return (EINVAL);
2636 
2637 	return (0);
2638 }
2639 
2640 static void
2641 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
2642 {
2643 	struct vtnet_softc *sc;
2644 
2645 	sc = ifp->if_softc;
2646 
2647 	ifmr->ifm_status = IFM_AVALID;
2648 	ifmr->ifm_active = IFM_ETHER;
2649 
2650 	lwkt_serialize_enter(&sc->vtnet_slz);
2651 	if (vtnet_is_link_up(sc) != 0) {
2652 		ifmr->ifm_status |= IFM_ACTIVE;
2653 		ifmr->ifm_active |= VTNET_MEDIATYPE;
2654 	} else
2655 		ifmr->ifm_active |= IFM_NONE;
2656 	lwkt_serialize_exit(&sc->vtnet_slz);
2657 }
2658 
2659 static void
2660 vtnet_add_statistics(struct vtnet_softc *sc)
2661 {
2662 	device_t dev;
2663 	struct vtnet_statistics *stats;
2664 	struct sysctl_ctx_list *ctx;
2665 	struct sysctl_oid *tree;
2666 	struct sysctl_oid_list *child;
2667 
2668 	dev = sc->vtnet_dev;
2669 	stats = &sc->vtnet_stats;
2670 	ctx = device_get_sysctl_ctx(dev);
2671 	tree = device_get_sysctl_tree(dev);
2672 	child = SYSCTL_CHILDREN(tree);
2673 
2674 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_failed",
2675 	    CTLFLAG_RD, &stats->mbuf_alloc_failed,
2676 	    "Mbuf cluster allocation failures");
2677 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_frame_too_large",
2678 	    CTLFLAG_RD, &stats->rx_frame_too_large,
2679 	    "Received frame larger than the mbuf chain");
2680 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
2681 	    CTLFLAG_RD, &stats->rx_enq_replacement_failed,
2682 	    "Enqueuing the replacement receive mbuf failed");
2683 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_mergeable_failed",
2684 	    CTLFLAG_RD, &stats->rx_mergeable_failed,
2685 	    "Mergeable buffers receive failures");
2686 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
2687 	    CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
2688 	    "Received checksum offloaded buffer with unsupported "
2689 	    "Ethernet type");
2690 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_start",
2691 	    CTLFLAG_RD, &stats->rx_csum_bad_start,
2692 	    "Received checksum offloaded buffer with incorrect start offset");
2693 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
2694 	    CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
2695 	    "Received checksum offloaded buffer with incorrect IP protocol");
2696 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_offset",
2697 	    CTLFLAG_RD, &stats->rx_csum_bad_offset,
2698 	    "Received checksum offloaded buffer with incorrect offset");
2699 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_failed",
2700 	    CTLFLAG_RD, &stats->rx_csum_failed,
2701 	    "Received buffer checksum offload failed");
2702 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_offloaded",
2703 	    CTLFLAG_RD, &stats->rx_csum_offloaded,
2704 	    "Received buffer checksum offload succeeded");
2705 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_task_rescheduled",
2706 	    CTLFLAG_RD, &stats->rx_task_rescheduled,
2707 	    "Times the receive interrupt task rescheduled itself");
2708 
2709 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_offloaded",
2710 	    CTLFLAG_RD, &stats->tx_csum_offloaded,
2711 	    "Offloaded checksum of transmitted buffer");
2712 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_offloaded",
2713 	    CTLFLAG_RD, &stats->tx_tso_offloaded,
2714 	    "Segmentation offload of transmitted buffer");
2715 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
2716 	    CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
2717 	    "Aborted transmit of checksum offloaded buffer with unknown "
2718 	    "Ethernet type");
2719 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
2720 	    CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
2721 	    "Aborted transmit of TSO buffer with unknown Ethernet type");
2722 	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_task_rescheduled",
2723 	    CTLFLAG_RD, &stats->tx_task_rescheduled,
2724 	    "Times the transmit interrupt task rescheduled itself");
2725 }
2726 
2727 static int
2728 vtnet_enable_rx_intr(struct vtnet_softc *sc)
2729 {
2730 
2731 	return (virtqueue_enable_intr(sc->vtnet_rx_vq));
2732 }
2733 
2734 static void
2735 vtnet_disable_rx_intr(struct vtnet_softc *sc)
2736 {
2737 
2738 	virtqueue_disable_intr(sc->vtnet_rx_vq);
2739 }
2740 
2741 static int
2742 vtnet_enable_tx_intr(struct vtnet_softc *sc)
2743 {
2744 
2745 #ifdef VTNET_TX_INTR_MODERATION
2746 	return (0);
2747 #else
2748 	return (virtqueue_enable_intr(sc->vtnet_tx_vq));
2749 #endif
2750 }
2751 
2752 static void
2753 vtnet_disable_tx_intr(struct vtnet_softc *sc)
2754 {
2755 
2756 	virtqueue_disable_intr(sc->vtnet_tx_vq);
2757 }
2758