1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  *
17  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18  * $FreeBSD: head/sys/dev/vmware/vmxnet3/if_vmx.c 318867 2017-05-25 10:49:56Z avg $
19  */
20 
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22 
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/device.h>
26 #include <sys/eventhandler.h>
27 #include <sys/kernel.h>
28 #include <sys/endian.h>
29 #include <sys/sockio.h>
30 #include <sys/mbuf.h>
31 #include <sys/malloc.h>
32 #include <sys/module.h>
33 #include <sys/socket.h>
34 #include <sys/sysctl.h>
35 #include <sys/taskqueue.h>
36 #include <vm/vm.h>
37 #include <vm/pmap.h>
38 
39 #include <net/ethernet.h>
40 #include <net/if.h>
41 #include <net/if_var.h>
42 #include <net/ifq_var.h>
43 #include <net/if_arp.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <net/if_media.h>
47 #include <net/vlan/if_vlan_ether.h>
48 #include <net/vlan/if_vlan_var.h>
49 
50 #include <net/bpf.h>
51 
52 #include <netinet/in_systm.h>
53 #include <netinet/in.h>
54 #include <netinet/ip.h>
55 #include <netinet/ip6.h>
56 #include <netinet6/ip6_var.h>
57 #include <netinet/udp.h>
58 #include <netinet/tcp.h>
59 
60 #include <sys/in_cksum.h>
61 
62 #include <sys/bus.h>
63 #include <sys/rman.h>
64 
65 #include <bus/pci/pcireg.h>
66 #include <bus/pci/pcivar.h>
67 
68 #define	VMXNET3_LEGACY_TX 1	/* XXX we need this at the moment */
69 #include "if_vmxreg.h"
70 #include "if_vmxvar.h"
71 
72 #include "opt_inet.h"
73 #include "opt_inet6.h"
74 
75 #ifdef VMXNET3_FAILPOINTS
76 #include <sys/fail.h>
77 static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
78     "vmxnet3 fail points");
79 #define VMXNET3_FP	_debug_fail_point_vmxnet3
80 #endif
81 
82 static int	vmxnet3_probe(device_t);
83 static int	vmxnet3_attach(device_t);
84 static int	vmxnet3_detach(device_t);
85 static int	vmxnet3_shutdown(device_t);
86 
87 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
88 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
89 static int	vmxnet3_check_version(struct vmxnet3_softc *);
90 static void	vmxnet3_initial_config(struct vmxnet3_softc *);
91 static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
92 
93 #ifdef __FreeBSD__
94 static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
95 static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
96 #else
97 static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
98 #endif
99 static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
100 static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
101 		    struct vmxnet3_interrupt *);
102 static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
103 #ifdef __FreeBSD__
104 static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
105 #endif
106 static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
107 static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
108 static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
109 
110 static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
111 		    struct vmxnet3_interrupt *);
112 static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
113 
114 #ifndef VMXNET3_LEGACY_TX
115 static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
116 static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
117 static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
118 static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
119 #endif
120 
121 static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
122 static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
123 static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
124 static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
125 static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
126 static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
127 
128 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
129 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
130 static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
131 static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
132 static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
133 static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
134 static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
135 static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
136 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
137 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
138 static void	vmxnet3_init_hwassist(struct vmxnet3_softc *);
139 static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
140 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
141 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
142 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
143 static void	vmxnet3_free_data(struct vmxnet3_softc *);
144 static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
145 
146 static void	vmxnet3_evintr(struct vmxnet3_softc *);
147 static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
148 static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
149 static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
150 static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
151 		    struct vmxnet3_rxring *, int);
152 static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
153 static void	vmxnet3_legacy_intr(void *);
154 #ifdef __FreeBSD__
155 static void	vmxnet3_txq_intr(void *);
156 static void	vmxnet3_rxq_intr(void *);
157 static void	vmxnet3_event_intr(void *);
158 #endif
159 
160 static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
161 static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
162 static void	vmxnet3_stop(struct vmxnet3_softc *);
163 
164 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
165 static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
166 static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
167 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
168 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
169 static int	vmxnet3_reinit(struct vmxnet3_softc *);
170 static void	vmxnet3_init_locked(struct vmxnet3_softc *);
171 static void	vmxnet3_init(void *);
172 
173 static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
174 		    int *, int *, int *);
175 static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
176 		    bus_dmamap_t, bus_dma_segment_t [], int *);
177 static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
178 static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
179 #ifdef VMXNET3_LEGACY_TX
180 static void	vmxnet3_start_locked(struct ifnet *);
181 static void	vmxnet3_start(struct ifnet *, struct ifaltq_subque *);
182 #else
183 static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
184 		    struct mbuf *);
185 static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
186 static void	vmxnet3_txq_tq_deferred(void *, int);
187 #endif
188 static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
189 static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
190 
191 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
192 		    uint16_t);
193 static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
194 static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
195 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
196 static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
197 static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
198 
199 #ifndef VMXNET3_LEGACY_TX
200 static void	vmxnet3_qflush(struct ifnet *);
201 #endif
202 
203 static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
204 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
205 static void	vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
206 		    struct vmxnet3_txq_stats *);
207 static void	vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
208 		    struct vmxnet3_rxq_stats *);
209 static void	vmxnet3_tick(void *);
210 static void	vmxnet3_link_status(struct vmxnet3_softc *);
211 static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
212 static int	vmxnet3_media_change(struct ifnet *);
213 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
214 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
215 
216 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
217 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
218 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
219 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
220 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
221 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
222 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
223 
224 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
225 		    uint32_t);
226 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
227 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
228 		    uint32_t);
229 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
230 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
231 
232 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
233 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
234 static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
235 static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
236 
237 static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
238 		    bus_size_t, struct vmxnet3_dma_alloc *);
239 static void	vmxnet3_dma_free(struct vmxnet3_softc *,
240 		    struct vmxnet3_dma_alloc *);
241 static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
242 		    const char *, int);
243 
244 typedef enum {
245 	VMXNET3_BARRIER_RD,
246 	VMXNET3_BARRIER_WR,
247 	VMXNET3_BARRIER_RDWR,
248 } vmxnet3_barrier_t;
249 
250 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
251 
252 /* Tunables. */
253 static int vmxnet3_mq_disable = 0;
254 TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
255 static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
256 TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
257 static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
258 TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
259 static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
260 TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
261 static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
262 TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
263 
264 static device_method_t vmxnet3_methods[] = {
265 	/* Device interface. */
266 	DEVMETHOD(device_probe,		vmxnet3_probe),
267 	DEVMETHOD(device_attach,	vmxnet3_attach),
268 	DEVMETHOD(device_detach,	vmxnet3_detach),
269 	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
270 
271 	DEVMETHOD_END
272 };
273 
274 static driver_t vmxnet3_driver = {
275 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
276 };
277 
278 static devclass_t vmxnet3_devclass;
279 DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, NULL, NULL);
280 
281 MODULE_DEPEND(vmx, pci, 1, 1, 1);
282 MODULE_DEPEND(vmx, ether, 1, 1, 1);
283 
284 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
285 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
286 
287 static int
288 vmxnet3_probe(device_t dev)
289 {
290 
291 	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
292 	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
293 		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
294 		return (BUS_PROBE_DEFAULT);
295 	}
296 
297 	return (ENXIO);
298 }
299 
300 static int
301 vmxnet3_attach(device_t dev)
302 {
303 	struct vmxnet3_softc *sc;
304 	int error;
305 
306 	sc = device_get_softc(dev);
307 	sc->vmx_dev = dev;
308 
309 	pci_enable_busmaster(dev);
310 
311 	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
312 	callout_init_lk(&sc->vmx_tick, &sc->vmx_lock);
313 
314 	vmxnet3_initial_config(sc);
315 
316 	error = vmxnet3_alloc_resources(sc);
317 	if (error)
318 		goto fail;
319 
320 	error = vmxnet3_check_version(sc);
321 	if (error)
322 		goto fail;
323 
324 	error = vmxnet3_alloc_rxtx_queues(sc);
325 	if (error)
326 		goto fail;
327 
328 #ifndef VMXNET3_LEGACY_TX
329 	error = vmxnet3_alloc_taskqueue(sc);
330 	if (error)
331 		goto fail;
332 #endif
333 
334 	error = vmxnet3_alloc_interrupts(sc);
335 	if (error)
336 		goto fail;
337 
338 	vmxnet3_check_multiqueue(sc);
339 
340 	error = vmxnet3_alloc_data(sc);
341 	if (error)
342 		goto fail;
343 
344 	error = vmxnet3_setup_interface(sc);
345 	if (error)
346 		goto fail;
347 
348 	error = vmxnet3_setup_interrupts(sc);
349 	if (error) {
350 		ether_ifdetach(sc->vmx_ifp);
351 		device_printf(dev, "could not set up interrupt\n");
352 		goto fail;
353 	}
354 
355 	vmxnet3_setup_sysctl(sc);
356 #ifndef VMXNET3_LEGACY_TX
357 	vmxnet3_start_taskqueue(sc);
358 #endif
359 
360 fail:
361 	if (error)
362 		vmxnet3_detach(dev);
363 
364 	return (error);
365 }
366 
367 static int
368 vmxnet3_detach(device_t dev)
369 {
370 	struct vmxnet3_softc *sc;
371 	struct ifnet *ifp;
372 
373 	sc = device_get_softc(dev);
374 	ifp = sc->vmx_ifp;
375 
376 	if (device_is_attached(dev)) {
377 		VMXNET3_CORE_LOCK(sc);
378 		vmxnet3_stop(sc);
379 		VMXNET3_CORE_UNLOCK(sc);
380 
381 		callout_terminate(&sc->vmx_tick);
382 #ifndef VMXNET3_LEGACY_TX
383 		vmxnet3_drain_taskqueue(sc);
384 #endif
385 
386 		ether_ifdetach(ifp);
387 	}
388 
389 	if (sc->vmx_vlan_attach != NULL) {
390 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
391 		sc->vmx_vlan_attach = NULL;
392 	}
393 	if (sc->vmx_vlan_detach != NULL) {
394 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
395 		sc->vmx_vlan_detach = NULL;
396 	}
397 
398 #ifndef VMXNET3_LEGACY_TX
399 	vmxnet3_free_taskqueue(sc);
400 #endif
401 	vmxnet3_free_interrupts(sc);
402 
403 	if (ifp != NULL) {
404 		if_free(ifp);
405 		sc->vmx_ifp = NULL;
406 	}
407 
408 	ifmedia_removeall(&sc->vmx_media);
409 
410 	vmxnet3_free_data(sc);
411 	vmxnet3_free_resources(sc);
412 	vmxnet3_free_rxtx_queues(sc);
413 
414 	VMXNET3_CORE_LOCK_DESTROY(sc);
415 
416 	return (0);
417 }
418 
419 static int
420 vmxnet3_shutdown(device_t dev)
421 {
422 
423 	return (0);
424 }
425 
426 static int
427 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
428 {
429 	device_t dev;
430 	int rid;
431 
432 	dev = sc->vmx_dev;
433 
434 	rid = PCIR_BAR(0);
435 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
436 	    RF_ACTIVE);
437 	if (sc->vmx_res0 == NULL) {
438 		device_printf(dev,
439 		    "could not map BAR0 memory\n");
440 		return (ENXIO);
441 	}
442 
443 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
444 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
445 
446 	rid = PCIR_BAR(1);
447 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
448 	    RF_ACTIVE);
449 	if (sc->vmx_res1 == NULL) {
450 		device_printf(dev,
451 		    "could not map BAR1 memory\n");
452 		return (ENXIO);
453 	}
454 
455 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
456 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
457 
458 	if (pci_find_extcap(dev, PCIY_MSIX, NULL) == 0) {
459 		rid = PCIR_BAR(2);
460 		sc->vmx_msix_res = bus_alloc_resource_any(dev,
461 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
462 	}
463 
464 	if (sc->vmx_msix_res == NULL)
465 		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
466 
467 	return (0);
468 }
469 
470 static void
471 vmxnet3_free_resources(struct vmxnet3_softc *sc)
472 {
473 	device_t dev;
474 	int rid;
475 
476 	dev = sc->vmx_dev;
477 
478 	if (sc->vmx_res0 != NULL) {
479 		rid = PCIR_BAR(0);
480 		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
481 		sc->vmx_res0 = NULL;
482 	}
483 
484 	if (sc->vmx_res1 != NULL) {
485 		rid = PCIR_BAR(1);
486 		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
487 		sc->vmx_res1 = NULL;
488 	}
489 
490 	if (sc->vmx_msix_res != NULL) {
491 		rid = PCIR_BAR(2);
492 		bus_release_resource(dev, SYS_RES_MEMORY, rid,
493 		    sc->vmx_msix_res);
494 		sc->vmx_msix_res = NULL;
495 	}
496 }
497 
498 static int
499 vmxnet3_check_version(struct vmxnet3_softc *sc)
500 {
501 	device_t dev;
502 	uint32_t version;
503 
504 	dev = sc->vmx_dev;
505 
506 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
507 	if ((version & 0x01) == 0) {
508 		device_printf(dev, "unsupported hardware version %#x\n",
509 		    version);
510 		return (ENOTSUP);
511 	}
512 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
513 
514 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
515 	if ((version & 0x01) == 0) {
516 		device_printf(dev, "unsupported UPT version %#x\n", version);
517 		return (ENOTSUP);
518 	}
519 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
520 
521 	return (0);
522 }
523 
524 static int
525 trunc_powerof2(int val)
526 {
527 
528 	return (1U << (fls(val) - 1));
529 }
530 
531 static void
532 vmxnet3_initial_config(struct vmxnet3_softc *sc)
533 {
534 	int nqueue, ndesc;
535 
536 	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
537 	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
538 		nqueue = VMXNET3_DEF_TX_QUEUES;
539 	if (nqueue > ncpus)
540 		nqueue = ncpus;
541 	sc->vmx_max_ntxqueues = trunc_powerof2(nqueue);
542 
543 	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
544 	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
545 		nqueue = VMXNET3_DEF_RX_QUEUES;
546 	if (nqueue > ncpus)
547 		nqueue = ncpus;
548 	sc->vmx_max_nrxqueues = trunc_powerof2(nqueue);
549 
550 	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
551 		sc->vmx_max_nrxqueues = 1;
552 		sc->vmx_max_ntxqueues = 1;
553 	}
554 
555 	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
556 	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
557 		ndesc = VMXNET3_DEF_TX_NDESC;
558 	if (ndesc & VMXNET3_MASK_TX_NDESC)
559 		ndesc &= ~VMXNET3_MASK_TX_NDESC;
560 	sc->vmx_ntxdescs = ndesc;
561 
562 	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
563 	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
564 		ndesc = VMXNET3_DEF_RX_NDESC;
565 	if (ndesc & VMXNET3_MASK_RX_NDESC)
566 		ndesc &= ~VMXNET3_MASK_RX_NDESC;
567 	sc->vmx_nrxdescs = ndesc;
568 	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
569 }
570 
571 static void
572 vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
573 {
574 
575 	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
576 		goto out;
577 
578 	/* BMV: Just use the maximum configured for now. */
579 	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
580 	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
581 
582 	if (sc->vmx_nrxqueues > 1)
583 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
584 
585 	return;
586 
587 out:
588 	sc->vmx_ntxqueues = 1;
589 	sc->vmx_nrxqueues = 1;
590 }
591 
592 #ifdef __FreeBSD__
593 static int
594 vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
595 {
596 	device_t dev;
597 	int nmsix, cnt, required;
598 
599 	dev = sc->vmx_dev;
600 
601 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
602 		return (1);
603 
604 	/* Allocate an additional vector for the events interrupt. */
605 	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
606 
607 	nmsix = pci_msix_count(dev);
608 	if (nmsix < required)
609 		return (1);
610 
611 	cnt = required;
612 	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
613 		sc->vmx_nintrs = required;
614 		return (0);
615 	} else
616 		pci_release_msi(dev);
617 
618 	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
619 
620 	return (1);
621 }
622 
623 static int
624 vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
625 {
626 	device_t dev;
627 	int nmsi, cnt, required;
628 
629 	dev = sc->vmx_dev;
630 	required = 1;
631 
632 	nmsi = pci_msi_count(dev);
633 	if (nmsi < required)
634 		return (1);
635 
636 	cnt = required;
637 	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
638 		sc->vmx_nintrs = 1;
639 		return (0);
640 	} else
641 		pci_release_msi(dev);
642 
643 	return (1);
644 }
645 #else
646 static int
647 vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
648 {
649 	int irq_flags, rid;
650 	int enable = 1;
651 
652 	sc->vmx_irq_type = pci_alloc_1intr(sc->vmx_dev, enable, &rid,
653 	    &irq_flags);
654 	sc->vmx_irq_flags = irq_flags;
655 	sc->vmx_nintrs = 1;
656 	return (0);
657 }
658 #endif
659 
660 static int
661 vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
662 {
663 
664 	sc->vmx_nintrs = 1;
665 	return (0);
666 }
667 
668 static int
669 vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
670     struct vmxnet3_interrupt *intr)
671 {
672 	struct resource *irq;
673 
674 	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid,
675 	    sc->vmx_irq_flags);
676 	if (irq == NULL)
677 		return (ENXIO);
678 
679 	intr->vmxi_irq = irq;
680 	intr->vmxi_rid = rid;
681 
682 	return (0);
683 }
684 
685 static int
686 vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
687 {
688 	int i, rid, flags, error;
689 
690 	rid = 0;
691 	flags = RF_ACTIVE;
692 
693 	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
694 		flags |= RF_SHAREABLE;
695 	else
696 		rid = 1;
697 
698 	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
699 		error = vmxnet3_alloc_interrupt(sc, rid, flags,
700 		    &sc->vmx_intrs[i]);
701 		if (error)
702 			return (error);
703 	}
704 
705 	return (0);
706 }
707 
708 #ifdef __FreeBSD__
709 static int
710 vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
711 {
712 	device_t dev;
713 	struct vmxnet3_txqueue *txq;
714 	struct vmxnet3_rxqueue *rxq;
715 	struct vmxnet3_interrupt *intr;
716 	int i, error;
717 
718 	dev = sc->vmx_dev;
719 	intr = &sc->vmx_intrs[0];
720 
721 	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
722 		txq = &sc->vmx_txq[i];
723 		error = bus_setup_intr(dev, intr->vmxi_irq, INTR_MPSAFE,
724 		     vmxnet3_txq_intr, txq, &intr->vmxi_handler, NULL);
725 		if (error)
726 			return (error);
727 		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
728 		    "tq%d", i);
729 		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
730 	}
731 
732 	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
733 		rxq = &sc->vmx_rxq[i];
734 		error = bus_setup_intr(dev, intr->vmxi_irq, INTR_MPSAFE,
735 		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler, NULL);
736 		if (error)
737 			return (error);
738 		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
739 		    "rq%d", i);
740 		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
741 	}
742 
743 	error = bus_setup_intr(dev, intr->vmxi_irq, INTR_MPSAFE,
744 	    vmxnet3_event_intr, sc, &intr->vmxi_handler, NULL);
745 	if (error)
746 		return (error);
747 	bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event");
748 	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
749 
750 	return (0);
751 }
752 #endif
753 
754 static int
755 vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
756 {
757 	struct vmxnet3_interrupt *intr;
758 	int i, error;
759 
760 	intr = &sc->vmx_intrs[0];
761 	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
762 	    INTR_MPSAFE, vmxnet3_legacy_intr, sc,
763 	    &intr->vmxi_handler, NULL);
764 
765 	for (i = 0; i < sc->vmx_ntxqueues; i++)
766 		sc->vmx_txq[i].vxtxq_intr_idx = 0;
767 	for (i = 0; i < sc->vmx_nrxqueues; i++)
768 		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
769 	sc->vmx_event_intr_idx = 0;
770 
771 	return (error);
772 }
773 
774 static void
775 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
776 {
777 	struct vmxnet3_txqueue *txq;
778 	struct vmxnet3_txq_shared *txs;
779 	struct vmxnet3_rxqueue *rxq;
780 	struct vmxnet3_rxq_shared *rxs;
781 	int i;
782 
783 	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
784 
785 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
786 		txq = &sc->vmx_txq[i];
787 		txs = txq->vxtxq_ts;
788 		txs->intr_idx = txq->vxtxq_intr_idx;
789 	}
790 
791 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
792 		rxq = &sc->vmx_rxq[i];
793 		rxs = rxq->vxrxq_rs;
794 		rxs->intr_idx = rxq->vxrxq_intr_idx;
795 	}
796 }
797 
798 static int
799 vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
800 {
801 	int error;
802 
803 	error = vmxnet3_alloc_intr_resources(sc);
804 	if (error)
805 		return (error);
806 
807 	switch (sc->vmx_intr_type) {
808 	case VMXNET3_IT_MSIX:
809 #ifdef __FreeBSD__
810 		error = vmxnet3_setup_msix_interrupts(sc);
811 #else
812 		device_printf(sc->vmx_dev, "VMXNET3_IT_MSIX unsupported\n");
813 		error = ENXIO;
814 #endif
815 		break;
816 	case VMXNET3_IT_MSI:
817 	case VMXNET3_IT_LEGACY:
818 		error = vmxnet3_setup_legacy_interrupt(sc);
819 		break;
820 	default:
821 		panic("%s: invalid interrupt type %d", __func__,
822 		    sc->vmx_intr_type);
823 	}
824 
825 	if (error == 0)
826 		vmxnet3_set_interrupt_idx(sc);
827 
828 	return (error);
829 }
830 
831 #ifdef __FreeBSD__
832 static int
833 vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
834 {
835 	device_t dev;
836 	uint32_t config;
837 	int error;
838 
839 	dev = sc->vmx_dev;
840 	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
841 
842 	sc->vmx_intr_type = config & 0x03;
843 	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
844 
845 	switch (sc->vmx_intr_type) {
846 	case VMXNET3_IT_AUTO:
847 		sc->vmx_intr_type = VMXNET3_IT_MSIX;
848 		/* FALLTHROUGH */
849 	case VMXNET3_IT_MSIX:
850 		error = vmxnet3_alloc_msix_interrupts(sc);
851 		if (error == 0)
852 			break;
853 		sc->vmx_intr_type = VMXNET3_IT_MSI;
854 		/* FALLTHROUGH */
855 	case VMXNET3_IT_MSI:
856 		error = vmxnet3_alloc_msi_interrupts(sc);
857 		if (error == 0)
858 			break;
859 		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
860 		/* FALLTHROUGH */
861 	case VMXNET3_IT_LEGACY:
862 		error = vmxnet3_alloc_legacy_interrupts(sc);
863 		if (error == 0)
864 			break;
865 		/* FALLTHROUGH */
866 	default:
867 		sc->vmx_intr_type = -1;
868 		device_printf(dev, "cannot allocate any interrupt resources\n");
869 		return (ENXIO);
870 	}
871 
872 	return (error);
873 }
874 #else
875 static int
876 vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
877 {
878 	device_t dev;
879 	uint32_t config;
880 	int error;
881 
882 	dev = sc->vmx_dev;
883 	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
884 
885 	sc->vmx_intr_type = config & 0x03;
886 	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
887 
888 	switch (sc->vmx_intr_type) {
889 	case VMXNET3_IT_AUTO:
890 		sc->vmx_intr_type = VMXNET3_IT_MSI;
891 		/* FALLTHROUGH */
892 	case VMXNET3_IT_MSI:
893 		error = vmxnet3_alloc_msi_interrupts(sc);
894 		if (error == 0)
895 			break;
896 		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
897 	case VMXNET3_IT_LEGACY:
898 		error = vmxnet3_alloc_legacy_interrupts(sc);
899 		if (error == 0)
900 			break;
901 		/* FALLTHROUGH */
902 	case VMXNET3_IT_MSIX:
903 		/* FALLTHROUGH */
904 	default:
905 		sc->vmx_intr_type = -1;
906 		device_printf(dev, "cannot allocate any interrupt resources\n");
907 		return (ENXIO);
908 	}
909 
910 	return (error);
911 }
912 #endif
913 
914 static void
915 vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
916     struct vmxnet3_interrupt *intr)
917 {
918 	device_t dev;
919 
920 	dev = sc->vmx_dev;
921 
922 	if (intr->vmxi_handler != NULL) {
923 		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
924 		intr->vmxi_handler = NULL;
925 	}
926 
927 	if (intr->vmxi_irq != NULL) {
928 		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
929 		    intr->vmxi_irq);
930 		intr->vmxi_irq = NULL;
931 		intr->vmxi_rid = -1;
932 	}
933 }
934 
935 #ifdef __FreeBSD__
936 static void
937 vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
938 {
939 	int i;
940 
941 	for (i = 0; i < sc->vmx_nintrs; i++)
942 		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
943 
944 	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
945 	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
946 		pci_release_msi(sc->vmx_dev);
947 }
948 #else
949 static void
950 vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
951 {
952 	int i;
953 
954 	for (i = 0; i < sc->vmx_nintrs; i++)
955 		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
956 
957 	if (sc->vmx_irq_type == PCI_INTR_TYPE_MSI)
958 		pci_release_msi(sc->vmx_dev);
959 }
960 #endif
961 
962 #ifndef VMXNET3_LEGACY_TX
963 static int
964 vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
965 {
966 	device_t dev;
967 
968 	dev = sc->vmx_dev;
969 
970 	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
971 	    taskqueue_thread_enqueue, &sc->vmx_tq);
972 	if (sc->vmx_tq == NULL)
973 		return (ENOMEM);
974 
975 	return (0);
976 }
977 
978 static void
979 vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
980 {
981 	device_t dev;
982 	int nthreads, error;
983 
984 	dev = sc->vmx_dev;
985 
986 	/*
987 	 * The taskqueue is typically not frequently used, so a dedicated
988 	 * thread for each queue is unnecessary.
989 	 */
990 	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
991 
992 	/*
993 	 * Most drivers just ignore the return value - it only fails
994 	 * with ENOMEM so an error is not likely. It is hard for us
995 	 * to recover from an error here.
996 	 */
997 	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
998 	    "%s taskq", device_get_nameunit(dev));
999 	if (error)
1000 		device_printf(dev, "failed to start taskqueue: %d", error);
1001 }
1002 
1003 static void
1004 vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
1005 {
1006 	struct vmxnet3_txqueue *txq;
1007 	int i;
1008 
1009 	if (sc->vmx_tq != NULL) {
1010 		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1011 			txq = &sc->vmx_txq[i];
1012 			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
1013 		}
1014 	}
1015 }
1016 
1017 static void
1018 vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
1019 {
1020 	if (sc->vmx_tq != NULL) {
1021 		taskqueue_free(sc->vmx_tq);
1022 		sc->vmx_tq = NULL;
1023 	}
1024 }
1025 #endif
1026 
1027 static int
1028 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
1029 {
1030 	struct vmxnet3_rxqueue *rxq;
1031 	struct vmxnet3_rxring *rxr;
1032 	int i;
1033 
1034 	rxq = &sc->vmx_rxq[q];
1035 
1036 	ksnprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
1037 	    device_get_nameunit(sc->vmx_dev), q);
1038 	lockinit(&rxq->vxrxq_lock, rxq->vxrxq_name, 0, 0);
1039 
1040 	rxq->vxrxq_sc = sc;
1041 	rxq->vxrxq_id = q;
1042 
1043 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1044 		rxr = &rxq->vxrxq_cmd_ring[i];
1045 		rxr->vxrxr_rid = i;
1046 		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
1047 		rxr->vxrxr_rxbuf = kmalloc(rxr->vxrxr_ndesc *
1048 		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_INTWAIT | M_ZERO);
1049 		if (rxr->vxrxr_rxbuf == NULL)
1050 			return (ENOMEM);
1051 
1052 		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
1053 	}
1054 
1055 	return (0);
1056 }
1057 
1058 static int
1059 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
1060 {
1061 	struct vmxnet3_txqueue *txq;
1062 	struct vmxnet3_txring *txr;
1063 
1064 	txq = &sc->vmx_txq[q];
1065 	txr = &txq->vxtxq_cmd_ring;
1066 
1067 	ksnprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
1068 	    device_get_nameunit(sc->vmx_dev), q);
1069 	lockinit(&txq->vxtxq_lock, txq->vxtxq_name, 0, 0);
1070 
1071 	txq->vxtxq_sc = sc;
1072 	txq->vxtxq_id = q;
1073 
1074 	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
1075 	txr->vxtxr_txbuf = kmalloc(txr->vxtxr_ndesc *
1076 	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_INTWAIT | M_ZERO);
1077 	if (txr->vxtxr_txbuf == NULL)
1078 		return (ENOMEM);
1079 
1080 	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
1081 
1082 #ifndef VMXNET3_LEGACY_TX
1083 	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
1084 
1085 	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
1086 	    M_NOWAIT, &txq->vxtxq_lock);
1087 	if (txq->vxtxq_br == NULL)
1088 		return (ENOMEM);
1089 #endif
1090 
1091 	return (0);
1092 }
1093 
1094 static int
1095 vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1096 {
1097 	int i, error;
1098 
1099 	/*
1100 	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1101 	 * disabled by default because its apparently broken for devices passed
1102 	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1103 	 * must be set to zero for MSIX. This check prevents us from allocating
1104 	 * queue structures that we will not use.
1105 	 */
1106 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1107 		sc->vmx_max_nrxqueues = 1;
1108 		sc->vmx_max_ntxqueues = 1;
1109 	}
1110 
1111 	sc->vmx_rxq = kmalloc(sizeof(struct vmxnet3_rxqueue) *
1112 	    sc->vmx_max_nrxqueues, M_DEVBUF, M_INTWAIT | M_ZERO);
1113 	sc->vmx_txq = kmalloc(sizeof(struct vmxnet3_txqueue) *
1114 	    sc->vmx_max_ntxqueues, M_DEVBUF, M_INTWAIT | M_ZERO);
1115 	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1116 		return (ENOMEM);
1117 
1118 	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1119 		error = vmxnet3_init_rxq(sc, i);
1120 		if (error)
1121 			return (error);
1122 	}
1123 
1124 	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1125 		error = vmxnet3_init_txq(sc, i);
1126 		if (error)
1127 			return (error);
1128 	}
1129 
1130 	return (0);
1131 }
1132 
1133 static void
1134 vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1135 {
1136 	struct vmxnet3_rxring *rxr;
1137 	int i;
1138 
1139 	rxq->vxrxq_sc = NULL;
1140 	rxq->vxrxq_id = -1;
1141 
1142 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1143 		rxr = &rxq->vxrxq_cmd_ring[i];
1144 
1145 		if (rxr->vxrxr_rxbuf != NULL) {
1146 			kfree(rxr->vxrxr_rxbuf, M_DEVBUF);
1147 			rxr->vxrxr_rxbuf = NULL;
1148 		}
1149 	}
1150 
1151 #if 0 /* XXX */
1152 	if (mtx_initialized(&rxq->vxrxq_lock) != 0)
1153 #endif
1154 		lockuninit(&rxq->vxrxq_lock);
1155 }
1156 
1157 static void
1158 vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1159 {
1160 	struct vmxnet3_txring *txr;
1161 
1162 	txr = &txq->vxtxq_cmd_ring;
1163 
1164 	txq->vxtxq_sc = NULL;
1165 	txq->vxtxq_id = -1;
1166 
1167 #ifndef VMXNET3_LEGACY_TX
1168 	if (txq->vxtxq_br != NULL) {
1169 		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1170 		txq->vxtxq_br = NULL;
1171 	}
1172 #endif
1173 
1174 	if (txr->vxtxr_txbuf != NULL) {
1175 		kfree(txr->vxtxr_txbuf, M_DEVBUF);
1176 		txr->vxtxr_txbuf = NULL;
1177 	}
1178 
1179 #if 0 /* XXX */
1180 	if (mtx_initialized(&txq->vxtxq_lock) != 0)
1181 #endif
1182 		lockuninit(&txq->vxtxq_lock);
1183 }
1184 
1185 static void
1186 vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1187 {
1188 	int i;
1189 
1190 	if (sc->vmx_rxq != NULL) {
1191 		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1192 			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1193 		kfree(sc->vmx_rxq, M_DEVBUF);
1194 		sc->vmx_rxq = NULL;
1195 	}
1196 
1197 	if (sc->vmx_txq != NULL) {
1198 		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1199 			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1200 		kfree(sc->vmx_txq, M_DEVBUF);
1201 		sc->vmx_txq = NULL;
1202 	}
1203 }
1204 
1205 static int
1206 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1207 {
1208 	device_t dev;
1209 	uint8_t *kva;
1210 	size_t size;
1211 	int i, error;
1212 
1213 	dev = sc->vmx_dev;
1214 
1215 	size = sizeof(struct vmxnet3_driver_shared);
1216 	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1217 	if (error) {
1218 		device_printf(dev, "cannot alloc shared memory\n");
1219 		return (error);
1220 	}
1221 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1222 
1223 	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1224 	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1225 	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1226 	if (error) {
1227 		device_printf(dev, "cannot alloc queue shared memory\n");
1228 		return (error);
1229 	}
1230 	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1231 	kva = sc->vmx_qs;
1232 
1233 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1234 		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1235 		kva += sizeof(struct vmxnet3_txq_shared);
1236 	}
1237 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1238 		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1239 		kva += sizeof(struct vmxnet3_rxq_shared);
1240 	}
1241 
1242 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1243 		size = sizeof(struct vmxnet3_rss_shared);
1244 		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1245 		if (error) {
1246 			device_printf(dev, "cannot alloc rss shared memory\n");
1247 			return (error);
1248 		}
1249 		sc->vmx_rss =
1250 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1251 	}
1252 
1253 	return (0);
1254 }
1255 
1256 static void
1257 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1258 {
1259 
1260 	if (sc->vmx_rss != NULL) {
1261 		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1262 		sc->vmx_rss = NULL;
1263 	}
1264 
1265 	if (sc->vmx_qs != NULL) {
1266 		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1267 		sc->vmx_qs = NULL;
1268 	}
1269 
1270 	if (sc->vmx_ds != NULL) {
1271 		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1272 		sc->vmx_ds = NULL;
1273 	}
1274 }
1275 
1276 static int
1277 vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1278 {
1279 	device_t dev;
1280 	struct vmxnet3_txqueue *txq;
1281 	struct vmxnet3_txring *txr;
1282 	struct vmxnet3_comp_ring *txc;
1283 	size_t descsz, compsz;
1284 	int i, q, error;
1285 
1286 	dev = sc->vmx_dev;
1287 
1288 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1289 		txq = &sc->vmx_txq[q];
1290 		txr = &txq->vxtxq_cmd_ring;
1291 		txc = &txq->vxtxq_comp_ring;
1292 
1293 		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1294 		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1295 
1296 		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1297 		    1, 0,			/* alignment, boundary */
1298 		    BUS_SPACE_MAXADDR,		/* lowaddr */
1299 		    BUS_SPACE_MAXADDR,		/* highaddr */
1300 		    NULL, NULL,			/* filter, filterarg */
1301 		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1302 		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1303 		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1304 		    0,				/* flags */
1305 		    &txr->vxtxr_txtag);
1306 		if (error) {
1307 			device_printf(dev,
1308 			    "unable to create Tx buffer tag for queue %d\n", q);
1309 			return (error);
1310 		}
1311 
1312 		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1313 		if (error) {
1314 			device_printf(dev, "cannot alloc Tx descriptors for "
1315 			    "queue %d error %d\n", q, error);
1316 			return (error);
1317 		}
1318 		txr->vxtxr_txd =
1319 		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1320 
1321 		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1322 		if (error) {
1323 			device_printf(dev, "cannot alloc Tx comp descriptors "
1324 			   "for queue %d error %d\n", q, error);
1325 			return (error);
1326 		}
1327 		txc->vxcr_u.txcd =
1328 		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1329 
1330 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1331 			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1332 			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1333 			if (error) {
1334 				device_printf(dev, "unable to create Tx buf "
1335 				    "dmamap for queue %d idx %d\n", q, i);
1336 				return (error);
1337 			}
1338 		}
1339 	}
1340 
1341 	return (0);
1342 }
1343 
1344 static void
1345 vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1346 {
1347 	device_t dev;
1348 	struct vmxnet3_txqueue *txq;
1349 	struct vmxnet3_txring *txr;
1350 	struct vmxnet3_comp_ring *txc;
1351 	struct vmxnet3_txbuf *txb;
1352 	int i, q;
1353 
1354 	dev = sc->vmx_dev;
1355 
1356 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1357 		txq = &sc->vmx_txq[q];
1358 		txr = &txq->vxtxq_cmd_ring;
1359 		txc = &txq->vxtxq_comp_ring;
1360 
1361 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1362 			txb = &txr->vxtxr_txbuf[i];
1363 			if (txb->vtxb_dmamap != NULL) {
1364 				bus_dmamap_destroy(txr->vxtxr_txtag,
1365 				    txb->vtxb_dmamap);
1366 				txb->vtxb_dmamap = NULL;
1367 			}
1368 		}
1369 
1370 		if (txc->vxcr_u.txcd != NULL) {
1371 			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1372 			txc->vxcr_u.txcd = NULL;
1373 		}
1374 
1375 		if (txr->vxtxr_txd != NULL) {
1376 			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1377 			txr->vxtxr_txd = NULL;
1378 		}
1379 
1380 		if (txr->vxtxr_txtag != NULL) {
1381 			bus_dma_tag_destroy(txr->vxtxr_txtag);
1382 			txr->vxtxr_txtag = NULL;
1383 		}
1384 	}
1385 }
1386 
1387 static int
1388 vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1389 {
1390 	device_t dev;
1391 	struct vmxnet3_rxqueue *rxq;
1392 	struct vmxnet3_rxring *rxr;
1393 	struct vmxnet3_comp_ring *rxc;
1394 	int descsz, compsz;
1395 	int i, j, q, error;
1396 
1397 	dev = sc->vmx_dev;
1398 
1399 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1400 		rxq = &sc->vmx_rxq[q];
1401 		rxc = &rxq->vxrxq_comp_ring;
1402 		compsz = 0;
1403 
1404 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1405 			rxr = &rxq->vxrxq_cmd_ring[i];
1406 
1407 			descsz = rxr->vxrxr_ndesc *
1408 			    sizeof(struct vmxnet3_rxdesc);
1409 			compsz += rxr->vxrxr_ndesc *
1410 			    sizeof(struct vmxnet3_rxcompdesc);
1411 
1412 			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1413 			    1, 0,		/* alignment, boundary */
1414 			    BUS_SPACE_MAXADDR,	/* lowaddr */
1415 			    BUS_SPACE_MAXADDR,	/* highaddr */
1416 			    NULL, NULL,		/* filter, filterarg */
1417 			    MJUMPAGESIZE,	/* maxsize */
1418 			    1,			/* nsegments */
1419 			    MJUMPAGESIZE,	/* maxsegsize */
1420 			    0,			/* flags */
1421 			    &rxr->vxrxr_rxtag);
1422 			if (error) {
1423 				device_printf(dev,
1424 				    "unable to create Rx buffer tag for "
1425 				    "queue %d\n", q);
1426 				return (error);
1427 			}
1428 
1429 			error = vmxnet3_dma_malloc(sc, descsz, 512,
1430 			    &rxr->vxrxr_dma);
1431 			if (error) {
1432 				device_printf(dev, "cannot allocate Rx "
1433 				    "descriptors for queue %d/%d error %d\n",
1434 				    i, q, error);
1435 				return (error);
1436 			}
1437 			rxr->vxrxr_rxd =
1438 			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1439 		}
1440 
1441 		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1442 		if (error) {
1443 			device_printf(dev, "cannot alloc Rx comp descriptors "
1444 			    "for queue %d error %d\n", q, error);
1445 			return (error);
1446 		}
1447 		rxc->vxcr_u.rxcd =
1448 		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1449 
1450 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1451 			rxr = &rxq->vxrxq_cmd_ring[i];
1452 
1453 			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1454 			    &rxr->vxrxr_spare_dmap);
1455 			if (error) {
1456 				device_printf(dev, "unable to create spare "
1457 				    "dmamap for queue %d/%d error %d\n",
1458 				    q, i, error);
1459 				return (error);
1460 			}
1461 
1462 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1463 				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1464 				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1465 				if (error) {
1466 					device_printf(dev, "unable to create "
1467 					    "dmamap for queue %d/%d slot %d "
1468 					    "error %d\n",
1469 					    q, i, j, error);
1470 					return (error);
1471 				}
1472 			}
1473 		}
1474 	}
1475 
1476 	return (0);
1477 }
1478 
1479 static void
1480 vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1481 {
1482 	device_t dev;
1483 	struct vmxnet3_rxqueue *rxq;
1484 	struct vmxnet3_rxring *rxr;
1485 	struct vmxnet3_comp_ring *rxc;
1486 	struct vmxnet3_rxbuf *rxb;
1487 	int i, j, q;
1488 
1489 	dev = sc->vmx_dev;
1490 
1491 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1492 		rxq = &sc->vmx_rxq[q];
1493 		rxc = &rxq->vxrxq_comp_ring;
1494 
1495 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1496 			rxr = &rxq->vxrxq_cmd_ring[i];
1497 
1498 			if (rxr->vxrxr_spare_dmap != NULL) {
1499 				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1500 				    rxr->vxrxr_spare_dmap);
1501 				rxr->vxrxr_spare_dmap = NULL;
1502 			}
1503 
1504 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1505 				rxb = &rxr->vxrxr_rxbuf[j];
1506 				if (rxb->vrxb_dmamap != NULL) {
1507 					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1508 					    rxb->vrxb_dmamap);
1509 					rxb->vrxb_dmamap = NULL;
1510 				}
1511 			}
1512 		}
1513 
1514 		if (rxc->vxcr_u.rxcd != NULL) {
1515 			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1516 			rxc->vxcr_u.rxcd = NULL;
1517 		}
1518 
1519 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1520 			rxr = &rxq->vxrxq_cmd_ring[i];
1521 
1522 			if (rxr->vxrxr_rxd != NULL) {
1523 				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1524 				rxr->vxrxr_rxd = NULL;
1525 			}
1526 
1527 			if (rxr->vxrxr_rxtag != NULL) {
1528 				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1529 				rxr->vxrxr_rxtag = NULL;
1530 			}
1531 		}
1532 	}
1533 }
1534 
1535 static int
1536 vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1537 {
1538 	int error;
1539 
1540 	error = vmxnet3_alloc_txq_data(sc);
1541 	if (error)
1542 		return (error);
1543 
1544 	error = vmxnet3_alloc_rxq_data(sc);
1545 	if (error)
1546 		return (error);
1547 
1548 	return (0);
1549 }
1550 
1551 static void
1552 vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1553 {
1554 
1555 	if (sc->vmx_rxq != NULL)
1556 		vmxnet3_free_rxq_data(sc);
1557 
1558 	if (sc->vmx_txq != NULL)
1559 		vmxnet3_free_txq_data(sc);
1560 }
1561 
1562 static int
1563 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1564 {
1565 	int error;
1566 
1567 	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1568 	    32, &sc->vmx_mcast_dma);
1569 	if (error)
1570 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1571 	else
1572 		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1573 
1574 	return (error);
1575 }
1576 
1577 static void
1578 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1579 {
1580 
1581 	if (sc->vmx_mcast != NULL) {
1582 		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1583 		sc->vmx_mcast = NULL;
1584 	}
1585 }
1586 
1587 static void
1588 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1589 {
1590 	struct vmxnet3_driver_shared *ds;
1591 	struct vmxnet3_txqueue *txq;
1592 	struct vmxnet3_txq_shared *txs;
1593 	struct vmxnet3_rxqueue *rxq;
1594 	struct vmxnet3_rxq_shared *rxs;
1595 	int i;
1596 
1597 	ds = sc->vmx_ds;
1598 
1599 	/*
1600 	 * Initialize fields of the shared data that remains the same across
1601 	 * reinits. Note the shared data is zero'd when allocated.
1602 	 */
1603 
1604 	ds->magic = VMXNET3_REV1_MAGIC;
1605 
1606 	/* DriverInfo */
1607 	ds->version = VMXNET3_DRIVER_VERSION;
1608 	ds->guest = VMXNET3_GOS_FREEBSD |
1609 #ifdef __LP64__
1610 	    VMXNET3_GOS_64BIT;
1611 #else
1612 	    VMXNET3_GOS_32BIT;
1613 #endif
1614 	ds->vmxnet3_revision = 1;
1615 	ds->upt_version = 1;
1616 
1617 	/* Misc. conf */
1618 	ds->driver_data = vtophys(sc);
1619 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1620 	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1621 	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1622 	ds->nrxsg_max = sc->vmx_max_rxsegs;
1623 
1624 	/* RSS conf */
1625 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1626 		ds->rss.version = 1;
1627 		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1628 		ds->rss.len = sc->vmx_rss_dma.dma_size;
1629 	}
1630 
1631 	/* Interrupt control. */
1632 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1633 	ds->nintr = sc->vmx_nintrs;
1634 	ds->evintr = sc->vmx_event_intr_idx;
1635 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1636 
1637 	for (i = 0; i < sc->vmx_nintrs; i++)
1638 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1639 
1640 	/* Receive filter. */
1641 	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1642 	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1643 
1644 	/* Tx queues */
1645 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1646 		txq = &sc->vmx_txq[i];
1647 		txs = txq->vxtxq_ts;
1648 
1649 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1650 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1651 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1652 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1653 		txs->driver_data = vtophys(txq);
1654 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1655 	}
1656 
1657 	/* Rx queues */
1658 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1659 		rxq = &sc->vmx_rxq[i];
1660 		rxs = rxq->vxrxq_rs;
1661 
1662 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1663 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1664 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1665 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1666 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1667 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1668 		rxs->driver_data = vtophys(rxq);
1669 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1670 	}
1671 }
1672 
1673 static void
1674 vmxnet3_init_hwassist(struct vmxnet3_softc *sc)
1675 {
1676 	struct ifnet *ifp = sc->vmx_ifp;
1677 	uint64_t hwassist;
1678 
1679 	hwassist = 0;
1680 	if (ifp->if_capenable & IFCAP_TXCSUM)
1681 		hwassist |= VMXNET3_CSUM_OFFLOAD;
1682 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1683 		hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1684 #if 0 /* XXX TSO */
1685 	if (ifp->if_capenable & IFCAP_TSO4)
1686 		hwassist |= CSUM_IP_TSO;
1687 	if (ifp->if_capenable & IFCAP_TSO6)
1688 		hwassist |= CSUM_IP6_TSO;
1689 #endif
1690 	ifp->if_hwassist = hwassist;
1691 }
1692 
1693 static void
1694 vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1695 {
1696 	struct ifnet *ifp;
1697 
1698 	ifp = sc->vmx_ifp;
1699 
1700 	/* Use the current MAC address. */
1701 	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1702 	vmxnet3_set_lladdr(sc);
1703 
1704 	vmxnet3_init_hwassist(sc);
1705 }
1706 
1707 static void
1708 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1709 {
1710 	/*
1711 	 * Use the same key as the Linux driver until FreeBSD can do
1712 	 * RSS (presumably Toeplitz) in software.
1713 	 */
1714 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1715 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1716 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1717 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1718 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1719 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1720 	};
1721 
1722 	struct vmxnet3_driver_shared *ds;
1723 	struct vmxnet3_rss_shared *rss;
1724 	int i;
1725 
1726 	ds = sc->vmx_ds;
1727 	rss = sc->vmx_rss;
1728 
1729 	rss->hash_type =
1730 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1731 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1732 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1733 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1734 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1735 	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1736 
1737 	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1738 		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1739 }
1740 
1741 static void
1742 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1743 {
1744 	struct ifnet *ifp;
1745 	struct vmxnet3_driver_shared *ds;
1746 
1747 	ifp = sc->vmx_ifp;
1748 	ds = sc->vmx_ds;
1749 
1750 	ds->mtu = ifp->if_mtu;
1751 	ds->ntxqueue = sc->vmx_ntxqueues;
1752 	ds->nrxqueue = sc->vmx_nrxqueues;
1753 
1754 	ds->upt_features = 0;
1755 	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1756 		ds->upt_features |= UPT1_F_CSUM;
1757 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1758 		ds->upt_features |= UPT1_F_VLAN;
1759 #if 0 /* XXX LRO */
1760 	if (ifp->if_capenable & IFCAP_LRO)
1761 		ds->upt_features |= UPT1_F_LRO;
1762 #endif
1763 
1764 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1765 		ds->upt_features |= UPT1_F_RSS;
1766 		vmxnet3_reinit_rss_shared_data(sc);
1767 	}
1768 
1769 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1770 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1771 	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1772 }
1773 
1774 static int
1775 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1776 {
1777 	int error;
1778 
1779 	error = vmxnet3_alloc_shared_data(sc);
1780 	if (error)
1781 		return (error);
1782 
1783 	error = vmxnet3_alloc_queue_data(sc);
1784 	if (error)
1785 		return (error);
1786 
1787 	error = vmxnet3_alloc_mcast_table(sc);
1788 	if (error)
1789 		return (error);
1790 
1791 	vmxnet3_init_shared_data(sc);
1792 
1793 	return (0);
1794 }
1795 
1796 static void
1797 vmxnet3_free_data(struct vmxnet3_softc *sc)
1798 {
1799 
1800 	vmxnet3_free_mcast_table(sc);
1801 	vmxnet3_free_queue_data(sc);
1802 	vmxnet3_free_shared_data(sc);
1803 }
1804 
1805 static int
1806 vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1807 {
1808 	device_t dev;
1809 	struct ifnet *ifp;
1810 
1811 	dev = sc->vmx_dev;
1812 
1813 	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1814 	if (ifp == NULL) {
1815 		device_printf(dev, "cannot allocate ifnet structure\n");
1816 		return (ENOSPC);
1817 	}
1818 
1819 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1820 	ifp->if_baudrate = IF_Gbps(10ULL);
1821 	ifp->if_softc = sc;
1822 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1823 	ifp->if_init = vmxnet3_init;
1824 	ifp->if_ioctl = vmxnet3_ioctl;
1825 #if 0 /* XXX TSO */
1826 	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1827 	ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS;
1828 	ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE;
1829 #endif
1830 
1831 #ifdef VMXNET3_LEGACY_TX
1832 	ifp->if_start = vmxnet3_start;
1833 	ifq_set_maxlen(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1834 	ifq_set_ready(&ifp->if_snd);
1835 #else
1836 	ifp->if_transmit = vmxnet3_txq_mq_start;
1837 	ifp->if_qflush = vmxnet3_qflush;
1838 #endif
1839 
1840 	vmxnet3_get_lladdr(sc);
1841 	ether_ifattach(ifp, sc->vmx_lladdr, NULL);
1842 
1843 	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1844 	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1845 #if 0 /* XXX TSO */
1846 	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1847 #endif
1848 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1849 	    IFCAP_VLAN_HWCSUM;
1850 	ifp->if_capenable = ifp->if_capabilities;
1851 
1852 #if 0 /* XXX LRO / VLAN_HWFILTER */
1853 	/* These capabilities are not enabled by default. */
1854 	ifp->if_capabilities |= /* IFCAP_LRO | */ IFCAP_VLAN_HWFILTER;
1855 #endif
1856 
1857 	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1858 	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1859 	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1860 	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1861 
1862 	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1863 	    vmxnet3_media_status);
1864 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1865 	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1866 
1867 	return (0);
1868 }
1869 
1870 static void
1871 vmxnet3_evintr(struct vmxnet3_softc *sc)
1872 {
1873 	device_t dev;
1874 	struct ifnet *ifp;
1875 	struct vmxnet3_txq_shared *ts;
1876 	struct vmxnet3_rxq_shared *rs;
1877 	uint32_t event;
1878 	int reset;
1879 
1880 	dev = sc->vmx_dev;
1881 	ifp = sc->vmx_ifp;
1882 	reset = 0;
1883 
1884 	VMXNET3_CORE_LOCK(sc);
1885 
1886 	/* Clear events. */
1887 	event = sc->vmx_ds->event;
1888 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1889 
1890 	if (event & VMXNET3_EVENT_LINK) {
1891 		vmxnet3_link_status(sc);
1892 		if (sc->vmx_link_active != 0)
1893 			vmxnet3_tx_start_all(sc);
1894 	}
1895 
1896 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1897 		reset = 1;
1898 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1899 		ts = sc->vmx_txq[0].vxtxq_ts;
1900 		if (ts->stopped != 0)
1901 			device_printf(dev, "Tx queue error %#x\n", ts->error);
1902 		rs = sc->vmx_rxq[0].vxrxq_rs;
1903 		if (rs->stopped != 0)
1904 			device_printf(dev, "Rx queue error %#x\n", rs->error);
1905 		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1906 	}
1907 
1908 	if (event & VMXNET3_EVENT_DIC)
1909 		device_printf(dev, "device implementation change event\n");
1910 	if (event & VMXNET3_EVENT_DEBUG)
1911 		device_printf(dev, "debug event\n");
1912 
1913 	if (reset != 0) {
1914 		ifp->if_flags &= ~IFF_RUNNING;
1915 		vmxnet3_init_locked(sc);
1916 	}
1917 
1918 	VMXNET3_CORE_UNLOCK(sc);
1919 }
1920 
1921 static void
1922 vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1923 {
1924 	struct vmxnet3_softc *sc;
1925 	struct ifnet *ifp;
1926 	struct vmxnet3_txring *txr;
1927 	struct vmxnet3_comp_ring *txc;
1928 	struct vmxnet3_txcompdesc *txcd;
1929 	struct vmxnet3_txbuf *txb;
1930 	struct mbuf *m;
1931 	u_int sop;
1932 
1933 	sc = txq->vxtxq_sc;
1934 	ifp = sc->vmx_ifp;
1935 	txr = &txq->vxtxq_cmd_ring;
1936 	txc = &txq->vxtxq_comp_ring;
1937 
1938 	VMXNET3_TXQ_LOCK_ASSERT(txq);
1939 
1940 	for (;;) {
1941 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1942 		if (txcd->gen != txc->vxcr_gen)
1943 			break;
1944 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1945 
1946 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1947 			txc->vxcr_next = 0;
1948 			txc->vxcr_gen ^= 1;
1949 		}
1950 
1951 		sop = txr->vxtxr_next;
1952 		txb = &txr->vxtxr_txbuf[sop];
1953 
1954 		if ((m = txb->vtxb_m) != NULL) {
1955 			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1956 			    BUS_DMASYNC_POSTWRITE);
1957 			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1958 
1959 			txq->vxtxq_stats.vmtxs_opackets++;
1960 			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1961 			if (m->m_flags & M_MCAST)
1962 				txq->vxtxq_stats.vmtxs_omcasts++;
1963 
1964 			m_freem(m);
1965 			txb->vtxb_m = NULL;
1966 		}
1967 
1968 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1969 	}
1970 
1971 	if (txr->vxtxr_head == txr->vxtxr_next)
1972 		txq->vxtxq_watchdog = 0;
1973 }
1974 
1975 static int
1976 vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1977 {
1978 	struct ifnet *ifp;
1979 	struct mbuf *m;
1980 	struct vmxnet3_rxdesc *rxd;
1981 	struct vmxnet3_rxbuf *rxb;
1982 	bus_dma_tag_t tag;
1983 	bus_dmamap_t dmap;
1984 	bus_dma_segment_t segs[1];
1985 	int idx, clsize, btype, flags, nsegs, error;
1986 
1987 	ifp = sc->vmx_ifp;
1988 	tag = rxr->vxrxr_rxtag;
1989 	dmap = rxr->vxrxr_spare_dmap;
1990 	idx = rxr->vxrxr_fill;
1991 	rxd = &rxr->vxrxr_rxd[idx];
1992 	rxb = &rxr->vxrxr_rxbuf[idx];
1993 
1994 #ifdef VMXNET3_FAILPOINTS
1995 	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1996 	if (rxr->vxrxr_rid != 0)
1997 		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1998 #endif
1999 
2000 	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
2001 		flags = M_PKTHDR;
2002 		clsize = MCLBYTES;
2003 		btype = VMXNET3_BTYPE_HEAD;
2004 	} else {
2005 		flags = M_PKTHDR;
2006 		clsize = MJUMPAGESIZE;
2007 		btype = VMXNET3_BTYPE_BODY;
2008 	}
2009 
2010 	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
2011 	if (m == NULL) {
2012 		sc->vmx_stats.vmst_mgetcl_failed++;
2013 		return (ENOBUFS);
2014 	}
2015 
2016 	if (btype == VMXNET3_BTYPE_HEAD) {
2017 		m->m_len = m->m_pkthdr.len = clsize;
2018 		m_adj(m, ETHER_ALIGN);
2019 	} else
2020 		m->m_len = clsize;
2021 
2022 	error = bus_dmamap_load_mbuf_segment(tag, dmap, m, &segs[0], 1, &nsegs,
2023 	    BUS_DMA_NOWAIT);
2024 	if (error) {
2025 		m_freem(m);
2026 		sc->vmx_stats.vmst_mbuf_load_failed++;
2027 		return (error);
2028 	}
2029 	KASSERT(nsegs == 1,
2030 	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2031 	if (btype == VMXNET3_BTYPE_BODY)
2032 		m->m_flags &= ~M_PKTHDR;
2033 
2034 	if (rxb->vrxb_m != NULL) {
2035 		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
2036 		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
2037 	}
2038 
2039 	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
2040 	rxb->vrxb_dmamap = dmap;
2041 	rxb->vrxb_m = m;
2042 
2043 	rxd->addr = segs[0].ds_addr;
2044 	rxd->len = segs[0].ds_len;
2045 	rxd->btype = btype;
2046 	rxd->gen = rxr->vxrxr_gen;
2047 
2048 	vmxnet3_rxr_increment_fill(rxr);
2049 	return (0);
2050 }
2051 
2052 static void
2053 vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
2054     struct vmxnet3_rxring *rxr, int idx)
2055 {
2056 	struct vmxnet3_rxdesc *rxd;
2057 
2058 	rxd = &rxr->vxrxr_rxd[idx];
2059 	rxd->gen = rxr->vxrxr_gen;
2060 	vmxnet3_rxr_increment_fill(rxr);
2061 }
2062 
2063 static void
2064 vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
2065 {
2066 	struct vmxnet3_softc *sc;
2067 	struct vmxnet3_rxring *rxr;
2068 	struct vmxnet3_comp_ring *rxc;
2069 	struct vmxnet3_rxcompdesc *rxcd;
2070 	int idx, eof;
2071 
2072 	sc = rxq->vxrxq_sc;
2073 	rxc = &rxq->vxrxq_comp_ring;
2074 
2075 	do {
2076 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2077 		if (rxcd->gen != rxc->vxcr_gen)
2078 			break;		/* Not expected. */
2079 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2080 
2081 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2082 			rxc->vxcr_next = 0;
2083 			rxc->vxcr_gen ^= 1;
2084 		}
2085 
2086 		idx = rxcd->rxd_idx;
2087 		eof = rxcd->eop;
2088 		if (rxcd->qid < sc->vmx_nrxqueues)
2089 			rxr = &rxq->vxrxq_cmd_ring[0];
2090 		else
2091 			rxr = &rxq->vxrxq_cmd_ring[1];
2092 		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2093 	} while (!eof);
2094 }
2095 
2096 static void
2097 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2098 {
2099 
2100 	if (rxcd->ipv4) {
2101 		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2102 		if (rxcd->ipcsum_ok)
2103 			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2104 	}
2105 
2106 	if (!rxcd->fragment) {
2107 		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2108 			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2109 			    CSUM_PSEUDO_HDR;
2110 			m->m_pkthdr.csum_data = 0xFFFF;
2111 		}
2112 	}
2113 }
2114 
2115 static void
2116 vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2117     struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2118 {
2119 	struct vmxnet3_softc *sc;
2120 	struct ifnet *ifp;
2121 
2122 	sc = rxq->vxrxq_sc;
2123 	ifp = sc->vmx_ifp;
2124 
2125 	if (rxcd->error) {
2126 		rxq->vxrxq_stats.vmrxs_ierrors++;
2127 		m_freem(m);
2128 		return;
2129 	}
2130 
2131 #if 0
2132 #ifdef notyet
2133 	switch (rxcd->rss_type) {
2134 	case VMXNET3_RCD_RSS_TYPE_IPV4:
2135 		m->m_pkthdr.flowid = rxcd->rss_hash;
2136 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2137 		break;
2138 	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2139 		m->m_pkthdr.flowid = rxcd->rss_hash;
2140 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2141 		break;
2142 	case VMXNET3_RCD_RSS_TYPE_IPV6:
2143 		m->m_pkthdr.flowid = rxcd->rss_hash;
2144 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2145 		break;
2146 	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2147 		m->m_pkthdr.flowid = rxcd->rss_hash;
2148 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2149 		break;
2150 	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2151 		m->m_pkthdr.flowid = rxq->vxrxq_id;
2152 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2153 		break;
2154 	}
2155 #else
2156 	m->m_pkthdr.flowid = rxq->vxrxq_id;
2157 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2158 #endif
2159 #endif
2160 
2161 	if (!rxcd->no_csum)
2162 		vmxnet3_rx_csum(rxcd, m);
2163 	if (rxcd->vlan) {
2164 		m->m_flags |= M_VLANTAG;
2165 		m->m_pkthdr.ether_vlantag = rxcd->vtag;
2166 	}
2167 
2168 	rxq->vxrxq_stats.vmrxs_ipackets++;
2169 	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2170 
2171 	VMXNET3_RXQ_UNLOCK(rxq);
2172 	(*ifp->if_input)(ifp, m, NULL, -1);
2173 	VMXNET3_RXQ_LOCK(rxq);
2174 }
2175 
2176 static void
2177 vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2178 {
2179 	struct vmxnet3_softc *sc;
2180 	struct ifnet *ifp;
2181 	struct vmxnet3_rxring *rxr;
2182 	struct vmxnet3_comp_ring *rxc;
2183 	struct vmxnet3_rxdesc *rxd;
2184 	struct vmxnet3_rxcompdesc *rxcd;
2185 	struct mbuf *m, *m_head, *m_tail;
2186 	int idx, length;
2187 
2188 	sc = rxq->vxrxq_sc;
2189 	ifp = sc->vmx_ifp;
2190 	rxc = &rxq->vxrxq_comp_ring;
2191 
2192 	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2193 
2194 	if ((ifp->if_flags & IFF_RUNNING) == 0)
2195 		return;
2196 
2197 	m_head = rxq->vxrxq_mhead;
2198 	rxq->vxrxq_mhead = NULL;
2199 	m_tail = rxq->vxrxq_mtail;
2200 	rxq->vxrxq_mtail = NULL;
2201 	KKASSERT(m_head == NULL || m_tail != NULL);
2202 
2203 	for (;;) {
2204 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2205 		if (rxcd->gen != rxc->vxcr_gen) {
2206 			rxq->vxrxq_mhead = m_head;
2207 			rxq->vxrxq_mtail = m_tail;
2208 			break;
2209 		}
2210 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2211 
2212 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2213 			rxc->vxcr_next = 0;
2214 			rxc->vxcr_gen ^= 1;
2215 		}
2216 
2217 		idx = rxcd->rxd_idx;
2218 		length = rxcd->len;
2219 		if (rxcd->qid < sc->vmx_nrxqueues)
2220 			rxr = &rxq->vxrxq_cmd_ring[0];
2221 		else
2222 			rxr = &rxq->vxrxq_cmd_ring[1];
2223 		rxd = &rxr->vxrxr_rxd[idx];
2224 
2225 		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2226 		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2227 		    __func__, rxcd->qid, idx));
2228 
2229 		/*
2230 		 * The host may skip descriptors. We detect this when this
2231 		 * descriptor does not match the previous fill index. Catch
2232 		 * up with the host now.
2233 		 */
2234 		if (__predict_false(rxr->vxrxr_fill != idx)) {
2235 			while (rxr->vxrxr_fill != idx) {
2236 				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2237 				    rxr->vxrxr_gen;
2238 				vmxnet3_rxr_increment_fill(rxr);
2239 			}
2240 		}
2241 
2242 		if (rxcd->sop) {
2243 			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2244 			    ("%s: start of frame w/o head buffer", __func__));
2245 			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2246 			    ("%s: start of frame not in ring 0", __func__));
2247 			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2248 			    ("%s: start of frame at unexcepted index %d (%d)",
2249 			     __func__, idx, sc->vmx_rx_max_chain));
2250 			KASSERT(m_head == NULL,
2251 			    ("%s: duplicate start of frame?", __func__));
2252 
2253 			if (length == 0) {
2254 				/* Just ignore this descriptor. */
2255 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2256 				goto nextp;
2257 			}
2258 
2259 			if (vmxnet3_newbuf(sc, rxr) != 0) {
2260 				rxq->vxrxq_stats.vmrxs_iqdrops++;
2261 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2262 				if (!rxcd->eop)
2263 					vmxnet3_rxq_discard_chain(rxq);
2264 				goto nextp;
2265 			}
2266 
2267 			m->m_pkthdr.rcvif = ifp;
2268 			m->m_pkthdr.len = m->m_len = length;
2269 			m->m_pkthdr.csum_flags = 0;
2270 			m_head = m_tail = m;
2271 
2272 		} else {
2273 			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2274 			    ("%s: non start of frame w/o body buffer", __func__));
2275 
2276 			if (m_head == NULL && m_tail == NULL) {
2277 				/*
2278 				 * This is a continuation of a packet that we
2279 				 * started to drop, but could not drop entirely
2280 				 * because this segment was still owned by the
2281 				 * host.  So, drop the remainder now.
2282 				 */
2283 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2284 				if (!rxcd->eop)
2285 					vmxnet3_rxq_discard_chain(rxq);
2286 				goto nextp;
2287 			}
2288 
2289 			KASSERT(m_head != NULL,
2290 			    ("%s: frame not started?", __func__));
2291 
2292 			if (vmxnet3_newbuf(sc, rxr) != 0) {
2293 				rxq->vxrxq_stats.vmrxs_iqdrops++;
2294 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2295 				if (!rxcd->eop)
2296 					vmxnet3_rxq_discard_chain(rxq);
2297 				m_freem(m_head);
2298 				m_head = m_tail = NULL;
2299 				goto nextp;
2300 			}
2301 
2302 			m->m_len = length;
2303 			m_head->m_pkthdr.len += length;
2304 			m_tail->m_next = m;
2305 			m_tail = m;
2306 		}
2307 
2308 		if (rxcd->eop) {
2309 			vmxnet3_rxq_input(rxq, rxcd, m_head);
2310 			m_head = m_tail = NULL;
2311 
2312 			/* Must recheck after dropping the Rx lock. */
2313 			if ((ifp->if_flags & IFF_RUNNING) == 0)
2314 				break;
2315 		}
2316 
2317 nextp:
2318 		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2319 			int qid = rxcd->qid;
2320 			bus_size_t r;
2321 
2322 			idx = (idx + 1) % rxr->vxrxr_ndesc;
2323 			if (qid >= sc->vmx_nrxqueues) {
2324 				qid -= sc->vmx_nrxqueues;
2325 				r = VMXNET3_BAR0_RXH2(qid);
2326 			} else
2327 				r = VMXNET3_BAR0_RXH1(qid);
2328 			vmxnet3_write_bar0(sc, r, idx);
2329 		}
2330 	}
2331 }
2332 
2333 static void
2334 vmxnet3_legacy_intr(void *xsc)
2335 {
2336 	struct vmxnet3_softc *sc;
2337 	struct vmxnet3_rxqueue *rxq;
2338 	struct vmxnet3_txqueue *txq;
2339 
2340 	sc = xsc;
2341 	rxq = &sc->vmx_rxq[0];
2342 	txq = &sc->vmx_txq[0];
2343 
2344 	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2345 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2346 			return;
2347 	}
2348 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2349 		vmxnet3_disable_all_intrs(sc);
2350 
2351 	if (sc->vmx_ds->event != 0)
2352 		vmxnet3_evintr(sc);
2353 
2354 	VMXNET3_RXQ_LOCK(rxq);
2355 	vmxnet3_rxq_eof(rxq);
2356 	VMXNET3_RXQ_UNLOCK(rxq);
2357 
2358 	VMXNET3_TXQ_LOCK(txq);
2359 	vmxnet3_txq_eof(txq);
2360 	vmxnet3_txq_start(txq);
2361 	VMXNET3_TXQ_UNLOCK(txq);
2362 
2363 	vmxnet3_enable_all_intrs(sc);
2364 }
2365 
2366 #ifdef __FreeBSD__
2367 static void
2368 vmxnet3_txq_intr(void *xtxq)
2369 {
2370 	struct vmxnet3_softc *sc;
2371 	struct vmxnet3_txqueue *txq;
2372 
2373 	txq = xtxq;
2374 	sc = txq->vxtxq_sc;
2375 
2376 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2377 		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2378 
2379 	VMXNET3_TXQ_LOCK(txq);
2380 	vmxnet3_txq_eof(txq);
2381 	vmxnet3_txq_start(txq);
2382 	VMXNET3_TXQ_UNLOCK(txq);
2383 
2384 	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2385 }
2386 
2387 static void
2388 vmxnet3_rxq_intr(void *xrxq)
2389 {
2390 	struct vmxnet3_softc *sc;
2391 	struct vmxnet3_rxqueue *rxq;
2392 
2393 	rxq = xrxq;
2394 	sc = rxq->vxrxq_sc;
2395 
2396 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2397 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2398 
2399 	VMXNET3_RXQ_LOCK(rxq);
2400 	vmxnet3_rxq_eof(rxq);
2401 	VMXNET3_RXQ_UNLOCK(rxq);
2402 
2403 	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2404 }
2405 
2406 static void
2407 vmxnet3_event_intr(void *xsc)
2408 {
2409 	struct vmxnet3_softc *sc;
2410 
2411 	sc = xsc;
2412 
2413 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2414 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2415 
2416 	if (sc->vmx_ds->event != 0)
2417 		vmxnet3_evintr(sc);
2418 
2419 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2420 }
2421 #endif
2422 
2423 static void
2424 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2425 {
2426 	struct vmxnet3_txring *txr;
2427 	struct vmxnet3_txbuf *txb;
2428 	int i;
2429 
2430 	txr = &txq->vxtxq_cmd_ring;
2431 
2432 	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2433 		txb = &txr->vxtxr_txbuf[i];
2434 
2435 		if (txb->vtxb_m == NULL)
2436 			continue;
2437 
2438 		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2439 		    BUS_DMASYNC_POSTWRITE);
2440 		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2441 		m_freem(txb->vtxb_m);
2442 		txb->vtxb_m = NULL;
2443 	}
2444 }
2445 
2446 static void
2447 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2448 {
2449 	struct vmxnet3_rxring *rxr;
2450 	struct vmxnet3_rxbuf *rxb;
2451 	int i, j;
2452 
2453 	if (rxq->vxrxq_mhead != NULL) {
2454 		m_freem(rxq->vxrxq_mhead);
2455 		rxq->vxrxq_mhead = NULL;
2456 		rxq->vxrxq_mtail = NULL;
2457 	}
2458 
2459 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2460 		rxr = &rxq->vxrxq_cmd_ring[i];
2461 
2462 		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2463 			rxb = &rxr->vxrxr_rxbuf[j];
2464 
2465 			if (rxb->vrxb_m == NULL)
2466 				continue;
2467 
2468 			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2469 			    BUS_DMASYNC_POSTREAD);
2470 			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2471 			m_freem(rxb->vrxb_m);
2472 			rxb->vrxb_m = NULL;
2473 		}
2474 	}
2475 }
2476 
2477 static void
2478 vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2479 {
2480 	struct vmxnet3_rxqueue *rxq;
2481 	struct vmxnet3_txqueue *txq;
2482 	int i;
2483 
2484 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2485 		rxq = &sc->vmx_rxq[i];
2486 		VMXNET3_RXQ_LOCK(rxq);
2487 		VMXNET3_RXQ_UNLOCK(rxq);
2488 	}
2489 
2490 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2491 		txq = &sc->vmx_txq[i];
2492 		VMXNET3_TXQ_LOCK(txq);
2493 		VMXNET3_TXQ_UNLOCK(txq);
2494 	}
2495 }
2496 
2497 static void
2498 vmxnet3_stop(struct vmxnet3_softc *sc)
2499 {
2500 	struct ifnet *ifp;
2501 	int q;
2502 
2503 	ifp = sc->vmx_ifp;
2504 	VMXNET3_CORE_LOCK_ASSERT(sc);
2505 
2506 	ifp->if_flags &= ~IFF_RUNNING;
2507 	sc->vmx_link_active = 0;
2508 	callout_stop(&sc->vmx_tick);
2509 
2510 	/* Disable interrupts. */
2511 	vmxnet3_disable_all_intrs(sc);
2512 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2513 
2514 	vmxnet3_stop_rendezvous(sc);
2515 
2516 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2517 		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2518 	for (q = 0; q < sc->vmx_nrxqueues; q++)
2519 		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2520 
2521 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2522 }
2523 
2524 static void
2525 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2526 {
2527 	struct vmxnet3_txring *txr;
2528 	struct vmxnet3_comp_ring *txc;
2529 
2530 	txr = &txq->vxtxq_cmd_ring;
2531 	txr->vxtxr_head = 0;
2532 	txr->vxtxr_next = 0;
2533 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2534 	bzero(txr->vxtxr_txd,
2535 	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2536 
2537 	txc = &txq->vxtxq_comp_ring;
2538 	txc->vxcr_next = 0;
2539 	txc->vxcr_gen = VMXNET3_INIT_GEN;
2540 	bzero(txc->vxcr_u.txcd,
2541 	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2542 }
2543 
2544 static int
2545 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2546 {
2547 	struct ifnet *ifp;
2548 	struct vmxnet3_rxring *rxr;
2549 	struct vmxnet3_comp_ring *rxc;
2550 	int i, populate, idx, frame_size, error;
2551 
2552 	ifp = sc->vmx_ifp;
2553 	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2554 	    ifp->if_mtu;
2555 
2556 	/*
2557 	 * If the MTU causes us to exceed what a regular sized cluster can
2558 	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2559 	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2560 	 *
2561 	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2562 	 * our life easier. We do not support changing the ring size after
2563 	 * the attach.
2564 	 */
2565 	if (frame_size <= MCLBYTES)
2566 		sc->vmx_rx_max_chain = 1;
2567 	else
2568 		sc->vmx_rx_max_chain = 2;
2569 
2570 	/*
2571 	 * Only populate ring 1 if the configuration will take advantage
2572 	 * of it. That is either when LRO is enabled or the frame size
2573 	 * exceeds what ring 0 can contain.
2574 	 */
2575 #if 0 /* XXX LRO */
2576 	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2577 #else
2578 	if (
2579 #endif
2580 	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2581 		populate = 1;
2582 	else
2583 		populate = VMXNET3_RXRINGS_PERQ;
2584 
2585 	for (i = 0; i < populate; i++) {
2586 		rxr = &rxq->vxrxq_cmd_ring[i];
2587 		rxr->vxrxr_fill = 0;
2588 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2589 		bzero(rxr->vxrxr_rxd,
2590 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2591 
2592 		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2593 			error = vmxnet3_newbuf(sc, rxr);
2594 			if (error)
2595 				return (error);
2596 		}
2597 	}
2598 
2599 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2600 		rxr = &rxq->vxrxq_cmd_ring[i];
2601 		rxr->vxrxr_fill = 0;
2602 		rxr->vxrxr_gen = 0;
2603 		bzero(rxr->vxrxr_rxd,
2604 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2605 	}
2606 
2607 	rxc = &rxq->vxrxq_comp_ring;
2608 	rxc->vxcr_next = 0;
2609 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2610 	bzero(rxc->vxcr_u.rxcd,
2611 	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2612 
2613 	return (0);
2614 }
2615 
2616 static int
2617 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2618 {
2619 	device_t dev;
2620 	int q, error;
2621 
2622 	dev = sc->vmx_dev;
2623 
2624 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2625 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2626 
2627 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2628 		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2629 		if (error) {
2630 			device_printf(dev, "cannot populate Rx queue %d\n", q);
2631 			return (error);
2632 		}
2633 	}
2634 
2635 	return (0);
2636 }
2637 
2638 static int
2639 vmxnet3_enable_device(struct vmxnet3_softc *sc)
2640 {
2641 	int q;
2642 
2643 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2644 		device_printf(sc->vmx_dev, "device enable command failed!\n");
2645 		return (1);
2646 	}
2647 
2648 	/* Reset the Rx queue heads. */
2649 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2650 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2651 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2652 	}
2653 
2654 	return (0);
2655 }
2656 
2657 static void
2658 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2659 {
2660 	struct ifnet *ifp;
2661 
2662 	ifp = sc->vmx_ifp;
2663 
2664 	vmxnet3_set_rxfilter(sc);
2665 
2666 #if 0 /* VLAN_HWFILTER */
2667 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2668 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2669 		    sizeof(sc->vmx_ds->vlan_filter));
2670 	else
2671 #endif
2672 		bzero(sc->vmx_ds->vlan_filter,
2673 		    sizeof(sc->vmx_ds->vlan_filter));
2674 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2675 }
2676 
2677 static int
2678 vmxnet3_reinit(struct vmxnet3_softc *sc)
2679 {
2680 
2681 	vmxnet3_reinit_interface(sc);
2682 	vmxnet3_reinit_shared_data(sc);
2683 
2684 	if (vmxnet3_reinit_queues(sc) != 0)
2685 		return (ENXIO);
2686 
2687 	if (vmxnet3_enable_device(sc) != 0)
2688 		return (ENXIO);
2689 
2690 	vmxnet3_reinit_rxfilters(sc);
2691 
2692 	return (0);
2693 }
2694 
2695 static void
2696 vmxnet3_init_locked(struct vmxnet3_softc *sc)
2697 {
2698 	struct ifnet *ifp;
2699 
2700 	ifp = sc->vmx_ifp;
2701 
2702 	if (ifp->if_flags & IFF_RUNNING)
2703 		return;
2704 
2705 	vmxnet3_stop(sc);
2706 
2707 	if (vmxnet3_reinit(sc) != 0) {
2708 		vmxnet3_stop(sc);
2709 		return;
2710 	}
2711 
2712 	ifp->if_flags |= IFF_RUNNING;
2713 	vmxnet3_link_status(sc);
2714 
2715 	vmxnet3_enable_all_intrs(sc);
2716 	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2717 }
2718 
2719 static void
2720 vmxnet3_init(void *xsc)
2721 {
2722 	struct vmxnet3_softc *sc;
2723 
2724 	sc = xsc;
2725 
2726 	VMXNET3_CORE_LOCK(sc);
2727 	vmxnet3_init_locked(sc);
2728 	VMXNET3_CORE_UNLOCK(sc);
2729 }
2730 
2731 /*
2732  * BMV: Much of this can go away once we finally have offsets in
2733  * the mbuf packet header. Bug andre@.
2734  */
2735 static int
2736 vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2737     int *etype, int *proto, int *start)
2738 {
2739 	struct ether_vlan_header *evh;
2740 	int offset;
2741 #if defined(INET)
2742 	struct ip *ip = NULL;
2743 #endif
2744 #if defined(INET6)
2745 	struct ip6_hdr *ip6 = NULL;
2746 #endif
2747 
2748 	evh = mtod(m, struct ether_vlan_header *);
2749 	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2750 		/* BMV: We should handle nested VLAN tags too. */
2751 		*etype = ntohs(evh->evl_proto);
2752 		offset = sizeof(struct ether_vlan_header);
2753 	} else {
2754 		*etype = ntohs(evh->evl_encap_proto);
2755 		offset = sizeof(struct ether_header);
2756 	}
2757 
2758 	switch (*etype) {
2759 #if defined(INET)
2760 	case ETHERTYPE_IP:
2761 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2762 			m = m_pullup(m, offset + sizeof(struct ip));
2763 			if (m == NULL)
2764 				return (EINVAL);
2765 		}
2766 
2767 		ip = (struct ip *)(mtod(m, uint8_t *) + offset);
2768 		*proto = ip->ip_p;
2769 		*start = offset + (ip->ip_hl << 2);
2770 		break;
2771 #endif
2772 #if defined(INET6)
2773 	case ETHERTYPE_IPV6:
2774 		if (__predict_false(m->m_len <
2775 		    offset + sizeof(struct ip6_hdr))) {
2776 			m = m_pullup(m, offset + sizeof(struct ip6_hdr));
2777 			if (m == NULL)
2778 				return (EINVAL);
2779 		}
2780 
2781 		ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + offset);
2782 		*proto = -1;
2783 		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2784 		/* Assert the network stack sent us a valid packet. */
2785 		KASSERT(*start > offset,
2786 		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2787 		    *start, offset, *proto));
2788 		break;
2789 #endif
2790 	default:
2791 		return (EINVAL);
2792 	}
2793 
2794 #if 0 /* XXX TSO */
2795 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2796 		struct tcphdr *tcp;
2797 
2798 		if (__predict_false(*proto != IPPROTO_TCP)) {
2799 			/* Likely failed to correctly parse the mbuf. */
2800 			return (EINVAL);
2801 		}
2802 
2803 		if (m->m_len < *start + sizeof(struct tcphdr)) {
2804 			m = m_pullup(m, *start + sizeof(struct tcphdr));
2805 			if (m == NULL)
2806 				return (EINVAL);
2807 		}
2808 
2809 		tcp = (struct tcphdr *)(mtod(m, uint8_t *) + *start);
2810 		*start += (tcp->th_off << 2);
2811 
2812 		txq->vxtxq_stats.vmtxs_tso++;
2813 	} else
2814 #endif
2815 		txq->vxtxq_stats.vmtxs_csum++;
2816 
2817 	return (0);
2818 }
2819 
2820 static int
2821 vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2822     bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2823 {
2824 	struct vmxnet3_txring *txr;
2825 	struct mbuf *m;
2826 	bus_dma_tag_t tag;
2827 	int error;
2828 
2829 	txr = &txq->vxtxq_cmd_ring;
2830 	m = *m0;
2831 	tag = txr->vxtxr_txtag;
2832 
2833 	error = bus_dmamap_load_mbuf_segment(tag, dmap, m, segs, 1, nsegs,
2834 	    BUS_DMA_NOWAIT);
2835 	if (error == 0 || error != EFBIG)
2836 		return (error);
2837 
2838 	m = m_defrag(m, M_NOWAIT);
2839 	if (m != NULL) {
2840 		*m0 = m;
2841 		error = bus_dmamap_load_mbuf_segment(tag, dmap, m, segs,
2842 		    1, nsegs, BUS_DMA_NOWAIT);
2843 	} else
2844 		error = ENOBUFS;
2845 
2846 	if (error) {
2847 		m_freem(*m0);
2848 		*m0 = NULL;
2849 		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2850 	} else
2851 		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2852 
2853 	return (error);
2854 }
2855 
2856 static void
2857 vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2858 {
2859 	struct vmxnet3_txring *txr;
2860 
2861 	txr = &txq->vxtxq_cmd_ring;
2862 	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2863 }
2864 
2865 static int
2866 vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2867 {
2868 	struct vmxnet3_softc *sc;
2869 	struct vmxnet3_txring *txr;
2870 	struct vmxnet3_txdesc *txd, *sop;
2871 	struct mbuf *m;
2872 	bus_dmamap_t dmap;
2873 	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2874 	int i, gen, nsegs, etype, proto, start, error;
2875 
2876 	sc = txq->vxtxq_sc;
2877 	start = 0;
2878 	txd = NULL;
2879 	txr = &txq->vxtxq_cmd_ring;
2880 	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2881 
2882 	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2883 	if (error)
2884 		return (error);
2885 
2886 	m = *m0;
2887 	M_ASSERTPKTHDR(m);
2888 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2889 	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2890 
2891 	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2892 		txq->vxtxq_stats.vmtxs_full++;
2893 		vmxnet3_txq_unload_mbuf(txq, dmap);
2894 		return (ENOSPC);
2895 	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2896 		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2897 		if (error) {
2898 			txq->vxtxq_stats.vmtxs_offload_failed++;
2899 			vmxnet3_txq_unload_mbuf(txq, dmap);
2900 			m_freem(m);
2901 			*m0 = NULL;
2902 			return (error);
2903 		}
2904 	}
2905 
2906 	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2907 	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2908 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2909 
2910 	for (i = 0; i < nsegs; i++) {
2911 		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2912 
2913 		txd->addr = segs[i].ds_addr;
2914 		txd->len = segs[i].ds_len;
2915 		txd->gen = gen;
2916 		txd->dtype = 0;
2917 		txd->offload_mode = VMXNET3_OM_NONE;
2918 		txd->offload_pos = 0;
2919 		txd->hlen = 0;
2920 		txd->eop = 0;
2921 		txd->compreq = 0;
2922 		txd->vtag_mode = 0;
2923 		txd->vtag = 0;
2924 
2925 		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2926 			txr->vxtxr_head = 0;
2927 			txr->vxtxr_gen ^= 1;
2928 		}
2929 		gen = txr->vxtxr_gen;
2930 	}
2931 	txd->eop = 1;
2932 	txd->compreq = 1;
2933 
2934 	if (m->m_flags & M_VLANTAG) {
2935 		sop->vtag_mode = 1;
2936 		sop->vtag = m->m_pkthdr.ether_vlantag;
2937 	}
2938 
2939 
2940 #if 0 /* XXX TSO */
2941 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2942 		sop->offload_mode = VMXNET3_OM_TSO;
2943 		sop->hlen = start;
2944 		sop->offload_pos = m->m_pkthdr.tso_segsz;
2945 	} else
2946 #endif
2947 	if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2948 	    VMXNET3_CSUM_OFFLOAD_IPV6))	{
2949 		sop->offload_mode = VMXNET3_OM_CSUM;
2950 		sop->hlen = start;
2951 		sop->offload_pos = start + m->m_pkthdr.csum_data;
2952 	}
2953 
2954 	/* Finally, change the ownership. */
2955 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2956 	sop->gen ^= 1;
2957 
2958 	txq->vxtxq_ts->npending += nsegs;
2959 	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2960 		txq->vxtxq_ts->npending = 0;
2961 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2962 		    txr->vxtxr_head);
2963 	}
2964 
2965 	return (0);
2966 }
2967 
2968 #ifdef VMXNET3_LEGACY_TX
2969 
2970 static void
2971 vmxnet3_start_locked(struct ifnet *ifp)
2972 {
2973 	struct vmxnet3_softc *sc;
2974 	struct vmxnet3_txqueue *txq;
2975 	struct vmxnet3_txring *txr;
2976 	struct mbuf *m_head;
2977 	int tx, avail;
2978 
2979 	sc = ifp->if_softc;
2980 	txq = &sc->vmx_txq[0];
2981 	txr = &txq->vxtxq_cmd_ring;
2982 	tx = 0;
2983 
2984 	VMXNET3_TXQ_LOCK_ASSERT(txq);
2985 
2986 	if ((ifp->if_flags & IFF_RUNNING) == 0 ||
2987 	    sc->vmx_link_active == 0)
2988 		return;
2989 
2990 	while (!ifq_is_empty(&ifp->if_snd)) {
2991 		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2992 			break;
2993 
2994 		m_head = ifq_dequeue(&ifp->if_snd);
2995 		if (m_head == NULL)
2996 			break;
2997 
2998 		/* Assume worse case if this mbuf is the head of a chain. */
2999 		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
3000 			ifq_prepend(&ifp->if_snd, m_head);
3001 			break;
3002 		}
3003 
3004 		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
3005 			if (m_head != NULL)
3006 				ifq_prepend(&ifp->if_snd, m_head);
3007 			break;
3008 		}
3009 
3010 		tx++;
3011 		ETHER_BPF_MTAP(ifp, m_head);
3012 	}
3013 
3014 	if (tx > 0)
3015 		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
3016 }
3017 
3018 static void
3019 vmxnet3_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
3020 {
3021 	struct vmxnet3_softc *sc;
3022 	struct vmxnet3_txqueue *txq;
3023 
3024 	sc = ifp->if_softc;
3025 	txq = &sc->vmx_txq[0];
3026 
3027 	VMXNET3_TXQ_LOCK(txq);
3028 	vmxnet3_start_locked(ifp);
3029 	VMXNET3_TXQ_UNLOCK(txq);
3030 }
3031 
3032 #else /* !VMXNET3_LEGACY_TX */
3033 
3034 static int
3035 vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
3036 {
3037 	struct vmxnet3_softc *sc;
3038 	struct vmxnet3_txring *txr;
3039 	struct buf_ring *br;
3040 	struct ifnet *ifp;
3041 	int tx, avail, error;
3042 
3043 	sc = txq->vxtxq_sc;
3044 	br = txq->vxtxq_br;
3045 	ifp = sc->vmx_ifp;
3046 	txr = &txq->vxtxq_cmd_ring;
3047 	tx = 0;
3048 	error = 0;
3049 
3050 	VMXNET3_TXQ_LOCK_ASSERT(txq);
3051 
3052 	if ((ifp->if_flags & IFF_RUNNING) == 0 ||
3053 	    sc->vmx_link_active == 0) {
3054 		if (m != NULL)
3055 			error = drbr_enqueue(ifp, br, m);
3056 		return (error);
3057 	}
3058 
3059 	if (m != NULL) {
3060 		error = drbr_enqueue(ifp, br, m);
3061 		if (error)
3062 			return (error);
3063 	}
3064 
3065 	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
3066 		m = drbr_peek(ifp, br);
3067 		if (m == NULL)
3068 			break;
3069 
3070 		/* Assume worse case if this mbuf is the head of a chain. */
3071 		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
3072 			drbr_putback(ifp, br, m);
3073 			break;
3074 		}
3075 
3076 		if (vmxnet3_txq_encap(txq, &m) != 0) {
3077 			if (m != NULL)
3078 				drbr_putback(ifp, br, m);
3079 			else
3080 				drbr_advance(ifp, br);
3081 			break;
3082 		}
3083 		drbr_advance(ifp, br);
3084 
3085 		tx++;
3086 		ETHER_BPF_MTAP(ifp, m);
3087 	}
3088 
3089 	if (tx > 0)
3090 		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
3091 
3092 	return (0);
3093 }
3094 
3095 static int
3096 vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
3097 {
3098 	struct vmxnet3_softc *sc;
3099 	struct vmxnet3_txqueue *txq;
3100 	int i, ntxq, error;
3101 
3102 	sc = ifp->if_softc;
3103 	ntxq = sc->vmx_ntxqueues;
3104 
3105 	/* check if flowid is set */
3106 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
3107 		i = m->m_pkthdr.flowid % ntxq;
3108 	else
3109 		i = curcpu % ntxq;
3110 
3111 	txq = &sc->vmx_txq[i];
3112 
3113 	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
3114 		error = vmxnet3_txq_mq_start_locked(txq, m);
3115 		VMXNET3_TXQ_UNLOCK(txq);
3116 	} else {
3117 		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
3118 		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
3119 	}
3120 
3121 	return (error);
3122 }
3123 
3124 static void
3125 vmxnet3_txq_tq_deferred(void *xtxq, int pending)
3126 {
3127 	struct vmxnet3_softc *sc;
3128 	struct vmxnet3_txqueue *txq;
3129 
3130 	txq = xtxq;
3131 	sc = txq->vxtxq_sc;
3132 
3133 	VMXNET3_TXQ_LOCK(txq);
3134 	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3135 		vmxnet3_txq_mq_start_locked(txq, NULL);
3136 	VMXNET3_TXQ_UNLOCK(txq);
3137 }
3138 
3139 #endif /* VMXNET3_LEGACY_TX */
3140 
3141 static void
3142 vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3143 {
3144 	struct vmxnet3_softc *sc;
3145 	struct ifnet *ifp;
3146 
3147 	sc = txq->vxtxq_sc;
3148 	ifp = sc->vmx_ifp;
3149 
3150 #ifdef VMXNET3_LEGACY_TX
3151 	if (!ifq_is_empty(&ifp->if_snd))
3152 		vmxnet3_start_locked(ifp);
3153 #else
3154 	if (!drbr_empty(ifp, txq->vxtxq_br))
3155 		vmxnet3_txq_mq_start_locked(txq, NULL);
3156 #endif
3157 }
3158 
3159 static void
3160 vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3161 {
3162 	struct vmxnet3_txqueue *txq;
3163 	int i;
3164 
3165 	VMXNET3_CORE_LOCK_ASSERT(sc);
3166 
3167 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3168 		txq = &sc->vmx_txq[i];
3169 
3170 		VMXNET3_TXQ_LOCK(txq);
3171 		vmxnet3_txq_start(txq);
3172 		VMXNET3_TXQ_UNLOCK(txq);
3173 	}
3174 }
3175 
3176 static void
3177 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3178 {
3179 	struct ifnet *ifp;
3180 	int idx, bit;
3181 
3182 	ifp = sc->vmx_ifp;
3183 	idx = (tag >> 5) & 0x7F;
3184 	bit = tag & 0x1F;
3185 
3186 	if (tag == 0 || tag > 4095)
3187 		return;
3188 
3189 	VMXNET3_CORE_LOCK(sc);
3190 
3191 	/* Update our private VLAN bitvector. */
3192 	if (add)
3193 		sc->vmx_vlan_filter[idx] |= (1 << bit);
3194 	else
3195 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3196 
3197 #if 0 /* VLAN_HWFILTER */
3198 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3199 		if (add)
3200 			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3201 		else
3202 			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3203 		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3204 	}
3205 #endif
3206 
3207 	VMXNET3_CORE_UNLOCK(sc);
3208 }
3209 
3210 static void
3211 vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3212 {
3213 
3214 	if (ifp->if_softc == arg)
3215 		vmxnet3_update_vlan_filter(arg, 1, tag);
3216 }
3217 
3218 static void
3219 vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3220 {
3221 
3222 	if (ifp->if_softc == arg)
3223 		vmxnet3_update_vlan_filter(arg, 0, tag);
3224 }
3225 
3226 static void
3227 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3228 {
3229 	struct ifnet *ifp;
3230 	struct vmxnet3_driver_shared *ds;
3231 	struct ifmultiaddr *ifma;
3232 	u_int mode;
3233 
3234 	ifp = sc->vmx_ifp;
3235 	ds = sc->vmx_ds;
3236 
3237 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3238 	if (ifp->if_flags & IFF_PROMISC)
3239 		mode |= VMXNET3_RXMODE_PROMISC;
3240 	if (ifp->if_flags & IFF_ALLMULTI)
3241 		mode |= VMXNET3_RXMODE_ALLMULTI;
3242 	else {
3243 		int cnt = 0, overflow = 0;
3244 
3245 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3246 			if (ifma->ifma_addr->sa_family != AF_LINK)
3247 				continue;
3248 			else if (cnt == VMXNET3_MULTICAST_MAX) {
3249 				overflow = 1;
3250 				break;
3251 			}
3252 
3253 			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3254 			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3255 			cnt++;
3256 		}
3257 
3258 		if (overflow != 0) {
3259 			cnt = 0;
3260 			mode |= VMXNET3_RXMODE_ALLMULTI;
3261 		} else if (cnt > 0)
3262 			mode |= VMXNET3_RXMODE_MCAST;
3263 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3264 	}
3265 
3266 	ds->rxmode = mode;
3267 
3268 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3269 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3270 }
3271 
3272 static int
3273 vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3274 {
3275 	struct ifnet *ifp;
3276 
3277 	ifp = sc->vmx_ifp;
3278 
3279 	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3280 		return (EINVAL);
3281 
3282 	ifp->if_mtu = mtu;
3283 
3284 	if (ifp->if_flags & IFF_RUNNING) {
3285 		ifp->if_flags &= ~IFF_RUNNING;
3286 		vmxnet3_init_locked(sc);
3287 	}
3288 
3289 	return (0);
3290 }
3291 
3292 static int
3293 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cred)
3294 {
3295 	struct vmxnet3_softc *sc;
3296 	struct ifreq *ifr;
3297 	int reinit, mask, error;
3298 
3299 	sc = ifp->if_softc;
3300 	ifr = (struct ifreq *) data;
3301 	error = 0;
3302 
3303 	switch (cmd) {
3304 	case SIOCSIFMTU:
3305 		if (ifp->if_mtu != ifr->ifr_mtu) {
3306 			VMXNET3_CORE_LOCK(sc);
3307 			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3308 			VMXNET3_CORE_UNLOCK(sc);
3309 		}
3310 		break;
3311 
3312 	case SIOCSIFFLAGS:
3313 		VMXNET3_CORE_LOCK(sc);
3314 		if (ifp->if_flags & IFF_UP) {
3315 			if ((ifp->if_flags & IFF_RUNNING)) {
3316 				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3317 				    (IFF_PROMISC | IFF_ALLMULTI)) {
3318 					vmxnet3_set_rxfilter(sc);
3319 				}
3320 			} else
3321 				vmxnet3_init_locked(sc);
3322 		} else {
3323 			if (ifp->if_flags & IFF_RUNNING)
3324 				vmxnet3_stop(sc);
3325 		}
3326 		sc->vmx_if_flags = ifp->if_flags;
3327 		VMXNET3_CORE_UNLOCK(sc);
3328 		break;
3329 
3330 	case SIOCADDMULTI:
3331 	case SIOCDELMULTI:
3332 		VMXNET3_CORE_LOCK(sc);
3333 		if (ifp->if_flags & IFF_RUNNING)
3334 			vmxnet3_set_rxfilter(sc);
3335 		VMXNET3_CORE_UNLOCK(sc);
3336 		break;
3337 
3338 	case SIOCSIFMEDIA:
3339 	case SIOCGIFMEDIA:
3340 		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3341 		break;
3342 
3343 	case SIOCSIFCAP:
3344 		VMXNET3_CORE_LOCK(sc);
3345 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3346 
3347 		if (mask & IFCAP_TXCSUM)
3348 			ifp->if_capenable ^= IFCAP_TXCSUM;
3349 		if (mask & IFCAP_TXCSUM_IPV6)
3350 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3351 #if 0 /* XXX TSO */
3352 		if (mask & IFCAP_TSO4)
3353 			ifp->if_capenable ^= IFCAP_TSO4;
3354 		if (mask & IFCAP_TSO6)
3355 			ifp->if_capenable ^= IFCAP_TSO6;
3356 #endif
3357 
3358 		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | /* IFCAP_LRO | */
3359 		    IFCAP_VLAN_HWTAGGING /* | IFCAP_VLAN_HWFILTER */)) {
3360 			/* Changing these features requires us to reinit. */
3361 			reinit = 1;
3362 
3363 			if (mask & IFCAP_RXCSUM)
3364 				ifp->if_capenable ^= IFCAP_RXCSUM;
3365 			if (mask & IFCAP_RXCSUM_IPV6)
3366 				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3367 #if 0 /* XXX LRO */
3368 			if (mask & IFCAP_LRO)
3369 				ifp->if_capenable ^= IFCAP_LRO;
3370 #endif
3371 			if (mask & IFCAP_VLAN_HWTAGGING)
3372 				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3373 #if 0 /* XXX VLAN_HWFILTER */
3374 			if (mask & IFCAP_VLAN_HWFILTER)
3375 				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3376 #endif
3377 		} else
3378 			reinit = 0;
3379 
3380 #if 0 /* XXX TSO */
3381 		if (mask & IFCAP_VLAN_HWTSO)
3382 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3383 #endif
3384 
3385 		if (reinit && (ifp->if_flags & IFF_RUNNING)) {
3386 			ifp->if_flags &= ~IFF_RUNNING;
3387 			vmxnet3_init_locked(sc);
3388 		} else {
3389 			vmxnet3_init_hwassist(sc);
3390 		}
3391 
3392 		VMXNET3_CORE_UNLOCK(sc);
3393 #if 0 /* XXX */
3394 		VLAN_CAPABILITIES(ifp);
3395 #endif
3396 		break;
3397 
3398 	default:
3399 		error = ether_ioctl(ifp, cmd, data);
3400 		break;
3401 	}
3402 
3403 	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3404 
3405 	return (error);
3406 }
3407 
3408 #ifndef VMXNET3_LEGACY_TX
3409 static void
3410 vmxnet3_qflush(struct ifnet *ifp)
3411 {
3412 	struct vmxnet3_softc *sc;
3413 	struct vmxnet3_txqueue *txq;
3414 	struct mbuf *m;
3415 	int i;
3416 
3417 	sc = ifp->if_softc;
3418 
3419 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3420 		txq = &sc->vmx_txq[i];
3421 
3422 		VMXNET3_TXQ_LOCK(txq);
3423 		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3424 			m_freem(m);
3425 		VMXNET3_TXQ_UNLOCK(txq);
3426 	}
3427 
3428 	if_qflush(ifp);
3429 }
3430 #endif
3431 
3432 static int
3433 vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3434 {
3435 	struct vmxnet3_softc *sc;
3436 
3437 	sc = txq->vxtxq_sc;
3438 
3439 	VMXNET3_TXQ_LOCK(txq);
3440 	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3441 		VMXNET3_TXQ_UNLOCK(txq);
3442 		return (0);
3443 	}
3444 	VMXNET3_TXQ_UNLOCK(txq);
3445 
3446 	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3447 	    txq->vxtxq_id);
3448 	return (1);
3449 }
3450 
3451 static void
3452 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3453 {
3454 
3455 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3456 }
3457 
3458 static void
3459 vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3460     struct vmxnet3_txq_stats *accum)
3461 {
3462 	struct vmxnet3_txq_stats *st;
3463 
3464 	st = &txq->vxtxq_stats;
3465 
3466 	accum->vmtxs_opackets += st->vmtxs_opackets;
3467 	accum->vmtxs_obytes += st->vmtxs_obytes;
3468 	accum->vmtxs_omcasts += st->vmtxs_omcasts;
3469 	accum->vmtxs_csum += st->vmtxs_csum;
3470 	accum->vmtxs_tso += st->vmtxs_tso;
3471 	accum->vmtxs_full += st->vmtxs_full;
3472 	accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3473 }
3474 
3475 static void
3476 vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3477     struct vmxnet3_rxq_stats *accum)
3478 {
3479 	struct vmxnet3_rxq_stats *st;
3480 
3481 	st = &rxq->vxrxq_stats;
3482 
3483 	accum->vmrxs_ipackets += st->vmrxs_ipackets;
3484 	accum->vmrxs_ibytes += st->vmrxs_ibytes;
3485 	accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3486 	accum->vmrxs_ierrors += st->vmrxs_ierrors;
3487 }
3488 
3489 static void
3490 vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3491 {
3492 	struct ifnet *ifp;
3493 	struct vmxnet3_statistics *st;
3494 	struct vmxnet3_txq_stats txaccum;
3495 	struct vmxnet3_rxq_stats rxaccum;
3496 	int i;
3497 
3498 	ifp = sc->vmx_ifp;
3499 	st = &sc->vmx_stats;
3500 
3501 	bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3502 	bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3503 
3504 	for (i = 0; i < sc->vmx_ntxqueues; i++)
3505 		vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3506 	for (i = 0; i < sc->vmx_nrxqueues; i++)
3507 		vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3508 
3509 	/*
3510 	 * With the exception of if_ierrors, these ifnet statistics are
3511 	 * only updated in the driver, so just set them to our accumulated
3512 	 * values. if_ierrors is updated in ether_input() for malformed
3513 	 * frames that we should have already discarded.
3514 	 */
3515 	ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3516 	ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3517 	ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3518 	ifp->if_opackets = txaccum.vmtxs_opackets;
3519 #ifndef VMXNET3_LEGACY_TX
3520 	ifp->if_obytes = txaccum.vmtxs_obytes;
3521 	ifp->if_omcasts = txaccum.vmtxs_omcasts;
3522 #endif
3523 }
3524 
3525 static void
3526 vmxnet3_tick(void *xsc)
3527 {
3528 	struct vmxnet3_softc *sc;
3529 	struct ifnet *ifp;
3530 	int i, timedout;
3531 
3532 	sc = xsc;
3533 	ifp = sc->vmx_ifp;
3534 	timedout = 0;
3535 
3536 	VMXNET3_CORE_LOCK_ASSERT(sc);
3537 
3538 	vmxnet3_accumulate_stats(sc);
3539 	vmxnet3_refresh_host_stats(sc);
3540 
3541 	for (i = 0; i < sc->vmx_ntxqueues; i++)
3542 		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3543 
3544 	if (timedout != 0) {
3545 		ifp->if_flags &= ~IFF_RUNNING;
3546 		vmxnet3_init_locked(sc);
3547 	} else
3548 		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3549 }
3550 
3551 static int
3552 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3553 {
3554 	uint32_t status;
3555 
3556 	/* Also update the link speed while here. */
3557 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3558 	sc->vmx_link_speed = status >> 16;
3559 	return !!(status & 0x1);
3560 }
3561 
3562 static void
3563 vmxnet3_link_status(struct vmxnet3_softc *sc)
3564 {
3565 	struct ifnet *ifp;
3566 	int link;
3567 
3568 	ifp = sc->vmx_ifp;
3569 	link = vmxnet3_link_is_up(sc);
3570 
3571 	if (link != 0 && sc->vmx_link_active == 0) {
3572 		sc->vmx_link_active = 1;
3573 		ifp->if_link_state = LINK_STATE_UP;
3574 		if_link_state_change(ifp);
3575 	} else if (link == 0 && sc->vmx_link_active != 0) {
3576 		sc->vmx_link_active = 0;
3577 		ifp->if_link_state = LINK_STATE_DOWN;
3578 		if_link_state_change(ifp);
3579 	}
3580 }
3581 
3582 static void
3583 vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3584 {
3585 	struct vmxnet3_softc *sc;
3586 
3587 	sc = ifp->if_softc;
3588 
3589 	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3590 	ifmr->ifm_status = IFM_AVALID;
3591 
3592 	VMXNET3_CORE_LOCK(sc);
3593 	if (vmxnet3_link_is_up(sc) != 0)
3594 		ifmr->ifm_status |= IFM_ACTIVE;
3595 	else
3596 		ifmr->ifm_status |= IFM_NONE;
3597 	VMXNET3_CORE_UNLOCK(sc);
3598 }
3599 
3600 static int
3601 vmxnet3_media_change(struct ifnet *ifp)
3602 {
3603 
3604 	/* Ignore. */
3605 	return (0);
3606 }
3607 
3608 static void
3609 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3610 {
3611 	uint32_t ml, mh;
3612 
3613 	ml  = sc->vmx_lladdr[0];
3614 	ml |= sc->vmx_lladdr[1] << 8;
3615 	ml |= sc->vmx_lladdr[2] << 16;
3616 	ml |= sc->vmx_lladdr[3] << 24;
3617 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3618 
3619 	mh  = sc->vmx_lladdr[4];
3620 	mh |= sc->vmx_lladdr[5] << 8;
3621 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3622 }
3623 
3624 static void
3625 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3626 {
3627 	uint32_t ml, mh;
3628 
3629 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3630 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3631 
3632 	sc->vmx_lladdr[0] = ml;
3633 	sc->vmx_lladdr[1] = ml >> 8;
3634 	sc->vmx_lladdr[2] = ml >> 16;
3635 	sc->vmx_lladdr[3] = ml >> 24;
3636 	sc->vmx_lladdr[4] = mh;
3637 	sc->vmx_lladdr[5] = mh >> 8;
3638 }
3639 
3640 static void
3641 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3642     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3643 {
3644 	struct sysctl_oid *node, *txsnode;
3645 	struct sysctl_oid_list *list, *txslist;
3646 	struct vmxnet3_txq_stats *stats;
3647 	struct UPT1_TxStats *txstats;
3648 	char namebuf[16];
3649 
3650 	stats = &txq->vxtxq_stats;
3651 	txstats = &txq->vxtxq_ts->stats;
3652 
3653 	ksnprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3654 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3655 	    NULL, "Transmit Queue");
3656 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3657 
3658 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3659 	    &stats->vmtxs_opackets, 0, "Transmit packets");
3660 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3661 	    &stats->vmtxs_obytes, 0, "Transmit bytes");
3662 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3663 	    &stats->vmtxs_omcasts, 0, "Transmit multicasts");
3664 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3665 	    &stats->vmtxs_csum, 0, "Transmit checksum offloaded");
3666 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3667 	    &stats->vmtxs_tso, 0, "Transmit TCP segmentation offloaded");
3668 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3669 	    &stats->vmtxs_full, 0, "Transmit ring full");
3670 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3671 	    &stats->vmtxs_offload_failed, 0, "Transmit checksum offload failed");
3672 
3673 	/*
3674 	 * Add statistics reported by the host. These are updated once
3675 	 * per second.
3676 	 */
3677 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3678 	    NULL, "Host Statistics");
3679 	txslist = SYSCTL_CHILDREN(txsnode);
3680 #if 0 /* XXX TSO */
3681 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3682 	    &txstats->TSO_packets, 0, "TSO packets");
3683 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3684 	    &txstats->TSO_bytes, 0, "TSO bytes");
3685 #endif
3686 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3687 	    &txstats->ucast_packets, 0, "Unicast packets");
3688 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3689 	    &txstats->ucast_bytes, 0, "Unicast bytes");
3690 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3691 	    &txstats->mcast_packets, 0, "Multicast packets");
3692 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3693 	    &txstats->mcast_bytes, 0, "Multicast bytes");
3694 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3695 	    &txstats->error, 0, "Errors");
3696 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3697 	    &txstats->discard, 0, "Discards");
3698 }
3699 
3700 static void
3701 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3702     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3703 {
3704 	struct sysctl_oid *node, *rxsnode;
3705 	struct sysctl_oid_list *list, *rxslist;
3706 	struct vmxnet3_rxq_stats *stats;
3707 	struct UPT1_RxStats *rxstats;
3708 	char namebuf[16];
3709 
3710 	stats = &rxq->vxrxq_stats;
3711 	rxstats = &rxq->vxrxq_rs->stats;
3712 
3713 	ksnprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3714 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3715 	    NULL, "Receive Queue");
3716 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3717 
3718 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3719 	    &stats->vmrxs_ipackets, 0, "Receive packets");
3720 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3721 	    &stats->vmrxs_ibytes, 0, "Receive bytes");
3722 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3723 	    &stats->vmrxs_iqdrops, 0, "Receive drops");
3724 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3725 	    &stats->vmrxs_ierrors, 0, "Receive errors");
3726 
3727 	/*
3728 	 * Add statistics reported by the host. These are updated once
3729 	 * per second.
3730 	 */
3731 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3732 	    NULL, "Host Statistics");
3733 	rxslist = SYSCTL_CHILDREN(rxsnode);
3734 #if 0 /* XXX LRO */
3735 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3736 	    &rxstats->LRO_packets, 0, "LRO packets");
3737 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3738 	    &rxstats->LRO_bytes, 0, "LRO bytes");
3739 #endif
3740 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3741 	    &rxstats->ucast_packets, 0, "Unicast packets");
3742 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3743 	    &rxstats->ucast_bytes, 0, "Unicast bytes");
3744 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3745 	    &rxstats->mcast_packets, 0, "Multicast packets");
3746 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3747 	    &rxstats->mcast_bytes, 0, "Multicast bytes");
3748 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3749 	    &rxstats->bcast_packets, 0, "Broadcast packets");
3750 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3751 	    &rxstats->bcast_bytes, 0, "Broadcast bytes");
3752 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3753 	    &rxstats->nobuffer, 0, "No buffer");
3754 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3755 	    &rxstats->error, 0, "Errors");
3756 }
3757 
3758 static void
3759 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3760     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3761 {
3762 	struct sysctl_oid *node;
3763 	struct sysctl_oid_list *list;
3764 	int i;
3765 
3766 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3767 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3768 
3769 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3770 		    "debug", CTLFLAG_RD, NULL, "");
3771 		list = SYSCTL_CHILDREN(node);
3772 
3773 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3774 		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3775 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3776 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3777 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3778 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3779 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3780 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3781 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3782 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3783 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3784 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3785 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3786 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3787 	}
3788 
3789 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3790 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3791 
3792 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3793 		    "debug", CTLFLAG_RD, NULL, "");
3794 		list = SYSCTL_CHILDREN(node);
3795 
3796 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3797 		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3798 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3799 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3800 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3801 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3802 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3803 		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3804 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3805 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3806 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3807 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3808 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3809 		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3810 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3811 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3812 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3813 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3814 	}
3815 }
3816 
3817 static void
3818 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3819     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3820 {
3821 	int i;
3822 
3823 	for (i = 0; i < sc->vmx_ntxqueues; i++)
3824 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3825 	for (i = 0; i < sc->vmx_nrxqueues; i++)
3826 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3827 
3828 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3829 }
3830 
3831 static void
3832 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3833 {
3834 	device_t dev;
3835 	struct vmxnet3_statistics *stats;
3836 	struct sysctl_ctx_list *ctx;
3837 	struct sysctl_oid *tree;
3838 	struct sysctl_oid_list *child;
3839 
3840 	dev = sc->vmx_dev;
3841 	ctx = device_get_sysctl_ctx(dev);
3842 	tree = device_get_sysctl_tree(dev);
3843 	child = SYSCTL_CHILDREN(tree);
3844 
3845 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3846 	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3847 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3848 	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3849 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3850 	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3851 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3852 	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3853 
3854 	stats = &sc->vmx_stats;
3855 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3856 	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3857 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3858 	    &stats->vmst_defrag_failed, 0,
3859 	    "Tx mbuf dropped because defrag failed");
3860 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3861 	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3862 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3863 	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3864 
3865 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3866 }
3867 
3868 static void
3869 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3870 {
3871 
3872 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3873 }
3874 
3875 static uint32_t
3876 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3877 {
3878 
3879 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3880 }
3881 
3882 static void
3883 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3884 {
3885 
3886 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3887 }
3888 
3889 static void
3890 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3891 {
3892 
3893 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3894 }
3895 
3896 static uint32_t
3897 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3898 {
3899 
3900 	vmxnet3_write_cmd(sc, cmd);
3901 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3902 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3903 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3904 }
3905 
3906 static void
3907 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3908 {
3909 
3910 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3911 }
3912 
3913 static void
3914 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3915 {
3916 
3917 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3918 }
3919 
3920 static void
3921 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3922 {
3923 	int i;
3924 
3925 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3926 	for (i = 0; i < sc->vmx_nintrs; i++)
3927 		vmxnet3_enable_intr(sc, i);
3928 }
3929 
3930 static void
3931 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3932 {
3933 	int i;
3934 
3935 	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3936 	for (i = 0; i < sc->vmx_nintrs; i++)
3937 		vmxnet3_disable_intr(sc, i);
3938 }
3939 
3940 static void
3941 vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3942 {
3943 	bus_addr_t *baddr = arg;
3944 
3945 	if (error == 0)
3946 		*baddr = segs->ds_addr;
3947 }
3948 
3949 static int
3950 vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3951     struct vmxnet3_dma_alloc *dma)
3952 {
3953 	device_t dev;
3954 	int error;
3955 
3956 	dev = sc->vmx_dev;
3957 	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3958 
3959 	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3960 	    align, 0,		/* alignment, bounds */
3961 	    BUS_SPACE_MAXADDR,	/* lowaddr */
3962 	    BUS_SPACE_MAXADDR,	/* highaddr */
3963 	    NULL, NULL,		/* filter, filterarg */
3964 	    size,		/* maxsize */
3965 	    1,			/* nsegments */
3966 	    size,		/* maxsegsize */
3967 	    BUS_DMA_ALLOCNOW,	/* flags */
3968 	    &dma->dma_tag);
3969 	if (error) {
3970 		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3971 		goto fail;
3972 	}
3973 
3974 	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3975 	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3976 	if (error) {
3977 		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3978 		goto fail;
3979 	}
3980 
3981 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3982 	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3983 	if (error) {
3984 		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3985 		goto fail;
3986 	}
3987 
3988 	dma->dma_size = size;
3989 
3990 fail:
3991 	if (error)
3992 		vmxnet3_dma_free(sc, dma);
3993 
3994 	return (error);
3995 }
3996 
3997 static void
3998 vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3999 {
4000 
4001 	if (dma->dma_tag != NULL) {
4002 		if (dma->dma_paddr != 0) {
4003 			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
4004 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4005 			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
4006 		}
4007 
4008 		if (dma->dma_vaddr != NULL) {
4009 			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
4010 			    dma->dma_map);
4011 		}
4012 
4013 		bus_dma_tag_destroy(dma->dma_tag);
4014 	}
4015 	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
4016 }
4017 
4018 static int
4019 vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
4020 {
4021 	char path[64];
4022 
4023 	ksnprintf(path, sizeof(path),
4024 	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
4025 	TUNABLE_INT_FETCH(path, &def);
4026 
4027 	return (def);
4028 }
4029 
4030 #define mb()	__asm volatile("mfence" ::: "memory")
4031 #define wmb()	__asm volatile("sfence" ::: "memory")
4032 #define rmb()	__asm volatile("lfence" ::: "memory")
4033 
4034 /*
4035  * Since this is a purely paravirtualized device, we do not have
4036  * to worry about DMA coherency. But at times, we must make sure
4037  * both the compiler and CPU do not reorder memory operations.
4038  */
4039 static inline void
4040 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
4041 {
4042 
4043 	switch (type) {
4044 	case VMXNET3_BARRIER_RD:
4045 		rmb();
4046 		break;
4047 	case VMXNET3_BARRIER_WR:
4048 		wmb();
4049 		break;
4050 	case VMXNET3_BARRIER_RDWR:
4051 		mb();
4052 		break;
4053 	default:
4054 		panic("%s: bad barrier type %d", __func__, type);
4055 	}
4056 }
4057