1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  *
17  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18  * $FreeBSD: head/sys/dev/vmware/vmxnet3/if_vmx.c 318867 2017-05-25 10:49:56Z avg $
19  */
20 
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22 
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/device.h>
26 #include <sys/eventhandler.h>
27 #include <sys/kernel.h>
28 #include <sys/endian.h>
29 #include <sys/sockio.h>
30 #include <sys/mbuf.h>
31 #include <sys/malloc.h>
32 #include <sys/module.h>
33 #include <sys/socket.h>
34 #include <sys/sysctl.h>
35 #include <sys/taskqueue.h>
36 #include <vm/vm.h>
37 #include <vm/pmap.h>
38 
39 #include <net/ethernet.h>
40 #include <net/if.h>
41 #include <net/if_var.h>
42 #include <net/ifq_var.h>
43 #include <net/if_arp.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <net/if_media.h>
47 #include <net/vlan/if_vlan_ether.h>
48 #include <net/vlan/if_vlan_var.h>
49 
50 #include <net/bpf.h>
51 
52 #include <netinet/in_systm.h>
53 #include <netinet/in.h>
54 #include <netinet/ip.h>
55 #include <netinet/ip6.h>
56 #include <netinet6/ip6_var.h>
57 #include <netinet/udp.h>
58 #include <netinet/tcp.h>
59 
60 #include <sys/in_cksum.h>
61 
62 #include <sys/bus.h>
63 #include <sys/rman.h>
64 
65 #include <bus/pci/pcireg.h>
66 #include <bus/pci/pcivar.h>
67 
68 #define	VMXNET3_LEGACY_TX 1	/* XXX we need this at the moment */
69 #include "if_vmxreg.h"
70 #include "if_vmxvar.h"
71 
72 #include "opt_inet.h"
73 #include "opt_inet6.h"
74 
75 #ifdef VMXNET3_FAILPOINTS
76 #include <sys/fail.h>
77 static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
78     "vmxnet3 fail points");
79 #define VMXNET3_FP	_debug_fail_point_vmxnet3
80 #endif
81 
82 static int	vmxnet3_probe(device_t);
83 static int	vmxnet3_attach(device_t);
84 static int	vmxnet3_detach(device_t);
85 static int	vmxnet3_shutdown(device_t);
86 
87 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
88 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
89 static int	vmxnet3_check_version(struct vmxnet3_softc *);
90 static void	vmxnet3_initial_config(struct vmxnet3_softc *);
91 static void	vmxnet3_check_multiqueue(struct vmxnet3_softc *);
92 
93 #ifdef __FreeBSD__
94 static int	vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
95 static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
96 #else
97 static int	vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
98 #endif
99 static int	vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
100 static int	vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
101 		    struct vmxnet3_interrupt *);
102 static int	vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
103 #ifdef __FreeBSD__
104 static int	vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
105 #endif
106 static int	vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
107 static int	vmxnet3_setup_interrupts(struct vmxnet3_softc *);
108 static int	vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
109 
110 static void	vmxnet3_free_interrupt(struct vmxnet3_softc *,
111 		    struct vmxnet3_interrupt *);
112 static void	vmxnet3_free_interrupts(struct vmxnet3_softc *);
113 
114 #ifndef VMXNET3_LEGACY_TX
115 static int	vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
116 static void	vmxnet3_start_taskqueue(struct vmxnet3_softc *);
117 static void	vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
118 static void	vmxnet3_free_taskqueue(struct vmxnet3_softc *);
119 #endif
120 
121 static int	vmxnet3_init_rxq(struct vmxnet3_softc *, int);
122 static int	vmxnet3_init_txq(struct vmxnet3_softc *, int);
123 static int	vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
124 static void	vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
125 static void	vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
126 static void	vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
127 
128 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
129 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
130 static int	vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
131 static void	vmxnet3_free_txq_data(struct vmxnet3_softc *);
132 static int	vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
133 static void	vmxnet3_free_rxq_data(struct vmxnet3_softc *);
134 static int	vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
135 static void	vmxnet3_free_queue_data(struct vmxnet3_softc *);
136 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
137 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
138 static void	vmxnet3_init_hwassist(struct vmxnet3_softc *);
139 static void	vmxnet3_reinit_interface(struct vmxnet3_softc *);
140 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
141 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
142 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
143 static void	vmxnet3_free_data(struct vmxnet3_softc *);
144 static int	vmxnet3_setup_interface(struct vmxnet3_softc *);
145 
146 static void	vmxnet3_evintr(struct vmxnet3_softc *);
147 static void	vmxnet3_txq_eof(struct vmxnet3_txqueue *);
148 static void	vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
149 static int	vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
150 static void	vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
151 		    struct vmxnet3_rxring *, int);
152 static void	vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
153 static void	vmxnet3_legacy_intr(void *);
154 #ifdef __FreeBSD__
155 static void	vmxnet3_txq_intr(void *);
156 static void	vmxnet3_rxq_intr(void *);
157 static void	vmxnet3_event_intr(void *);
158 #endif
159 
160 static void	vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
161 static void	vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
162 static void	vmxnet3_stop(struct vmxnet3_softc *);
163 
164 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
165 static int	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
166 static int	vmxnet3_reinit_queues(struct vmxnet3_softc *);
167 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
168 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
169 static int	vmxnet3_reinit(struct vmxnet3_softc *);
170 static void	vmxnet3_init_locked(struct vmxnet3_softc *);
171 static void	vmxnet3_init(void *);
172 
173 static int	vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
174 		    int *, int *, int *);
175 static int	vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
176 		    bus_dmamap_t, bus_dma_segment_t [], int *);
177 static void	vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
178 static int	vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
179 #ifdef VMXNET3_LEGACY_TX
180 static void	vmxnet3_start_locked(struct ifnet *);
181 static void	vmxnet3_start(struct ifnet *, struct ifaltq_subque *);
182 #else
183 static int	vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
184 		    struct mbuf *);
185 static int	vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
186 static void	vmxnet3_txq_tq_deferred(void *, int);
187 #endif
188 static void	vmxnet3_txq_start(struct vmxnet3_txqueue *);
189 static void	vmxnet3_tx_start_all(struct vmxnet3_softc *);
190 
191 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
192 		    uint16_t);
193 static void	vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
194 static void	vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
195 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *);
196 static int	vmxnet3_change_mtu(struct vmxnet3_softc *, int);
197 static int	vmxnet3_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
198 
199 #ifndef VMXNET3_LEGACY_TX
200 static void	vmxnet3_qflush(struct ifnet *);
201 #endif
202 
203 static int	vmxnet3_watchdog(struct vmxnet3_txqueue *);
204 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
205 static void	vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
206 		    struct vmxnet3_txq_stats *);
207 static void	vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
208 		    struct vmxnet3_rxq_stats *);
209 static void	vmxnet3_tick(void *);
210 static void	vmxnet3_link_status(struct vmxnet3_softc *);
211 static void	vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
212 static int	vmxnet3_media_change(struct ifnet *);
213 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
214 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
215 
216 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
217 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
218 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
219 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
220 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
221 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
222 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
223 
224 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
225 		    uint32_t);
226 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
227 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
228 		    uint32_t);
229 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
230 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
231 
232 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
233 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
234 static void	vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
235 static void	vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
236 
237 static int	vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
238 		    bus_size_t, struct vmxnet3_dma_alloc *);
239 static void	vmxnet3_dma_free(struct vmxnet3_softc *,
240 		    struct vmxnet3_dma_alloc *);
241 static int	vmxnet3_tunable_int(struct vmxnet3_softc *,
242 		    const char *, int);
243 
244 typedef enum {
245 	VMXNET3_BARRIER_RD,
246 	VMXNET3_BARRIER_WR,
247 	VMXNET3_BARRIER_RDWR,
248 } vmxnet3_barrier_t;
249 
250 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
251 
252 /* Tunables. */
253 static int vmxnet3_mq_disable = 0;
254 TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
255 static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
256 TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
257 static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
258 TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
259 static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
260 TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
261 static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
262 TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
263 
264 static device_method_t vmxnet3_methods[] = {
265 	/* Device interface. */
266 	DEVMETHOD(device_probe,		vmxnet3_probe),
267 	DEVMETHOD(device_attach,	vmxnet3_attach),
268 	DEVMETHOD(device_detach,	vmxnet3_detach),
269 	DEVMETHOD(device_shutdown,	vmxnet3_shutdown),
270 
271 	DEVMETHOD_END
272 };
273 
274 static driver_t vmxnet3_driver = {
275 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
276 };
277 
278 static devclass_t vmxnet3_devclass;
279 DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
280 
281 MODULE_DEPEND(vmx, pci, 1, 1, 1);
282 MODULE_DEPEND(vmx, ether, 1, 1, 1);
283 
284 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
285 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
286 
287 static int
288 vmxnet3_probe(device_t dev)
289 {
290 
291 	if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
292 	    pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
293 		device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
294 		return (BUS_PROBE_DEFAULT);
295 	}
296 
297 	return (ENXIO);
298 }
299 
300 static int
301 vmxnet3_attach(device_t dev)
302 {
303 	struct vmxnet3_softc *sc;
304 	int error;
305 
306 	sc = device_get_softc(dev);
307 	sc->vmx_dev = dev;
308 
309 	pci_enable_busmaster(dev);
310 
311 	VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
312 	callout_init_lk(&sc->vmx_tick, &sc->vmx_lock);
313 
314 	vmxnet3_initial_config(sc);
315 
316 	error = vmxnet3_alloc_resources(sc);
317 	if (error)
318 		goto fail;
319 
320 	error = vmxnet3_check_version(sc);
321 	if (error)
322 		goto fail;
323 
324 	error = vmxnet3_alloc_rxtx_queues(sc);
325 	if (error)
326 		goto fail;
327 
328 #ifndef VMXNET3_LEGACY_TX
329 	error = vmxnet3_alloc_taskqueue(sc);
330 	if (error)
331 		goto fail;
332 #endif
333 
334 	error = vmxnet3_alloc_interrupts(sc);
335 	if (error)
336 		goto fail;
337 
338 	vmxnet3_check_multiqueue(sc);
339 
340 	error = vmxnet3_alloc_data(sc);
341 	if (error)
342 		goto fail;
343 
344 	error = vmxnet3_setup_interface(sc);
345 	if (error)
346 		goto fail;
347 
348 	error = vmxnet3_setup_interrupts(sc);
349 	if (error) {
350 		ether_ifdetach(sc->vmx_ifp);
351 		device_printf(dev, "could not set up interrupt\n");
352 		goto fail;
353 	}
354 
355 	vmxnet3_setup_sysctl(sc);
356 #ifndef VMXNET3_LEGACY_TX
357 	vmxnet3_start_taskqueue(sc);
358 #endif
359 
360 fail:
361 	if (error)
362 		vmxnet3_detach(dev);
363 
364 	return (error);
365 }
366 
367 static int
368 vmxnet3_detach(device_t dev)
369 {
370 	struct vmxnet3_softc *sc;
371 	struct ifnet *ifp;
372 
373 	sc = device_get_softc(dev);
374 	ifp = sc->vmx_ifp;
375 
376 	if (device_is_attached(dev)) {
377 		VMXNET3_CORE_LOCK(sc);
378 		vmxnet3_stop(sc);
379 		VMXNET3_CORE_UNLOCK(sc);
380 
381 		callout_stop_sync(&sc->vmx_tick);
382 #ifndef VMXNET3_LEGACY_TX
383 		vmxnet3_drain_taskqueue(sc);
384 #endif
385 
386 		ether_ifdetach(ifp);
387 	}
388 
389 	if (sc->vmx_vlan_attach != NULL) {
390 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
391 		sc->vmx_vlan_attach = NULL;
392 	}
393 	if (sc->vmx_vlan_detach != NULL) {
394 		EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
395 		sc->vmx_vlan_detach = NULL;
396 	}
397 
398 #ifndef VMXNET3_LEGACY_TX
399 	vmxnet3_free_taskqueue(sc);
400 #endif
401 	vmxnet3_free_interrupts(sc);
402 
403 	if (ifp != NULL) {
404 		if_free(ifp);
405 		sc->vmx_ifp = NULL;
406 	}
407 
408 	ifmedia_removeall(&sc->vmx_media);
409 
410 	vmxnet3_free_data(sc);
411 	vmxnet3_free_resources(sc);
412 	vmxnet3_free_rxtx_queues(sc);
413 
414 	VMXNET3_CORE_LOCK_DESTROY(sc);
415 
416 	return (0);
417 }
418 
419 static int
420 vmxnet3_shutdown(device_t dev)
421 {
422 
423 	return (0);
424 }
425 
426 static int
427 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
428 {
429 	device_t dev;
430 	int rid;
431 
432 	dev = sc->vmx_dev;
433 
434 	rid = PCIR_BAR(0);
435 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
436 	    RF_ACTIVE);
437 	if (sc->vmx_res0 == NULL) {
438 		device_printf(dev,
439 		    "could not map BAR0 memory\n");
440 		return (ENXIO);
441 	}
442 
443 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
444 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
445 
446 	rid = PCIR_BAR(1);
447 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
448 	    RF_ACTIVE);
449 	if (sc->vmx_res1 == NULL) {
450 		device_printf(dev,
451 		    "could not map BAR1 memory\n");
452 		return (ENXIO);
453 	}
454 
455 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
456 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
457 
458 	if (pci_find_extcap(dev, PCIY_MSIX, NULL) == 0) {
459 		rid = PCIR_BAR(2);
460 		sc->vmx_msix_res = bus_alloc_resource_any(dev,
461 		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
462 	}
463 
464 	if (sc->vmx_msix_res == NULL)
465 		sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
466 
467 	return (0);
468 }
469 
470 static void
471 vmxnet3_free_resources(struct vmxnet3_softc *sc)
472 {
473 	device_t dev;
474 	int rid;
475 
476 	dev = sc->vmx_dev;
477 
478 	if (sc->vmx_res0 != NULL) {
479 		rid = PCIR_BAR(0);
480 		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
481 		sc->vmx_res0 = NULL;
482 	}
483 
484 	if (sc->vmx_res1 != NULL) {
485 		rid = PCIR_BAR(1);
486 		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
487 		sc->vmx_res1 = NULL;
488 	}
489 
490 	if (sc->vmx_msix_res != NULL) {
491 		rid = PCIR_BAR(2);
492 		bus_release_resource(dev, SYS_RES_MEMORY, rid,
493 		    sc->vmx_msix_res);
494 		sc->vmx_msix_res = NULL;
495 	}
496 }
497 
498 static int
499 vmxnet3_check_version(struct vmxnet3_softc *sc)
500 {
501 	device_t dev;
502 	uint32_t version;
503 
504 	dev = sc->vmx_dev;
505 
506 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
507 	if ((version & 0x01) == 0) {
508 		device_printf(dev, "unsupported hardware version %#x\n",
509 		    version);
510 		return (ENOTSUP);
511 	}
512 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
513 
514 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
515 	if ((version & 0x01) == 0) {
516 		device_printf(dev, "unsupported UPT version %#x\n", version);
517 		return (ENOTSUP);
518 	}
519 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
520 
521 	return (0);
522 }
523 
524 static int
525 trunc_powerof2(int val)
526 {
527 
528 	return (1U << (fls(val) - 1));
529 }
530 
531 static void
532 vmxnet3_initial_config(struct vmxnet3_softc *sc)
533 {
534 	int nqueue, ndesc;
535 
536 	nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
537 	if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
538 		nqueue = VMXNET3_DEF_TX_QUEUES;
539 	if (nqueue > ncpus)
540 		nqueue = ncpus;
541 	sc->vmx_max_ntxqueues = trunc_powerof2(nqueue);
542 
543 	nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
544 	if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
545 		nqueue = VMXNET3_DEF_RX_QUEUES;
546 	if (nqueue > ncpus)
547 		nqueue = ncpus;
548 	sc->vmx_max_nrxqueues = trunc_powerof2(nqueue);
549 
550 	if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
551 		sc->vmx_max_nrxqueues = 1;
552 		sc->vmx_max_ntxqueues = 1;
553 	}
554 
555 	ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
556 	if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
557 		ndesc = VMXNET3_DEF_TX_NDESC;
558 	if (ndesc & VMXNET3_MASK_TX_NDESC)
559 		ndesc &= ~VMXNET3_MASK_TX_NDESC;
560 	sc->vmx_ntxdescs = ndesc;
561 
562 	ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
563 	if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
564 		ndesc = VMXNET3_DEF_RX_NDESC;
565 	if (ndesc & VMXNET3_MASK_RX_NDESC)
566 		ndesc &= ~VMXNET3_MASK_RX_NDESC;
567 	sc->vmx_nrxdescs = ndesc;
568 	sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
569 }
570 
571 static void
572 vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
573 {
574 
575 	if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
576 		goto out;
577 
578 	/* BMV: Just use the maximum configured for now. */
579 	sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
580 	sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
581 
582 	if (sc->vmx_nrxqueues > 1)
583 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
584 
585 	return;
586 
587 out:
588 	sc->vmx_ntxqueues = 1;
589 	sc->vmx_nrxqueues = 1;
590 }
591 
592 #ifdef __FreeBSD__
593 static int
594 vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
595 {
596 	device_t dev;
597 	int nmsix, cnt, required;
598 
599 	dev = sc->vmx_dev;
600 
601 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
602 		return (1);
603 
604 	/* Allocate an additional vector for the events interrupt. */
605 	required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
606 
607 	nmsix = pci_msix_count(dev);
608 	if (nmsix < required)
609 		return (1);
610 
611 	cnt = required;
612 	if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
613 		sc->vmx_nintrs = required;
614 		return (0);
615 	} else
616 		pci_release_msi(dev);
617 
618 	/* BMV TODO Fallback to sharing MSIX vectors if possible. */
619 
620 	return (1);
621 }
622 
623 static int
624 vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
625 {
626 	device_t dev;
627 	int nmsi, cnt, required;
628 
629 	dev = sc->vmx_dev;
630 	required = 1;
631 
632 	nmsi = pci_msi_count(dev);
633 	if (nmsi < required)
634 		return (1);
635 
636 	cnt = required;
637 	if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
638 		sc->vmx_nintrs = 1;
639 		return (0);
640 	} else
641 		pci_release_msi(dev);
642 
643 	return (1);
644 }
645 #else
646 static int
647 vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
648 {
649 	int irq_flags, rid;
650 	int enable = 1;
651 
652 	sc->vmx_irq_type = pci_alloc_1intr(sc->vmx_dev, enable, &rid,
653 	    &irq_flags);
654 	sc->vmx_irq_flags = irq_flags;
655 	sc->vmx_nintrs = 1;
656 	return (0);
657 }
658 #endif
659 
660 static int
661 vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
662 {
663 
664 	sc->vmx_nintrs = 1;
665 	return (0);
666 }
667 
668 static int
669 vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
670     struct vmxnet3_interrupt *intr)
671 {
672 	struct resource *irq;
673 
674 	irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid,
675 	    sc->vmx_irq_flags);
676 	if (irq == NULL)
677 		return (ENXIO);
678 
679 	intr->vmxi_irq = irq;
680 	intr->vmxi_rid = rid;
681 
682 	return (0);
683 }
684 
685 static int
686 vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
687 {
688 	int i, rid, flags, error;
689 
690 	rid = 0;
691 	flags = RF_ACTIVE;
692 
693 	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
694 		flags |= RF_SHAREABLE;
695 	else
696 		rid = 1;
697 
698 	for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
699 		error = vmxnet3_alloc_interrupt(sc, rid, flags,
700 		    &sc->vmx_intrs[i]);
701 		if (error)
702 			return (error);
703 	}
704 
705 	return (0);
706 }
707 
708 #ifdef __FreeBSD__
709 static int
710 vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
711 {
712 	device_t dev;
713 	struct vmxnet3_txqueue *txq;
714 	struct vmxnet3_rxqueue *rxq;
715 	struct vmxnet3_interrupt *intr;
716 	int i, error;
717 
718 	dev = sc->vmx_dev;
719 	intr = &sc->vmx_intrs[0];
720 
721 	for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
722 		txq = &sc->vmx_txq[i];
723 		error = bus_setup_intr(dev, intr->vmxi_irq, INTR_MPSAFE,
724 		     vmxnet3_txq_intr, txq, &intr->vmxi_handler, NULL);
725 		if (error)
726 			return (error);
727 		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
728 		    "tq%d", i);
729 		txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
730 	}
731 
732 	for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
733 		rxq = &sc->vmx_rxq[i];
734 		error = bus_setup_intr(dev, intr->vmxi_irq, INTR_MPSAFE,
735 		    vmxnet3_rxq_intr, rxq, &intr->vmxi_handler, NULL);
736 		if (error)
737 			return (error);
738 		bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
739 		    "rq%d", i);
740 		rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
741 	}
742 
743 	error = bus_setup_intr(dev, intr->vmxi_irq, INTR_MPSAFE,
744 	    vmxnet3_event_intr, sc, &intr->vmxi_handler, NULL);
745 	if (error)
746 		return (error);
747 	bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event");
748 	sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
749 
750 	return (0);
751 }
752 #endif
753 
754 static int
755 vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
756 {
757 	struct vmxnet3_interrupt *intr;
758 	int i, error;
759 
760 	intr = &sc->vmx_intrs[0];
761 	error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
762 	    INTR_MPSAFE, vmxnet3_legacy_intr, sc,
763 	    &intr->vmxi_handler, NULL);
764 
765 	for (i = 0; i < sc->vmx_ntxqueues; i++)
766 		sc->vmx_txq[i].vxtxq_intr_idx = 0;
767 	for (i = 0; i < sc->vmx_nrxqueues; i++)
768 		sc->vmx_rxq[i].vxrxq_intr_idx = 0;
769 	sc->vmx_event_intr_idx = 0;
770 
771 	return (error);
772 }
773 
774 static void
775 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
776 {
777 	struct vmxnet3_txqueue *txq;
778 	struct vmxnet3_txq_shared *txs;
779 	struct vmxnet3_rxqueue *rxq;
780 	struct vmxnet3_rxq_shared *rxs;
781 	int i;
782 
783 	sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
784 
785 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
786 		txq = &sc->vmx_txq[i];
787 		txs = txq->vxtxq_ts;
788 		txs->intr_idx = txq->vxtxq_intr_idx;
789 	}
790 
791 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
792 		rxq = &sc->vmx_rxq[i];
793 		rxs = rxq->vxrxq_rs;
794 		rxs->intr_idx = rxq->vxrxq_intr_idx;
795 	}
796 }
797 
798 static int
799 vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
800 {
801 	int error;
802 
803 	error = vmxnet3_alloc_intr_resources(sc);
804 	if (error)
805 		return (error);
806 
807 	switch (sc->vmx_intr_type) {
808 	case VMXNET3_IT_MSIX:
809 #ifdef __FreeBSD__
810 		error = vmxnet3_setup_msix_interrupts(sc);
811 #else
812 		device_printf(sc->vmx_dev, "VMXNET3_IT_MSIX unsupported\n");
813 		error = ENXIO;
814 #endif
815 		break;
816 	case VMXNET3_IT_MSI:
817 	case VMXNET3_IT_LEGACY:
818 		error = vmxnet3_setup_legacy_interrupt(sc);
819 		break;
820 	default:
821 		panic("%s: invalid interrupt type %d", __func__,
822 		    sc->vmx_intr_type);
823 	}
824 
825 	if (error == 0)
826 		vmxnet3_set_interrupt_idx(sc);
827 
828 	return (error);
829 }
830 
831 #ifdef __FreeBSD__
832 static int
833 vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
834 {
835 	device_t dev;
836 	uint32_t config;
837 	int error;
838 
839 	dev = sc->vmx_dev;
840 	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
841 
842 	sc->vmx_intr_type = config & 0x03;
843 	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
844 
845 	switch (sc->vmx_intr_type) {
846 	case VMXNET3_IT_AUTO:
847 		sc->vmx_intr_type = VMXNET3_IT_MSIX;
848 		/* FALLTHROUGH */
849 	case VMXNET3_IT_MSIX:
850 		error = vmxnet3_alloc_msix_interrupts(sc);
851 		if (error == 0)
852 			break;
853 		sc->vmx_intr_type = VMXNET3_IT_MSI;
854 		/* FALLTHROUGH */
855 	case VMXNET3_IT_MSI:
856 		error = vmxnet3_alloc_msi_interrupts(sc);
857 		if (error == 0)
858 			break;
859 		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
860 		/* FALLTHROUGH */
861 	case VMXNET3_IT_LEGACY:
862 		error = vmxnet3_alloc_legacy_interrupts(sc);
863 		if (error == 0)
864 			break;
865 		/* FALLTHROUGH */
866 	default:
867 		sc->vmx_intr_type = -1;
868 		device_printf(dev, "cannot allocate any interrupt resources\n");
869 		return (ENXIO);
870 	}
871 
872 	return (error);
873 }
874 #else
875 static int
876 vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
877 {
878 	device_t dev;
879 	uint32_t config;
880 	int error;
881 
882 	dev = sc->vmx_dev;
883 	config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
884 
885 	sc->vmx_intr_type = config & 0x03;
886 	sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
887 
888 	switch (sc->vmx_intr_type) {
889 	case VMXNET3_IT_AUTO:
890 		sc->vmx_intr_type = VMXNET3_IT_MSI;
891 		/* FALLTHROUGH */
892 	case VMXNET3_IT_MSI:
893 		error = vmxnet3_alloc_msi_interrupts(sc);
894 		if (error == 0)
895 			break;
896 		sc->vmx_intr_type = VMXNET3_IT_LEGACY;
897 	case VMXNET3_IT_LEGACY:
898 		error = vmxnet3_alloc_legacy_interrupts(sc);
899 		if (error == 0)
900 			break;
901 		/* FALLTHROUGH */
902 	case VMXNET3_IT_MSIX:
903 		/* FALLTHROUGH */
904 	default:
905 		sc->vmx_intr_type = -1;
906 		device_printf(dev, "cannot allocate any interrupt resources\n");
907 		return (ENXIO);
908 	}
909 
910 	return (error);
911 }
912 #endif
913 
914 static void
915 vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
916     struct vmxnet3_interrupt *intr)
917 {
918 	device_t dev;
919 
920 	dev = sc->vmx_dev;
921 
922 	if (intr->vmxi_handler != NULL) {
923 		bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
924 		intr->vmxi_handler = NULL;
925 	}
926 
927 	if (intr->vmxi_irq != NULL) {
928 		bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
929 		    intr->vmxi_irq);
930 		intr->vmxi_irq = NULL;
931 		intr->vmxi_rid = -1;
932 	}
933 }
934 
935 #ifdef __FreeBSD__
936 static void
937 vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
938 {
939 	int i;
940 
941 	for (i = 0; i < sc->vmx_nintrs; i++)
942 		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
943 
944 	if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
945 	    sc->vmx_intr_type == VMXNET3_IT_MSIX)
946 		pci_release_msi(sc->vmx_dev);
947 }
948 #else
949 static void
950 vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
951 {
952 	int i;
953 
954 	for (i = 0; i < sc->vmx_nintrs; i++)
955 		vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
956 
957 	if (sc->vmx_irq_type == PCI_INTR_TYPE_MSI)
958 		pci_release_msi(sc->vmx_dev);
959 }
960 #endif
961 
962 #ifndef VMXNET3_LEGACY_TX
963 static int
964 vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
965 {
966 	device_t dev;
967 
968 	dev = sc->vmx_dev;
969 
970 	sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
971 	    taskqueue_thread_enqueue, &sc->vmx_tq);
972 	if (sc->vmx_tq == NULL)
973 		return (ENOMEM);
974 
975 	return (0);
976 }
977 
978 static void
979 vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
980 {
981 	device_t dev;
982 	int nthreads, error;
983 
984 	dev = sc->vmx_dev;
985 
986 	/*
987 	 * The taskqueue is typically not frequently used, so a dedicated
988 	 * thread for each queue is unnecessary.
989 	 */
990 	nthreads = MAX(1, sc->vmx_ntxqueues / 2);
991 
992 	/*
993 	 * Most drivers just ignore the return value - it only fails
994 	 * with ENOMEM so an error is not likely. It is hard for us
995 	 * to recover from an error here.
996 	 */
997 	error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
998 	    "%s taskq", device_get_nameunit(dev));
999 	if (error)
1000 		device_printf(dev, "failed to start taskqueue: %d", error);
1001 }
1002 
1003 static void
1004 vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
1005 {
1006 	struct vmxnet3_txqueue *txq;
1007 	int i;
1008 
1009 	if (sc->vmx_tq != NULL) {
1010 		for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1011 			txq = &sc->vmx_txq[i];
1012 			taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
1013 		}
1014 	}
1015 }
1016 
1017 static void
1018 vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
1019 {
1020 	if (sc->vmx_tq != NULL) {
1021 		taskqueue_free(sc->vmx_tq);
1022 		sc->vmx_tq = NULL;
1023 	}
1024 }
1025 #endif
1026 
1027 static int
1028 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
1029 {
1030 	struct vmxnet3_rxqueue *rxq;
1031 	struct vmxnet3_rxring *rxr;
1032 	int i;
1033 
1034 	rxq = &sc->vmx_rxq[q];
1035 
1036 	ksnprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
1037 	    device_get_nameunit(sc->vmx_dev), q);
1038 	lockinit(&rxq->vxrxq_lock, rxq->vxrxq_name, 0, 0);
1039 
1040 	rxq->vxrxq_sc = sc;
1041 	rxq->vxrxq_id = q;
1042 
1043 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1044 		rxr = &rxq->vxrxq_cmd_ring[i];
1045 		rxr->vxrxr_rid = i;
1046 		rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
1047 		rxr->vxrxr_rxbuf = kmalloc(rxr->vxrxr_ndesc *
1048 		    sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_INTWAIT | M_ZERO);
1049 		if (rxr->vxrxr_rxbuf == NULL)
1050 			return (ENOMEM);
1051 
1052 		rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
1053 	}
1054 
1055 	return (0);
1056 }
1057 
1058 static int
1059 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
1060 {
1061 	struct vmxnet3_txqueue *txq;
1062 	struct vmxnet3_txring *txr;
1063 
1064 	txq = &sc->vmx_txq[q];
1065 	txr = &txq->vxtxq_cmd_ring;
1066 
1067 	ksnprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
1068 	    device_get_nameunit(sc->vmx_dev), q);
1069 	lockinit(&txq->vxtxq_lock, txq->vxtxq_name, 0, 0);
1070 
1071 	txq->vxtxq_sc = sc;
1072 	txq->vxtxq_id = q;
1073 
1074 	txr->vxtxr_ndesc = sc->vmx_ntxdescs;
1075 	txr->vxtxr_txbuf = kmalloc(txr->vxtxr_ndesc *
1076 	    sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_INTWAIT | M_ZERO);
1077 	if (txr->vxtxr_txbuf == NULL)
1078 		return (ENOMEM);
1079 
1080 	txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
1081 
1082 #ifndef VMXNET3_LEGACY_TX
1083 	TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
1084 
1085 	txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
1086 	    M_NOWAIT, &txq->vxtxq_lock);
1087 	if (txq->vxtxq_br == NULL)
1088 		return (ENOMEM);
1089 #endif
1090 
1091 	return (0);
1092 }
1093 
1094 static int
1095 vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1096 {
1097 	int i, error;
1098 
1099 	/*
1100 	 * Only attempt to create multiple queues if MSIX is available. MSIX is
1101 	 * disabled by default because its apparently broken for devices passed
1102 	 * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1103 	 * must be set to zero for MSIX. This check prevents us from allocating
1104 	 * queue structures that we will not use.
1105 	 */
1106 	if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1107 		sc->vmx_max_nrxqueues = 1;
1108 		sc->vmx_max_ntxqueues = 1;
1109 	}
1110 
1111 	sc->vmx_rxq = kmalloc(sizeof(struct vmxnet3_rxqueue) *
1112 	    sc->vmx_max_nrxqueues, M_DEVBUF, M_INTWAIT | M_ZERO);
1113 	sc->vmx_txq = kmalloc(sizeof(struct vmxnet3_txqueue) *
1114 	    sc->vmx_max_ntxqueues, M_DEVBUF, M_INTWAIT | M_ZERO);
1115 	if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1116 		return (ENOMEM);
1117 
1118 	for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1119 		error = vmxnet3_init_rxq(sc, i);
1120 		if (error)
1121 			return (error);
1122 	}
1123 
1124 	for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1125 		error = vmxnet3_init_txq(sc, i);
1126 		if (error)
1127 			return (error);
1128 	}
1129 
1130 	return (0);
1131 }
1132 
1133 static void
1134 vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1135 {
1136 	struct vmxnet3_rxring *rxr;
1137 	int i;
1138 
1139 	rxq->vxrxq_sc = NULL;
1140 	rxq->vxrxq_id = -1;
1141 
1142 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1143 		rxr = &rxq->vxrxq_cmd_ring[i];
1144 
1145 		if (rxr->vxrxr_rxbuf != NULL) {
1146 			kfree(rxr->vxrxr_rxbuf, M_DEVBUF);
1147 			rxr->vxrxr_rxbuf = NULL;
1148 		}
1149 	}
1150 
1151 #if 0 /* XXX */
1152 	if (mtx_initialized(&rxq->vxrxq_lock) != 0)
1153 #endif
1154 		lockuninit(&rxq->vxrxq_lock);
1155 }
1156 
1157 static void
1158 vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1159 {
1160 	struct vmxnet3_txring *txr;
1161 
1162 	txr = &txq->vxtxq_cmd_ring;
1163 
1164 	txq->vxtxq_sc = NULL;
1165 	txq->vxtxq_id = -1;
1166 
1167 #ifndef VMXNET3_LEGACY_TX
1168 	if (txq->vxtxq_br != NULL) {
1169 		buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1170 		txq->vxtxq_br = NULL;
1171 	}
1172 #endif
1173 
1174 	if (txr->vxtxr_txbuf != NULL) {
1175 		kfree(txr->vxtxr_txbuf, M_DEVBUF);
1176 		txr->vxtxr_txbuf = NULL;
1177 	}
1178 
1179 #if 0 /* XXX */
1180 	if (mtx_initialized(&txq->vxtxq_lock) != 0)
1181 #endif
1182 		lockuninit(&txq->vxtxq_lock);
1183 }
1184 
1185 static void
1186 vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1187 {
1188 	int i;
1189 
1190 	if (sc->vmx_rxq != NULL) {
1191 		for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1192 			vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1193 		kfree(sc->vmx_rxq, M_DEVBUF);
1194 		sc->vmx_rxq = NULL;
1195 	}
1196 
1197 	if (sc->vmx_txq != NULL) {
1198 		for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1199 			vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1200 		kfree(sc->vmx_txq, M_DEVBUF);
1201 		sc->vmx_txq = NULL;
1202 	}
1203 }
1204 
1205 static int
1206 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1207 {
1208 	device_t dev;
1209 	uint8_t *kva;
1210 	size_t size;
1211 	int i, error;
1212 
1213 	dev = sc->vmx_dev;
1214 
1215 	size = sizeof(struct vmxnet3_driver_shared);
1216 	error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1217 	if (error) {
1218 		device_printf(dev, "cannot alloc shared memory\n");
1219 		return (error);
1220 	}
1221 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1222 
1223 	size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1224 	    sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1225 	error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1226 	if (error) {
1227 		device_printf(dev, "cannot alloc queue shared memory\n");
1228 		return (error);
1229 	}
1230 	sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1231 	kva = sc->vmx_qs;
1232 
1233 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1234 		sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1235 		kva += sizeof(struct vmxnet3_txq_shared);
1236 	}
1237 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1238 		sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1239 		kva += sizeof(struct vmxnet3_rxq_shared);
1240 	}
1241 
1242 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1243 		size = sizeof(struct vmxnet3_rss_shared);
1244 		error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1245 		if (error) {
1246 			device_printf(dev, "cannot alloc rss shared memory\n");
1247 			return (error);
1248 		}
1249 		sc->vmx_rss =
1250 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1251 	}
1252 
1253 	return (0);
1254 }
1255 
1256 static void
1257 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1258 {
1259 
1260 	if (sc->vmx_rss != NULL) {
1261 		vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1262 		sc->vmx_rss = NULL;
1263 	}
1264 
1265 	if (sc->vmx_qs != NULL) {
1266 		vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1267 		sc->vmx_qs = NULL;
1268 	}
1269 
1270 	if (sc->vmx_ds != NULL) {
1271 		vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1272 		sc->vmx_ds = NULL;
1273 	}
1274 }
1275 
1276 static int
1277 vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1278 {
1279 	device_t dev;
1280 	struct vmxnet3_txqueue *txq;
1281 	struct vmxnet3_txring *txr;
1282 	struct vmxnet3_comp_ring *txc;
1283 	size_t descsz, compsz;
1284 	int i, q, error;
1285 
1286 	dev = sc->vmx_dev;
1287 
1288 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1289 		txq = &sc->vmx_txq[q];
1290 		txr = &txq->vxtxq_cmd_ring;
1291 		txc = &txq->vxtxq_comp_ring;
1292 
1293 		descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1294 		compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1295 
1296 		error = bus_dma_tag_create(bus_get_dma_tag(dev),
1297 		    1, 0,			/* alignment, boundary */
1298 		    BUS_SPACE_MAXADDR,		/* lowaddr */
1299 		    BUS_SPACE_MAXADDR,		/* highaddr */
1300 		    NULL, NULL,			/* filter, filterarg */
1301 		    VMXNET3_TX_MAXSIZE,		/* maxsize */
1302 		    VMXNET3_TX_MAXSEGS,		/* nsegments */
1303 		    VMXNET3_TX_MAXSEGSIZE,	/* maxsegsize */
1304 		    0,				/* flags */
1305 		    &txr->vxtxr_txtag);
1306 		if (error) {
1307 			device_printf(dev,
1308 			    "unable to create Tx buffer tag for queue %d\n", q);
1309 			return (error);
1310 		}
1311 
1312 		error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1313 		if (error) {
1314 			device_printf(dev, "cannot alloc Tx descriptors for "
1315 			    "queue %d error %d\n", q, error);
1316 			return (error);
1317 		}
1318 		txr->vxtxr_txd =
1319 		    (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1320 
1321 		error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1322 		if (error) {
1323 			device_printf(dev, "cannot alloc Tx comp descriptors "
1324 			   "for queue %d error %d\n", q, error);
1325 			return (error);
1326 		}
1327 		txc->vxcr_u.txcd =
1328 		    (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1329 
1330 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1331 			error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1332 			    &txr->vxtxr_txbuf[i].vtxb_dmamap);
1333 			if (error) {
1334 				device_printf(dev, "unable to create Tx buf "
1335 				    "dmamap for queue %d idx %d\n", q, i);
1336 				return (error);
1337 			}
1338 		}
1339 	}
1340 
1341 	return (0);
1342 }
1343 
1344 static void
1345 vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1346 {
1347 	device_t dev;
1348 	struct vmxnet3_txqueue *txq;
1349 	struct vmxnet3_txring *txr;
1350 	struct vmxnet3_comp_ring *txc;
1351 	struct vmxnet3_txbuf *txb;
1352 	int i, q;
1353 
1354 	dev = sc->vmx_dev;
1355 
1356 	for (q = 0; q < sc->vmx_ntxqueues; q++) {
1357 		txq = &sc->vmx_txq[q];
1358 		txr = &txq->vxtxq_cmd_ring;
1359 		txc = &txq->vxtxq_comp_ring;
1360 
1361 		for (i = 0; i < txr->vxtxr_ndesc; i++) {
1362 			txb = &txr->vxtxr_txbuf[i];
1363 			if (txb->vtxb_dmamap != NULL) {
1364 				bus_dmamap_destroy(txr->vxtxr_txtag,
1365 				    txb->vtxb_dmamap);
1366 				txb->vtxb_dmamap = NULL;
1367 			}
1368 		}
1369 
1370 		if (txc->vxcr_u.txcd != NULL) {
1371 			vmxnet3_dma_free(sc, &txc->vxcr_dma);
1372 			txc->vxcr_u.txcd = NULL;
1373 		}
1374 
1375 		if (txr->vxtxr_txd != NULL) {
1376 			vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1377 			txr->vxtxr_txd = NULL;
1378 		}
1379 
1380 		if (txr->vxtxr_txtag != NULL) {
1381 			bus_dma_tag_destroy(txr->vxtxr_txtag);
1382 			txr->vxtxr_txtag = NULL;
1383 		}
1384 	}
1385 }
1386 
1387 static int
1388 vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1389 {
1390 	device_t dev;
1391 	struct vmxnet3_rxqueue *rxq;
1392 	struct vmxnet3_rxring *rxr;
1393 	struct vmxnet3_comp_ring *rxc;
1394 	int descsz, compsz;
1395 	int i, j, q, error;
1396 
1397 	dev = sc->vmx_dev;
1398 
1399 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1400 		rxq = &sc->vmx_rxq[q];
1401 		rxc = &rxq->vxrxq_comp_ring;
1402 		compsz = 0;
1403 
1404 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1405 			rxr = &rxq->vxrxq_cmd_ring[i];
1406 
1407 			descsz = rxr->vxrxr_ndesc *
1408 			    sizeof(struct vmxnet3_rxdesc);
1409 			compsz += rxr->vxrxr_ndesc *
1410 			    sizeof(struct vmxnet3_rxcompdesc);
1411 
1412 			error = bus_dma_tag_create(bus_get_dma_tag(dev),
1413 			    1, 0,		/* alignment, boundary */
1414 			    BUS_SPACE_MAXADDR,	/* lowaddr */
1415 			    BUS_SPACE_MAXADDR,	/* highaddr */
1416 			    NULL, NULL,		/* filter, filterarg */
1417 			    MJUMPAGESIZE,	/* maxsize */
1418 			    1,			/* nsegments */
1419 			    MJUMPAGESIZE,	/* maxsegsize */
1420 			    0,			/* flags */
1421 			    &rxr->vxrxr_rxtag);
1422 			if (error) {
1423 				device_printf(dev,
1424 				    "unable to create Rx buffer tag for "
1425 				    "queue %d\n", q);
1426 				return (error);
1427 			}
1428 
1429 			error = vmxnet3_dma_malloc(sc, descsz, 512,
1430 			    &rxr->vxrxr_dma);
1431 			if (error) {
1432 				device_printf(dev, "cannot allocate Rx "
1433 				    "descriptors for queue %d/%d error %d\n",
1434 				    i, q, error);
1435 				return (error);
1436 			}
1437 			rxr->vxrxr_rxd =
1438 			    (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1439 		}
1440 
1441 		error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1442 		if (error) {
1443 			device_printf(dev, "cannot alloc Rx comp descriptors "
1444 			    "for queue %d error %d\n", q, error);
1445 			return (error);
1446 		}
1447 		rxc->vxcr_u.rxcd =
1448 		    (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1449 
1450 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1451 			rxr = &rxq->vxrxq_cmd_ring[i];
1452 
1453 			error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1454 			    &rxr->vxrxr_spare_dmap);
1455 			if (error) {
1456 				device_printf(dev, "unable to create spare "
1457 				    "dmamap for queue %d/%d error %d\n",
1458 				    q, i, error);
1459 				return (error);
1460 			}
1461 
1462 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1463 				error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1464 				    &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1465 				if (error) {
1466 					device_printf(dev, "unable to create "
1467 					    "dmamap for queue %d/%d slot %d "
1468 					    "error %d\n",
1469 					    q, i, j, error);
1470 					return (error);
1471 				}
1472 			}
1473 		}
1474 	}
1475 
1476 	return (0);
1477 }
1478 
1479 static void
1480 vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1481 {
1482 	device_t dev;
1483 	struct vmxnet3_rxqueue *rxq;
1484 	struct vmxnet3_rxring *rxr;
1485 	struct vmxnet3_comp_ring *rxc;
1486 	struct vmxnet3_rxbuf *rxb;
1487 	int i, j, q;
1488 
1489 	dev = sc->vmx_dev;
1490 
1491 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
1492 		rxq = &sc->vmx_rxq[q];
1493 		rxc = &rxq->vxrxq_comp_ring;
1494 
1495 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1496 			rxr = &rxq->vxrxq_cmd_ring[i];
1497 
1498 			if (rxr->vxrxr_spare_dmap != NULL) {
1499 				bus_dmamap_destroy(rxr->vxrxr_rxtag,
1500 				    rxr->vxrxr_spare_dmap);
1501 				rxr->vxrxr_spare_dmap = NULL;
1502 			}
1503 
1504 			for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1505 				rxb = &rxr->vxrxr_rxbuf[j];
1506 				if (rxb->vrxb_dmamap != NULL) {
1507 					bus_dmamap_destroy(rxr->vxrxr_rxtag,
1508 					    rxb->vrxb_dmamap);
1509 					rxb->vrxb_dmamap = NULL;
1510 				}
1511 			}
1512 		}
1513 
1514 		if (rxc->vxcr_u.rxcd != NULL) {
1515 			vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1516 			rxc->vxcr_u.rxcd = NULL;
1517 		}
1518 
1519 		for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1520 			rxr = &rxq->vxrxq_cmd_ring[i];
1521 
1522 			if (rxr->vxrxr_rxd != NULL) {
1523 				vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1524 				rxr->vxrxr_rxd = NULL;
1525 			}
1526 
1527 			if (rxr->vxrxr_rxtag != NULL) {
1528 				bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1529 				rxr->vxrxr_rxtag = NULL;
1530 			}
1531 		}
1532 	}
1533 }
1534 
1535 static int
1536 vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1537 {
1538 	int error;
1539 
1540 	error = vmxnet3_alloc_txq_data(sc);
1541 	if (error)
1542 		return (error);
1543 
1544 	error = vmxnet3_alloc_rxq_data(sc);
1545 	if (error)
1546 		return (error);
1547 
1548 	return (0);
1549 }
1550 
1551 static void
1552 vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1553 {
1554 
1555 	if (sc->vmx_rxq != NULL)
1556 		vmxnet3_free_rxq_data(sc);
1557 
1558 	if (sc->vmx_txq != NULL)
1559 		vmxnet3_free_txq_data(sc);
1560 }
1561 
1562 static int
1563 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1564 {
1565 	int error;
1566 
1567 	error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1568 	    32, &sc->vmx_mcast_dma);
1569 	if (error)
1570 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1571 	else
1572 		sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1573 
1574 	return (error);
1575 }
1576 
1577 static void
1578 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1579 {
1580 
1581 	if (sc->vmx_mcast != NULL) {
1582 		vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1583 		sc->vmx_mcast = NULL;
1584 	}
1585 }
1586 
1587 static void
1588 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1589 {
1590 	struct vmxnet3_driver_shared *ds;
1591 	struct vmxnet3_txqueue *txq;
1592 	struct vmxnet3_txq_shared *txs;
1593 	struct vmxnet3_rxqueue *rxq;
1594 	struct vmxnet3_rxq_shared *rxs;
1595 	int i;
1596 
1597 	ds = sc->vmx_ds;
1598 
1599 	/*
1600 	 * Initialize fields of the shared data that remains the same across
1601 	 * reinits. Note the shared data is zero'd when allocated.
1602 	 */
1603 
1604 	ds->magic = VMXNET3_REV1_MAGIC;
1605 
1606 	/* DriverInfo */
1607 	ds->version = VMXNET3_DRIVER_VERSION;
1608 	ds->guest = VMXNET3_GOS_FREEBSD |
1609 #ifdef __LP64__
1610 	    VMXNET3_GOS_64BIT;
1611 #else
1612 	    VMXNET3_GOS_32BIT;
1613 #endif
1614 	ds->vmxnet3_revision = 1;
1615 	ds->upt_version = 1;
1616 
1617 	/* Misc. conf */
1618 	ds->driver_data = vtophys(sc);
1619 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1620 	ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1621 	ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1622 	ds->nrxsg_max = sc->vmx_max_rxsegs;
1623 
1624 	/* RSS conf */
1625 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1626 		ds->rss.version = 1;
1627 		ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1628 		ds->rss.len = sc->vmx_rss_dma.dma_size;
1629 	}
1630 
1631 	/* Interrupt control. */
1632 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1633 	ds->nintr = sc->vmx_nintrs;
1634 	ds->evintr = sc->vmx_event_intr_idx;
1635 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1636 
1637 	for (i = 0; i < sc->vmx_nintrs; i++)
1638 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1639 
1640 	/* Receive filter. */
1641 	ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1642 	ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1643 
1644 	/* Tx queues */
1645 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
1646 		txq = &sc->vmx_txq[i];
1647 		txs = txq->vxtxq_ts;
1648 
1649 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1650 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1651 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1652 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1653 		txs->driver_data = vtophys(txq);
1654 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1655 	}
1656 
1657 	/* Rx queues */
1658 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
1659 		rxq = &sc->vmx_rxq[i];
1660 		rxs = rxq->vxrxq_rs;
1661 
1662 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1663 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1664 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1665 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1666 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1667 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1668 		rxs->driver_data = vtophys(rxq);
1669 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1670 	}
1671 }
1672 
1673 static void
1674 vmxnet3_init_hwassist(struct vmxnet3_softc *sc)
1675 {
1676 	struct ifnet *ifp = sc->vmx_ifp;
1677 	uint64_t hwassist;
1678 
1679 	hwassist = 0;
1680 	if (ifp->if_capenable & IFCAP_TXCSUM)
1681 		hwassist |= VMXNET3_CSUM_OFFLOAD;
1682 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1683 		hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1684 #if 0 /* XXX TSO */
1685 	if (ifp->if_capenable & IFCAP_TSO4)
1686 		hwassist |= CSUM_IP_TSO;
1687 	if (ifp->if_capenable & IFCAP_TSO6)
1688 		hwassist |= CSUM_IP6_TSO;
1689 #endif
1690 	ifp->if_hwassist = hwassist;
1691 }
1692 
1693 static void
1694 vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1695 {
1696 	struct ifnet *ifp;
1697 
1698 	ifp = sc->vmx_ifp;
1699 
1700 	/* Use the current MAC address. */
1701 	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1702 	vmxnet3_set_lladdr(sc);
1703 
1704 	vmxnet3_init_hwassist(sc);
1705 }
1706 
1707 static void
1708 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1709 {
1710 	/*
1711 	 * Use the same key as the Linux driver until FreeBSD can do
1712 	 * RSS (presumably Toeplitz) in software.
1713 	 */
1714 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1715 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1716 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1717 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1718 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1719 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1720 	};
1721 
1722 	struct vmxnet3_driver_shared *ds;
1723 	struct vmxnet3_rss_shared *rss;
1724 	int i;
1725 
1726 	ds = sc->vmx_ds;
1727 	rss = sc->vmx_rss;
1728 
1729 	rss->hash_type =
1730 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1731 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1732 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1733 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1734 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1735 	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1736 
1737 	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1738 		rss->ind_table[i] = i % sc->vmx_nrxqueues;
1739 }
1740 
1741 static void
1742 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1743 {
1744 	struct ifnet *ifp;
1745 	struct vmxnet3_driver_shared *ds;
1746 
1747 	ifp = sc->vmx_ifp;
1748 	ds = sc->vmx_ds;
1749 
1750 	ds->mtu = ifp->if_mtu;
1751 	ds->ntxqueue = sc->vmx_ntxqueues;
1752 	ds->nrxqueue = sc->vmx_nrxqueues;
1753 
1754 	ds->upt_features = 0;
1755 	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1756 		ds->upt_features |= UPT1_F_CSUM;
1757 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1758 		ds->upt_features |= UPT1_F_VLAN;
1759 #if 0 /* XXX LRO */
1760 	if (ifp->if_capenable & IFCAP_LRO)
1761 		ds->upt_features |= UPT1_F_LRO;
1762 #endif
1763 
1764 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1765 		ds->upt_features |= UPT1_F_RSS;
1766 		vmxnet3_reinit_rss_shared_data(sc);
1767 	}
1768 
1769 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1770 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1771 	    (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1772 }
1773 
1774 static int
1775 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1776 {
1777 	int error;
1778 
1779 	error = vmxnet3_alloc_shared_data(sc);
1780 	if (error)
1781 		return (error);
1782 
1783 	error = vmxnet3_alloc_queue_data(sc);
1784 	if (error)
1785 		return (error);
1786 
1787 	error = vmxnet3_alloc_mcast_table(sc);
1788 	if (error)
1789 		return (error);
1790 
1791 	vmxnet3_init_shared_data(sc);
1792 
1793 	return (0);
1794 }
1795 
1796 static void
1797 vmxnet3_free_data(struct vmxnet3_softc *sc)
1798 {
1799 
1800 	vmxnet3_free_mcast_table(sc);
1801 	vmxnet3_free_queue_data(sc);
1802 	vmxnet3_free_shared_data(sc);
1803 }
1804 
1805 static int
1806 vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1807 {
1808 	device_t dev;
1809 	struct ifnet *ifp;
1810 
1811 	dev = sc->vmx_dev;
1812 
1813 	ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1814 	if (ifp == NULL) {
1815 		device_printf(dev, "cannot allocate ifnet structure\n");
1816 		return (ENOSPC);
1817 	}
1818 
1819 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1820 	ifp->if_baudrate = IF_Gbps(10ULL);
1821 	ifp->if_softc = sc;
1822 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1823 	ifp->if_init = vmxnet3_init;
1824 	ifp->if_ioctl = vmxnet3_ioctl;
1825 #if 0 /* XXX TSO */
1826 	ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1827 	ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS;
1828 	ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE;
1829 #endif
1830 
1831 #ifdef VMXNET3_LEGACY_TX
1832 	ifp->if_start = vmxnet3_start;
1833 	ifq_set_maxlen(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1834 	ifq_set_ready(&ifp->if_snd);
1835 #else
1836 	ifp->if_transmit = vmxnet3_txq_mq_start;
1837 	ifp->if_qflush = vmxnet3_qflush;
1838 #endif
1839 
1840 	vmxnet3_get_lladdr(sc);
1841 	ether_ifattach(ifp, sc->vmx_lladdr, NULL);
1842 
1843 	ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1844 	ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1845 #if 0 /* XXX TSO */
1846 	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1847 #endif
1848 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1849 	    IFCAP_VLAN_HWCSUM;
1850 	ifp->if_capenable = ifp->if_capabilities;
1851 
1852 #if 0 /* XXX LRO / VLAN_HWFILTER */
1853 	/* These capabilities are not enabled by default. */
1854 	ifp->if_capabilities |= /* IFCAP_LRO | */ IFCAP_VLAN_HWFILTER;
1855 #endif
1856 
1857 	sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1858 	    vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1859 	sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1860 	    vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1861 
1862 	ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1863 	    vmxnet3_media_status);
1864 	ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1865 	ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1866 
1867 	return (0);
1868 }
1869 
1870 static void
1871 vmxnet3_evintr(struct vmxnet3_softc *sc)
1872 {
1873 	device_t dev;
1874 	struct ifnet *ifp;
1875 	struct vmxnet3_txq_shared *ts;
1876 	struct vmxnet3_rxq_shared *rs;
1877 	uint32_t event;
1878 	int reset;
1879 
1880 	dev = sc->vmx_dev;
1881 	ifp = sc->vmx_ifp;
1882 	reset = 0;
1883 
1884 	VMXNET3_CORE_LOCK(sc);
1885 
1886 	/* Clear events. */
1887 	event = sc->vmx_ds->event;
1888 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1889 
1890 	if (event & VMXNET3_EVENT_LINK) {
1891 		vmxnet3_link_status(sc);
1892 		if (sc->vmx_link_active != 0)
1893 			vmxnet3_tx_start_all(sc);
1894 	}
1895 
1896 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1897 		reset = 1;
1898 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1899 		ts = sc->vmx_txq[0].vxtxq_ts;
1900 		if (ts->stopped != 0)
1901 			device_printf(dev, "Tx queue error %#x\n", ts->error);
1902 		rs = sc->vmx_rxq[0].vxrxq_rs;
1903 		if (rs->stopped != 0)
1904 			device_printf(dev, "Rx queue error %#x\n", rs->error);
1905 		device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1906 	}
1907 
1908 	if (event & VMXNET3_EVENT_DIC)
1909 		device_printf(dev, "device implementation change event\n");
1910 	if (event & VMXNET3_EVENT_DEBUG)
1911 		device_printf(dev, "debug event\n");
1912 
1913 	if (reset != 0) {
1914 		ifp->if_flags &= ~IFF_RUNNING;
1915 		vmxnet3_init_locked(sc);
1916 	}
1917 
1918 	VMXNET3_CORE_UNLOCK(sc);
1919 }
1920 
1921 static void
1922 vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1923 {
1924 	struct vmxnet3_softc *sc;
1925 	struct ifnet *ifp;
1926 	struct vmxnet3_txring *txr;
1927 	struct vmxnet3_comp_ring *txc;
1928 	struct vmxnet3_txcompdesc *txcd;
1929 	struct vmxnet3_txbuf *txb;
1930 	struct mbuf *m;
1931 	u_int sop;
1932 
1933 	sc = txq->vxtxq_sc;
1934 	ifp = sc->vmx_ifp;
1935 	txr = &txq->vxtxq_cmd_ring;
1936 	txc = &txq->vxtxq_comp_ring;
1937 
1938 	VMXNET3_TXQ_LOCK_ASSERT(txq);
1939 
1940 	for (;;) {
1941 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1942 		if (txcd->gen != txc->vxcr_gen)
1943 			break;
1944 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1945 
1946 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1947 			txc->vxcr_next = 0;
1948 			txc->vxcr_gen ^= 1;
1949 		}
1950 
1951 		sop = txr->vxtxr_next;
1952 		txb = &txr->vxtxr_txbuf[sop];
1953 
1954 		if ((m = txb->vtxb_m) != NULL) {
1955 			bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1956 			    BUS_DMASYNC_POSTWRITE);
1957 			bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1958 
1959 			txq->vxtxq_stats.vmtxs_opackets++;
1960 			txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1961 			if (m->m_flags & M_MCAST)
1962 				txq->vxtxq_stats.vmtxs_omcasts++;
1963 
1964 			m_freem(m);
1965 			txb->vtxb_m = NULL;
1966 		}
1967 
1968 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1969 	}
1970 
1971 	if (txr->vxtxr_head == txr->vxtxr_next)
1972 		txq->vxtxq_watchdog = 0;
1973 }
1974 
1975 static int
1976 vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1977 {
1978 	struct ifnet *ifp;
1979 	struct mbuf *m;
1980 	struct vmxnet3_rxdesc *rxd;
1981 	struct vmxnet3_rxbuf *rxb;
1982 	bus_dma_tag_t tag;
1983 	bus_dmamap_t dmap;
1984 	bus_dma_segment_t segs[1];
1985 	int idx, clsize, btype, flags, nsegs, error;
1986 
1987 	ifp = sc->vmx_ifp;
1988 	tag = rxr->vxrxr_rxtag;
1989 	dmap = rxr->vxrxr_spare_dmap;
1990 	idx = rxr->vxrxr_fill;
1991 	rxd = &rxr->vxrxr_rxd[idx];
1992 	rxb = &rxr->vxrxr_rxbuf[idx];
1993 
1994 #ifdef VMXNET3_FAILPOINTS
1995 	KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1996 	if (rxr->vxrxr_rid != 0)
1997 		KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1998 #endif
1999 
2000 	if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
2001 		flags = M_PKTHDR;
2002 		clsize = MCLBYTES;
2003 		btype = VMXNET3_BTYPE_HEAD;
2004 	} else {
2005 #if __FreeBSD_version < 902001
2006 		/*
2007 		 * These mbufs will never be used for the start of a frame.
2008 		 * Roughly prior to branching releng/9.2, the load_mbuf_sg()
2009 		 * required the mbuf to always be a packet header. Avoid
2010 		 * unnecessary mbuf initialization in newer versions where
2011 		 * that is not the case.
2012 		 */
2013 		flags = M_PKTHDR;
2014 #else
2015 		flags = 0;
2016 #endif
2017 		clsize = MJUMPAGESIZE;
2018 		btype = VMXNET3_BTYPE_BODY;
2019 	}
2020 
2021 	m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
2022 	if (m == NULL) {
2023 		sc->vmx_stats.vmst_mgetcl_failed++;
2024 		return (ENOBUFS);
2025 	}
2026 
2027 	if (btype == VMXNET3_BTYPE_HEAD) {
2028 		m->m_len = m->m_pkthdr.len = clsize;
2029 		m_adj(m, ETHER_ALIGN);
2030 	} else
2031 		m->m_len = clsize;
2032 
2033 	error = bus_dmamap_load_mbuf_segment(tag, dmap, m, &segs[0], 1, &nsegs,
2034 	    BUS_DMA_NOWAIT);
2035 	if (error) {
2036 		m_freem(m);
2037 		sc->vmx_stats.vmst_mbuf_load_failed++;
2038 		return (error);
2039 	}
2040 	KASSERT(nsegs == 1,
2041 	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2042 #if __FreeBSD_version < 902001
2043 	if (btype == VMXNET3_BTYPE_BODY)
2044 		m->m_flags &= ~M_PKTHDR;
2045 #endif
2046 
2047 	if (rxb->vrxb_m != NULL) {
2048 		bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
2049 		bus_dmamap_unload(tag, rxb->vrxb_dmamap);
2050 	}
2051 
2052 	rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
2053 	rxb->vrxb_dmamap = dmap;
2054 	rxb->vrxb_m = m;
2055 
2056 	rxd->addr = segs[0].ds_addr;
2057 	rxd->len = segs[0].ds_len;
2058 	rxd->btype = btype;
2059 	rxd->gen = rxr->vxrxr_gen;
2060 
2061 	vmxnet3_rxr_increment_fill(rxr);
2062 	return (0);
2063 }
2064 
2065 static void
2066 vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
2067     struct vmxnet3_rxring *rxr, int idx)
2068 {
2069 	struct vmxnet3_rxdesc *rxd;
2070 
2071 	rxd = &rxr->vxrxr_rxd[idx];
2072 	rxd->gen = rxr->vxrxr_gen;
2073 	vmxnet3_rxr_increment_fill(rxr);
2074 }
2075 
2076 static void
2077 vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
2078 {
2079 	struct vmxnet3_softc *sc;
2080 	struct vmxnet3_rxring *rxr;
2081 	struct vmxnet3_comp_ring *rxc;
2082 	struct vmxnet3_rxcompdesc *rxcd;
2083 	int idx, eof;
2084 
2085 	sc = rxq->vxrxq_sc;
2086 	rxc = &rxq->vxrxq_comp_ring;
2087 
2088 	do {
2089 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2090 		if (rxcd->gen != rxc->vxcr_gen)
2091 			break;		/* Not expected. */
2092 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2093 
2094 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2095 			rxc->vxcr_next = 0;
2096 			rxc->vxcr_gen ^= 1;
2097 		}
2098 
2099 		idx = rxcd->rxd_idx;
2100 		eof = rxcd->eop;
2101 		if (rxcd->qid < sc->vmx_nrxqueues)
2102 			rxr = &rxq->vxrxq_cmd_ring[0];
2103 		else
2104 			rxr = &rxq->vxrxq_cmd_ring[1];
2105 		vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2106 	} while (!eof);
2107 }
2108 
2109 static void
2110 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2111 {
2112 
2113 	if (rxcd->ipv4) {
2114 		m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2115 		if (rxcd->ipcsum_ok)
2116 			m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2117 	}
2118 
2119 	if (!rxcd->fragment) {
2120 		if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2121 			m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2122 			    CSUM_PSEUDO_HDR;
2123 			m->m_pkthdr.csum_data = 0xFFFF;
2124 		}
2125 	}
2126 }
2127 
2128 static void
2129 vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2130     struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2131 {
2132 	struct vmxnet3_softc *sc;
2133 	struct ifnet *ifp;
2134 
2135 	sc = rxq->vxrxq_sc;
2136 	ifp = sc->vmx_ifp;
2137 
2138 	if (rxcd->error) {
2139 		rxq->vxrxq_stats.vmrxs_ierrors++;
2140 		m_freem(m);
2141 		return;
2142 	}
2143 
2144 #if 0
2145 #ifdef notyet
2146 	switch (rxcd->rss_type) {
2147 	case VMXNET3_RCD_RSS_TYPE_IPV4:
2148 		m->m_pkthdr.flowid = rxcd->rss_hash;
2149 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2150 		break;
2151 	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2152 		m->m_pkthdr.flowid = rxcd->rss_hash;
2153 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2154 		break;
2155 	case VMXNET3_RCD_RSS_TYPE_IPV6:
2156 		m->m_pkthdr.flowid = rxcd->rss_hash;
2157 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2158 		break;
2159 	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2160 		m->m_pkthdr.flowid = rxcd->rss_hash;
2161 		M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2162 		break;
2163 	default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2164 		m->m_pkthdr.flowid = rxq->vxrxq_id;
2165 		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2166 		break;
2167 	}
2168 #else
2169 	m->m_pkthdr.flowid = rxq->vxrxq_id;
2170 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2171 #endif
2172 #endif
2173 
2174 	if (!rxcd->no_csum)
2175 		vmxnet3_rx_csum(rxcd, m);
2176 	if (rxcd->vlan) {
2177 		m->m_flags |= M_VLANTAG;
2178 		m->m_pkthdr.ether_vlantag = rxcd->vtag;
2179 	}
2180 
2181 	rxq->vxrxq_stats.vmrxs_ipackets++;
2182 	rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2183 
2184 	VMXNET3_RXQ_UNLOCK(rxq);
2185 	(*ifp->if_input)(ifp, m, NULL, -1);
2186 	VMXNET3_RXQ_LOCK(rxq);
2187 }
2188 
2189 static void
2190 vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2191 {
2192 	struct vmxnet3_softc *sc;
2193 	struct ifnet *ifp;
2194 	struct vmxnet3_rxring *rxr;
2195 	struct vmxnet3_comp_ring *rxc;
2196 	struct vmxnet3_rxdesc *rxd;
2197 	struct vmxnet3_rxcompdesc *rxcd;
2198 	struct mbuf *m, *m_head, *m_tail;
2199 	int idx, length;
2200 
2201 	sc = rxq->vxrxq_sc;
2202 	ifp = sc->vmx_ifp;
2203 	rxc = &rxq->vxrxq_comp_ring;
2204 
2205 	VMXNET3_RXQ_LOCK_ASSERT(rxq);
2206 
2207 	if ((ifp->if_flags & IFF_RUNNING) == 0)
2208 		return;
2209 
2210 	m_head = rxq->vxrxq_mhead;
2211 	rxq->vxrxq_mhead = NULL;
2212 	m_tail = rxq->vxrxq_mtail;
2213 	rxq->vxrxq_mtail = NULL;
2214 	KKASSERT(m_head == NULL || m_tail != NULL);
2215 
2216 	for (;;) {
2217 		rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2218 		if (rxcd->gen != rxc->vxcr_gen) {
2219 			rxq->vxrxq_mhead = m_head;
2220 			rxq->vxrxq_mtail = m_tail;
2221 			break;
2222 		}
2223 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2224 
2225 		if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2226 			rxc->vxcr_next = 0;
2227 			rxc->vxcr_gen ^= 1;
2228 		}
2229 
2230 		idx = rxcd->rxd_idx;
2231 		length = rxcd->len;
2232 		if (rxcd->qid < sc->vmx_nrxqueues)
2233 			rxr = &rxq->vxrxq_cmd_ring[0];
2234 		else
2235 			rxr = &rxq->vxrxq_cmd_ring[1];
2236 		rxd = &rxr->vxrxr_rxd[idx];
2237 
2238 		m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2239 		KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2240 		    __func__, rxcd->qid, idx));
2241 
2242 		/*
2243 		 * The host may skip descriptors. We detect this when this
2244 		 * descriptor does not match the previous fill index. Catch
2245 		 * up with the host now.
2246 		 */
2247 		if (__predict_false(rxr->vxrxr_fill != idx)) {
2248 			while (rxr->vxrxr_fill != idx) {
2249 				rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2250 				    rxr->vxrxr_gen;
2251 				vmxnet3_rxr_increment_fill(rxr);
2252 			}
2253 		}
2254 
2255 		if (rxcd->sop) {
2256 			KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2257 			    ("%s: start of frame w/o head buffer", __func__));
2258 			KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2259 			    ("%s: start of frame not in ring 0", __func__));
2260 			KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2261 			    ("%s: start of frame at unexcepted index %d (%d)",
2262 			     __func__, idx, sc->vmx_rx_max_chain));
2263 			KASSERT(m_head == NULL,
2264 			    ("%s: duplicate start of frame?", __func__));
2265 
2266 			if (length == 0) {
2267 				/* Just ignore this descriptor. */
2268 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2269 				goto nextp;
2270 			}
2271 
2272 			if (vmxnet3_newbuf(sc, rxr) != 0) {
2273 				rxq->vxrxq_stats.vmrxs_iqdrops++;
2274 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2275 				if (!rxcd->eop)
2276 					vmxnet3_rxq_discard_chain(rxq);
2277 				goto nextp;
2278 			}
2279 
2280 			m->m_pkthdr.rcvif = ifp;
2281 			m->m_pkthdr.len = m->m_len = length;
2282 			m->m_pkthdr.csum_flags = 0;
2283 			m_head = m_tail = m;
2284 
2285 		} else {
2286 			KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2287 			    ("%s: non start of frame w/o body buffer", __func__));
2288 
2289 			if (m_head == NULL && m_tail == NULL) {
2290 				/*
2291 				 * This is a continuation of a packet that we
2292 				 * started to drop, but could not drop entirely
2293 				 * because this segment was still owned by the
2294 				 * host.  So, drop the remainder now.
2295 				 */
2296 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2297 				if (!rxcd->eop)
2298 					vmxnet3_rxq_discard_chain(rxq);
2299 				goto nextp;
2300 			}
2301 
2302 			KASSERT(m_head != NULL,
2303 			    ("%s: frame not started?", __func__));
2304 
2305 			if (vmxnet3_newbuf(sc, rxr) != 0) {
2306 				rxq->vxrxq_stats.vmrxs_iqdrops++;
2307 				vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2308 				if (!rxcd->eop)
2309 					vmxnet3_rxq_discard_chain(rxq);
2310 				m_freem(m_head);
2311 				m_head = m_tail = NULL;
2312 				goto nextp;
2313 			}
2314 
2315 			m->m_len = length;
2316 			m_head->m_pkthdr.len += length;
2317 			m_tail->m_next = m;
2318 			m_tail = m;
2319 		}
2320 
2321 		if (rxcd->eop) {
2322 			vmxnet3_rxq_input(rxq, rxcd, m_head);
2323 			m_head = m_tail = NULL;
2324 
2325 			/* Must recheck after dropping the Rx lock. */
2326 			if ((ifp->if_flags & IFF_RUNNING) == 0)
2327 				break;
2328 		}
2329 
2330 nextp:
2331 		if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2332 			int qid = rxcd->qid;
2333 			bus_size_t r;
2334 
2335 			idx = (idx + 1) % rxr->vxrxr_ndesc;
2336 			if (qid >= sc->vmx_nrxqueues) {
2337 				qid -= sc->vmx_nrxqueues;
2338 				r = VMXNET3_BAR0_RXH2(qid);
2339 			} else
2340 				r = VMXNET3_BAR0_RXH1(qid);
2341 			vmxnet3_write_bar0(sc, r, idx);
2342 		}
2343 	}
2344 }
2345 
2346 static void
2347 vmxnet3_legacy_intr(void *xsc)
2348 {
2349 	struct vmxnet3_softc *sc;
2350 	struct vmxnet3_rxqueue *rxq;
2351 	struct vmxnet3_txqueue *txq;
2352 
2353 	sc = xsc;
2354 	rxq = &sc->vmx_rxq[0];
2355 	txq = &sc->vmx_txq[0];
2356 
2357 	if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2358 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2359 			return;
2360 	}
2361 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2362 		vmxnet3_disable_all_intrs(sc);
2363 
2364 	if (sc->vmx_ds->event != 0)
2365 		vmxnet3_evintr(sc);
2366 
2367 	VMXNET3_RXQ_LOCK(rxq);
2368 	vmxnet3_rxq_eof(rxq);
2369 	VMXNET3_RXQ_UNLOCK(rxq);
2370 
2371 	VMXNET3_TXQ_LOCK(txq);
2372 	vmxnet3_txq_eof(txq);
2373 	vmxnet3_txq_start(txq);
2374 	VMXNET3_TXQ_UNLOCK(txq);
2375 
2376 	vmxnet3_enable_all_intrs(sc);
2377 }
2378 
2379 #ifdef __FreeBSD__
2380 static void
2381 vmxnet3_txq_intr(void *xtxq)
2382 {
2383 	struct vmxnet3_softc *sc;
2384 	struct vmxnet3_txqueue *txq;
2385 
2386 	txq = xtxq;
2387 	sc = txq->vxtxq_sc;
2388 
2389 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2390 		vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2391 
2392 	VMXNET3_TXQ_LOCK(txq);
2393 	vmxnet3_txq_eof(txq);
2394 	vmxnet3_txq_start(txq);
2395 	VMXNET3_TXQ_UNLOCK(txq);
2396 
2397 	vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2398 }
2399 
2400 static void
2401 vmxnet3_rxq_intr(void *xrxq)
2402 {
2403 	struct vmxnet3_softc *sc;
2404 	struct vmxnet3_rxqueue *rxq;
2405 
2406 	rxq = xrxq;
2407 	sc = rxq->vxrxq_sc;
2408 
2409 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2410 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2411 
2412 	VMXNET3_RXQ_LOCK(rxq);
2413 	vmxnet3_rxq_eof(rxq);
2414 	VMXNET3_RXQ_UNLOCK(rxq);
2415 
2416 	vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2417 }
2418 
2419 static void
2420 vmxnet3_event_intr(void *xsc)
2421 {
2422 	struct vmxnet3_softc *sc;
2423 
2424 	sc = xsc;
2425 
2426 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2427 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2428 
2429 	if (sc->vmx_ds->event != 0)
2430 		vmxnet3_evintr(sc);
2431 
2432 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2433 }
2434 #endif
2435 
2436 static void
2437 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2438 {
2439 	struct vmxnet3_txring *txr;
2440 	struct vmxnet3_txbuf *txb;
2441 	int i;
2442 
2443 	txr = &txq->vxtxq_cmd_ring;
2444 
2445 	for (i = 0; i < txr->vxtxr_ndesc; i++) {
2446 		txb = &txr->vxtxr_txbuf[i];
2447 
2448 		if (txb->vtxb_m == NULL)
2449 			continue;
2450 
2451 		bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2452 		    BUS_DMASYNC_POSTWRITE);
2453 		bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2454 		m_freem(txb->vtxb_m);
2455 		txb->vtxb_m = NULL;
2456 	}
2457 }
2458 
2459 static void
2460 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2461 {
2462 	struct vmxnet3_rxring *rxr;
2463 	struct vmxnet3_rxbuf *rxb;
2464 	int i, j;
2465 
2466 	if (rxq->vxrxq_mhead != NULL) {
2467 		m_freem(rxq->vxrxq_mhead);
2468 		rxq->vxrxq_mhead = NULL;
2469 		rxq->vxrxq_mtail = NULL;
2470 	}
2471 
2472 	for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2473 		rxr = &rxq->vxrxq_cmd_ring[i];
2474 
2475 		for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2476 			rxb = &rxr->vxrxr_rxbuf[j];
2477 
2478 			if (rxb->vrxb_m == NULL)
2479 				continue;
2480 
2481 			bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2482 			    BUS_DMASYNC_POSTREAD);
2483 			bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2484 			m_freem(rxb->vrxb_m);
2485 			rxb->vrxb_m = NULL;
2486 		}
2487 	}
2488 }
2489 
2490 static void
2491 vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2492 {
2493 	struct vmxnet3_rxqueue *rxq;
2494 	struct vmxnet3_txqueue *txq;
2495 	int i;
2496 
2497 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
2498 		rxq = &sc->vmx_rxq[i];
2499 		VMXNET3_RXQ_LOCK(rxq);
2500 		VMXNET3_RXQ_UNLOCK(rxq);
2501 	}
2502 
2503 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
2504 		txq = &sc->vmx_txq[i];
2505 		VMXNET3_TXQ_LOCK(txq);
2506 		VMXNET3_TXQ_UNLOCK(txq);
2507 	}
2508 }
2509 
2510 static void
2511 vmxnet3_stop(struct vmxnet3_softc *sc)
2512 {
2513 	struct ifnet *ifp;
2514 	int q;
2515 
2516 	ifp = sc->vmx_ifp;
2517 	VMXNET3_CORE_LOCK_ASSERT(sc);
2518 
2519 	ifp->if_flags &= ~IFF_RUNNING;
2520 	sc->vmx_link_active = 0;
2521 	callout_stop(&sc->vmx_tick);
2522 
2523 	/* Disable interrupts. */
2524 	vmxnet3_disable_all_intrs(sc);
2525 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2526 
2527 	vmxnet3_stop_rendezvous(sc);
2528 
2529 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2530 		vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2531 	for (q = 0; q < sc->vmx_nrxqueues; q++)
2532 		vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2533 
2534 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2535 }
2536 
2537 static void
2538 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2539 {
2540 	struct vmxnet3_txring *txr;
2541 	struct vmxnet3_comp_ring *txc;
2542 
2543 	txr = &txq->vxtxq_cmd_ring;
2544 	txr->vxtxr_head = 0;
2545 	txr->vxtxr_next = 0;
2546 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
2547 	bzero(txr->vxtxr_txd,
2548 	    txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2549 
2550 	txc = &txq->vxtxq_comp_ring;
2551 	txc->vxcr_next = 0;
2552 	txc->vxcr_gen = VMXNET3_INIT_GEN;
2553 	bzero(txc->vxcr_u.txcd,
2554 	    txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2555 }
2556 
2557 static int
2558 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2559 {
2560 	struct ifnet *ifp;
2561 	struct vmxnet3_rxring *rxr;
2562 	struct vmxnet3_comp_ring *rxc;
2563 	int i, populate, idx, frame_size, error;
2564 
2565 	ifp = sc->vmx_ifp;
2566 	frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2567 	    ifp->if_mtu;
2568 
2569 	/*
2570 	 * If the MTU causes us to exceed what a regular sized cluster can
2571 	 * handle, we allocate a second MJUMPAGESIZE cluster after it in
2572 	 * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2573 	 *
2574 	 * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2575 	 * our life easier. We do not support changing the ring size after
2576 	 * the attach.
2577 	 */
2578 	if (frame_size <= MCLBYTES)
2579 		sc->vmx_rx_max_chain = 1;
2580 	else
2581 		sc->vmx_rx_max_chain = 2;
2582 
2583 	/*
2584 	 * Only populate ring 1 if the configuration will take advantage
2585 	 * of it. That is either when LRO is enabled or the frame size
2586 	 * exceeds what ring 0 can contain.
2587 	 */
2588 #if 0 /* XXX LRO */
2589 	if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2590 #else
2591 	if (
2592 #endif
2593 	    frame_size <= MCLBYTES + MJUMPAGESIZE)
2594 		populate = 1;
2595 	else
2596 		populate = VMXNET3_RXRINGS_PERQ;
2597 
2598 	for (i = 0; i < populate; i++) {
2599 		rxr = &rxq->vxrxq_cmd_ring[i];
2600 		rxr->vxrxr_fill = 0;
2601 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2602 		bzero(rxr->vxrxr_rxd,
2603 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2604 
2605 		for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2606 			error = vmxnet3_newbuf(sc, rxr);
2607 			if (error)
2608 				return (error);
2609 		}
2610 	}
2611 
2612 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2613 		rxr = &rxq->vxrxq_cmd_ring[i];
2614 		rxr->vxrxr_fill = 0;
2615 		rxr->vxrxr_gen = 0;
2616 		bzero(rxr->vxrxr_rxd,
2617 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2618 	}
2619 
2620 	rxc = &rxq->vxrxq_comp_ring;
2621 	rxc->vxcr_next = 0;
2622 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
2623 	bzero(rxc->vxcr_u.rxcd,
2624 	    rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2625 
2626 	return (0);
2627 }
2628 
2629 static int
2630 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2631 {
2632 	device_t dev;
2633 	int q, error;
2634 
2635 	dev = sc->vmx_dev;
2636 
2637 	for (q = 0; q < sc->vmx_ntxqueues; q++)
2638 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2639 
2640 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2641 		error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2642 		if (error) {
2643 			device_printf(dev, "cannot populate Rx queue %d\n", q);
2644 			return (error);
2645 		}
2646 	}
2647 
2648 	return (0);
2649 }
2650 
2651 static int
2652 vmxnet3_enable_device(struct vmxnet3_softc *sc)
2653 {
2654 	int q;
2655 
2656 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2657 		device_printf(sc->vmx_dev, "device enable command failed!\n");
2658 		return (1);
2659 	}
2660 
2661 	/* Reset the Rx queue heads. */
2662 	for (q = 0; q < sc->vmx_nrxqueues; q++) {
2663 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2664 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2665 	}
2666 
2667 	return (0);
2668 }
2669 
2670 static void
2671 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2672 {
2673 	struct ifnet *ifp;
2674 
2675 	ifp = sc->vmx_ifp;
2676 
2677 	vmxnet3_set_rxfilter(sc);
2678 
2679 #if 0 /* VLAN_HWFILTER */
2680 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2681 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2682 		    sizeof(sc->vmx_ds->vlan_filter));
2683 	else
2684 #endif
2685 		bzero(sc->vmx_ds->vlan_filter,
2686 		    sizeof(sc->vmx_ds->vlan_filter));
2687 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2688 }
2689 
2690 static int
2691 vmxnet3_reinit(struct vmxnet3_softc *sc)
2692 {
2693 
2694 	vmxnet3_reinit_interface(sc);
2695 	vmxnet3_reinit_shared_data(sc);
2696 
2697 	if (vmxnet3_reinit_queues(sc) != 0)
2698 		return (ENXIO);
2699 
2700 	if (vmxnet3_enable_device(sc) != 0)
2701 		return (ENXIO);
2702 
2703 	vmxnet3_reinit_rxfilters(sc);
2704 
2705 	return (0);
2706 }
2707 
2708 static void
2709 vmxnet3_init_locked(struct vmxnet3_softc *sc)
2710 {
2711 	struct ifnet *ifp;
2712 
2713 	ifp = sc->vmx_ifp;
2714 
2715 	if (ifp->if_flags & IFF_RUNNING)
2716 		return;
2717 
2718 	vmxnet3_stop(sc);
2719 
2720 	if (vmxnet3_reinit(sc) != 0) {
2721 		vmxnet3_stop(sc);
2722 		return;
2723 	}
2724 
2725 	ifp->if_flags |= IFF_RUNNING;
2726 	vmxnet3_link_status(sc);
2727 
2728 	vmxnet3_enable_all_intrs(sc);
2729 	callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2730 }
2731 
2732 static void
2733 vmxnet3_init(void *xsc)
2734 {
2735 	struct vmxnet3_softc *sc;
2736 
2737 	sc = xsc;
2738 
2739 	VMXNET3_CORE_LOCK(sc);
2740 	vmxnet3_init_locked(sc);
2741 	VMXNET3_CORE_UNLOCK(sc);
2742 }
2743 
2744 /*
2745  * BMV: Much of this can go away once we finally have offsets in
2746  * the mbuf packet header. Bug andre@.
2747  */
2748 static int
2749 vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2750     int *etype, int *proto, int *start)
2751 {
2752 	struct ether_vlan_header *evh;
2753 	int offset;
2754 #if defined(INET)
2755 	struct ip *ip = NULL;
2756 #endif
2757 #if defined(INET6)
2758 	struct ip6_hdr *ip6 = NULL;
2759 #endif
2760 
2761 	evh = mtod(m, struct ether_vlan_header *);
2762 	if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2763 		/* BMV: We should handle nested VLAN tags too. */
2764 		*etype = ntohs(evh->evl_proto);
2765 		offset = sizeof(struct ether_vlan_header);
2766 	} else {
2767 		*etype = ntohs(evh->evl_encap_proto);
2768 		offset = sizeof(struct ether_header);
2769 	}
2770 
2771 	switch (*etype) {
2772 #if defined(INET)
2773 	case ETHERTYPE_IP:
2774 		if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2775 			m = m_pullup(m, offset + sizeof(struct ip));
2776 			if (m == NULL)
2777 				return (EINVAL);
2778 		}
2779 
2780 		ip = (struct ip *)(mtod(m, uint8_t *) + offset);
2781 		*proto = ip->ip_p;
2782 		*start = offset + (ip->ip_hl << 2);
2783 		break;
2784 #endif
2785 #if defined(INET6)
2786 	case ETHERTYPE_IPV6:
2787 		if (__predict_false(m->m_len <
2788 		    offset + sizeof(struct ip6_hdr))) {
2789 			m = m_pullup(m, offset + sizeof(struct ip6_hdr));
2790 			if (m == NULL)
2791 				return (EINVAL);
2792 		}
2793 
2794 		ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + offset);
2795 		*proto = -1;
2796 		*start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2797 		/* Assert the network stack sent us a valid packet. */
2798 		KASSERT(*start > offset,
2799 		    ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2800 		    *start, offset, *proto));
2801 		break;
2802 #endif
2803 	default:
2804 		return (EINVAL);
2805 	}
2806 
2807 #if 0 /* XXX TSO */
2808 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2809 		struct tcphdr *tcp;
2810 
2811 		if (__predict_false(*proto != IPPROTO_TCP)) {
2812 			/* Likely failed to correctly parse the mbuf. */
2813 			return (EINVAL);
2814 		}
2815 
2816 		if (m->m_len < *start + sizeof(struct tcphdr)) {
2817 			m = m_pullup(m, *start + sizeof(struct tcphdr));
2818 			if (m == NULL)
2819 				return (EINVAL);
2820 		}
2821 
2822 		tcp = (struct tcphdr *)(mtod(m, uint8_t *) + *start);
2823 		*start += (tcp->th_off << 2);
2824 
2825 		txq->vxtxq_stats.vmtxs_tso++;
2826 	} else
2827 #endif
2828 		txq->vxtxq_stats.vmtxs_csum++;
2829 
2830 	return (0);
2831 }
2832 
2833 static int
2834 vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2835     bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2836 {
2837 	struct vmxnet3_txring *txr;
2838 	struct mbuf *m;
2839 	bus_dma_tag_t tag;
2840 	int error;
2841 
2842 	txr = &txq->vxtxq_cmd_ring;
2843 	m = *m0;
2844 	tag = txr->vxtxr_txtag;
2845 
2846 	error = bus_dmamap_load_mbuf_segment(tag, dmap, m, segs, 1, nsegs,
2847 	    BUS_DMA_NOWAIT);
2848 	if (error == 0 || error != EFBIG)
2849 		return (error);
2850 
2851 	m = m_defrag(m, M_NOWAIT);
2852 	if (m != NULL) {
2853 		*m0 = m;
2854 		error = bus_dmamap_load_mbuf_segment(tag, dmap, m, segs,
2855 		    1, nsegs, BUS_DMA_NOWAIT);
2856 	} else
2857 		error = ENOBUFS;
2858 
2859 	if (error) {
2860 		m_freem(*m0);
2861 		*m0 = NULL;
2862 		txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2863 	} else
2864 		txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2865 
2866 	return (error);
2867 }
2868 
2869 static void
2870 vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2871 {
2872 	struct vmxnet3_txring *txr;
2873 
2874 	txr = &txq->vxtxq_cmd_ring;
2875 	bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2876 }
2877 
2878 static int
2879 vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2880 {
2881 	struct vmxnet3_softc *sc;
2882 	struct vmxnet3_txring *txr;
2883 	struct vmxnet3_txdesc *txd, *sop;
2884 	struct mbuf *m;
2885 	bus_dmamap_t dmap;
2886 	bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2887 	int i, gen, nsegs, etype, proto, start, error;
2888 
2889 	sc = txq->vxtxq_sc;
2890 	start = 0;
2891 	txd = NULL;
2892 	txr = &txq->vxtxq_cmd_ring;
2893 	dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2894 
2895 	error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2896 	if (error)
2897 		return (error);
2898 
2899 	m = *m0;
2900 	M_ASSERTPKTHDR(m);
2901 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2902 	    ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2903 
2904 	if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2905 		txq->vxtxq_stats.vmtxs_full++;
2906 		vmxnet3_txq_unload_mbuf(txq, dmap);
2907 		return (ENOSPC);
2908 	} else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2909 		error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2910 		if (error) {
2911 			txq->vxtxq_stats.vmtxs_offload_failed++;
2912 			vmxnet3_txq_unload_mbuf(txq, dmap);
2913 			m_freem(m);
2914 			*m0 = NULL;
2915 			return (error);
2916 		}
2917 	}
2918 
2919 	txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2920 	sop = &txr->vxtxr_txd[txr->vxtxr_head];
2921 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
2922 
2923 	for (i = 0; i < nsegs; i++) {
2924 		txd = &txr->vxtxr_txd[txr->vxtxr_head];
2925 
2926 		txd->addr = segs[i].ds_addr;
2927 		txd->len = segs[i].ds_len;
2928 		txd->gen = gen;
2929 		txd->dtype = 0;
2930 		txd->offload_mode = VMXNET3_OM_NONE;
2931 		txd->offload_pos = 0;
2932 		txd->hlen = 0;
2933 		txd->eop = 0;
2934 		txd->compreq = 0;
2935 		txd->vtag_mode = 0;
2936 		txd->vtag = 0;
2937 
2938 		if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2939 			txr->vxtxr_head = 0;
2940 			txr->vxtxr_gen ^= 1;
2941 		}
2942 		gen = txr->vxtxr_gen;
2943 	}
2944 	txd->eop = 1;
2945 	txd->compreq = 1;
2946 
2947 	if (m->m_flags & M_VLANTAG) {
2948 		sop->vtag_mode = 1;
2949 		sop->vtag = m->m_pkthdr.ether_vlantag;
2950 	}
2951 
2952 
2953 #if 0 /* XXX TSO */
2954 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2955 		sop->offload_mode = VMXNET3_OM_TSO;
2956 		sop->hlen = start;
2957 		sop->offload_pos = m->m_pkthdr.tso_segsz;
2958 	} else
2959 #endif
2960 	if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2961 	    VMXNET3_CSUM_OFFLOAD_IPV6))	{
2962 		sop->offload_mode = VMXNET3_OM_CSUM;
2963 		sop->hlen = start;
2964 		sop->offload_pos = start + m->m_pkthdr.csum_data;
2965 	}
2966 
2967 	/* Finally, change the ownership. */
2968 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2969 	sop->gen ^= 1;
2970 
2971 	txq->vxtxq_ts->npending += nsegs;
2972 	if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2973 		txq->vxtxq_ts->npending = 0;
2974 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2975 		    txr->vxtxr_head);
2976 	}
2977 
2978 	return (0);
2979 }
2980 
2981 #ifdef VMXNET3_LEGACY_TX
2982 
2983 static void
2984 vmxnet3_start_locked(struct ifnet *ifp)
2985 {
2986 	struct vmxnet3_softc *sc;
2987 	struct vmxnet3_txqueue *txq;
2988 	struct vmxnet3_txring *txr;
2989 	struct mbuf *m_head;
2990 	int tx, avail;
2991 
2992 	sc = ifp->if_softc;
2993 	txq = &sc->vmx_txq[0];
2994 	txr = &txq->vxtxq_cmd_ring;
2995 	tx = 0;
2996 
2997 	VMXNET3_TXQ_LOCK_ASSERT(txq);
2998 
2999 	if ((ifp->if_flags & IFF_RUNNING) == 0 ||
3000 	    sc->vmx_link_active == 0)
3001 		return;
3002 
3003 	while (!ifq_is_empty(&ifp->if_snd)) {
3004 		if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
3005 			break;
3006 
3007 		m_head = ifq_dequeue(&ifp->if_snd);
3008 		if (m_head == NULL)
3009 			break;
3010 
3011 		/* Assume worse case if this mbuf is the head of a chain. */
3012 		if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
3013 			ifq_prepend(&ifp->if_snd, m_head);
3014 			break;
3015 		}
3016 
3017 		if (vmxnet3_txq_encap(txq, &m_head) != 0) {
3018 			if (m_head != NULL)
3019 				ifq_prepend(&ifp->if_snd, m_head);
3020 			break;
3021 		}
3022 
3023 		tx++;
3024 		ETHER_BPF_MTAP(ifp, m_head);
3025 	}
3026 
3027 	if (tx > 0)
3028 		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
3029 }
3030 
3031 static void
3032 vmxnet3_start(struct ifnet *ifp, struct ifaltq_subque *ifsq)
3033 {
3034 	struct vmxnet3_softc *sc;
3035 	struct vmxnet3_txqueue *txq;
3036 
3037 	sc = ifp->if_softc;
3038 	txq = &sc->vmx_txq[0];
3039 
3040 	VMXNET3_TXQ_LOCK(txq);
3041 	vmxnet3_start_locked(ifp);
3042 	VMXNET3_TXQ_UNLOCK(txq);
3043 }
3044 
3045 #else /* !VMXNET3_LEGACY_TX */
3046 
3047 static int
3048 vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
3049 {
3050 	struct vmxnet3_softc *sc;
3051 	struct vmxnet3_txring *txr;
3052 	struct buf_ring *br;
3053 	struct ifnet *ifp;
3054 	int tx, avail, error;
3055 
3056 	sc = txq->vxtxq_sc;
3057 	br = txq->vxtxq_br;
3058 	ifp = sc->vmx_ifp;
3059 	txr = &txq->vxtxq_cmd_ring;
3060 	tx = 0;
3061 	error = 0;
3062 
3063 	VMXNET3_TXQ_LOCK_ASSERT(txq);
3064 
3065 	if ((ifp->if_flags & IFF_RUNNING) == 0 ||
3066 	    sc->vmx_link_active == 0) {
3067 		if (m != NULL)
3068 			error = drbr_enqueue(ifp, br, m);
3069 		return (error);
3070 	}
3071 
3072 	if (m != NULL) {
3073 		error = drbr_enqueue(ifp, br, m);
3074 		if (error)
3075 			return (error);
3076 	}
3077 
3078 	while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
3079 		m = drbr_peek(ifp, br);
3080 		if (m == NULL)
3081 			break;
3082 
3083 		/* Assume worse case if this mbuf is the head of a chain. */
3084 		if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
3085 			drbr_putback(ifp, br, m);
3086 			break;
3087 		}
3088 
3089 		if (vmxnet3_txq_encap(txq, &m) != 0) {
3090 			if (m != NULL)
3091 				drbr_putback(ifp, br, m);
3092 			else
3093 				drbr_advance(ifp, br);
3094 			break;
3095 		}
3096 		drbr_advance(ifp, br);
3097 
3098 		tx++;
3099 		ETHER_BPF_MTAP(ifp, m);
3100 	}
3101 
3102 	if (tx > 0)
3103 		txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
3104 
3105 	return (0);
3106 }
3107 
3108 static int
3109 vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
3110 {
3111 	struct vmxnet3_softc *sc;
3112 	struct vmxnet3_txqueue *txq;
3113 	int i, ntxq, error;
3114 
3115 	sc = ifp->if_softc;
3116 	ntxq = sc->vmx_ntxqueues;
3117 
3118 	/* check if flowid is set */
3119 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
3120 		i = m->m_pkthdr.flowid % ntxq;
3121 	else
3122 		i = curcpu % ntxq;
3123 
3124 	txq = &sc->vmx_txq[i];
3125 
3126 	if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
3127 		error = vmxnet3_txq_mq_start_locked(txq, m);
3128 		VMXNET3_TXQ_UNLOCK(txq);
3129 	} else {
3130 		error = drbr_enqueue(ifp, txq->vxtxq_br, m);
3131 		taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
3132 	}
3133 
3134 	return (error);
3135 }
3136 
3137 static void
3138 vmxnet3_txq_tq_deferred(void *xtxq, int pending)
3139 {
3140 	struct vmxnet3_softc *sc;
3141 	struct vmxnet3_txqueue *txq;
3142 
3143 	txq = xtxq;
3144 	sc = txq->vxtxq_sc;
3145 
3146 	VMXNET3_TXQ_LOCK(txq);
3147 	if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3148 		vmxnet3_txq_mq_start_locked(txq, NULL);
3149 	VMXNET3_TXQ_UNLOCK(txq);
3150 }
3151 
3152 #endif /* VMXNET3_LEGACY_TX */
3153 
3154 static void
3155 vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3156 {
3157 	struct vmxnet3_softc *sc;
3158 	struct ifnet *ifp;
3159 
3160 	sc = txq->vxtxq_sc;
3161 	ifp = sc->vmx_ifp;
3162 
3163 #ifdef VMXNET3_LEGACY_TX
3164 	if (!ifq_is_empty(&ifp->if_snd))
3165 		vmxnet3_start_locked(ifp);
3166 #else
3167 	if (!drbr_empty(ifp, txq->vxtxq_br))
3168 		vmxnet3_txq_mq_start_locked(txq, NULL);
3169 #endif
3170 }
3171 
3172 static void
3173 vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3174 {
3175 	struct vmxnet3_txqueue *txq;
3176 	int i;
3177 
3178 	VMXNET3_CORE_LOCK_ASSERT(sc);
3179 
3180 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3181 		txq = &sc->vmx_txq[i];
3182 
3183 		VMXNET3_TXQ_LOCK(txq);
3184 		vmxnet3_txq_start(txq);
3185 		VMXNET3_TXQ_UNLOCK(txq);
3186 	}
3187 }
3188 
3189 static void
3190 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3191 {
3192 	struct ifnet *ifp;
3193 	int idx, bit;
3194 
3195 	ifp = sc->vmx_ifp;
3196 	idx = (tag >> 5) & 0x7F;
3197 	bit = tag & 0x1F;
3198 
3199 	if (tag == 0 || tag > 4095)
3200 		return;
3201 
3202 	VMXNET3_CORE_LOCK(sc);
3203 
3204 	/* Update our private VLAN bitvector. */
3205 	if (add)
3206 		sc->vmx_vlan_filter[idx] |= (1 << bit);
3207 	else
3208 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3209 
3210 #if 0 /* VLAN_HWFILTER */
3211 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3212 		if (add)
3213 			sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3214 		else
3215 			sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3216 		vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3217 	}
3218 #endif
3219 
3220 	VMXNET3_CORE_UNLOCK(sc);
3221 }
3222 
3223 static void
3224 vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3225 {
3226 
3227 	if (ifp->if_softc == arg)
3228 		vmxnet3_update_vlan_filter(arg, 1, tag);
3229 }
3230 
3231 static void
3232 vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3233 {
3234 
3235 	if (ifp->if_softc == arg)
3236 		vmxnet3_update_vlan_filter(arg, 0, tag);
3237 }
3238 
3239 static void
3240 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3241 {
3242 	struct ifnet *ifp;
3243 	struct vmxnet3_driver_shared *ds;
3244 	struct ifmultiaddr *ifma;
3245 	u_int mode;
3246 
3247 	ifp = sc->vmx_ifp;
3248 	ds = sc->vmx_ds;
3249 
3250 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3251 	if (ifp->if_flags & IFF_PROMISC)
3252 		mode |= VMXNET3_RXMODE_PROMISC;
3253 	if (ifp->if_flags & IFF_ALLMULTI)
3254 		mode |= VMXNET3_RXMODE_ALLMULTI;
3255 	else {
3256 		int cnt = 0, overflow = 0;
3257 
3258 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3259 			if (ifma->ifma_addr->sa_family != AF_LINK)
3260 				continue;
3261 			else if (cnt == VMXNET3_MULTICAST_MAX) {
3262 				overflow = 1;
3263 				break;
3264 			}
3265 
3266 			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3267 			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3268 			cnt++;
3269 		}
3270 
3271 		if (overflow != 0) {
3272 			cnt = 0;
3273 			mode |= VMXNET3_RXMODE_ALLMULTI;
3274 		} else if (cnt > 0)
3275 			mode |= VMXNET3_RXMODE_MCAST;
3276 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3277 	}
3278 
3279 	ds->rxmode = mode;
3280 
3281 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3282 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3283 }
3284 
3285 static int
3286 vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3287 {
3288 	struct ifnet *ifp;
3289 
3290 	ifp = sc->vmx_ifp;
3291 
3292 	if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3293 		return (EINVAL);
3294 
3295 	ifp->if_mtu = mtu;
3296 
3297 	if (ifp->if_flags & IFF_RUNNING) {
3298 		ifp->if_flags &= ~IFF_RUNNING;
3299 		vmxnet3_init_locked(sc);
3300 	}
3301 
3302 	return (0);
3303 }
3304 
3305 static int
3306 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cred)
3307 {
3308 	struct vmxnet3_softc *sc;
3309 	struct ifreq *ifr;
3310 	int reinit, mask, error;
3311 
3312 	sc = ifp->if_softc;
3313 	ifr = (struct ifreq *) data;
3314 	error = 0;
3315 
3316 	switch (cmd) {
3317 	case SIOCSIFMTU:
3318 		if (ifp->if_mtu != ifr->ifr_mtu) {
3319 			VMXNET3_CORE_LOCK(sc);
3320 			error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3321 			VMXNET3_CORE_UNLOCK(sc);
3322 		}
3323 		break;
3324 
3325 	case SIOCSIFFLAGS:
3326 		VMXNET3_CORE_LOCK(sc);
3327 		if (ifp->if_flags & IFF_UP) {
3328 			if ((ifp->if_flags & IFF_RUNNING)) {
3329 				if ((ifp->if_flags ^ sc->vmx_if_flags) &
3330 				    (IFF_PROMISC | IFF_ALLMULTI)) {
3331 					vmxnet3_set_rxfilter(sc);
3332 				}
3333 			} else
3334 				vmxnet3_init_locked(sc);
3335 		} else {
3336 			if (ifp->if_flags & IFF_RUNNING)
3337 				vmxnet3_stop(sc);
3338 		}
3339 		sc->vmx_if_flags = ifp->if_flags;
3340 		VMXNET3_CORE_UNLOCK(sc);
3341 		break;
3342 
3343 	case SIOCADDMULTI:
3344 	case SIOCDELMULTI:
3345 		VMXNET3_CORE_LOCK(sc);
3346 		if (ifp->if_flags & IFF_RUNNING)
3347 			vmxnet3_set_rxfilter(sc);
3348 		VMXNET3_CORE_UNLOCK(sc);
3349 		break;
3350 
3351 	case SIOCSIFMEDIA:
3352 	case SIOCGIFMEDIA:
3353 		error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3354 		break;
3355 
3356 	case SIOCSIFCAP:
3357 		VMXNET3_CORE_LOCK(sc);
3358 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3359 
3360 		if (mask & IFCAP_TXCSUM)
3361 			ifp->if_capenable ^= IFCAP_TXCSUM;
3362 		if (mask & IFCAP_TXCSUM_IPV6)
3363 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3364 #if 0 /* XXX TSO */
3365 		if (mask & IFCAP_TSO4)
3366 			ifp->if_capenable ^= IFCAP_TSO4;
3367 		if (mask & IFCAP_TSO6)
3368 			ifp->if_capenable ^= IFCAP_TSO6;
3369 #endif
3370 
3371 		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | /* IFCAP_LRO | */
3372 		    IFCAP_VLAN_HWTAGGING /* | IFCAP_VLAN_HWFILTER */)) {
3373 			/* Changing these features requires us to reinit. */
3374 			reinit = 1;
3375 
3376 			if (mask & IFCAP_RXCSUM)
3377 				ifp->if_capenable ^= IFCAP_RXCSUM;
3378 			if (mask & IFCAP_RXCSUM_IPV6)
3379 				ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3380 #if 0 /* XXX LRO */
3381 			if (mask & IFCAP_LRO)
3382 				ifp->if_capenable ^= IFCAP_LRO;
3383 #endif
3384 			if (mask & IFCAP_VLAN_HWTAGGING)
3385 				ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3386 #if 0 /* XXX VLAN_HWFILTER */
3387 			if (mask & IFCAP_VLAN_HWFILTER)
3388 				ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3389 #endif
3390 		} else
3391 			reinit = 0;
3392 
3393 #if 0 /* XXX TSO */
3394 		if (mask & IFCAP_VLAN_HWTSO)
3395 			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3396 #endif
3397 
3398 		if (reinit && (ifp->if_flags & IFF_RUNNING)) {
3399 			ifp->if_flags &= ~IFF_RUNNING;
3400 			vmxnet3_init_locked(sc);
3401 		} else {
3402 			vmxnet3_init_hwassist(sc);
3403 		}
3404 
3405 		VMXNET3_CORE_UNLOCK(sc);
3406 #if 0 /* XXX */
3407 		VLAN_CAPABILITIES(ifp);
3408 #endif
3409 		break;
3410 
3411 	default:
3412 		error = ether_ioctl(ifp, cmd, data);
3413 		break;
3414 	}
3415 
3416 	VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3417 
3418 	return (error);
3419 }
3420 
3421 #ifndef VMXNET3_LEGACY_TX
3422 static void
3423 vmxnet3_qflush(struct ifnet *ifp)
3424 {
3425 	struct vmxnet3_softc *sc;
3426 	struct vmxnet3_txqueue *txq;
3427 	struct mbuf *m;
3428 	int i;
3429 
3430 	sc = ifp->if_softc;
3431 
3432 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3433 		txq = &sc->vmx_txq[i];
3434 
3435 		VMXNET3_TXQ_LOCK(txq);
3436 		while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3437 			m_freem(m);
3438 		VMXNET3_TXQ_UNLOCK(txq);
3439 	}
3440 
3441 	if_qflush(ifp);
3442 }
3443 #endif
3444 
3445 static int
3446 vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3447 {
3448 	struct vmxnet3_softc *sc;
3449 
3450 	sc = txq->vxtxq_sc;
3451 
3452 	VMXNET3_TXQ_LOCK(txq);
3453 	if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3454 		VMXNET3_TXQ_UNLOCK(txq);
3455 		return (0);
3456 	}
3457 	VMXNET3_TXQ_UNLOCK(txq);
3458 
3459 	if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3460 	    txq->vxtxq_id);
3461 	return (1);
3462 }
3463 
3464 static void
3465 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3466 {
3467 
3468 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3469 }
3470 
3471 static void
3472 vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3473     struct vmxnet3_txq_stats *accum)
3474 {
3475 	struct vmxnet3_txq_stats *st;
3476 
3477 	st = &txq->vxtxq_stats;
3478 
3479 	accum->vmtxs_opackets += st->vmtxs_opackets;
3480 	accum->vmtxs_obytes += st->vmtxs_obytes;
3481 	accum->vmtxs_omcasts += st->vmtxs_omcasts;
3482 	accum->vmtxs_csum += st->vmtxs_csum;
3483 	accum->vmtxs_tso += st->vmtxs_tso;
3484 	accum->vmtxs_full += st->vmtxs_full;
3485 	accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3486 }
3487 
3488 static void
3489 vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3490     struct vmxnet3_rxq_stats *accum)
3491 {
3492 	struct vmxnet3_rxq_stats *st;
3493 
3494 	st = &rxq->vxrxq_stats;
3495 
3496 	accum->vmrxs_ipackets += st->vmrxs_ipackets;
3497 	accum->vmrxs_ibytes += st->vmrxs_ibytes;
3498 	accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3499 	accum->vmrxs_ierrors += st->vmrxs_ierrors;
3500 }
3501 
3502 static void
3503 vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3504 {
3505 	struct ifnet *ifp;
3506 	struct vmxnet3_statistics *st;
3507 	struct vmxnet3_txq_stats txaccum;
3508 	struct vmxnet3_rxq_stats rxaccum;
3509 	int i;
3510 
3511 	ifp = sc->vmx_ifp;
3512 	st = &sc->vmx_stats;
3513 
3514 	bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3515 	bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3516 
3517 	for (i = 0; i < sc->vmx_ntxqueues; i++)
3518 		vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3519 	for (i = 0; i < sc->vmx_nrxqueues; i++)
3520 		vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3521 
3522 	/*
3523 	 * With the exception of if_ierrors, these ifnet statistics are
3524 	 * only updated in the driver, so just set them to our accumulated
3525 	 * values. if_ierrors is updated in ether_input() for malformed
3526 	 * frames that we should have already discarded.
3527 	 */
3528 	ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3529 	ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3530 	ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3531 	ifp->if_opackets = txaccum.vmtxs_opackets;
3532 #ifndef VMXNET3_LEGACY_TX
3533 	ifp->if_obytes = txaccum.vmtxs_obytes;
3534 	ifp->if_omcasts = txaccum.vmtxs_omcasts;
3535 #endif
3536 }
3537 
3538 static void
3539 vmxnet3_tick(void *xsc)
3540 {
3541 	struct vmxnet3_softc *sc;
3542 	struct ifnet *ifp;
3543 	int i, timedout;
3544 
3545 	sc = xsc;
3546 	ifp = sc->vmx_ifp;
3547 	timedout = 0;
3548 
3549 	VMXNET3_CORE_LOCK_ASSERT(sc);
3550 
3551 	vmxnet3_accumulate_stats(sc);
3552 	vmxnet3_refresh_host_stats(sc);
3553 
3554 	for (i = 0; i < sc->vmx_ntxqueues; i++)
3555 		timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3556 
3557 	if (timedout != 0) {
3558 		ifp->if_flags &= ~IFF_RUNNING;
3559 		vmxnet3_init_locked(sc);
3560 	} else
3561 		callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3562 }
3563 
3564 static int
3565 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3566 {
3567 	uint32_t status;
3568 
3569 	/* Also update the link speed while here. */
3570 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3571 	sc->vmx_link_speed = status >> 16;
3572 	return !!(status & 0x1);
3573 }
3574 
3575 static void
3576 vmxnet3_link_status(struct vmxnet3_softc *sc)
3577 {
3578 	struct ifnet *ifp;
3579 	int link;
3580 
3581 	ifp = sc->vmx_ifp;
3582 	link = vmxnet3_link_is_up(sc);
3583 
3584 	if (link != 0 && sc->vmx_link_active == 0) {
3585 		sc->vmx_link_active = 1;
3586 		ifp->if_link_state = LINK_STATE_UP;
3587 		if_link_state_change(ifp);
3588 	} else if (link == 0 && sc->vmx_link_active != 0) {
3589 		sc->vmx_link_active = 0;
3590 		ifp->if_link_state = LINK_STATE_DOWN;
3591 		if_link_state_change(ifp);
3592 	}
3593 }
3594 
3595 static void
3596 vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3597 {
3598 	struct vmxnet3_softc *sc;
3599 
3600 	sc = ifp->if_softc;
3601 
3602 	ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3603 	ifmr->ifm_status = IFM_AVALID;
3604 
3605 	VMXNET3_CORE_LOCK(sc);
3606 	if (vmxnet3_link_is_up(sc) != 0)
3607 		ifmr->ifm_status |= IFM_ACTIVE;
3608 	else
3609 		ifmr->ifm_status |= IFM_NONE;
3610 	VMXNET3_CORE_UNLOCK(sc);
3611 }
3612 
3613 static int
3614 vmxnet3_media_change(struct ifnet *ifp)
3615 {
3616 
3617 	/* Ignore. */
3618 	return (0);
3619 }
3620 
3621 static void
3622 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3623 {
3624 	uint32_t ml, mh;
3625 
3626 	ml  = sc->vmx_lladdr[0];
3627 	ml |= sc->vmx_lladdr[1] << 8;
3628 	ml |= sc->vmx_lladdr[2] << 16;
3629 	ml |= sc->vmx_lladdr[3] << 24;
3630 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3631 
3632 	mh  = sc->vmx_lladdr[4];
3633 	mh |= sc->vmx_lladdr[5] << 8;
3634 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3635 }
3636 
3637 static void
3638 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3639 {
3640 	uint32_t ml, mh;
3641 
3642 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3643 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3644 
3645 	sc->vmx_lladdr[0] = ml;
3646 	sc->vmx_lladdr[1] = ml >> 8;
3647 	sc->vmx_lladdr[2] = ml >> 16;
3648 	sc->vmx_lladdr[3] = ml >> 24;
3649 	sc->vmx_lladdr[4] = mh;
3650 	sc->vmx_lladdr[5] = mh >> 8;
3651 }
3652 
3653 static void
3654 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3655     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3656 {
3657 	struct sysctl_oid *node, *txsnode;
3658 	struct sysctl_oid_list *list, *txslist;
3659 	struct vmxnet3_txq_stats *stats;
3660 	struct UPT1_TxStats *txstats;
3661 	char namebuf[16];
3662 
3663 	stats = &txq->vxtxq_stats;
3664 	txstats = &txq->vxtxq_ts->stats;
3665 
3666 	ksnprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3667 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3668 	    NULL, "Transmit Queue");
3669 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3670 
3671 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3672 	    &stats->vmtxs_opackets, 0, "Transmit packets");
3673 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3674 	    &stats->vmtxs_obytes, 0, "Transmit bytes");
3675 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3676 	    &stats->vmtxs_omcasts, 0, "Transmit multicasts");
3677 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3678 	    &stats->vmtxs_csum, 0, "Transmit checksum offloaded");
3679 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3680 	    &stats->vmtxs_tso, 0, "Transmit TCP segmentation offloaded");
3681 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3682 	    &stats->vmtxs_full, 0, "Transmit ring full");
3683 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3684 	    &stats->vmtxs_offload_failed, 0, "Transmit checksum offload failed");
3685 
3686 	/*
3687 	 * Add statistics reported by the host. These are updated once
3688 	 * per second.
3689 	 */
3690 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3691 	    NULL, "Host Statistics");
3692 	txslist = SYSCTL_CHILDREN(txsnode);
3693 #if 0 /* XXX TSO */
3694 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3695 	    &txstats->TSO_packets, 0, "TSO packets");
3696 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3697 	    &txstats->TSO_bytes, 0, "TSO bytes");
3698 #endif
3699 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3700 	    &txstats->ucast_packets, 0, "Unicast packets");
3701 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3702 	    &txstats->ucast_bytes, 0, "Unicast bytes");
3703 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3704 	    &txstats->mcast_packets, 0, "Multicast packets");
3705 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3706 	    &txstats->mcast_bytes, 0, "Multicast bytes");
3707 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3708 	    &txstats->error, 0, "Errors");
3709 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3710 	    &txstats->discard, 0, "Discards");
3711 }
3712 
3713 static void
3714 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3715     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3716 {
3717 	struct sysctl_oid *node, *rxsnode;
3718 	struct sysctl_oid_list *list, *rxslist;
3719 	struct vmxnet3_rxq_stats *stats;
3720 	struct UPT1_RxStats *rxstats;
3721 	char namebuf[16];
3722 
3723 	stats = &rxq->vxrxq_stats;
3724 	rxstats = &rxq->vxrxq_rs->stats;
3725 
3726 	ksnprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3727 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3728 	    NULL, "Receive Queue");
3729 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3730 
3731 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3732 	    &stats->vmrxs_ipackets, 0, "Receive packets");
3733 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3734 	    &stats->vmrxs_ibytes, 0, "Receive bytes");
3735 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3736 	    &stats->vmrxs_iqdrops, 0, "Receive drops");
3737 	SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3738 	    &stats->vmrxs_ierrors, 0, "Receive errors");
3739 
3740 	/*
3741 	 * Add statistics reported by the host. These are updated once
3742 	 * per second.
3743 	 */
3744 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3745 	    NULL, "Host Statistics");
3746 	rxslist = SYSCTL_CHILDREN(rxsnode);
3747 #if 0 /* XXX LRO */
3748 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3749 	    &rxstats->LRO_packets, 0, "LRO packets");
3750 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3751 	    &rxstats->LRO_bytes, 0, "LRO bytes");
3752 #endif
3753 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3754 	    &rxstats->ucast_packets, 0, "Unicast packets");
3755 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3756 	    &rxstats->ucast_bytes, 0, "Unicast bytes");
3757 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3758 	    &rxstats->mcast_packets, 0, "Multicast packets");
3759 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3760 	    &rxstats->mcast_bytes, 0, "Multicast bytes");
3761 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3762 	    &rxstats->bcast_packets, 0, "Broadcast packets");
3763 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3764 	    &rxstats->bcast_bytes, 0, "Broadcast bytes");
3765 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3766 	    &rxstats->nobuffer, 0, "No buffer");
3767 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3768 	    &rxstats->error, 0, "Errors");
3769 }
3770 
3771 static void
3772 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3773     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3774 {
3775 	struct sysctl_oid *node;
3776 	struct sysctl_oid_list *list;
3777 	int i;
3778 
3779 	for (i = 0; i < sc->vmx_ntxqueues; i++) {
3780 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3781 
3782 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3783 		    "debug", CTLFLAG_RD, NULL, "");
3784 		list = SYSCTL_CHILDREN(node);
3785 
3786 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3787 		    &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3788 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3789 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3790 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3791 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3792 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3793 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3794 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3795 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3796 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3797 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3798 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3799 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3800 	}
3801 
3802 	for (i = 0; i < sc->vmx_nrxqueues; i++) {
3803 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3804 
3805 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3806 		    "debug", CTLFLAG_RD, NULL, "");
3807 		list = SYSCTL_CHILDREN(node);
3808 
3809 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3810 		    &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3811 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3812 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3813 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3814 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3815 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3816 		    &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3817 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3818 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3819 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3820 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3821 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3822 		    &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3823 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3824 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3825 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3826 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3827 	}
3828 }
3829 
3830 static void
3831 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3832     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3833 {
3834 	int i;
3835 
3836 	for (i = 0; i < sc->vmx_ntxqueues; i++)
3837 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3838 	for (i = 0; i < sc->vmx_nrxqueues; i++)
3839 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3840 
3841 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
3842 }
3843 
3844 static void
3845 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3846 {
3847 	device_t dev;
3848 	struct vmxnet3_statistics *stats;
3849 	struct sysctl_ctx_list *ctx;
3850 	struct sysctl_oid *tree;
3851 	struct sysctl_oid_list *child;
3852 
3853 	dev = sc->vmx_dev;
3854 	ctx = device_get_sysctl_ctx(dev);
3855 	tree = device_get_sysctl_tree(dev);
3856 	child = SYSCTL_CHILDREN(tree);
3857 
3858 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3859 	    &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3860 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3861 	    &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3862 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3863 	    &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3864 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3865 	    &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3866 
3867 	stats = &sc->vmx_stats;
3868 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3869 	    &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3870 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3871 	    &stats->vmst_defrag_failed, 0,
3872 	    "Tx mbuf dropped because defrag failed");
3873 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3874 	    &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3875 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3876 	    &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3877 
3878 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
3879 }
3880 
3881 static void
3882 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3883 {
3884 
3885 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3886 }
3887 
3888 static uint32_t
3889 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3890 {
3891 
3892 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3893 }
3894 
3895 static void
3896 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3897 {
3898 
3899 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3900 }
3901 
3902 static void
3903 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3904 {
3905 
3906 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3907 }
3908 
3909 static uint32_t
3910 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3911 {
3912 
3913 	vmxnet3_write_cmd(sc, cmd);
3914 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3915 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3916 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3917 }
3918 
3919 static void
3920 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3921 {
3922 
3923 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3924 }
3925 
3926 static void
3927 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3928 {
3929 
3930 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3931 }
3932 
3933 static void
3934 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3935 {
3936 	int i;
3937 
3938 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3939 	for (i = 0; i < sc->vmx_nintrs; i++)
3940 		vmxnet3_enable_intr(sc, i);
3941 }
3942 
3943 static void
3944 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3945 {
3946 	int i;
3947 
3948 	sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3949 	for (i = 0; i < sc->vmx_nintrs; i++)
3950 		vmxnet3_disable_intr(sc, i);
3951 }
3952 
3953 static void
3954 vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3955 {
3956 	bus_addr_t *baddr = arg;
3957 
3958 	if (error == 0)
3959 		*baddr = segs->ds_addr;
3960 }
3961 
3962 static int
3963 vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3964     struct vmxnet3_dma_alloc *dma)
3965 {
3966 	device_t dev;
3967 	int error;
3968 
3969 	dev = sc->vmx_dev;
3970 	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3971 
3972 	error = bus_dma_tag_create(bus_get_dma_tag(dev),
3973 	    align, 0,		/* alignment, bounds */
3974 	    BUS_SPACE_MAXADDR,	/* lowaddr */
3975 	    BUS_SPACE_MAXADDR,	/* highaddr */
3976 	    NULL, NULL,		/* filter, filterarg */
3977 	    size,		/* maxsize */
3978 	    1,			/* nsegments */
3979 	    size,		/* maxsegsize */
3980 	    BUS_DMA_ALLOCNOW,	/* flags */
3981 	    &dma->dma_tag);
3982 	if (error) {
3983 		device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3984 		goto fail;
3985 	}
3986 
3987 	error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3988 	    BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3989 	if (error) {
3990 		device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3991 		goto fail;
3992 	}
3993 
3994 	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3995 	    size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3996 	if (error) {
3997 		device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3998 		goto fail;
3999 	}
4000 
4001 	dma->dma_size = size;
4002 
4003 fail:
4004 	if (error)
4005 		vmxnet3_dma_free(sc, dma);
4006 
4007 	return (error);
4008 }
4009 
4010 static void
4011 vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
4012 {
4013 
4014 	if (dma->dma_tag != NULL) {
4015 		if (dma->dma_paddr != 0) {
4016 			bus_dmamap_sync(dma->dma_tag, dma->dma_map,
4017 			    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4018 			bus_dmamap_unload(dma->dma_tag, dma->dma_map);
4019 		}
4020 
4021 		if (dma->dma_vaddr != NULL) {
4022 			bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
4023 			    dma->dma_map);
4024 		}
4025 
4026 		bus_dma_tag_destroy(dma->dma_tag);
4027 	}
4028 	bzero(dma, sizeof(struct vmxnet3_dma_alloc));
4029 }
4030 
4031 static int
4032 vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
4033 {
4034 	char path[64];
4035 
4036 	ksnprintf(path, sizeof(path),
4037 	    "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
4038 	TUNABLE_INT_FETCH(path, &def);
4039 
4040 	return (def);
4041 }
4042 
4043 #define mb()	__asm volatile("mfence" ::: "memory")
4044 #define wmb()	__asm volatile("sfence" ::: "memory")
4045 #define rmb()	__asm volatile("lfence" ::: "memory")
4046 
4047 /*
4048  * Since this is a purely paravirtualized device, we do not have
4049  * to worry about DMA coherency. But at times, we must make sure
4050  * both the compiler and CPU do not reorder memory operations.
4051  */
4052 static inline void
4053 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
4054 {
4055 
4056 	switch (type) {
4057 	case VMXNET3_BARRIER_RD:
4058 		rmb();
4059 		break;
4060 	case VMXNET3_BARRIER_WR:
4061 		wmb();
4062 		break;
4063 	case VMXNET3_BARRIER_RDWR:
4064 		mb();
4065 		break;
4066 	default:
4067 		panic("%s: bad barrier type %d", __func__, type);
4068 	}
4069 }
4070