xref: /freebsd/sys/dev/vmware/vmxnet3/if_vmx.c (revision d0b2dbfa)
1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  * Copyright (c) 2018 Patrick Kelsey
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19  */
20 
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22 
23 #include <sys/cdefs.h>
24 #include "opt_rss.h"
25 
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/kernel.h>
29 #include <sys/endian.h>
30 #include <sys/sockio.h>
31 #include <sys/mbuf.h>
32 #include <sys/malloc.h>
33 #include <sys/module.h>
34 #include <sys/socket.h>
35 #include <sys/sysctl.h>
36 #include <sys/smp.h>
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39 
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_arp.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <net/if_media.h>
47 #include <net/if_vlan_var.h>
48 #include <net/iflib.h>
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52 
53 #include <netinet/in_systm.h>
54 #include <netinet/in.h>
55 #include <netinet/ip.h>
56 #include <netinet/ip6.h>
57 #include <netinet6/ip6_var.h>
58 #include <netinet/udp.h>
59 #include <netinet/tcp.h>
60 
61 #include <machine/bus.h>
62 #include <machine/resource.h>
63 #include <sys/bus.h>
64 #include <sys/rman.h>
65 
66 #include <dev/pci/pcireg.h>
67 #include <dev/pci/pcivar.h>
68 
69 #include "ifdi_if.h"
70 
71 #include "if_vmxreg.h"
72 #include "if_vmxvar.h"
73 
74 #include "opt_inet.h"
75 #include "opt_inet6.h"
76 
77 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
78 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
79 
80 static pci_vendor_info_t vmxnet3_vendor_info_array[] =
81 {
82 	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
83 	/* required last entry */
84 	PVID_END
85 };
86 
87 static void	*vmxnet3_register(device_t);
88 static int	vmxnet3_attach_pre(if_ctx_t);
89 static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
90 static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
91 static int	vmxnet3_attach_post(if_ctx_t);
92 static int	vmxnet3_detach(if_ctx_t);
93 static int	vmxnet3_shutdown(if_ctx_t);
94 static int	vmxnet3_suspend(if_ctx_t);
95 static int	vmxnet3_resume(if_ctx_t);
96 
97 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
98 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
99 static int	vmxnet3_check_version(struct vmxnet3_softc *);
100 static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
101 
102 static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
103 static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
104 static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
105 static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
106 static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
107 static void	vmxnet3_queues_free(if_ctx_t);
108 
109 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
110 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
111 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
112 static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
113 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
114 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
115 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
116 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
117 static void	vmxnet3_free_data(struct vmxnet3_softc *);
118 
119 static void	vmxnet3_evintr(struct vmxnet3_softc *);
120 static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
121 static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
122 static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
123 static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
124 static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
125 static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
126 static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
127 static int	vmxnet3_legacy_intr(void *);
128 static int	vmxnet3_rxq_intr(void *);
129 static int	vmxnet3_event_intr(void *);
130 
131 static void	vmxnet3_stop(if_ctx_t);
132 
133 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
134 static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
135 static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
136 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
137 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
138 static void	vmxnet3_init(if_ctx_t);
139 static void	vmxnet3_multi_set(if_ctx_t);
140 static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
141 static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
142 static int	vmxnet3_media_change(if_ctx_t);
143 static int	vmxnet3_promisc_set(if_ctx_t, int);
144 static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
145 static void	vmxnet3_update_admin_status(if_ctx_t);
146 static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
147 
148 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
149 		    uint16_t);
150 static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
151 static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
152 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
153 
154 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
155 static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
156 static void	vmxnet3_link_status(struct vmxnet3_softc *);
157 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
158 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
159 
160 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
161 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
162 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
163 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
164 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
165 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
166 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
167 
168 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
169 		    uint32_t);
170 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
171 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
172 		    uint32_t);
173 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
174 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
175 
176 static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
177 static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
178 static void	vmxnet3_link_intr_enable(if_ctx_t);
179 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
180 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
181 static void	vmxnet3_intr_enable_all(if_ctx_t);
182 static void	vmxnet3_intr_disable_all(if_ctx_t);
183 
184 typedef enum {
185 	VMXNET3_BARRIER_RD,
186 	VMXNET3_BARRIER_WR,
187 	VMXNET3_BARRIER_RDWR,
188 } vmxnet3_barrier_t;
189 
190 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
191 
192 static device_method_t vmxnet3_methods[] = {
193 	/* Device interface */
194 	DEVMETHOD(device_register, vmxnet3_register),
195 	DEVMETHOD(device_probe, iflib_device_probe),
196 	DEVMETHOD(device_attach, iflib_device_attach),
197 	DEVMETHOD(device_detach, iflib_device_detach),
198 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
199 	DEVMETHOD(device_suspend, iflib_device_suspend),
200 	DEVMETHOD(device_resume, iflib_device_resume),
201 	DEVMETHOD_END
202 };
203 
204 static driver_t vmxnet3_driver = {
205 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
206 };
207 
208 DRIVER_MODULE(vmx, pci, vmxnet3_driver, 0, 0);
209 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
210 MODULE_VERSION(vmx, 2);
211 
212 MODULE_DEPEND(vmx, pci, 1, 1, 1);
213 MODULE_DEPEND(vmx, ether, 1, 1, 1);
214 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
215 
216 static device_method_t vmxnet3_iflib_methods[] = {
217 	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
218 	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
219 	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
220 
221 	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
222 	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
223 	DEVMETHOD(ifdi_detach, vmxnet3_detach),
224 
225 	DEVMETHOD(ifdi_init, vmxnet3_init),
226 	DEVMETHOD(ifdi_stop, vmxnet3_stop),
227 	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
228 	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
229 	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
230 	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
231 	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
232 	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
233 	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
234 	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
235 
236 	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
237 	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
238 	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
239 	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
240 	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
241 	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
242 
243 	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
244 	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
245 
246 	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
247 	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
248 	DEVMETHOD(ifdi_resume, vmxnet3_resume),
249 
250 	DEVMETHOD_END
251 };
252 
253 static driver_t vmxnet3_iflib_driver = {
254 	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
255 };
256 
257 struct if_txrx vmxnet3_txrx = {
258 	.ift_txd_encap = vmxnet3_isc_txd_encap,
259 	.ift_txd_flush = vmxnet3_isc_txd_flush,
260 	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
261 	.ift_rxd_available = vmxnet3_isc_rxd_available,
262 	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
263 	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
264 	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
265 	.ift_legacy_intr = vmxnet3_legacy_intr
266 };
267 
268 static struct if_shared_ctx vmxnet3_sctx_init = {
269 	.isc_magic = IFLIB_MAGIC,
270 	.isc_q_align = 512,
271 
272 	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
273 	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
274 	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
275 	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
276 
277 	/*
278 	 * These values are used to configure the busdma tag used for
279 	 * receive descriptors.  Each receive descriptor only points to one
280 	 * buffer.
281 	 */
282 	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
283 	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
284 	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
285 
286 	.isc_admin_intrcnt = 1,
287 	.isc_vendor_info = vmxnet3_vendor_info_array,
288 	.isc_driver_version = "2",
289 	.isc_driver = &vmxnet3_iflib_driver,
290 	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
291 
292 	/*
293 	 * Number of receive queues per receive queue set, with associated
294 	 * descriptor settings for each.
295 	 */
296 	.isc_nrxqs = 3,
297 	.isc_nfl = 2, /* one free list for each receive command queue */
298 	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
299 	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
300 	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
301 
302 	/*
303 	 * Number of transmit queues per transmit queue set, with associated
304 	 * descriptor settings for each.
305 	 */
306 	.isc_ntxqs = 2,
307 	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
308 	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
309 	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
310 };
311 
312 static void *
313 vmxnet3_register(device_t dev)
314 {
315 	return (&vmxnet3_sctx_init);
316 }
317 
318 static int
319 trunc_powerof2(int val)
320 {
321 
322 	return (1U << (fls(val) - 1));
323 }
324 
325 static int
326 vmxnet3_attach_pre(if_ctx_t ctx)
327 {
328 	device_t dev;
329 	if_softc_ctx_t scctx;
330 	struct vmxnet3_softc *sc;
331 	uint32_t intr_config;
332 	int error;
333 
334 	dev = iflib_get_dev(ctx);
335 	sc = iflib_get_softc(ctx);
336 	sc->vmx_dev = dev;
337 	sc->vmx_ctx = ctx;
338 	sc->vmx_sctx = iflib_get_sctx(ctx);
339 	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
340 	sc->vmx_ifp = iflib_get_ifp(ctx);
341 	sc->vmx_media = iflib_get_media(ctx);
342 	scctx = sc->vmx_scctx;
343 
344 	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
345 	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
346 	/* isc_tx_tso_size_max doesn't include possible vlan header */
347 	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
348 	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
349 	scctx->isc_txrx = &vmxnet3_txrx;
350 
351 	/* If 0, the iflib tunable was not set, so set to the default */
352 	if (scctx->isc_nrxqsets == 0)
353 		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
354 	scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
355 	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
356 	scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
357 
358 	/* If 0, the iflib tunable was not set, so set to the default */
359 	if (scctx->isc_ntxqsets == 0)
360 		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
361 	scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
362 	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
363 	scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
364 
365 	/*
366 	 * Enforce that the transmit completion queue descriptor count is
367 	 * the same as the transmit command queue descriptor count.
368 	 */
369 	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
370 	scctx->isc_txqsizes[0] =
371 	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
372 	scctx->isc_txqsizes[1] =
373 	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
374 
375 	/*
376 	 * Enforce that the receive completion queue descriptor count is the
377 	 * sum of the receive command queue descriptor counts, and that the
378 	 * second receive command queue descriptor count is the same as the
379 	 * first one.
380 	 */
381 	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
382 	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
383 	scctx->isc_rxqsizes[0] =
384 	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
385 	scctx->isc_rxqsizes[1] =
386 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
387 	scctx->isc_rxqsizes[2] =
388 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
389 
390 	/*
391 	 * Initialize the max frame size and descriptor queue buffer
392 	 * sizes.
393 	 */
394 	vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
395 
396 	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
397 
398 	/* Map PCI BARs */
399 	error = vmxnet3_alloc_resources(sc);
400 	if (error)
401 		goto fail;
402 
403 	/* Check device versions */
404 	error = vmxnet3_check_version(sc);
405 	if (error)
406 		goto fail;
407 
408 	/*
409 	 * The interrupt mode can be set in the hypervisor configuration via
410 	 * the parameter ethernet<N>.intrMode.
411 	 */
412 	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
413 	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
414 
415 	/*
416 	 * Configure the softc context to attempt to configure the interrupt
417 	 * mode now indicated by intr_config.  iflib will follow the usual
418 	 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
419 	 * starting mode.
420 	 */
421 	switch (intr_config & 0x03) {
422 	case VMXNET3_IT_AUTO:
423 	case VMXNET3_IT_MSIX:
424 		scctx->isc_msix_bar = pci_msix_table_bar(dev);
425 		break;
426 	case VMXNET3_IT_MSI:
427 		scctx->isc_msix_bar = -1;
428 		scctx->isc_disable_msix = 1;
429 		break;
430 	case VMXNET3_IT_LEGACY:
431 		scctx->isc_msix_bar = 0;
432 		break;
433 	}
434 
435 	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
436 	scctx->isc_capabilities = scctx->isc_capenable =
437 	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
438 	    IFCAP_TSO4 | IFCAP_TSO6 |
439 	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
440 	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
441 	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
442 	    IFCAP_JUMBO_MTU;
443 
444 	/* These capabilities are not enabled by default. */
445 	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
446 
447 	vmxnet3_get_lladdr(sc);
448 	iflib_set_mac(ctx, sc->vmx_lladdr);
449 
450 	return (0);
451 fail:
452 	/*
453 	 * We must completely clean up anything allocated above as iflib
454 	 * will not invoke any other driver entry points as a result of this
455 	 * failure.
456 	 */
457 	vmxnet3_free_resources(sc);
458 
459 	return (error);
460 }
461 
462 static int
463 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
464 {
465 	struct vmxnet3_softc *sc;
466 	if_softc_ctx_t scctx;
467 	struct vmxnet3_rxqueue *rxq;
468 	int error;
469 	int i;
470 	char irq_name[16];
471 
472 	sc = iflib_get_softc(ctx);
473 	scctx = sc->vmx_scctx;
474 
475 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
476 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
477 
478 		rxq = &sc->vmx_rxq[i];
479 		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
480 		    IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name);
481 		if (error) {
482 			device_printf(iflib_get_dev(ctx),
483 			    "Failed to register rxq %d interrupt handler\n", i);
484 			return (error);
485 		}
486 	}
487 
488 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
489 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
490 
491 		/*
492 		 * Don't provide the corresponding rxq irq for reference -
493 		 * we want the transmit task to be attached to a task queue
494 		 * that is different from the one used by the corresponding
495 		 * rxq irq.  That is because the TX doorbell writes are very
496 		 * expensive as virtualized MMIO operations, so we want to
497 		 * be able to defer them to another core when possible so
498 		 * that they don't steal receive processing cycles during
499 		 * stack turnarounds like TCP ACK generation.  The other
500 		 * piece to this approach is enabling the iflib abdicate
501 		 * option (currently via an interface-specific
502 		 * tunable/sysctl).
503 		 */
504 		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
505 		    irq_name);
506 	}
507 
508 	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
509 	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
510 	    "event");
511 	if (error) {
512 		device_printf(iflib_get_dev(ctx),
513 		    "Failed to register event interrupt handler\n");
514 		return (error);
515 	}
516 
517 	return (0);
518 }
519 
520 static void
521 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
522 {
523 	if_softc_ctx_t scctx;
524 	struct vmxnet3_rxqueue *rxq;
525 	int i;
526 
527 	scctx = sc->vmx_scctx;
528 
529 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
530 		rxq = &sc->vmx_rxq[i];
531 		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
532 	}
533 
534 	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
535 }
536 
537 static int
538 vmxnet3_attach_post(if_ctx_t ctx)
539 {
540 	if_softc_ctx_t scctx;
541 	struct vmxnet3_softc *sc;
542 	int error;
543 
544 	scctx = iflib_get_softc_ctx(ctx);
545 	sc = iflib_get_softc(ctx);
546 
547 	if (scctx->isc_nrxqsets > 1)
548 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
549 
550 	error = vmxnet3_alloc_data(sc);
551 	if (error)
552 		goto fail;
553 
554 	vmxnet3_set_interrupt_idx(sc);
555 	vmxnet3_setup_sysctl(sc);
556 
557 	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
558 	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
559 
560 fail:
561 	return (error);
562 }
563 
564 static int
565 vmxnet3_detach(if_ctx_t ctx)
566 {
567 	struct vmxnet3_softc *sc;
568 
569 	sc = iflib_get_softc(ctx);
570 
571 	vmxnet3_free_irqs(sc);
572 	vmxnet3_free_data(sc);
573 	vmxnet3_free_resources(sc);
574 
575 	return (0);
576 }
577 
578 static int
579 vmxnet3_shutdown(if_ctx_t ctx)
580 {
581 
582 	return (0);
583 }
584 
585 static int
586 vmxnet3_suspend(if_ctx_t ctx)
587 {
588 
589 	return (0);
590 }
591 
592 static int
593 vmxnet3_resume(if_ctx_t ctx)
594 {
595 
596 	return (0);
597 }
598 
599 static int
600 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
601 {
602 	device_t dev;
603 	int rid;
604 
605 	dev = sc->vmx_dev;
606 
607 	rid = PCIR_BAR(0);
608 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
609 	    RF_ACTIVE);
610 	if (sc->vmx_res0 == NULL) {
611 		device_printf(dev,
612 		    "could not map BAR0 memory\n");
613 		return (ENXIO);
614 	}
615 
616 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
617 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
618 
619 	rid = PCIR_BAR(1);
620 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
621 	    RF_ACTIVE);
622 	if (sc->vmx_res1 == NULL) {
623 		device_printf(dev,
624 		    "could not map BAR1 memory\n");
625 		return (ENXIO);
626 	}
627 
628 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
629 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
630 
631 	return (0);
632 }
633 
634 static void
635 vmxnet3_free_resources(struct vmxnet3_softc *sc)
636 {
637 	device_t dev;
638 
639 	dev = sc->vmx_dev;
640 
641 	if (sc->vmx_res0 != NULL) {
642 		bus_release_resource(dev, SYS_RES_MEMORY,
643 		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
644 		sc->vmx_res0 = NULL;
645 	}
646 
647 	if (sc->vmx_res1 != NULL) {
648 		bus_release_resource(dev, SYS_RES_MEMORY,
649 		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
650 		sc->vmx_res1 = NULL;
651 	}
652 }
653 
654 static int
655 vmxnet3_check_version(struct vmxnet3_softc *sc)
656 {
657 	device_t dev;
658 	uint32_t version;
659 
660 	dev = sc->vmx_dev;
661 
662 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
663 	if ((version & 0x01) == 0) {
664 		device_printf(dev, "unsupported hardware version %#x\n",
665 		    version);
666 		return (ENOTSUP);
667 	}
668 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
669 
670 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
671 	if ((version & 0x01) == 0) {
672 		device_printf(dev, "unsupported UPT version %#x\n", version);
673 		return (ENOTSUP);
674 	}
675 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
676 
677 	return (0);
678 }
679 
680 static void
681 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
682 {
683 	if_softc_ctx_t scctx;
684 	struct vmxnet3_txqueue *txq;
685 	struct vmxnet3_txq_shared *txs;
686 	struct vmxnet3_rxqueue *rxq;
687 	struct vmxnet3_rxq_shared *rxs;
688 	int intr_idx;
689 	int i;
690 
691 	scctx = sc->vmx_scctx;
692 
693 	/*
694 	 * There is always one interrupt per receive queue, assigned
695 	 * starting with the first interrupt.  When there is only one
696 	 * interrupt available, the event interrupt shares the receive queue
697 	 * interrupt, otherwise it uses the interrupt following the last
698 	 * receive queue interrupt.  Transmit queues are not assigned
699 	 * interrupts, so they are given indexes beyond the indexes that
700 	 * correspond to the real interrupts.
701 	 */
702 
703 	/* The event interrupt is always the last vector. */
704 	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
705 
706 	intr_idx = 0;
707 	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
708 		rxq = &sc->vmx_rxq[i];
709 		rxs = rxq->vxrxq_rs;
710 		rxq->vxrxq_intr_idx = intr_idx;
711 		rxs->intr_idx = rxq->vxrxq_intr_idx;
712 	}
713 
714 	/*
715 	 * Assign the tx queues interrupt indexes above what we are actually
716 	 * using.  These interrupts will never be enabled.
717 	 */
718 	intr_idx = scctx->isc_vectors;
719 	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
720 		txq = &sc->vmx_txq[i];
721 		txs = txq->vxtxq_ts;
722 		txq->vxtxq_intr_idx = intr_idx;
723 		txs->intr_idx = txq->vxtxq_intr_idx;
724 	}
725 }
726 
727 static int
728 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
729 {
730 	if_softc_ctx_t scctx;
731 	int size;
732 	int error;
733 
734 	scctx = sc->vmx_scctx;
735 
736 	/*
737 	 * The txq and rxq shared data areas must be allocated contiguously
738 	 * as vmxnet3_driver_shared contains only a single address member
739 	 * for the shared queue data area.
740 	 */
741 	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
742 	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
743 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
744 	if (error) {
745 		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
746 		return (error);
747 	}
748 
749 	return (0);
750 }
751 
752 static void
753 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
754 {
755 	struct vmxnet3_txqueue *txq;
756 	struct vmxnet3_comp_ring *txc;
757 	struct vmxnet3_txring *txr;
758 	if_softc_ctx_t scctx;
759 
760 	txq = &sc->vmx_txq[q];
761 	txc = &txq->vxtxq_comp_ring;
762 	txr = &txq->vxtxq_cmd_ring;
763 	scctx = sc->vmx_scctx;
764 
765 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
766 	    device_get_nameunit(sc->vmx_dev), q);
767 
768 	txq->vxtxq_sc = sc;
769 	txq->vxtxq_id = q;
770 	txc->vxcr_ndesc = scctx->isc_ntxd[0];
771 	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
772 }
773 
774 static int
775 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
776     int ntxqs, int ntxqsets)
777 {
778 	struct vmxnet3_softc *sc;
779 	int q;
780 	int error;
781 	caddr_t kva;
782 
783 	sc = iflib_get_softc(ctx);
784 
785 	/* Allocate the array of transmit queues */
786 	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
787 	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
788 	if (sc->vmx_txq == NULL)
789 		return (ENOMEM);
790 
791 	/* Initialize driver state for each transmit queue */
792 	for (q = 0; q < ntxqsets; q++)
793 		vmxnet3_init_txq(sc, q);
794 
795 	/*
796 	 * Allocate queue state that is shared with the device.  This check
797 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
798 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
799 	 * order iflib invokes those routines in.
800 	 */
801 	if (sc->vmx_qs_dma.idi_size == 0) {
802 		error = vmxnet3_queues_shared_alloc(sc);
803 		if (error)
804 			return (error);
805 	}
806 
807 	kva = sc->vmx_qs_dma.idi_vaddr;
808 	for (q = 0; q < ntxqsets; q++) {
809 		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
810 		kva += sizeof(struct vmxnet3_txq_shared);
811 	}
812 
813 	/* Record descriptor ring vaddrs and paddrs */
814 	for (q = 0; q < ntxqsets; q++) {
815 		struct vmxnet3_txqueue *txq;
816 		struct vmxnet3_txring *txr;
817 		struct vmxnet3_comp_ring *txc;
818 
819 		txq = &sc->vmx_txq[q];
820 		txc = &txq->vxtxq_comp_ring;
821 		txr = &txq->vxtxq_cmd_ring;
822 
823 		/* Completion ring */
824 		txc->vxcr_u.txcd =
825 		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
826 		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
827 
828 		/* Command ring */
829 		txr->vxtxr_txd =
830 		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
831 		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
832 	}
833 
834 	return (0);
835 }
836 
837 static void
838 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
839 {
840 	struct vmxnet3_rxqueue *rxq;
841 	struct vmxnet3_comp_ring *rxc;
842 	struct vmxnet3_rxring *rxr;
843 	if_softc_ctx_t scctx;
844 	int i;
845 
846 	rxq = &sc->vmx_rxq[q];
847 	rxc = &rxq->vxrxq_comp_ring;
848 	scctx = sc->vmx_scctx;
849 
850 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
851 	    device_get_nameunit(sc->vmx_dev), q);
852 
853 	rxq->vxrxq_sc = sc;
854 	rxq->vxrxq_id = q;
855 
856 	/*
857 	 * First rxq is the completion queue, so there are nrxqs - 1 command
858 	 * rings starting at iflib queue id 1.
859 	 */
860 	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
861 	for (i = 0; i < nrxqs - 1; i++) {
862 		rxr = &rxq->vxrxq_cmd_ring[i];
863 		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
864 	}
865 }
866 
867 static int
868 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
869     int nrxqs, int nrxqsets)
870 {
871 	struct vmxnet3_softc *sc;
872 	if_softc_ctx_t scctx;
873 	int q;
874 	int i;
875 	int error;
876 	caddr_t kva;
877 
878 	sc = iflib_get_softc(ctx);
879 	scctx = sc->vmx_scctx;
880 
881 	/* Allocate the array of receive queues */
882 	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
883 	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
884 	if (sc->vmx_rxq == NULL)
885 		return (ENOMEM);
886 
887 	/* Initialize driver state for each receive queue */
888 	for (q = 0; q < nrxqsets; q++)
889 		vmxnet3_init_rxq(sc, q, nrxqs);
890 
891 	/*
892 	 * Allocate queue state that is shared with the device.  This check
893 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
894 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
895 	 * order iflib invokes those routines in.
896 	 */
897 	if (sc->vmx_qs_dma.idi_size == 0) {
898 		error = vmxnet3_queues_shared_alloc(sc);
899 		if (error)
900 			return (error);
901 	}
902 
903 	kva = sc->vmx_qs_dma.idi_vaddr +
904 	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
905 	for (q = 0; q < nrxqsets; q++) {
906 		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
907 		kva += sizeof(struct vmxnet3_rxq_shared);
908 	}
909 
910 	/* Record descriptor ring vaddrs and paddrs */
911 	for (q = 0; q < nrxqsets; q++) {
912 		struct vmxnet3_rxqueue *rxq;
913 		struct vmxnet3_rxring *rxr;
914 		struct vmxnet3_comp_ring *rxc;
915 
916 		rxq = &sc->vmx_rxq[q];
917 		rxc = &rxq->vxrxq_comp_ring;
918 
919 		/* Completion ring */
920 		rxc->vxcr_u.rxcd =
921 		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
922 		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
923 
924 		/* Command ring(s) */
925 		for (i = 0; i < nrxqs - 1; i++) {
926 			rxr = &rxq->vxrxq_cmd_ring[i];
927 
928 			rxr->vxrxr_rxd =
929 			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
930 			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
931 		}
932 	}
933 
934 	return (0);
935 }
936 
937 static void
938 vmxnet3_queues_free(if_ctx_t ctx)
939 {
940 	struct vmxnet3_softc *sc;
941 
942 	sc = iflib_get_softc(ctx);
943 
944 	/* Free queue state area that is shared with the device */
945 	if (sc->vmx_qs_dma.idi_size != 0) {
946 		iflib_dma_free(&sc->vmx_qs_dma);
947 		sc->vmx_qs_dma.idi_size = 0;
948 	}
949 
950 	/* Free array of receive queues */
951 	if (sc->vmx_rxq != NULL) {
952 		free(sc->vmx_rxq, M_DEVBUF);
953 		sc->vmx_rxq = NULL;
954 	}
955 
956 	/* Free array of transmit queues */
957 	if (sc->vmx_txq != NULL) {
958 		free(sc->vmx_txq, M_DEVBUF);
959 		sc->vmx_txq = NULL;
960 	}
961 }
962 
963 static int
964 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
965 {
966 	device_t dev;
967 	size_t size;
968 	int error;
969 
970 	dev = sc->vmx_dev;
971 
972 	/* Top level state structure shared with the device */
973 	size = sizeof(struct vmxnet3_driver_shared);
974 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
975 	if (error) {
976 		device_printf(dev, "cannot alloc shared memory\n");
977 		return (error);
978 	}
979 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
980 
981 	/* RSS table state shared with the device */
982 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
983 		size = sizeof(struct vmxnet3_rss_shared);
984 		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
985 		    &sc->vmx_rss_dma, 0);
986 		if (error) {
987 			device_printf(dev, "cannot alloc rss shared memory\n");
988 			return (error);
989 		}
990 		sc->vmx_rss =
991 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
992 	}
993 
994 	return (0);
995 }
996 
997 static void
998 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
999 {
1000 
1001 	/* Free RSS table state shared with the device */
1002 	if (sc->vmx_rss != NULL) {
1003 		iflib_dma_free(&sc->vmx_rss_dma);
1004 		sc->vmx_rss = NULL;
1005 	}
1006 
1007 	/* Free top level state structure shared with the device */
1008 	if (sc->vmx_ds != NULL) {
1009 		iflib_dma_free(&sc->vmx_ds_dma);
1010 		sc->vmx_ds = NULL;
1011 	}
1012 }
1013 
1014 static int
1015 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1016 {
1017 	int error;
1018 
1019 	/* Multicast table state shared with the device */
1020 	error = iflib_dma_alloc_align(sc->vmx_ctx,
1021 	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1022 	if (error)
1023 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1024 	else
1025 		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1026 
1027 	return (error);
1028 }
1029 
1030 static void
1031 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1032 {
1033 
1034 	/* Free multicast table state shared with the device */
1035 	if (sc->vmx_mcast != NULL) {
1036 		iflib_dma_free(&sc->vmx_mcast_dma);
1037 		sc->vmx_mcast = NULL;
1038 	}
1039 }
1040 
1041 static void
1042 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1043 {
1044 	struct vmxnet3_driver_shared *ds;
1045 	if_softc_ctx_t scctx;
1046 	struct vmxnet3_txqueue *txq;
1047 	struct vmxnet3_txq_shared *txs;
1048 	struct vmxnet3_rxqueue *rxq;
1049 	struct vmxnet3_rxq_shared *rxs;
1050 	int i;
1051 
1052 	ds = sc->vmx_ds;
1053 	scctx = sc->vmx_scctx;
1054 
1055 	/*
1056 	 * Initialize fields of the shared data that remains the same across
1057 	 * reinits. Note the shared data is zero'd when allocated.
1058 	 */
1059 
1060 	ds->magic = VMXNET3_REV1_MAGIC;
1061 
1062 	/* DriverInfo */
1063 	ds->version = VMXNET3_DRIVER_VERSION;
1064 	ds->guest = VMXNET3_GOS_FREEBSD |
1065 #ifdef __LP64__
1066 	    VMXNET3_GOS_64BIT;
1067 #else
1068 	    VMXNET3_GOS_32BIT;
1069 #endif
1070 	ds->vmxnet3_revision = 1;
1071 	ds->upt_version = 1;
1072 
1073 	/* Misc. conf */
1074 	ds->driver_data = vtophys(sc);
1075 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1076 	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1077 	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1078 	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1079 
1080 	/* RSS conf */
1081 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1082 		ds->rss.version = 1;
1083 		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1084 		ds->rss.len = sc->vmx_rss_dma.idi_size;
1085 	}
1086 
1087 	/* Interrupt control. */
1088 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1089 	/*
1090 	 * Total number of interrupt indexes we are using in the shared
1091 	 * config data, even though we don't actually allocate interrupt
1092 	 * resources for the tx queues.  Some versions of the device will
1093 	 * fail to initialize successfully if interrupt indexes are used in
1094 	 * the shared config that exceed the number of interrupts configured
1095 	 * here.
1096 	 */
1097 	ds->nintr = (scctx->isc_vectors == 1) ?
1098 	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1099 	ds->evintr = sc->vmx_event_intr_idx;
1100 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1101 
1102 	for (i = 0; i < ds->nintr; i++)
1103 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1104 
1105 	/* Receive filter. */
1106 	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1107 	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1108 
1109 	/* Tx queues */
1110 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1111 		txq = &sc->vmx_txq[i];
1112 		txs = txq->vxtxq_ts;
1113 
1114 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1115 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1116 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1117 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1118 		txs->driver_data = vtophys(txq);
1119 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1120 	}
1121 
1122 	/* Rx queues */
1123 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1124 		rxq = &sc->vmx_rxq[i];
1125 		rxs = rxq->vxrxq_rs;
1126 
1127 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1128 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1129 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1130 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1131 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1132 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1133 		rxs->driver_data = vtophys(rxq);
1134 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1135 	}
1136 }
1137 
1138 static void
1139 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1140 {
1141 	/*
1142 	 * Use the same key as the Linux driver until FreeBSD can do
1143 	 * RSS (presumably Toeplitz) in software.
1144 	 */
1145 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1146 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1147 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1148 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1149 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1150 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1151 	};
1152 
1153 	if_softc_ctx_t scctx;
1154 	struct vmxnet3_rss_shared *rss;
1155 #ifdef RSS
1156 	uint8_t rss_algo;
1157 #endif
1158 	int i;
1159 
1160 	scctx = sc->vmx_scctx;
1161 	rss = sc->vmx_rss;
1162 
1163 	rss->hash_type =
1164 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1165 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1166 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1167 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1168 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1169 #ifdef RSS
1170 	/*
1171 	 * If the software RSS is configured to anything else other than
1172 	 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
1173 	 * the packet distribution, but report the hash as opaque to
1174 	 * disengage from the software RSS.
1175 	 */
1176 	rss_algo = rss_gethashalgo();
1177 	if (rss_algo == RSS_HASH_TOEPLITZ) {
1178 		rss_getkey(rss->hash_key);
1179 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1180 			rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1181 			    scctx->isc_nrxqsets;
1182 		}
1183 		sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1184 	} else
1185 #endif
1186 	{
1187 		memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1188 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1189 			rss->ind_table[i] = i % scctx->isc_nrxqsets;
1190 		sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1191 	}
1192 }
1193 
1194 static void
1195 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1196 {
1197 	if_t ifp;
1198 	struct vmxnet3_driver_shared *ds;
1199 	if_softc_ctx_t scctx;
1200 
1201 	ifp = sc->vmx_ifp;
1202 	ds = sc->vmx_ds;
1203 	scctx = sc->vmx_scctx;
1204 
1205 	ds->mtu = if_getmtu(ifp);
1206 	ds->ntxqueue = scctx->isc_ntxqsets;
1207 	ds->nrxqueue = scctx->isc_nrxqsets;
1208 
1209 	ds->upt_features = 0;
1210 	if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1211 		ds->upt_features |= UPT1_F_CSUM;
1212 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING)
1213 		ds->upt_features |= UPT1_F_VLAN;
1214 	if (if_getcapenable(ifp) & IFCAP_LRO)
1215 		ds->upt_features |= UPT1_F_LRO;
1216 
1217 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1218 		ds->upt_features |= UPT1_F_RSS;
1219 		vmxnet3_reinit_rss_shared_data(sc);
1220 	}
1221 
1222 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1223 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1224 	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1225 }
1226 
1227 static int
1228 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1229 {
1230 	int error;
1231 
1232 	error = vmxnet3_alloc_shared_data(sc);
1233 	if (error)
1234 		return (error);
1235 
1236 	error = vmxnet3_alloc_mcast_table(sc);
1237 	if (error)
1238 		return (error);
1239 
1240 	vmxnet3_init_shared_data(sc);
1241 
1242 	return (0);
1243 }
1244 
1245 static void
1246 vmxnet3_free_data(struct vmxnet3_softc *sc)
1247 {
1248 
1249 	vmxnet3_free_mcast_table(sc);
1250 	vmxnet3_free_shared_data(sc);
1251 }
1252 
1253 static void
1254 vmxnet3_evintr(struct vmxnet3_softc *sc)
1255 {
1256 	device_t dev;
1257 	struct vmxnet3_txq_shared *ts;
1258 	struct vmxnet3_rxq_shared *rs;
1259 	uint32_t event;
1260 
1261 	dev = sc->vmx_dev;
1262 
1263 	/* Clear events. */
1264 	event = sc->vmx_ds->event;
1265 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1266 
1267 	if (event & VMXNET3_EVENT_LINK)
1268 		vmxnet3_link_status(sc);
1269 
1270 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1271 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1272 		ts = sc->vmx_txq[0].vxtxq_ts;
1273 		if (ts->stopped != 0)
1274 			device_printf(dev, "Tx queue error %#x\n", ts->error);
1275 		rs = sc->vmx_rxq[0].vxrxq_rs;
1276 		if (rs->stopped != 0)
1277 			device_printf(dev, "Rx queue error %#x\n", rs->error);
1278 
1279 		/* XXX - rely on liflib watchdog to reset us? */
1280 		device_printf(dev, "Rx/Tx queue error event ... "
1281 		    "waiting for iflib watchdog reset\n");
1282 	}
1283 
1284 	if (event & VMXNET3_EVENT_DIC)
1285 		device_printf(dev, "device implementation change event\n");
1286 	if (event & VMXNET3_EVENT_DEBUG)
1287 		device_printf(dev, "debug event\n");
1288 }
1289 
1290 static int
1291 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1292 {
1293 	struct vmxnet3_softc *sc;
1294 	struct vmxnet3_txqueue *txq;
1295 	struct vmxnet3_txring *txr;
1296 	struct vmxnet3_txdesc *txd, *sop;
1297 	bus_dma_segment_t *segs;
1298 	int nsegs;
1299 	int pidx;
1300 	int hdrlen;
1301 	int i;
1302 	int gen;
1303 
1304 	sc = vsc;
1305 	txq = &sc->vmx_txq[pi->ipi_qsidx];
1306 	txr = &txq->vxtxq_cmd_ring;
1307 	segs = pi->ipi_segs;
1308 	nsegs = pi->ipi_nsegs;
1309 	pidx = pi->ipi_pidx;
1310 
1311 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1312 	    ("%s: packet with too many segments %d", __func__, nsegs));
1313 
1314 	sop = &txr->vxtxr_txd[pidx];
1315 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1316 
1317 	for (i = 0; i < nsegs; i++) {
1318 		txd = &txr->vxtxr_txd[pidx];
1319 
1320 		txd->addr = segs[i].ds_addr;
1321 		txd->len = segs[i].ds_len;
1322 		txd->gen = gen;
1323 		txd->dtype = 0;
1324 		txd->offload_mode = VMXNET3_OM_NONE;
1325 		txd->offload_pos = 0;
1326 		txd->hlen = 0;
1327 		txd->eop = 0;
1328 		txd->compreq = 0;
1329 		txd->vtag_mode = 0;
1330 		txd->vtag = 0;
1331 
1332 		if (++pidx == txr->vxtxr_ndesc) {
1333 			pidx = 0;
1334 			txr->vxtxr_gen ^= 1;
1335 		}
1336 		gen = txr->vxtxr_gen;
1337 	}
1338 	txd->eop = 1;
1339 	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1340 	pi->ipi_new_pidx = pidx;
1341 
1342 	/*
1343 	 * VLAN
1344 	 */
1345 	if (pi->ipi_mflags & M_VLANTAG) {
1346 		sop->vtag_mode = 1;
1347 		sop->vtag = pi->ipi_vtag;
1348 	}
1349 
1350 	/*
1351 	 * TSO and checksum offloads
1352 	 */
1353 	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1354 	if (pi->ipi_csum_flags & CSUM_TSO) {
1355 		sop->offload_mode = VMXNET3_OM_TSO;
1356 		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1357 		sop->offload_pos = pi->ipi_tso_segsz;
1358 	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1359 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1360 		sop->offload_mode = VMXNET3_OM_CSUM;
1361 		sop->hlen = hdrlen;
1362 		sop->offload_pos = hdrlen +
1363 		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1364 			offsetof(struct tcphdr, th_sum) :
1365 			offsetof(struct udphdr, uh_sum));
1366 	}
1367 
1368 	/* Finally, change the ownership. */
1369 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1370 	sop->gen ^= 1;
1371 
1372 	return (0);
1373 }
1374 
1375 static void
1376 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1377 {
1378 	struct vmxnet3_softc *sc;
1379 	struct vmxnet3_txqueue *txq;
1380 
1381 	sc = vsc;
1382 	txq = &sc->vmx_txq[txqid];
1383 
1384 	/*
1385 	 * pidx is what we last set ipi_new_pidx to in
1386 	 * vmxnet3_isc_txd_encap()
1387 	 */
1388 
1389 	/*
1390 	 * Avoid expensive register updates if the flush request is
1391 	 * redundant.
1392 	 */
1393 	if (txq->vxtxq_last_flush == pidx)
1394 		return;
1395 	txq->vxtxq_last_flush = pidx;
1396 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1397 }
1398 
1399 static int
1400 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1401 {
1402 	struct vmxnet3_softc *sc;
1403 	struct vmxnet3_txqueue *txq;
1404 	struct vmxnet3_comp_ring *txc;
1405 	struct vmxnet3_txcompdesc *txcd;
1406 	struct vmxnet3_txring *txr;
1407 	int processed;
1408 
1409 	sc = vsc;
1410 	txq = &sc->vmx_txq[txqid];
1411 	txc = &txq->vxtxq_comp_ring;
1412 	txr = &txq->vxtxq_cmd_ring;
1413 
1414 	/*
1415 	 * If clear is true, we need to report the number of TX command ring
1416 	 * descriptors that have been processed by the device.  If clear is
1417 	 * false, we just need to report whether or not at least one TX
1418 	 * command ring descriptor has been processed by the device.
1419 	 */
1420 	processed = 0;
1421 	for (;;) {
1422 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1423 		if (txcd->gen != txc->vxcr_gen)
1424 			break;
1425 		else if (!clear)
1426 			return (1);
1427 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1428 
1429 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1430 			txc->vxcr_next = 0;
1431 			txc->vxcr_gen ^= 1;
1432 		}
1433 
1434 		if (txcd->eop_idx < txr->vxtxr_next)
1435 			processed += txr->vxtxr_ndesc -
1436 			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1437 		else
1438 			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1439 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1440 	}
1441 
1442 	return (processed);
1443 }
1444 
1445 static int
1446 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1447 {
1448 	struct vmxnet3_softc *sc;
1449 	struct vmxnet3_rxqueue *rxq;
1450 	struct vmxnet3_comp_ring *rxc;
1451 	struct vmxnet3_rxcompdesc *rxcd;
1452 	int avail;
1453 	int completed_gen;
1454 #ifdef INVARIANTS
1455 	int expect_sop = 1;
1456 #endif
1457 	sc = vsc;
1458 	rxq = &sc->vmx_rxq[rxqid];
1459 	rxc = &rxq->vxrxq_comp_ring;
1460 
1461 	avail = 0;
1462 	completed_gen = rxc->vxcr_gen;
1463 	for (;;) {
1464 		rxcd = &rxc->vxcr_u.rxcd[idx];
1465 		if (rxcd->gen != completed_gen)
1466 			break;
1467 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1468 
1469 #ifdef INVARIANTS
1470 		if (expect_sop)
1471 			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1472 		else
1473 			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1474 		expect_sop = rxcd->eop;
1475 #endif
1476 		if (rxcd->eop && (rxcd->len != 0))
1477 			avail++;
1478 		if (avail > budget)
1479 			break;
1480 		if (++idx == rxc->vxcr_ndesc) {
1481 			idx = 0;
1482 			completed_gen ^= 1;
1483 		}
1484 	}
1485 
1486 	return (avail);
1487 }
1488 
1489 static int
1490 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1491 {
1492 	struct vmxnet3_softc *sc;
1493 	if_softc_ctx_t scctx;
1494 	struct vmxnet3_rxqueue *rxq;
1495 	struct vmxnet3_comp_ring *rxc;
1496 	struct vmxnet3_rxcompdesc *rxcd;
1497 	if_rxd_frag_t frag;
1498 	int cqidx;
1499 	uint16_t total_len;
1500 	uint8_t nfrags;
1501 	uint8_t i;
1502 	uint8_t flid;
1503 
1504 	sc = vsc;
1505 	scctx = sc->vmx_scctx;
1506 	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1507 	rxc = &rxq->vxrxq_comp_ring;
1508 
1509 	/*
1510 	 * Get a single packet starting at the given index in the completion
1511 	 * queue.  That we have been called indicates that
1512 	 * vmxnet3_isc_rxd_available() has already verified that either
1513 	 * there is a complete packet available starting at the given index,
1514 	 * or there are one or more zero length packets starting at the
1515 	 * given index followed by a complete packet, so no verification of
1516 	 * ownership of the descriptors (and no associated read barrier) is
1517 	 * required here.
1518 	 */
1519 	cqidx = ri->iri_cidx;
1520 	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1521 	while (rxcd->len == 0) {
1522 		KASSERT(rxcd->sop && rxcd->eop,
1523 		    ("%s: zero-length packet without both sop and eop set",
1524 			__func__));
1525 		rxc->vxcr_zero_length++;
1526 		if (++cqidx == rxc->vxcr_ndesc) {
1527 			cqidx = 0;
1528 			rxc->vxcr_gen ^= 1;
1529 		}
1530 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1531 	}
1532 	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1533 
1534 	/*
1535 	 * RSS and flow ID.
1536 	 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1537 	 * be used only if the software RSS is enabled and it uses the same
1538 	 * algorithm and the hash key as the "hardware".  If the software RSS
1539 	 * is not enabled, then it's simply pointless to use those types.
1540 	 * If it's enabled but with different parameters, then hash values will
1541 	 * not match.
1542 	 */
1543 	ri->iri_flowid = rxcd->rss_hash;
1544 #ifdef RSS
1545 	if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1546 		switch (rxcd->rss_type) {
1547 		case VMXNET3_RCD_RSS_TYPE_NONE:
1548 			ri->iri_flowid = ri->iri_qsidx;
1549 			ri->iri_rsstype = M_HASHTYPE_NONE;
1550 			break;
1551 		case VMXNET3_RCD_RSS_TYPE_IPV4:
1552 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1553 			break;
1554 		case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1555 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1556 			break;
1557 		case VMXNET3_RCD_RSS_TYPE_IPV6:
1558 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1559 			break;
1560 		case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1561 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1562 			break;
1563 		default:
1564 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1565 			break;
1566 		}
1567 	} else
1568 #endif
1569 	{
1570 		switch (rxcd->rss_type) {
1571 		case VMXNET3_RCD_RSS_TYPE_NONE:
1572 			ri->iri_flowid = ri->iri_qsidx;
1573 			ri->iri_rsstype = M_HASHTYPE_NONE;
1574 			break;
1575 		default:
1576 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1577 			break;
1578 		}
1579 	}
1580 
1581 	/*
1582 	 * The queue numbering scheme used for rxcd->qid is as follows:
1583 	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1584 	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1585 	 *
1586 	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1587 	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1588 	 * indicates command ring (and flid) 1.
1589 	 */
1590 	nfrags = 0;
1591 	total_len = 0;
1592 	do {
1593 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1594 		KASSERT(rxcd->gen == rxc->vxcr_gen,
1595 		    ("%s: generation mismatch", __func__));
1596 		KASSERT(nfrags < IFLIB_MAX_RX_SEGS,
1597 		    ("%s: too many fragments", __func__));
1598 		if (__predict_true(rxcd->len != 0)) {
1599 			frag = &ri->iri_frags[nfrags];
1600 			flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1601 			frag->irf_flid = flid;
1602 			frag->irf_idx = rxcd->rxd_idx;
1603 			frag->irf_len = rxcd->len;
1604 			total_len += rxcd->len;
1605 			nfrags++;
1606 		} else {
1607 			rxc->vcxr_zero_length_frag++;
1608 		}
1609 		if (++cqidx == rxc->vxcr_ndesc) {
1610 			cqidx = 0;
1611 			rxc->vxcr_gen ^= 1;
1612 		}
1613 	} while (!rxcd->eop);
1614 
1615 	ri->iri_cidx = cqidx;
1616 	ri->iri_nfrags = nfrags;
1617 	ri->iri_len = total_len;
1618 
1619 	/*
1620 	 * If there's an error, the last descriptor in the packet will
1621 	 * have the error indicator set.  In this case, set all
1622 	 * fragment lengths to zero.  This will cause iflib to discard
1623 	 * the packet, but process all associated descriptors through
1624 	 * the refill mechanism.
1625 	 */
1626 	if (__predict_false(rxcd->error)) {
1627 		rxc->vxcr_pkt_errors++;
1628 		for (i = 0; i < nfrags; i++) {
1629 			frag = &ri->iri_frags[i];
1630 			frag->irf_len = 0;
1631 		}
1632 	} else {
1633 		/* Checksum offload information is in the last descriptor. */
1634 		if (!rxcd->no_csum) {
1635 			uint32_t csum_flags = 0;
1636 
1637 			if (rxcd->ipv4) {
1638 				csum_flags |= CSUM_IP_CHECKED;
1639 				if (rxcd->ipcsum_ok)
1640 					csum_flags |= CSUM_IP_VALID;
1641 			}
1642 			if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1643 				csum_flags |= CSUM_L4_CALC;
1644 				if (rxcd->csum_ok) {
1645 					csum_flags |= CSUM_L4_VALID;
1646 					ri->iri_csum_data = 0xffff;
1647 				}
1648 			}
1649 			ri->iri_csum_flags = csum_flags;
1650 		}
1651 
1652 		/* VLAN information is in the last descriptor. */
1653 		if (rxcd->vlan) {
1654 			ri->iri_flags |= M_VLANTAG;
1655 			ri->iri_vtag = rxcd->vtag;
1656 		}
1657 	}
1658 
1659 	return (0);
1660 }
1661 
1662 static void
1663 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1664 {
1665 	struct vmxnet3_softc *sc;
1666 	struct vmxnet3_rxqueue *rxq;
1667 	struct vmxnet3_rxring *rxr;
1668 	struct vmxnet3_rxdesc *rxd;
1669 	uint64_t *paddrs;
1670 	int count;
1671 	int len;
1672 	int idx;
1673 	int i;
1674 	uint8_t flid;
1675 	uint8_t btype;
1676 
1677 	count = iru->iru_count;
1678 	len = iru->iru_buf_size;
1679 	flid = iru->iru_flidx;
1680 	paddrs = iru->iru_paddrs;
1681 
1682 	sc = vsc;
1683 	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1684 	rxr = &rxq->vxrxq_cmd_ring[flid];
1685 	rxd = rxr->vxrxr_rxd;
1686 
1687 	/*
1688 	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1689 	 * command ring 1 is filled with BTYPE_BODY descriptors.
1690 	 */
1691 	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1692 	/*
1693 	 * The refill entries from iflib will advance monotonically,
1694 	 * but the refilled descriptors may not be contiguous due to
1695 	 * earlier skipping of descriptors by the device.  The refill
1696 	 * entries from iflib need an entire state update, while the
1697 	 * descriptors previously skipped by the device only need to
1698 	 * have their generation numbers updated.
1699 	 */
1700 	idx = rxr->vxrxr_refill_start;
1701 	i = 0;
1702 	do {
1703 		if (idx == iru->iru_idxs[i]) {
1704 			rxd[idx].addr = paddrs[i];
1705 			rxd[idx].len = len;
1706 			rxd[idx].btype = btype;
1707 			i++;
1708 		} else
1709 			rxr->vxrxr_desc_skips++;
1710 		rxd[idx].gen = rxr->vxrxr_gen;
1711 
1712 		if (++idx == rxr->vxrxr_ndesc) {
1713 			idx = 0;
1714 			rxr->vxrxr_gen ^= 1;
1715 		}
1716 	} while (i != count);
1717 	rxr->vxrxr_refill_start = idx;
1718 }
1719 
1720 static void
1721 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1722 {
1723 	struct vmxnet3_softc *sc;
1724 	bus_size_t r;
1725 
1726 	sc = vsc;
1727 
1728 	if (flid == 0)
1729 		r = VMXNET3_BAR0_RXH1(rxqid);
1730 	else
1731 		r = VMXNET3_BAR0_RXH2(rxqid);
1732 
1733 	vmxnet3_write_bar0(sc, r, pidx);
1734 }
1735 
1736 static int
1737 vmxnet3_legacy_intr(void *xsc)
1738 {
1739 	struct vmxnet3_softc *sc;
1740 	if_softc_ctx_t scctx;
1741 	if_ctx_t ctx;
1742 
1743 	sc = xsc;
1744 	scctx = sc->vmx_scctx;
1745 	ctx = sc->vmx_ctx;
1746 
1747 	/*
1748 	 * When there is only a single interrupt configured, this routine
1749 	 * runs in fast interrupt context, following which the rxq 0 task
1750 	 * will be enqueued.
1751 	 */
1752 	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1753 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1754 			return (FILTER_HANDLED);
1755 	}
1756 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1757 		vmxnet3_intr_disable_all(ctx);
1758 
1759 	if (sc->vmx_ds->event != 0)
1760 		iflib_admin_intr_deferred(ctx);
1761 
1762 	/*
1763 	 * XXX - When there is both rxq and event activity, do we care
1764 	 * whether the rxq 0 task or the admin task re-enables the interrupt
1765 	 * first?
1766 	 */
1767 	return (FILTER_SCHEDULE_THREAD);
1768 }
1769 
1770 static int
1771 vmxnet3_rxq_intr(void *vrxq)
1772 {
1773 	struct vmxnet3_softc *sc;
1774 	struct vmxnet3_rxqueue *rxq;
1775 
1776 	rxq = vrxq;
1777 	sc = rxq->vxrxq_sc;
1778 
1779 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1780 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1781 
1782 	return (FILTER_SCHEDULE_THREAD);
1783 }
1784 
1785 static int
1786 vmxnet3_event_intr(void *vsc)
1787 {
1788 	struct vmxnet3_softc *sc;
1789 
1790 	sc = vsc;
1791 
1792 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1793 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1794 
1795 	/*
1796 	 * The work will be done via vmxnet3_update_admin_status(), and the
1797 	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1798 	 *
1799 	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1800 	 */
1801 	return (FILTER_SCHEDULE_THREAD);
1802 }
1803 
1804 static void
1805 vmxnet3_stop(if_ctx_t ctx)
1806 {
1807 	struct vmxnet3_softc *sc;
1808 
1809 	sc = iflib_get_softc(ctx);
1810 
1811 	sc->vmx_link_active = 0;
1812 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1813 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1814 }
1815 
1816 static void
1817 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1818 {
1819 	struct vmxnet3_txring *txr;
1820 	struct vmxnet3_comp_ring *txc;
1821 
1822 	txq->vxtxq_last_flush = -1;
1823 
1824 	txr = &txq->vxtxq_cmd_ring;
1825 	txr->vxtxr_next = 0;
1826 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1827 	/*
1828 	 * iflib has zeroed out the descriptor array during the prior attach
1829 	 * or stop
1830 	 */
1831 
1832 	txc = &txq->vxtxq_comp_ring;
1833 	txc->vxcr_next = 0;
1834 	txc->vxcr_gen = VMXNET3_INIT_GEN;
1835 	/*
1836 	 * iflib has zeroed out the descriptor array during the prior attach
1837 	 * or stop
1838 	 */
1839 }
1840 
1841 static void
1842 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1843 {
1844 	struct vmxnet3_rxring *rxr;
1845 	struct vmxnet3_comp_ring *rxc;
1846 	int i;
1847 
1848 	/*
1849 	 * The descriptors will be populated with buffers during a
1850 	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1851 	 */
1852 	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1853 		rxr = &rxq->vxrxq_cmd_ring[i];
1854 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1855 		rxr->vxrxr_desc_skips = 0;
1856 		rxr->vxrxr_refill_start = 0;
1857 		/*
1858 		 * iflib has zeroed out the descriptor array during the
1859 		 * prior attach or stop
1860 		 */
1861 	}
1862 
1863 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1864 		rxr = &rxq->vxrxq_cmd_ring[i];
1865 		rxr->vxrxr_gen = 0;
1866 		rxr->vxrxr_desc_skips = 0;
1867 		rxr->vxrxr_refill_start = 0;
1868 		bzero(rxr->vxrxr_rxd,
1869 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1870 	}
1871 
1872 	rxc = &rxq->vxrxq_comp_ring;
1873 	rxc->vxcr_next = 0;
1874 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1875 	rxc->vxcr_zero_length = 0;
1876 	rxc->vcxr_zero_length_frag = 0;
1877 	rxc->vxcr_pkt_errors = 0;
1878 	/*
1879 	 * iflib has zeroed out the descriptor array during the prior attach
1880 	 * or stop
1881 	 */
1882 }
1883 
1884 static void
1885 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1886 {
1887 	if_softc_ctx_t scctx;
1888 	int q;
1889 
1890 	scctx = sc->vmx_scctx;
1891 
1892 	for (q = 0; q < scctx->isc_ntxqsets; q++)
1893 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1894 
1895 	for (q = 0; q < scctx->isc_nrxqsets; q++)
1896 		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1897 }
1898 
1899 static int
1900 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1901 {
1902 	if_softc_ctx_t scctx;
1903 	int q;
1904 
1905 	scctx = sc->vmx_scctx;
1906 
1907 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1908 		device_printf(sc->vmx_dev, "device enable command failed!\n");
1909 		return (1);
1910 	}
1911 
1912 	/* Reset the Rx queue heads. */
1913 	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1914 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1915 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1916 	}
1917 
1918 	return (0);
1919 }
1920 
1921 static void
1922 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1923 {
1924 	if_t ifp;
1925 
1926 	ifp = sc->vmx_ifp;
1927 
1928 	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1929 
1930 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1931 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1932 		    sizeof(sc->vmx_ds->vlan_filter));
1933 	else
1934 		bzero(sc->vmx_ds->vlan_filter,
1935 		    sizeof(sc->vmx_ds->vlan_filter));
1936 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1937 }
1938 
1939 static void
1940 vmxnet3_init(if_ctx_t ctx)
1941 {
1942 	struct vmxnet3_softc *sc;
1943 
1944 	sc = iflib_get_softc(ctx);
1945 
1946 	/* Use the current MAC address. */
1947 	bcopy(if_getlladdr(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1948 	vmxnet3_set_lladdr(sc);
1949 
1950 	vmxnet3_reinit_shared_data(sc);
1951 	vmxnet3_reinit_queues(sc);
1952 
1953 	vmxnet3_enable_device(sc);
1954 
1955 	vmxnet3_reinit_rxfilters(sc);
1956 	vmxnet3_link_status(sc);
1957 }
1958 
1959 static void
1960 vmxnet3_multi_set(if_ctx_t ctx)
1961 {
1962 
1963 	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1964 	    if_getflags(iflib_get_ifp(ctx)));
1965 }
1966 
1967 static int
1968 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1969 {
1970 	struct vmxnet3_softc *sc;
1971 	if_softc_ctx_t scctx;
1972 
1973 	sc = iflib_get_softc(ctx);
1974 	scctx = sc->vmx_scctx;
1975 
1976 	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1977 		ETHER_CRC_LEN))
1978 		return (EINVAL);
1979 
1980 	/*
1981 	 * Update the max frame size so that the rx mbuf size is
1982 	 * chosen based on the new mtu during the interface init that
1983 	 * will occur after this routine returns.
1984 	 */
1985 	scctx->isc_max_frame_size = mtu +
1986 		ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1987 	/* RX completion queue - n/a */
1988 	scctx->isc_rxd_buf_size[0] = 0;
1989 	/*
1990 	 * For header-type descriptors (used for first segment of
1991 	 * packet), let iflib determine the buffer size based on the
1992 	 * max frame size.
1993 	 */
1994 	scctx->isc_rxd_buf_size[1] = 0;
1995 	/*
1996 	 * For body-type descriptors (used for jumbo frames and LRO),
1997 	 * always use page-sized buffers.
1998 	 */
1999 	scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
2000 
2001 	return (0);
2002 }
2003 
2004 static void
2005 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
2006 {
2007 	struct vmxnet3_softc *sc;
2008 
2009 	sc = iflib_get_softc(ctx);
2010 
2011 	ifmr->ifm_status = IFM_AVALID;
2012 	ifmr->ifm_active = IFM_ETHER;
2013 
2014 	if (vmxnet3_link_is_up(sc) != 0) {
2015 		ifmr->ifm_status |= IFM_ACTIVE;
2016 		ifmr->ifm_active |= IFM_AUTO;
2017 	} else
2018 		ifmr->ifm_active |= IFM_NONE;
2019 }
2020 
2021 static int
2022 vmxnet3_media_change(if_ctx_t ctx)
2023 {
2024 
2025 	/* Ignore. */
2026 	return (0);
2027 }
2028 
2029 static int
2030 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2031 {
2032 
2033 	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2034 
2035 	return (0);
2036 }
2037 
2038 static uint64_t
2039 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2040 {
2041 	if_t ifp = iflib_get_ifp(ctx);
2042 
2043 	if (cnt < IFCOUNTERS)
2044 		return if_get_counter_default(ifp, cnt);
2045 
2046 	return (0);
2047 }
2048 
2049 static void
2050 vmxnet3_update_admin_status(if_ctx_t ctx)
2051 {
2052 	struct vmxnet3_softc *sc;
2053 
2054 	sc = iflib_get_softc(ctx);
2055 	if (sc->vmx_ds->event != 0)
2056 		vmxnet3_evintr(sc);
2057 
2058 	vmxnet3_refresh_host_stats(sc);
2059 }
2060 
2061 static void
2062 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2063 {
2064 	/* Host stats refresh is global, so just trigger it on txq 0 */
2065 	if (qid == 0)
2066 		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2067 }
2068 
2069 static void
2070 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2071 {
2072 	int idx, bit;
2073 
2074 	if (tag == 0 || tag > 4095)
2075 		return;
2076 
2077 	idx = (tag >> 5) & 0x7F;
2078 	bit = tag & 0x1F;
2079 
2080 	/* Update our private VLAN bitvector. */
2081 	if (add)
2082 		sc->vmx_vlan_filter[idx] |= (1 << bit);
2083 	else
2084 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2085 }
2086 
2087 static void
2088 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2089 {
2090 
2091 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2092 }
2093 
2094 static void
2095 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2096 {
2097 
2098 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2099 }
2100 
2101 static u_int
2102 vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2103 {
2104 	struct vmxnet3_softc *sc = arg;
2105 
2106 	if (count < VMXNET3_MULTICAST_MAX)
2107 		bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2108 		    ETHER_ADDR_LEN);
2109 
2110 	return (1);
2111 }
2112 
2113 static void
2114 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2115 {
2116 	if_t ifp;
2117 	struct vmxnet3_driver_shared *ds;
2118 	u_int mode;
2119 
2120 	ifp = sc->vmx_ifp;
2121 	ds = sc->vmx_ds;
2122 
2123 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2124 	if (flags & IFF_PROMISC)
2125 		mode |= VMXNET3_RXMODE_PROMISC;
2126 	if (flags & IFF_ALLMULTI)
2127 		mode |= VMXNET3_RXMODE_ALLMULTI;
2128 	else {
2129 		int cnt;
2130 
2131 		cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2132 		if (cnt >= VMXNET3_MULTICAST_MAX) {
2133 			cnt = 0;
2134 			mode |= VMXNET3_RXMODE_ALLMULTI;
2135 		} else if (cnt > 0)
2136 			mode |= VMXNET3_RXMODE_MCAST;
2137 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2138 	}
2139 
2140 	ds->rxmode = mode;
2141 
2142 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2143 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2144 }
2145 
2146 static void
2147 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2148 {
2149 
2150 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2151 }
2152 
2153 static int
2154 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2155 {
2156 	uint32_t status;
2157 
2158 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2159 	return !!(status & 0x1);
2160 }
2161 
2162 static void
2163 vmxnet3_link_status(struct vmxnet3_softc *sc)
2164 {
2165 	if_ctx_t ctx;
2166 	uint64_t speed;
2167 	int link;
2168 
2169 	ctx = sc->vmx_ctx;
2170 	link = vmxnet3_link_is_up(sc);
2171 	speed = IF_Gbps(10);
2172 
2173 	if (link != 0 && sc->vmx_link_active == 0) {
2174 		sc->vmx_link_active = 1;
2175 		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2176 	} else if (link == 0 && sc->vmx_link_active != 0) {
2177 		sc->vmx_link_active = 0;
2178 		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2179 	}
2180 }
2181 
2182 static void
2183 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2184 {
2185 	uint32_t ml, mh;
2186 
2187 	ml  = sc->vmx_lladdr[0];
2188 	ml |= sc->vmx_lladdr[1] << 8;
2189 	ml |= sc->vmx_lladdr[2] << 16;
2190 	ml |= sc->vmx_lladdr[3] << 24;
2191 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2192 
2193 	mh  = sc->vmx_lladdr[4];
2194 	mh |= sc->vmx_lladdr[5] << 8;
2195 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2196 }
2197 
2198 static void
2199 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2200 {
2201 	uint32_t ml, mh;
2202 
2203 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2204 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2205 
2206 	sc->vmx_lladdr[0] = ml;
2207 	sc->vmx_lladdr[1] = ml >> 8;
2208 	sc->vmx_lladdr[2] = ml >> 16;
2209 	sc->vmx_lladdr[3] = ml >> 24;
2210 	sc->vmx_lladdr[4] = mh;
2211 	sc->vmx_lladdr[5] = mh >> 8;
2212 }
2213 
2214 static void
2215 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2216     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2217 {
2218 	struct sysctl_oid *node, *txsnode;
2219 	struct sysctl_oid_list *list, *txslist;
2220 	struct UPT1_TxStats *txstats;
2221 	char namebuf[16];
2222 
2223 	txstats = &txq->vxtxq_ts->stats;
2224 
2225 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2226 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2227 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
2228 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2229 
2230 	/*
2231 	 * Add statistics reported by the host. These are updated by the
2232 	 * iflib txq timer on txq 0.
2233 	 */
2234 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2235 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2236 	txslist = SYSCTL_CHILDREN(txsnode);
2237 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2238 	    &txstats->TSO_packets, "TSO packets");
2239 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2240 	    &txstats->TSO_bytes, "TSO bytes");
2241 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2242 	    &txstats->ucast_packets, "Unicast packets");
2243 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2244 	    &txstats->ucast_bytes, "Unicast bytes");
2245 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2246 	    &txstats->mcast_packets, "Multicast packets");
2247 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2248 	    &txstats->mcast_bytes, "Multicast bytes");
2249 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2250 	    &txstats->error, "Errors");
2251 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2252 	    &txstats->discard, "Discards");
2253 }
2254 
2255 static void
2256 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2257     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2258 {
2259 	struct sysctl_oid *node, *rxsnode;
2260 	struct sysctl_oid_list *list, *rxslist;
2261 	struct UPT1_RxStats *rxstats;
2262 	char namebuf[16];
2263 
2264 	rxstats = &rxq->vxrxq_rs->stats;
2265 
2266 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2267 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2268 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
2269 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2270 
2271 	/*
2272 	 * Add statistics reported by the host. These are updated by the
2273 	 * iflib txq timer on txq 0.
2274 	 */
2275 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2276 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2277 	rxslist = SYSCTL_CHILDREN(rxsnode);
2278 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2279 	    &rxstats->LRO_packets, "LRO packets");
2280 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2281 	    &rxstats->LRO_bytes, "LRO bytes");
2282 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2283 	    &rxstats->ucast_packets, "Unicast packets");
2284 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2285 	    &rxstats->ucast_bytes, "Unicast bytes");
2286 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2287 	    &rxstats->mcast_packets, "Multicast packets");
2288 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2289 	    &rxstats->mcast_bytes, "Multicast bytes");
2290 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2291 	    &rxstats->bcast_packets, "Broadcast packets");
2292 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2293 	    &rxstats->bcast_bytes, "Broadcast bytes");
2294 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2295 	    &rxstats->nobuffer, "No buffer");
2296 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2297 	    &rxstats->error, "Errors");
2298 }
2299 
2300 static void
2301 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2302     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2303 {
2304 	if_softc_ctx_t scctx;
2305 	struct sysctl_oid *node;
2306 	struct sysctl_oid_list *list;
2307 	int i;
2308 
2309 	scctx = sc->vmx_scctx;
2310 
2311 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2312 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2313 
2314 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2315 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2316 		list = SYSCTL_CHILDREN(node);
2317 
2318 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2319 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2320 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2321 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2322 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2323 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2324 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2325 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2326 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2327 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2328 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2329 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2330 	}
2331 
2332 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2333 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2334 
2335 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2336 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2337 		list = SYSCTL_CHILDREN(node);
2338 
2339 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2340 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2341 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2342 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2343 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2344 		    &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2345 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2346 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2347 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2348 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2349 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2350 		    &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2351 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2352 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2353 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2354 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2355 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2356 		    &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2357 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length_frag",
2358 		    CTLFLAG_RD, &rxq->vxrxq_comp_ring.vcxr_zero_length_frag,
2359 		    0, "");
2360 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2361 		    &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2362 	}
2363 }
2364 
2365 static void
2366 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2367     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2368 {
2369 	if_softc_ctx_t scctx;
2370 	int i;
2371 
2372 	scctx = sc->vmx_scctx;
2373 
2374 	for (i = 0; i < scctx->isc_ntxqsets; i++)
2375 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2376 	for (i = 0; i < scctx->isc_nrxqsets; i++)
2377 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2378 
2379 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2380 }
2381 
2382 static void
2383 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2384 {
2385 	device_t dev;
2386 	struct sysctl_ctx_list *ctx;
2387 	struct sysctl_oid *tree;
2388 	struct sysctl_oid_list *child;
2389 
2390 	dev = sc->vmx_dev;
2391 	ctx = device_get_sysctl_ctx(dev);
2392 	tree = device_get_sysctl_tree(dev);
2393 	child = SYSCTL_CHILDREN(tree);
2394 
2395 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2396 }
2397 
2398 static void
2399 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2400 {
2401 
2402 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2403 }
2404 
2405 static uint32_t
2406 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2407 {
2408 
2409 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2410 }
2411 
2412 static void
2413 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2414 {
2415 
2416 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2417 }
2418 
2419 static void
2420 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2421 {
2422 
2423 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2424 }
2425 
2426 static uint32_t
2427 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2428 {
2429 
2430 	vmxnet3_write_cmd(sc, cmd);
2431 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2432 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2433 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2434 }
2435 
2436 static void
2437 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2438 {
2439 
2440 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2441 }
2442 
2443 static void
2444 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2445 {
2446 
2447 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2448 }
2449 
2450 static int
2451 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2452 {
2453 	/* Not using interrupts for TX */
2454 	return (0);
2455 }
2456 
2457 static int
2458 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2459 {
2460 	struct vmxnet3_softc *sc;
2461 
2462 	sc = iflib_get_softc(ctx);
2463 	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2464 	return (0);
2465 }
2466 
2467 static void
2468 vmxnet3_link_intr_enable(if_ctx_t ctx)
2469 {
2470 	struct vmxnet3_softc *sc;
2471 
2472 	sc = iflib_get_softc(ctx);
2473 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2474 }
2475 
2476 static void
2477 vmxnet3_intr_enable_all(if_ctx_t ctx)
2478 {
2479 	struct vmxnet3_softc *sc;
2480 	if_softc_ctx_t scctx;
2481 	int i;
2482 
2483 	sc = iflib_get_softc(ctx);
2484 	scctx = sc->vmx_scctx;
2485 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2486 	for (i = 0; i < scctx->isc_vectors; i++)
2487 		vmxnet3_enable_intr(sc, i);
2488 }
2489 
2490 static void
2491 vmxnet3_intr_disable_all(if_ctx_t ctx)
2492 {
2493 	struct vmxnet3_softc *sc;
2494 	int i;
2495 
2496 	sc = iflib_get_softc(ctx);
2497 	/*
2498 	 * iflib may invoke this routine before vmxnet3_attach_post() has
2499 	 * run, which is before the top level shared data area is
2500 	 * initialized and the device made aware of it.
2501 	 */
2502 	if (sc->vmx_ds != NULL)
2503 		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2504 	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2505 		vmxnet3_disable_intr(sc, i);
2506 }
2507 
2508 /*
2509  * Since this is a purely paravirtualized device, we do not have
2510  * to worry about DMA coherency. But at times, we must make sure
2511  * both the compiler and CPU do not reorder memory operations.
2512  */
2513 static inline void
2514 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2515 {
2516 
2517 	switch (type) {
2518 	case VMXNET3_BARRIER_RD:
2519 		rmb();
2520 		break;
2521 	case VMXNET3_BARRIER_WR:
2522 		wmb();
2523 		break;
2524 	case VMXNET3_BARRIER_RDWR:
2525 		mb();
2526 		break;
2527 	default:
2528 		panic("%s: bad barrier type %d", __func__, type);
2529 	}
2530 }
2531