xref: /openbsd/sys/dev/pci/if_vmx.c (revision 905646f0)
1 /*	$OpenBSD: if_vmx.c,v 1.64 2020/07/10 13:26:38 patrick Exp $	*/
2 
3 /*
4  * Copyright (c) 2013 Tsubai Masanari
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "kstat.h"
21 
22 #include <sys/param.h>
23 #include <sys/device.h>
24 #include <sys/mbuf.h>
25 #include <sys/socket.h>
26 #include <sys/sockio.h>
27 #include <sys/systm.h>
28 #include <sys/atomic.h>
29 #include <sys/intrmap.h>
30 #include <sys/kstat.h>
31 
32 #include <net/bpf.h>
33 #include <net/if.h>
34 #include <net/toeplitz.h>
35 #include <net/if_media.h>
36 
37 #include <netinet/in.h>
38 #include <netinet/if_ether.h>
39 #include <netinet/ip.h>
40 #include <netinet/tcp.h>
41 #include <netinet/udp.h>
42 
43 #include <machine/bus.h>
44 
45 #include <dev/pci/if_vmxreg.h>
46 #include <dev/pci/pcivar.h>
47 #include <dev/pci/pcidevs.h>
48 
49 #define VMX_MAX_QUEUES	MIN(VMXNET3_MAX_TX_QUEUES, VMXNET3_MAX_RX_QUEUES)
50 
51 #define NTXDESC 512 /* tx ring size */
52 #define NTXSEGS 8 /* tx descriptors per packet */
53 #define NRXDESC 512
54 #define NTXCOMPDESC NTXDESC
55 #define NRXCOMPDESC (NRXDESC * 2)	/* ring1 + ring2 */
56 
57 #define VMXNET3_DRIVER_VERSION 0x00010000
58 
59 #define VMX_TX_GEN	htole32(VMXNET3_TX_GEN_M << VMXNET3_TX_GEN_S)
60 #define VMX_TXC_GEN	htole32(VMXNET3_TXC_GEN_M << VMXNET3_TXC_GEN_S)
61 #define VMX_RX_GEN	htole32(VMXNET3_RX_GEN_M << VMXNET3_RX_GEN_S)
62 #define VMX_RXC_GEN	htole32(VMXNET3_RXC_GEN_M << VMXNET3_RXC_GEN_S)
63 
64 struct vmxnet3_softc;
65 
66 struct vmxnet3_txring {
67 	struct mbuf *m[NTXDESC];
68 	bus_dmamap_t dmap[NTXDESC];
69 	struct vmxnet3_txdesc *txd;
70 	u_int32_t gen;
71 	u_int prod;
72 	u_int cons;
73 };
74 
75 struct vmxnet3_rxring {
76 	struct vmxnet3_softc *sc;
77 	struct mbuf *m[NRXDESC];
78 	bus_dmamap_t dmap[NRXDESC];
79 	struct mutex mtx;
80 	struct if_rxring rxr;
81 	struct timeout refill;
82 	struct vmxnet3_rxdesc *rxd;
83 	u_int fill;
84 	u_int32_t gen;
85 	u_int8_t rid;
86 };
87 
88 struct vmxnet3_comp_ring {
89 	union {
90 		struct vmxnet3_txcompdesc *txcd;
91 		struct vmxnet3_rxcompdesc *rxcd;
92 	};
93 	u_int next;
94 	u_int32_t gen;
95 };
96 
97 struct vmxnet3_txqueue {
98 	struct vmxnet3_softc *sc; /* sigh */
99 	struct vmxnet3_txring cmd_ring;
100 	struct vmxnet3_comp_ring comp_ring;
101 	struct vmxnet3_txq_shared *ts;
102 	struct ifqueue *ifq;
103 	struct kstat *txkstat;
104 } __aligned(64);
105 
106 struct vmxnet3_rxqueue {
107 	struct vmxnet3_softc *sc; /* sigh */
108 	struct vmxnet3_rxring cmd_ring[2];
109 	struct vmxnet3_comp_ring comp_ring;
110 	struct vmxnet3_rxq_shared *rs;
111 	struct ifiqueue *ifiq;
112 	struct kstat *rxkstat;
113 } __aligned(64);
114 
115 struct vmxnet3_queue {
116 	struct vmxnet3_txqueue tx;
117 	struct vmxnet3_rxqueue rx;
118 	struct vmxnet3_softc *sc;
119 	char intrname[16];
120 	void *ih;
121 	int intr;
122 };
123 
124 struct vmxnet3_softc {
125 	struct device sc_dev;
126 	struct arpcom sc_arpcom;
127 	struct ifmedia sc_media;
128 
129 	bus_space_tag_t	sc_iot0;
130 	bus_space_tag_t	sc_iot1;
131 	bus_space_handle_t sc_ioh0;
132 	bus_space_handle_t sc_ioh1;
133 	bus_dma_tag_t sc_dmat;
134 	void *sc_ih;
135 
136 	int sc_nqueues;
137 	struct vmxnet3_queue *sc_q;
138 	struct intrmap *sc_intrmap;
139 
140 	struct vmxnet3_driver_shared *sc_ds;
141 	u_int8_t *sc_mcast;
142 	struct vmxnet3_upt1_rss_conf *sc_rss;
143 
144 #if NKSTAT > 0
145 	struct rwlock		sc_kstat_lock;
146 	struct timeval		sc_kstat_updated;
147 #endif
148 };
149 
150 #define JUMBO_LEN (1024 * 9)
151 #define DMAADDR(map) ((map)->dm_segs[0].ds_addr)
152 
153 #define READ_BAR0(sc, reg) bus_space_read_4((sc)->sc_iot0, (sc)->sc_ioh0, reg)
154 #define READ_BAR1(sc, reg) bus_space_read_4((sc)->sc_iot1, (sc)->sc_ioh1, reg)
155 #define WRITE_BAR0(sc, reg, val) \
156 	bus_space_write_4((sc)->sc_iot0, (sc)->sc_ioh0, reg, val)
157 #define WRITE_BAR1(sc, reg, val) \
158 	bus_space_write_4((sc)->sc_iot1, (sc)->sc_ioh1, reg, val)
159 #define WRITE_CMD(sc, cmd) WRITE_BAR1(sc, VMXNET3_BAR1_CMD, cmd)
160 #define vtophys(va) 0		/* XXX ok? */
161 
162 int vmxnet3_match(struct device *, void *, void *);
163 void vmxnet3_attach(struct device *, struct device *, void *);
164 int vmxnet3_dma_init(struct vmxnet3_softc *);
165 int vmxnet3_alloc_txring(struct vmxnet3_softc *, int, int);
166 int vmxnet3_alloc_rxring(struct vmxnet3_softc *, int, int);
167 void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
168 void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
169 void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
170 void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
171 void vmxnet3_link_state(struct vmxnet3_softc *);
172 void vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
173 void vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
174 int vmxnet3_intr(void *);
175 int vmxnet3_intr_intx(void *);
176 int vmxnet3_intr_event(void *);
177 int vmxnet3_intr_queue(void *);
178 void vmxnet3_evintr(struct vmxnet3_softc *);
179 void vmxnet3_txintr(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
180 void vmxnet3_rxintr(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
181 void vmxnet3_rxfill_tick(void *);
182 void vmxnet3_rxfill(struct vmxnet3_rxring *);
183 void vmxnet3_iff(struct vmxnet3_softc *);
184 void vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
185 void vmxnet3_stop(struct ifnet *);
186 void vmxnet3_reset(struct vmxnet3_softc *);
187 int vmxnet3_init(struct vmxnet3_softc *);
188 int vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
189 void vmxnet3_start(struct ifqueue *);
190 int vmxnet3_load_mbuf(struct vmxnet3_softc *, struct vmxnet3_txring *,
191     struct mbuf **);
192 void vmxnet3_watchdog(struct ifnet *);
193 void vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
194 int vmxnet3_media_change(struct ifnet *);
195 void *vmxnet3_dma_allocmem(struct vmxnet3_softc *, u_int, u_int, bus_addr_t *);
196 
197 #if NKSTAT > 0
198 static void	vmx_kstat_init(struct vmxnet3_softc *);
199 static void	vmx_kstat_txstats(struct vmxnet3_softc *,
200 		    struct vmxnet3_txqueue *, int);
201 static void	vmx_kstat_rxstats(struct vmxnet3_softc *,
202 		    struct vmxnet3_rxqueue *, int);
203 #endif /* NKSTAT > 0 */
204 
205 const struct pci_matchid vmx_devices[] = {
206 	{ PCI_VENDOR_VMWARE, PCI_PRODUCT_VMWARE_NET_3 }
207 };
208 
209 struct cfattach vmx_ca = {
210 	sizeof(struct vmxnet3_softc), vmxnet3_match, vmxnet3_attach
211 };
212 
213 struct cfdriver vmx_cd = {
214 	NULL, "vmx", DV_IFNET
215 };
216 
217 int
218 vmxnet3_match(struct device *parent, void *match, void *aux)
219 {
220 	return (pci_matchbyid(aux, vmx_devices, nitems(vmx_devices)));
221 }
222 
223 void
224 vmxnet3_attach(struct device *parent, struct device *self, void *aux)
225 {
226 	struct vmxnet3_softc *sc = (void *)self;
227 	struct pci_attach_args *pa = aux;
228 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
229 	pci_intr_handle_t ih;
230 	const char *intrstr;
231 	u_int memtype, ver, macl, mach, intrcfg;
232 	u_char enaddr[ETHER_ADDR_LEN];
233 	int (*isr)(void *);
234 	int msix = 0;
235 	int i;
236 
237 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, 0x10);
238 	if (pci_mapreg_map(pa, 0x10, memtype, 0, &sc->sc_iot0, &sc->sc_ioh0,
239 	    NULL, NULL, 0)) {
240 		printf(": failed to map BAR0\n");
241 		return;
242 	}
243 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, 0x14);
244 	if (pci_mapreg_map(pa, 0x14, memtype, 0, &sc->sc_iot1, &sc->sc_ioh1,
245 	    NULL, NULL, 0)) {
246 		printf(": failed to map BAR1\n");
247 		return;
248 	}
249 
250 	ver = READ_BAR1(sc, VMXNET3_BAR1_VRRS);
251 	if ((ver & 0x1) == 0) {
252 		printf(": unsupported hardware version 0x%x\n", ver);
253 		return;
254 	}
255 	WRITE_BAR1(sc, VMXNET3_BAR1_VRRS, 1);
256 
257 	ver = READ_BAR1(sc, VMXNET3_BAR1_UVRS);
258 	if ((ver & 0x1) == 0) {
259 		printf(": incompatiable UPT version 0x%x\n", ver);
260 		return;
261 	}
262 	WRITE_BAR1(sc, VMXNET3_BAR1_UVRS, 1);
263 
264 	sc->sc_dmat = pa->pa_dmat;
265 
266 	WRITE_CMD(sc, VMXNET3_CMD_GET_INTRCFG);
267 	intrcfg = READ_BAR1(sc, VMXNET3_BAR1_CMD);
268 	isr = vmxnet3_intr;
269 	sc->sc_nqueues = 1;
270 
271 	switch (intrcfg & VMXNET3_INTRCFG_TYPE_MASK) {
272 	case VMXNET3_INTRCFG_TYPE_AUTO:
273 	case VMXNET3_INTRCFG_TYPE_MSIX:
274 		msix = pci_intr_msix_count(pa->pa_pc, pa->pa_tag);
275 		if (msix > 0) {
276 			if (pci_intr_map_msix(pa, 0, &ih) == 0) {
277 				msix--; /* are there spares for tx/rx qs? */
278 				if (msix == 0)
279 					break;
280 
281 				isr = vmxnet3_intr_event;
282 				sc->sc_intrmap = intrmap_create(&sc->sc_dev,
283 				    msix, VMX_MAX_QUEUES, INTRMAP_POWEROF2);
284 				sc->sc_nqueues = intrmap_count(sc->sc_intrmap);
285 			}
286 			break;
287 		}
288 
289 		/* FALLTHROUGH */
290 	case VMXNET3_INTRCFG_TYPE_MSI:
291 		if (pci_intr_map_msi(pa, &ih) == 0)
292 			break;
293 
294 		/* FALLTHROUGH */
295 	case VMXNET3_INTRCFG_TYPE_INTX:
296 		isr = vmxnet3_intr_intx;
297 		if (pci_intr_map(pa, &ih) == 0)
298 			break;
299 
300 		printf(": failed to map interrupt\n");
301 		return;
302 	}
303 	sc->sc_ih = pci_intr_establish(pa->pa_pc, ih, IPL_NET | IPL_MPSAFE,
304 	    isr, sc, self->dv_xname);
305 	intrstr = pci_intr_string(pa->pa_pc, ih);
306 	if (intrstr)
307 		printf(": %s", intrstr);
308 
309 	sc->sc_q = mallocarray(sc->sc_nqueues, sizeof(*sc->sc_q),
310 	    M_DEVBUF, M_WAITOK|M_ZERO);
311 
312 	if (sc->sc_intrmap != NULL) {
313 		for (i = 0; i < sc->sc_nqueues; i++) {
314 			struct vmxnet3_queue *q;
315 			int vec;
316 
317 			q = &sc->sc_q[i];
318 			vec = i + 1;
319 			if (pci_intr_map_msix(pa, vec, &ih) != 0) {
320 				printf(", failed to map interrupt %d\n", vec);
321 				return;
322 			}
323 			snprintf(q->intrname, sizeof(q->intrname), "%s:%d",
324 			    self->dv_xname, i);
325 			q->ih = pci_intr_establish_cpu(pa->pa_pc, ih,
326 			    IPL_NET | IPL_MPSAFE,
327 			    intrmap_cpu(sc->sc_intrmap, i),
328 			    vmxnet3_intr_queue, q, q->intrname);
329 
330 			q->intr = vec;
331 			q->sc = sc;
332 		}
333 	}
334 
335 	if (vmxnet3_dma_init(sc)) {
336 		printf(": failed to setup DMA\n");
337 		return;
338 	}
339 
340 	printf(", %d queue%s", sc->sc_nqueues, sc->sc_nqueues > 1 ? "s" : "");
341 
342 	WRITE_CMD(sc, VMXNET3_CMD_GET_MACL);
343 	macl = READ_BAR1(sc, VMXNET3_BAR1_CMD);
344 	enaddr[0] = macl;
345 	enaddr[1] = macl >> 8;
346 	enaddr[2] = macl >> 16;
347 	enaddr[3] = macl >> 24;
348 	WRITE_CMD(sc, VMXNET3_CMD_GET_MACH);
349 	mach = READ_BAR1(sc, VMXNET3_BAR1_CMD);
350 	enaddr[4] = mach;
351 	enaddr[5] = mach >> 8;
352 
353 	WRITE_BAR1(sc, VMXNET3_BAR1_MACL, macl);
354 	WRITE_BAR1(sc, VMXNET3_BAR1_MACH, mach);
355 	printf(", address %s\n", ether_sprintf(enaddr));
356 
357 	bcopy(enaddr, sc->sc_arpcom.ac_enaddr, 6);
358 	strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
359 	ifp->if_softc = sc;
360 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
361 	ifp->if_xflags = IFXF_MPSAFE;
362 	ifp->if_ioctl = vmxnet3_ioctl;
363 	ifp->if_qstart = vmxnet3_start;
364 	ifp->if_watchdog = vmxnet3_watchdog;
365 	ifp->if_hardmtu = VMXNET3_MAX_MTU;
366 	ifp->if_capabilities = IFCAP_VLAN_MTU;
367 #if 0
368 	if (sc->sc_ds->upt_features & UPT1_F_CSUM)
369 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
370 #endif
371 	if (sc->sc_ds->upt_features & UPT1_F_VLAN)
372 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
373 
374 	ifq_set_maxlen(&ifp->if_snd, NTXDESC);
375 
376 	ifmedia_init(&sc->sc_media, IFM_IMASK, vmxnet3_media_change,
377 	    vmxnet3_media_status);
378 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_AUTO, 0, NULL);
379 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_10G_T|IFM_FDX, 0, NULL);
380 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_10G_T, 0, NULL);
381 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL);
382 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_1000_T, 0, NULL);
383 	ifmedia_set(&sc->sc_media, IFM_ETHER|IFM_AUTO);
384 
385 	if_attach(ifp);
386 	ether_ifattach(ifp);
387 	vmxnet3_link_state(sc);
388 
389 	if_attach_queues(ifp, sc->sc_nqueues);
390 	if_attach_iqueues(ifp, sc->sc_nqueues);
391 
392 #if NKSTAT > 0
393 	vmx_kstat_init(sc);
394 #endif
395 
396 	for (i = 0; i < sc->sc_nqueues; i++) {
397 		ifp->if_ifqs[i]->ifq_softc = &sc->sc_q[i].tx;
398 		sc->sc_q[i].tx.ifq = ifp->if_ifqs[i];
399 		sc->sc_q[i].rx.ifiq = ifp->if_iqs[i];
400 
401 #if NKSTAT > 0
402 		vmx_kstat_txstats(sc, &sc->sc_q[i].tx, i);
403 		vmx_kstat_rxstats(sc, &sc->sc_q[i].rx, i);
404 #endif
405 	}
406 }
407 
408 int
409 vmxnet3_dma_init(struct vmxnet3_softc *sc)
410 {
411 	struct vmxnet3_driver_shared *ds;
412 	struct vmxnet3_txq_shared *ts;
413 	struct vmxnet3_rxq_shared *rs;
414 	bus_addr_t ds_pa, qs_pa, mcast_pa;
415 	int i, queue, qs_len, intr;
416 	u_int major, minor, release_code, rev;
417 
418 	qs_len = sc->sc_nqueues * (sizeof *ts + sizeof *rs);
419 	ts = vmxnet3_dma_allocmem(sc, qs_len, VMXNET3_DMADESC_ALIGN, &qs_pa);
420 	if (ts == NULL)
421 		return -1;
422 	for (queue = 0; queue < sc->sc_nqueues; queue++)
423 		sc->sc_q[queue].tx.ts = ts++;
424 	rs = (void *)ts;
425 	for (queue = 0; queue < sc->sc_nqueues; queue++)
426 		sc->sc_q[queue].rx.rs = rs++;
427 
428 	for (queue = 0; queue < sc->sc_nqueues; queue++) {
429 		intr = sc->sc_q[queue].intr;
430 
431 		if (vmxnet3_alloc_txring(sc, queue, intr))
432 			return -1;
433 		if (vmxnet3_alloc_rxring(sc, queue, intr))
434 			return -1;
435 	}
436 
437 	sc->sc_mcast = vmxnet3_dma_allocmem(sc, 682 * ETHER_ADDR_LEN, 32, &mcast_pa);
438 	if (sc->sc_mcast == NULL)
439 		return -1;
440 
441 	ds = vmxnet3_dma_allocmem(sc, sizeof *sc->sc_ds, 8, &ds_pa);
442 	if (ds == NULL)
443 		return -1;
444 	sc->sc_ds = ds;
445 	ds->magic = VMXNET3_REV1_MAGIC;
446 	ds->version = VMXNET3_DRIVER_VERSION;
447 
448 	/*
449 	 * XXX FreeBSD version uses following values:
450 	 * (Does the device behavior depend on them?)
451 	 *
452 	 * major = __FreeBSD_version / 100000;
453 	 * minor = (__FreeBSD_version / 1000) % 100;
454 	 * release_code = (__FreeBSD_version / 100) % 10;
455 	 * rev = __FreeBSD_version % 100;
456 	 */
457 	major = 0;
458 	minor = 0;
459 	release_code = 0;
460 	rev = 0;
461 #ifdef __LP64__
462 	ds->guest = release_code << 30 | rev << 22 | major << 14 | minor << 6
463 	    | VMXNET3_GOS_FREEBSD | VMXNET3_GOS_64BIT;
464 #else
465 	ds->guest = release_code << 30 | rev << 22 | major << 14 | minor << 6
466 	    | VMXNET3_GOS_FREEBSD | VMXNET3_GOS_32BIT;
467 #endif
468 	ds->vmxnet3_revision = 1;
469 	ds->upt_version = 1;
470 	ds->upt_features = UPT1_F_CSUM | UPT1_F_VLAN;
471 	ds->driver_data = vtophys(sc);
472 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
473 	ds->queue_shared = qs_pa;
474 	ds->queue_shared_len = qs_len;
475 	ds->mtu = VMXNET3_MAX_MTU;
476 	ds->ntxqueue = sc->sc_nqueues;
477 	ds->nrxqueue = sc->sc_nqueues;
478 	ds->mcast_table = mcast_pa;
479 	ds->automask = 1;
480 	ds->nintr = 1 + (sc->sc_intrmap != NULL ? sc->sc_nqueues : 0);
481 	ds->evintr = 0;
482 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
483 	for (i = 0; i < ds->nintr; i++)
484 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
485 
486 	if (sc->sc_nqueues > 1) {
487 		struct vmxnet3_upt1_rss_conf *rsscfg;
488 		bus_addr_t rss_pa;
489 
490 		rsscfg = vmxnet3_dma_allocmem(sc, sizeof(*rsscfg), 8, &rss_pa);
491 
492 		rsscfg->hash_type = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
493 		    UPT1_RSS_HASH_TYPE_IPV4 |
494 		    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
495 		    UPT1_RSS_HASH_TYPE_IPV6;
496 		rsscfg->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
497 		rsscfg->hash_key_size = sizeof(rsscfg->hash_key);
498 		stoeplitz_to_key(rsscfg->hash_key, sizeof(rsscfg->hash_key));
499 
500 		rsscfg->ind_table_size = sizeof(rsscfg->ind_table);
501 		for (i = 0; i < sizeof(rsscfg->ind_table); i++)
502 			rsscfg->ind_table[i] = i % sc->sc_nqueues;
503 
504 		ds->upt_features |= UPT1_F_RSS;
505 		ds->rss.version = 1;
506 		ds->rss.len = sizeof(*rsscfg);
507 		ds->rss.paddr = rss_pa;
508 
509 		sc->sc_rss = rsscfg;
510 	}
511 
512 	WRITE_BAR1(sc, VMXNET3_BAR1_DSL, ds_pa);
513 	WRITE_BAR1(sc, VMXNET3_BAR1_DSH, (u_int64_t)ds_pa >> 32);
514 	return 0;
515 }
516 
517 int
518 vmxnet3_alloc_txring(struct vmxnet3_softc *sc, int queue, int intr)
519 {
520 	struct vmxnet3_txqueue *tq = &sc->sc_q[queue].tx;
521 	struct vmxnet3_txq_shared *ts;
522 	struct vmxnet3_txring *ring = &tq->cmd_ring;
523 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
524 	bus_addr_t pa, comp_pa;
525 	int idx;
526 
527 	ring->txd = vmxnet3_dma_allocmem(sc, NTXDESC * sizeof ring->txd[0], 512, &pa);
528 	if (ring->txd == NULL)
529 		return -1;
530 	comp_ring->txcd = vmxnet3_dma_allocmem(sc,
531 	    NTXCOMPDESC * sizeof comp_ring->txcd[0], 512, &comp_pa);
532 	if (comp_ring->txcd == NULL)
533 		return -1;
534 
535 	for (idx = 0; idx < NTXDESC; idx++) {
536 		if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, NTXSEGS,
537 		    VMXNET3_TX_LEN_M + 1, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
538 			return -1;
539 	}
540 
541 	ts = tq->ts;
542 	bzero(ts, sizeof *ts);
543 	ts->npending = 0;
544 	ts->intr_threshold = 1;
545 	ts->cmd_ring = pa;
546 	ts->cmd_ring_len = NTXDESC;
547 	ts->comp_ring = comp_pa;
548 	ts->comp_ring_len = NTXCOMPDESC;
549 	ts->driver_data = vtophys(tq);
550 	ts->driver_data_len = sizeof *tq;
551 	ts->intr_idx = intr;
552 	ts->stopped = 1;
553 	ts->error = 0;
554 	return 0;
555 }
556 
557 int
558 vmxnet3_alloc_rxring(struct vmxnet3_softc *sc, int queue, int intr)
559 {
560 	struct vmxnet3_rxqueue *rq = &sc->sc_q[queue].rx;
561 	struct vmxnet3_rxq_shared *rs;
562 	struct vmxnet3_rxring *ring;
563 	struct vmxnet3_comp_ring *comp_ring;
564 	bus_addr_t pa[2], comp_pa;
565 	int i, idx;
566 
567 	for (i = 0; i < 2; i++) {
568 		ring = &rq->cmd_ring[i];
569 		ring->rxd = vmxnet3_dma_allocmem(sc, NRXDESC * sizeof ring->rxd[0],
570 		    512, &pa[i]);
571 		if (ring->rxd == NULL)
572 			return -1;
573 	}
574 	comp_ring = &rq->comp_ring;
575 	comp_ring->rxcd = vmxnet3_dma_allocmem(sc,
576 	    NRXCOMPDESC * sizeof comp_ring->rxcd[0], 512, &comp_pa);
577 	if (comp_ring->rxcd == NULL)
578 		return -1;
579 
580 	for (i = 0; i < 2; i++) {
581 		ring = &rq->cmd_ring[i];
582 		ring->sc = sc;
583 		ring->rid = i;
584 		mtx_init(&ring->mtx, IPL_NET);
585 		timeout_set(&ring->refill, vmxnet3_rxfill_tick, ring);
586 		for (idx = 0; idx < NRXDESC; idx++) {
587 			if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, 1,
588 			    JUMBO_LEN, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
589 				return -1;
590 		}
591 	}
592 
593 	rs = rq->rs;
594 	bzero(rs, sizeof *rs);
595 	rs->cmd_ring[0] = pa[0];
596 	rs->cmd_ring[1] = pa[1];
597 	rs->cmd_ring_len[0] = NRXDESC;
598 	rs->cmd_ring_len[1] = NRXDESC;
599 	rs->comp_ring = comp_pa;
600 	rs->comp_ring_len = NRXCOMPDESC;
601 	rs->driver_data = vtophys(rq);
602 	rs->driver_data_len = sizeof *rq;
603 	rs->intr_idx = intr;
604 	rs->stopped = 1;
605 	rs->error = 0;
606 	return 0;
607 }
608 
609 void
610 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
611 {
612 	struct vmxnet3_txring *ring = &tq->cmd_ring;
613 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
614 
615 	ring->cons = ring->prod = 0;
616 	ring->gen = VMX_TX_GEN;
617 	comp_ring->next = 0;
618 	comp_ring->gen = VMX_TXC_GEN;
619 	bzero(ring->txd, NTXDESC * sizeof ring->txd[0]);
620 	bzero(comp_ring->txcd, NTXCOMPDESC * sizeof comp_ring->txcd[0]);
621 }
622 
623 void
624 vmxnet3_rxfill_tick(void *arg)
625 {
626 	struct vmxnet3_rxring *ring = arg;
627 
628 	if (!mtx_enter_try(&ring->mtx))
629 		return;
630 
631 	vmxnet3_rxfill(ring);
632 	mtx_leave(&ring->mtx);
633 }
634 
635 void
636 vmxnet3_rxfill(struct vmxnet3_rxring *ring)
637 {
638 	struct vmxnet3_softc *sc = ring->sc;
639 	struct vmxnet3_rxdesc *rxd;
640 	struct mbuf *m;
641 	bus_dmamap_t map;
642 	u_int slots;
643 	unsigned int prod;
644 	uint32_t rgen;
645 	uint32_t type = htole32(VMXNET3_BTYPE_HEAD << VMXNET3_RX_BTYPE_S);
646 
647 	MUTEX_ASSERT_LOCKED(&ring->mtx);
648 
649 	prod = ring->fill;
650 	rgen = ring->gen;
651 
652 	for (slots = if_rxr_get(&ring->rxr, NRXDESC); slots > 0; slots--) {
653 		KASSERT(ring->m[prod] == NULL);
654 
655 		m = MCLGETI(NULL, M_DONTWAIT, NULL, JUMBO_LEN);
656 		if (m == NULL)
657 			break;
658 
659 		m->m_pkthdr.len = m->m_len = JUMBO_LEN;
660 		m_adj(m, ETHER_ALIGN);
661 
662 		map = ring->dmap[prod];
663 		if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT))
664 			panic("load mbuf");
665 
666 		bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
667 		    BUS_DMASYNC_PREREAD);
668 
669 		ring->m[prod] = m;
670 
671 		rxd = &ring->rxd[prod];
672 		rxd->rx_addr = htole64(DMAADDR(map));
673 		membar_producer();
674 		rxd->rx_word2 = (htole32(m->m_pkthdr.len & VMXNET3_RX_LEN_M) <<
675 		    VMXNET3_RX_LEN_S) | type | rgen;
676 
677 		if (++prod == NRXDESC) {
678 			prod = 0;
679 			rgen ^= VMX_RX_GEN;
680 		}
681 	}
682 	if_rxr_put(&ring->rxr, slots);
683 
684 	ring->fill = prod;
685 	ring->gen = rgen;
686 
687 	if (if_rxr_inuse(&ring->rxr) == 0)
688 		timeout_add(&ring->refill, 1);
689 }
690 
691 void
692 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
693 {
694 	struct vmxnet3_rxring *ring;
695 	struct vmxnet3_comp_ring *comp_ring;
696 	int i;
697 
698 	for (i = 0; i < 2; i++) {
699 		ring = &rq->cmd_ring[i];
700 		ring->fill = 0;
701 		ring->gen = VMX_RX_GEN;
702 		bzero(ring->rxd, NRXDESC * sizeof ring->rxd[0]);
703 		if_rxr_init(&ring->rxr, 2, NRXDESC - 1);
704 	}
705 
706 	/* XXX only fill ring 0 */
707 	ring = &rq->cmd_ring[0];
708 	mtx_enter(&ring->mtx);
709 	vmxnet3_rxfill(ring);
710 	mtx_leave(&ring->mtx);
711 
712 	comp_ring = &rq->comp_ring;
713 	comp_ring->next = 0;
714 	comp_ring->gen = VMX_RXC_GEN;
715 	bzero(comp_ring->rxcd, NRXCOMPDESC * sizeof comp_ring->rxcd[0]);
716 }
717 
718 void
719 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
720 {
721 	struct vmxnet3_txring *ring = &tq->cmd_ring;
722 	int idx;
723 
724 	for (idx = 0; idx < NTXDESC; idx++) {
725 		if (ring->m[idx]) {
726 			bus_dmamap_unload(sc->sc_dmat, ring->dmap[idx]);
727 			m_freem(ring->m[idx]);
728 			ring->m[idx] = NULL;
729 		}
730 	}
731 }
732 
733 void
734 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
735 {
736 	struct vmxnet3_rxring *ring;
737 	int i, idx;
738 
739 	for (i = 0; i < 2; i++) {
740 		ring = &rq->cmd_ring[i];
741 		timeout_del(&ring->refill);
742 		for (idx = 0; idx < NRXDESC; idx++) {
743 			struct mbuf *m = ring->m[idx];
744 			if (m == NULL)
745 				continue;
746 
747 			ring->m[idx] = NULL;
748 			m_freem(m);
749 			bus_dmamap_unload(sc->sc_dmat, ring->dmap[idx]);
750 		}
751 	}
752 }
753 
754 void
755 vmxnet3_link_state(struct vmxnet3_softc *sc)
756 {
757 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
758 	u_int x, link, speed;
759 
760 	WRITE_CMD(sc, VMXNET3_CMD_GET_LINK);
761 	x = READ_BAR1(sc, VMXNET3_BAR1_CMD);
762 	speed = x >> 16;
763 	if (x & 1) {
764 		ifp->if_baudrate = IF_Mbps(speed);
765 		link = LINK_STATE_UP;
766 	} else
767 		link = LINK_STATE_DOWN;
768 
769 	if (ifp->if_link_state != link) {
770 		ifp->if_link_state = link;
771 		if_link_state_change(ifp);
772 	}
773 }
774 
775 static inline void
776 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
777 {
778 	WRITE_BAR0(sc, VMXNET3_BAR0_IMASK(irq), 0);
779 }
780 
781 static inline void
782 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
783 {
784 	WRITE_BAR0(sc, VMXNET3_BAR0_IMASK(irq), 1);
785 }
786 
787 void
788 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
789 {
790 	int i;
791 
792 	sc->sc_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
793 	vmxnet3_enable_intr(sc, 0);
794 	if (sc->sc_intrmap) {
795 		for (i = 0; i < sc->sc_nqueues; i++)
796 			vmxnet3_enable_intr(sc, sc->sc_q[i].intr);
797 	}
798 }
799 
800 void
801 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
802 {
803 	int i;
804 
805 	sc->sc_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
806 	vmxnet3_disable_intr(sc, 0);
807 	if (sc->sc_intrmap) {
808 		for (i = 0; i < sc->sc_nqueues; i++)
809 			vmxnet3_disable_intr(sc, sc->sc_q[i].intr);
810 	}
811 }
812 
813 int
814 vmxnet3_intr_intx(void *arg)
815 {
816 	struct vmxnet3_softc *sc = arg;
817 
818 	if (READ_BAR1(sc, VMXNET3_BAR1_INTR) == 0)
819 		return 0;
820 
821 	return (vmxnet3_intr(sc));
822 }
823 
824 int
825 vmxnet3_intr(void *arg)
826 {
827 	struct vmxnet3_softc *sc = arg;
828 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
829 
830 	if (sc->sc_ds->event) {
831 		KERNEL_LOCK();
832 		vmxnet3_evintr(sc);
833 		KERNEL_UNLOCK();
834 	}
835 
836 	if (ifp->if_flags & IFF_RUNNING) {
837 		vmxnet3_rxintr(sc, &sc->sc_q[0].rx);
838 		vmxnet3_txintr(sc, &sc->sc_q[0].tx);
839 		vmxnet3_enable_intr(sc, 0);
840 	}
841 
842 	return 1;
843 }
844 
845 int
846 vmxnet3_intr_event(void *arg)
847 {
848 	struct vmxnet3_softc *sc = arg;
849 
850 	if (sc->sc_ds->event) {
851 		KERNEL_LOCK();
852 		vmxnet3_evintr(sc);
853 		KERNEL_UNLOCK();
854 	}
855 
856 	vmxnet3_enable_intr(sc, 0);
857 	return 1;
858 }
859 
860 int
861 vmxnet3_intr_queue(void *arg)
862 {
863 	struct vmxnet3_queue *q = arg;
864 
865 	vmxnet3_rxintr(q->sc, &q->rx);
866 	vmxnet3_txintr(q->sc, &q->tx);
867 	vmxnet3_enable_intr(q->sc, q->intr);
868 
869 	return 1;
870 }
871 
872 void
873 vmxnet3_evintr(struct vmxnet3_softc *sc)
874 {
875 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
876 	u_int event = sc->sc_ds->event;
877 	struct vmxnet3_txq_shared *ts;
878 	struct vmxnet3_rxq_shared *rs;
879 
880 	/* Clear events. */
881 	WRITE_BAR1(sc, VMXNET3_BAR1_EVENT, event);
882 
883 	/* Link state change? */
884 	if (event & VMXNET3_EVENT_LINK)
885 		vmxnet3_link_state(sc);
886 
887 	/* Queue error? */
888 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
889 		WRITE_CMD(sc, VMXNET3_CMD_GET_STATUS);
890 
891 		ts = sc->sc_q[0].tx.ts;
892 		if (ts->stopped)
893 			printf("%s: TX error 0x%x\n", ifp->if_xname, ts->error);
894 		rs = sc->sc_q[0].rx.rs;
895 		if (rs->stopped)
896 			printf("%s: RX error 0x%x\n", ifp->if_xname, rs->error);
897 		vmxnet3_init(sc);
898 	}
899 
900 	if (event & VMXNET3_EVENT_DIC)
901 		printf("%s: device implementation change event\n",
902 		    ifp->if_xname);
903 	if (event & VMXNET3_EVENT_DEBUG)
904 		printf("%s: debug event\n", ifp->if_xname);
905 }
906 
907 void
908 vmxnet3_txintr(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
909 {
910 	struct ifqueue *ifq = tq->ifq;
911 	struct vmxnet3_txring *ring = &tq->cmd_ring;
912 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
913 	struct vmxnet3_txcompdesc *txcd;
914 	bus_dmamap_t map;
915 	struct mbuf *m;
916 	u_int cons, next;
917 	uint32_t rgen;
918 
919 	cons = ring->cons;
920 	if (cons == ring->prod)
921 		return;
922 
923 	next = comp_ring->next;
924 	rgen = comp_ring->gen;
925 
926 	/* postread */
927 	for (;;) {
928 		txcd = &comp_ring->txcd[next];
929 		if ((txcd->txc_word3 & VMX_TXC_GEN) != rgen)
930 			break;
931 
932 		if (++next == NTXCOMPDESC) {
933 			next = 0;
934 			rgen ^= VMX_TXC_GEN;
935 		}
936 
937 		m = ring->m[cons];
938 		ring->m[cons] = NULL;
939 
940 		KASSERT(m != NULL);
941 
942 		map = ring->dmap[cons];
943 		bus_dmamap_unload(sc->sc_dmat, map);
944 		m_freem(m);
945 
946 		cons = (letoh32(txcd->txc_word0) >> VMXNET3_TXC_EOPIDX_S) &
947 		    VMXNET3_TXC_EOPIDX_M;
948 		cons++;
949 		cons %= NTXDESC;
950 	}
951 	/* preread */
952 
953 	comp_ring->next = next;
954 	comp_ring->gen = rgen;
955 	ring->cons = cons;
956 
957 	if (ifq_is_oactive(ifq))
958 		ifq_restart(ifq);
959 }
960 
961 void
962 vmxnet3_rxintr(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
963 {
964 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
965 	struct vmxnet3_comp_ring *comp_ring = &rq->comp_ring;
966 	struct vmxnet3_rxring *ring;
967 	struct vmxnet3_rxcompdesc *rxcd;
968 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
969 	struct mbuf *m;
970 	bus_dmamap_t map;
971 	unsigned int idx, len;
972 	unsigned int next, rgen;
973 	unsigned int done = 0;
974 
975 	next = comp_ring->next;
976 	rgen = comp_ring->gen;
977 
978 	for (;;) {
979 		rxcd = &comp_ring->rxcd[next];
980 		if ((rxcd->rxc_word3 & VMX_RXC_GEN) != rgen)
981 			break;
982 
983 		if (++next == NRXCOMPDESC) {
984 			next = 0;
985 			rgen ^= VMX_RXC_GEN;
986 		}
987 
988 		idx = letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_IDX_S) &
989 		    VMXNET3_RXC_IDX_M);
990 		if (letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_QID_S) &
991 		    VMXNET3_RXC_QID_M) < sc->sc_nqueues)
992 			ring = &rq->cmd_ring[0];
993 		else
994 			ring = &rq->cmd_ring[1];
995 
996 		m = ring->m[idx];
997 		KASSERT(m != NULL);
998 		ring->m[idx] = NULL;
999 
1000 		map = ring->dmap[idx];
1001 		bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
1002 		    BUS_DMASYNC_POSTREAD);
1003 		bus_dmamap_unload(sc->sc_dmat, map);
1004 
1005 		done++;
1006 
1007 		if (letoh32(rxcd->rxc_word2 & VMXNET3_RXC_ERROR)) {
1008 			ifp->if_ierrors++;
1009 			m_freem(m);
1010 			goto skip_buffer;
1011 		}
1012 
1013 		len = letoh32((rxcd->rxc_word2 >> VMXNET3_RXC_LEN_S) &
1014 		    VMXNET3_RXC_LEN_M);
1015 		if (len < VMXNET3_MIN_MTU) {
1016 			m_freem(m);
1017 			goto skip_buffer;
1018 		}
1019 		m->m_pkthdr.len = m->m_len = len;
1020 
1021 		vmxnet3_rx_csum(rxcd, m);
1022 		if (letoh32(rxcd->rxc_word2 & VMXNET3_RXC_VLAN)) {
1023 			m->m_flags |= M_VLANTAG;
1024 			m->m_pkthdr.ether_vtag = letoh32((rxcd->rxc_word2 >>
1025 			    VMXNET3_RXC_VLANTAG_S) & VMXNET3_RXC_VLANTAG_M);
1026 		}
1027 		if (((letoh32(rxcd->rxc_word0) >> VMXNET3_RXC_RSSTYPE_S) &
1028 		    VMXNET3_RXC_RSSTYPE_M) != VMXNET3_RXC_RSSTYPE_NONE) {
1029 			m->m_pkthdr.ph_flowid = letoh32(rxcd->rxc_word1);
1030 			SET(m->m_pkthdr.csum_flags, M_FLOWID);
1031 		}
1032 
1033 		ml_enqueue(&ml, m);
1034 
1035 skip_buffer:
1036 		if (rq->rs->update_rxhead) {
1037 			u_int qid = letoh32((rxcd->rxc_word0 >>
1038 			    VMXNET3_RXC_QID_S) & VMXNET3_RXC_QID_M);
1039 
1040 			idx = (idx + 1) % NRXDESC;
1041 			if (qid < sc->sc_nqueues) {
1042 				WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(qid), idx);
1043 			} else {
1044 				qid -= sc->sc_nqueues;
1045 				WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(qid), idx);
1046 			}
1047 		}
1048 	}
1049 
1050 	comp_ring->next = next;
1051 	comp_ring->gen = rgen;
1052 
1053 	if (done == 0)
1054 		return;
1055 
1056 	ring = &rq->cmd_ring[0];
1057 
1058 	if (ifiq_input(rq->ifiq, &ml))
1059 		if_rxr_livelocked(&ring->rxr);
1060 
1061 	/* XXX Should we (try to) allocate buffers for ring 2 too? */
1062 	mtx_enter(&ring->mtx);
1063 	if_rxr_put(&ring->rxr, done);
1064 	vmxnet3_rxfill(ring);
1065 	mtx_leave(&ring->mtx);
1066 }
1067 
1068 void
1069 vmxnet3_iff(struct vmxnet3_softc *sc)
1070 {
1071 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1072 	struct arpcom *ac = &sc->sc_arpcom;
1073 	struct vmxnet3_driver_shared *ds = sc->sc_ds;
1074 	struct ether_multi *enm;
1075 	struct ether_multistep step;
1076 	u_int mode;
1077 	u_int8_t *p;
1078 
1079 	ds->mcast_tablelen = 0;
1080 	CLR(ifp->if_flags, IFF_ALLMULTI);
1081 
1082 	/*
1083 	 * Always accept broadcast frames.
1084 	 * Always accept frames destined to our station address.
1085 	 */
1086 	mode = VMXNET3_RXMODE_BCAST | VMXNET3_RXMODE_UCAST;
1087 
1088 	if (ISSET(ifp->if_flags, IFF_PROMISC) || ac->ac_multirangecnt > 0 ||
1089 	    ac->ac_multicnt > 682) {
1090 		SET(ifp->if_flags, IFF_ALLMULTI);
1091 		SET(mode, (VMXNET3_RXMODE_ALLMULTI | VMXNET3_RXMODE_MCAST));
1092 		if (ifp->if_flags & IFF_PROMISC)
1093 			SET(mode, VMXNET3_RXMODE_PROMISC);
1094 	} else {
1095 		p = sc->sc_mcast;
1096 		ETHER_FIRST_MULTI(step, ac, enm);
1097 		while (enm != NULL) {
1098 			bcopy(enm->enm_addrlo, p, ETHER_ADDR_LEN);
1099 
1100 			p += ETHER_ADDR_LEN;
1101 
1102 			ETHER_NEXT_MULTI(step, enm);
1103 		}
1104 
1105 		if (ac->ac_multicnt > 0) {
1106 			SET(mode, VMXNET3_RXMODE_MCAST);
1107 			ds->mcast_tablelen = p - sc->sc_mcast;
1108 		}
1109 	}
1110 
1111 	WRITE_CMD(sc, VMXNET3_CMD_SET_FILTER);
1112 	ds->rxmode = mode;
1113 	WRITE_CMD(sc, VMXNET3_CMD_SET_RXMODE);
1114 }
1115 
1116 
1117 void
1118 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
1119 {
1120 	if (letoh32(rxcd->rxc_word0 & VMXNET3_RXC_NOCSUM))
1121 		return;
1122 
1123 	if ((rxcd->rxc_word3 & (VMXNET3_RXC_IPV4 | VMXNET3_RXC_IPSUM_OK)) ==
1124 	    (VMXNET3_RXC_IPV4 | VMXNET3_RXC_IPSUM_OK))
1125 		m->m_pkthdr.csum_flags |= M_IPV4_CSUM_IN_OK;
1126 
1127 	if (rxcd->rxc_word3 & VMXNET3_RXC_FRAGMENT)
1128 		return;
1129 
1130 	if (rxcd->rxc_word3 & (VMXNET3_RXC_TCP | VMXNET3_RXC_UDP)) {
1131 		if (rxcd->rxc_word3 & VMXNET3_RXC_CSUM_OK)
1132 			m->m_pkthdr.csum_flags |=
1133 			    M_TCP_CSUM_IN_OK | M_UDP_CSUM_IN_OK;
1134 	}
1135 }
1136 
1137 void
1138 vmxnet3_stop(struct ifnet *ifp)
1139 {
1140 	struct vmxnet3_softc *sc = ifp->if_softc;
1141 	int queue;
1142 
1143 	ifp->if_flags &= ~IFF_RUNNING;
1144 	ifq_clr_oactive(&ifp->if_snd);
1145 	ifp->if_timer = 0;
1146 
1147 	vmxnet3_disable_all_intrs(sc);
1148 
1149 	WRITE_CMD(sc, VMXNET3_CMD_DISABLE);
1150 
1151 	if (sc->sc_intrmap != NULL) {
1152 		for (queue = 0; queue < sc->sc_nqueues; queue++)
1153 			intr_barrier(sc->sc_q[queue].ih);
1154 	} else
1155 		intr_barrier(sc->sc_ih);
1156 
1157 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1158 		vmxnet3_txstop(sc, &sc->sc_q[queue].tx);
1159 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1160 		vmxnet3_rxstop(sc, &sc->sc_q[queue].rx);
1161 }
1162 
1163 void
1164 vmxnet3_reset(struct vmxnet3_softc *sc)
1165 {
1166 	WRITE_CMD(sc, VMXNET3_CMD_RESET);
1167 }
1168 
1169 int
1170 vmxnet3_init(struct vmxnet3_softc *sc)
1171 {
1172 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1173 	int queue;
1174 
1175 	/*
1176 	 * Cancel pending I/O and free all RX/TX buffers.
1177 	 */
1178 	vmxnet3_stop(ifp);
1179 
1180 #if 0
1181 	/* Put controller into known state. */
1182 	vmxnet3_reset(sc);
1183 #endif
1184 
1185 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1186 		vmxnet3_txinit(sc, &sc->sc_q[queue].tx);
1187 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1188 		vmxnet3_rxinit(sc, &sc->sc_q[queue].rx);
1189 
1190 	for (queue = 0; queue < sc->sc_nqueues; queue++) {
1191 		WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(queue), 0);
1192 		WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(queue), 0);
1193 	}
1194 
1195 	WRITE_CMD(sc, VMXNET3_CMD_ENABLE);
1196 	if (READ_BAR1(sc, VMXNET3_BAR1_CMD)) {
1197 		printf("%s: failed to initialize\n", ifp->if_xname);
1198 		vmxnet3_stop(ifp);
1199 		return EIO;
1200 	}
1201 
1202 	/* Program promiscuous mode and multicast filters. */
1203 	vmxnet3_iff(sc);
1204 
1205 	vmxnet3_enable_all_intrs(sc);
1206 
1207 	vmxnet3_link_state(sc);
1208 
1209 	ifp->if_flags |= IFF_RUNNING;
1210 	ifq_clr_oactive(&ifp->if_snd);
1211 
1212 	return 0;
1213 }
1214 
1215 static int
1216 vmx_rxr_info(struct vmxnet3_softc *sc, struct if_rxrinfo *ifri)
1217 {
1218 	struct if_rxring_info *ifrs, *ifr;
1219 	int error;
1220 	unsigned int i;
1221 
1222 	ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs),
1223 	    M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL);
1224 	if (ifrs == NULL)
1225 		return (ENOMEM);
1226 
1227 	for (i = 0; i < sc->sc_nqueues; i++) {
1228 		struct if_rxring *rxr = &sc->sc_q[i].rx.cmd_ring[0].rxr;
1229 		ifr = &ifrs[i];
1230 
1231 		ifr->ifr_size = JUMBO_LEN;
1232 		snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i);
1233 		ifr->ifr_info = *rxr;
1234 	}
1235 
1236 	error = if_rxr_info_ioctl(ifri, i, ifrs);
1237 
1238 	free(ifrs, M_TEMP, i * sizeof(*ifrs));
1239 
1240 	return (error);
1241 }
1242 
1243 int
1244 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1245 {
1246 	struct vmxnet3_softc *sc = ifp->if_softc;
1247 	struct ifreq *ifr = (struct ifreq *)data;
1248 	int error = 0, s;
1249 
1250 	s = splnet();
1251 
1252 	switch (cmd) {
1253 	case SIOCSIFADDR:
1254 		ifp->if_flags |= IFF_UP;
1255 		if ((ifp->if_flags & IFF_RUNNING) == 0)
1256 			error = vmxnet3_init(sc);
1257 		break;
1258 	case SIOCSIFFLAGS:
1259 		if (ifp->if_flags & IFF_UP) {
1260 			if (ifp->if_flags & IFF_RUNNING)
1261 				error = ENETRESET;
1262 			else
1263 				error = vmxnet3_init(sc);
1264 		} else {
1265 			if (ifp->if_flags & IFF_RUNNING)
1266 				vmxnet3_stop(ifp);
1267 		}
1268 		break;
1269 	case SIOCSIFMEDIA:
1270 	case SIOCGIFMEDIA:
1271 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1272 		break;
1273 	case SIOCGIFRXR:
1274 		error = vmx_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data);
1275 		break;
1276 	default:
1277 		error = ether_ioctl(ifp, &sc->sc_arpcom, cmd, data);
1278 	}
1279 
1280 	if (error == ENETRESET) {
1281 		if (ifp->if_flags & IFF_RUNNING)
1282 			vmxnet3_iff(sc);
1283 		error = 0;
1284 	}
1285 
1286 	splx(s);
1287 	return error;
1288 }
1289 
1290 static inline int
1291 vmx_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
1292 {
1293 	int error;
1294 
1295 	error = bus_dmamap_load_mbuf(dmat, map, m,
1296 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
1297 	if (error != EFBIG)
1298 		return (error);
1299 
1300 	error = m_defrag(m, M_DONTWAIT);
1301 	if (error != 0)
1302 		return (error);
1303 
1304 	return (bus_dmamap_load_mbuf(dmat, map, m,
1305 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT));
1306 }
1307 
1308 void
1309 vmxnet3_start(struct ifqueue *ifq)
1310 {
1311 	struct ifnet *ifp = ifq->ifq_if;
1312 	struct vmxnet3_softc *sc = ifp->if_softc;
1313 	struct vmxnet3_txqueue *tq = ifq->ifq_softc;
1314 	struct vmxnet3_txring *ring = &tq->cmd_ring;
1315 	struct vmxnet3_txdesc *txd, *sop;
1316 	bus_dmamap_t map;
1317         unsigned int prod, free, i;
1318 	unsigned int post = 0;
1319 	uint32_t rgen, gen;
1320 
1321 	struct mbuf *m;
1322 
1323 	free = ring->cons;
1324 	prod = ring->prod;
1325 	if (free <= prod)
1326 		free += NTXDESC;
1327 	free -= prod;
1328 
1329 	rgen = ring->gen;
1330 
1331 	for (;;) {
1332 		if (free <= NTXSEGS) {
1333 			ifq_set_oactive(ifq);
1334 			break;
1335 		}
1336 
1337 		m = ifq_dequeue(ifq);
1338 		if (m == NULL)
1339 			break;
1340 
1341 		map = ring->dmap[prod];
1342 
1343 		if (vmx_load_mbuf(sc->sc_dmat, map, m) != 0) {
1344 			ifq->ifq_errors++;
1345 			m_freem(m);
1346 			continue;
1347 		}
1348 
1349 #if NBPFILTER > 0
1350 		if (ifp->if_bpf)
1351 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1352 #endif
1353 
1354 		ring->m[prod] = m;
1355 
1356 		bus_dmamap_sync(sc->sc_dmat, map, 0,
1357 		    map->dm_mapsize, BUS_DMASYNC_PREWRITE);
1358 
1359 		gen = rgen ^ VMX_TX_GEN;
1360 		sop = &ring->txd[prod];
1361 		for (i = 0; i < map->dm_nsegs; i++) {
1362 			txd = &ring->txd[prod];
1363 			txd->tx_addr = htole64(map->dm_segs[i].ds_addr);
1364 			txd->tx_word2 = htole32(map->dm_segs[i].ds_len <<
1365 			    VMXNET3_TX_LEN_S) | gen;
1366 			txd->tx_word3 = 0;
1367 
1368 			if (++prod == NTXDESC) {
1369 				prod = 0;
1370 				rgen ^= VMX_TX_GEN;
1371 			}
1372 
1373 			gen = rgen;
1374 		}
1375 		txd->tx_word3 = htole32(VMXNET3_TX_EOP | VMXNET3_TX_COMPREQ);
1376 
1377 		if (ISSET(m->m_flags, M_VLANTAG)) {
1378 			sop->tx_word3 |= htole32(VMXNET3_TX_VTAG_MODE);
1379 			sop->tx_word3 |= htole32((m->m_pkthdr.ether_vtag &
1380 			    VMXNET3_TX_VLANTAG_M) << VMXNET3_TX_VLANTAG_S);
1381 		}
1382 
1383 		/* Change the ownership by flipping the "generation" bit */
1384 		membar_producer();
1385 		sop->tx_word2 ^= VMX_TX_GEN;
1386 
1387 		free -= i;
1388 		post = 1;
1389 	}
1390 
1391 	if (!post)
1392 		return;
1393 
1394 	ring->prod = prod;
1395 	ring->gen = rgen;
1396 
1397 	WRITE_BAR0(sc, VMXNET3_BAR0_TXH(0), prod);
1398 }
1399 
1400 void
1401 vmxnet3_watchdog(struct ifnet *ifp)
1402 {
1403 	struct vmxnet3_softc *sc = ifp->if_softc;
1404 	int s;
1405 
1406 	printf("%s: device timeout\n", ifp->if_xname);
1407 	s = splnet();
1408 	vmxnet3_init(sc);
1409 	splx(s);
1410 }
1411 
1412 void
1413 vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1414 {
1415 	struct vmxnet3_softc *sc = ifp->if_softc;
1416 
1417 	vmxnet3_link_state(sc);
1418 
1419 	ifmr->ifm_status = IFM_AVALID;
1420 	ifmr->ifm_active = IFM_ETHER;
1421 
1422 	if (ifp->if_link_state != LINK_STATE_UP)
1423 		return;
1424 
1425 	ifmr->ifm_status |= IFM_ACTIVE;
1426 
1427 	if (ifp->if_baudrate >= IF_Gbps(10))
1428 		ifmr->ifm_active |= IFM_10G_T;
1429 }
1430 
1431 int
1432 vmxnet3_media_change(struct ifnet *ifp)
1433 {
1434 	return 0;
1435 }
1436 
1437 void *
1438 vmxnet3_dma_allocmem(struct vmxnet3_softc *sc, u_int size, u_int align, bus_addr_t *pa)
1439 {
1440 	bus_dma_tag_t t = sc->sc_dmat;
1441 	bus_dma_segment_t segs[1];
1442 	bus_dmamap_t map;
1443 	caddr_t va;
1444 	int n;
1445 
1446 	if (bus_dmamem_alloc(t, size, align, 0, segs, 1, &n, BUS_DMA_NOWAIT))
1447 		return NULL;
1448 	if (bus_dmamem_map(t, segs, 1, size, &va, BUS_DMA_NOWAIT))
1449 		return NULL;
1450 	if (bus_dmamap_create(t, size, 1, size, 0, BUS_DMA_NOWAIT, &map))
1451 		return NULL;
1452 	if (bus_dmamap_load(t, map, va, size, NULL, BUS_DMA_NOWAIT))
1453 		return NULL;
1454 	bzero(va, size);
1455 	*pa = DMAADDR(map);
1456 	bus_dmamap_unload(t, map);
1457 	bus_dmamap_destroy(t, map);
1458 	return va;
1459 }
1460 
1461 #if NKSTAT > 0
1462 /*
1463  * "hardware" counters are exported as separate kstats for each tx
1464  * and rx ring, but the request for the hypervisor to update the
1465  * stats is done once at the controller level. we limit the number
1466  * of updates at the controller level to a rate of one per second to
1467  * debounce this a bit.
1468  */
1469 static const struct timeval vmx_kstat_rate = { 1, 0 };
1470 
1471 /*
1472  * all the vmx stats are 64 bit counters, we just need their name and units.
1473  */
1474 struct vmx_kstat_tpl {
1475 	const char		*name;
1476 	enum kstat_kv_unit	 unit;
1477 };
1478 
1479 static const struct vmx_kstat_tpl vmx_rx_kstat_tpl[UPT1_RxStats_count] = {
1480 	{ "LRO packets",	KSTAT_KV_U_PACKETS },
1481 	{ "LRO bytes",		KSTAT_KV_U_BYTES },
1482 	{ "ucast packets",	KSTAT_KV_U_PACKETS },
1483 	{ "ucast bytes",	KSTAT_KV_U_BYTES },
1484 	{ "mcast packets",	KSTAT_KV_U_PACKETS },
1485 	{ "mcast bytes",	KSTAT_KV_U_BYTES },
1486 	{ "bcast packets",	KSTAT_KV_U_PACKETS },
1487 	{ "bcast bytes",	KSTAT_KV_U_BYTES },
1488 	{ "no buffers",		KSTAT_KV_U_PACKETS },
1489 	{ "errors",		KSTAT_KV_U_PACKETS },
1490 };
1491 
1492 static const struct vmx_kstat_tpl vmx_tx_kstat_tpl[UPT1_TxStats_count] = {
1493 	{ "TSO packets",	KSTAT_KV_U_PACKETS },
1494 	{ "TSO bytes",		KSTAT_KV_U_BYTES },
1495 	{ "ucast packets",	KSTAT_KV_U_PACKETS },
1496 	{ "ucast bytes",	KSTAT_KV_U_BYTES },
1497 	{ "mcast packets",	KSTAT_KV_U_PACKETS },
1498 	{ "mcast bytes",	KSTAT_KV_U_BYTES },
1499 	{ "bcast packets",	KSTAT_KV_U_PACKETS },
1500 	{ "bcast bytes",	KSTAT_KV_U_BYTES },
1501 	{ "errors",		KSTAT_KV_U_PACKETS },
1502 	{ "discards",		KSTAT_KV_U_PACKETS },
1503 };
1504 
1505 static void
1506 vmx_kstat_init(struct vmxnet3_softc *sc)
1507 {
1508 	rw_init(&sc->sc_kstat_lock, "vmxkstat");
1509 }
1510 
1511 static int
1512 vmx_kstat_read(struct kstat *ks)
1513 {
1514 	struct vmxnet3_softc *sc = ks->ks_softc;
1515 	struct kstat_kv *kvs = ks->ks_data;
1516 	uint64_t *vs = ks->ks_ptr;
1517 	unsigned int n, i;
1518 
1519 	if (ratecheck(&sc->sc_kstat_updated, &vmx_kstat_rate)) {
1520 		WRITE_CMD(sc, VMXNET3_CMD_GET_STATS);
1521 		/* barrier? */
1522 	}
1523 
1524 	n = ks->ks_datalen / sizeof(*kvs);
1525 	for (i = 0; i < n; i++)
1526 		kstat_kv_u64(&kvs[i]) = lemtoh64(&vs[i]);
1527 
1528  	TIMEVAL_TO_TIMESPEC(&sc->sc_kstat_updated, &ks->ks_updated);
1529 
1530 	return (0);
1531 }
1532 
1533 static struct kstat *
1534 vmx_kstat_create(struct vmxnet3_softc *sc, const char *name, unsigned int unit,
1535     const struct vmx_kstat_tpl *tpls, unsigned int n, uint64_t *vs)
1536 {
1537 	struct kstat *ks;
1538 	struct kstat_kv *kvs;
1539 	unsigned int i;
1540 
1541 	ks = kstat_create(sc->sc_dev.dv_xname, 0, name, unit,
1542 	    KSTAT_T_KV, 0);
1543 	if (ks == NULL)
1544 		return (NULL);
1545 
1546 	kvs = mallocarray(n, sizeof(*kvs), M_DEVBUF, M_WAITOK|M_ZERO);
1547 	for (i = 0; i < n; i++) {
1548 		const struct vmx_kstat_tpl *tpl = &tpls[i];
1549 
1550 		kstat_kv_unit_init(&kvs[i], tpl->name,
1551 		    KSTAT_KV_T_COUNTER64, tpl->unit);
1552 	}
1553 
1554 	ks->ks_softc = sc;
1555 	kstat_set_wlock(ks, &sc->sc_kstat_lock);
1556 	ks->ks_ptr = vs;
1557 	ks->ks_data = kvs;
1558 	ks->ks_datalen = n * sizeof(*kvs);
1559 	ks->ks_read = vmx_kstat_read;
1560 	TIMEVAL_TO_TIMESPEC(&vmx_kstat_rate, &ks->ks_interval);
1561 
1562 	kstat_install(ks);
1563 
1564 	return (ks);
1565 }
1566 
1567 static void
1568 vmx_kstat_txstats(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq,
1569     int unit)
1570 {
1571 	tq->txkstat = vmx_kstat_create(sc, "vmx-txstats", unit,
1572 	    vmx_tx_kstat_tpl, nitems(vmx_tx_kstat_tpl), tq->ts->stats);
1573 }
1574 
1575 static void
1576 vmx_kstat_rxstats(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq,
1577     int unit)
1578 {
1579 	rq->rxkstat = vmx_kstat_create(sc, "vmx-rxstats", unit,
1580 	    vmx_rx_kstat_tpl, nitems(vmx_rx_kstat_tpl), rq->rs->stats);
1581 }
1582 #endif /* NKSTAT > 0 */
1583