xref: /openbsd/sys/dev/pci/if_vmx.c (revision b90fb1cb)
1 /*	$OpenBSD: if_vmx.c,v 1.90 2025/01/24 10:29:43 yasuoka Exp $	*/
2 
3 /*
4  * Copyright (c) 2013 Tsubai Masanari
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include "bpfilter.h"
20 #include "kstat.h"
21 #include "vlan.h"
22 
23 #include <sys/param.h>
24 #include <sys/device.h>
25 #include <sys/mbuf.h>
26 #include <sys/socket.h>
27 #include <sys/sockio.h>
28 #include <sys/systm.h>
29 #include <sys/atomic.h>
30 #include <sys/intrmap.h>
31 #include <sys/kstat.h>
32 
33 #include <net/bpf.h>
34 #include <net/if.h>
35 #include <net/toeplitz.h>
36 #include <net/if_media.h>
37 #include <net/route.h>
38 
39 #include <netinet/in.h>
40 #include <netinet/if_ether.h>
41 #include <netinet/ip.h>
42 #include <netinet/tcp.h>
43 #include <netinet/tcp_timer.h>
44 #include <netinet/tcp_var.h>
45 #include <netinet/udp.h>
46 
47 #include <machine/bus.h>
48 
49 #include <dev/pci/if_vmxreg.h>
50 #include <dev/pci/pcivar.h>
51 #include <dev/pci/pcidevs.h>
52 
53 #define VMX_MAX_QUEUES	MIN(VMXNET3_MAX_TX_QUEUES, VMXNET3_MAX_RX_QUEUES)
54 
55 #define NTXDESC 512 /* tx ring size */
56 #define NTXSEGS 8 /* tx descriptors per packet */
57 #define NRXDESC 512
58 #define NTXCOMPDESC NTXDESC
59 #define NRXCOMPDESC (NRXDESC * 2)	/* ring1 + ring2 */
60 
61 #define VMXNET3_DRIVER_VERSION 0x00010000
62 
63 #define VMX_TX_GEN	htole32(VMXNET3_TX_GEN_M << VMXNET3_TX_GEN_S)
64 #define VMX_TXC_GEN	htole32(VMXNET3_TXC_GEN_M << VMXNET3_TXC_GEN_S)
65 #define VMX_RX_GEN	htole32(VMXNET3_RX_GEN_M << VMXNET3_RX_GEN_S)
66 #define VMX_RXC_GEN	htole32(VMXNET3_RXC_GEN_M << VMXNET3_RXC_GEN_S)
67 
68 struct vmx_dmamem {
69 	bus_dmamap_t		vdm_map;
70 	bus_dma_segment_t	vdm_seg;
71 	int			vdm_nsegs;
72 	size_t			vdm_size;
73 	caddr_t			vdm_kva;
74 };
75 
76 #define VMX_DMA_MAP(_vdm)	((_vdm)->vdm_map)
77 #define VMX_DMA_DVA(_vdm)	((_vdm)->vdm_map->dm_segs[0].ds_addr)
78 #define VMX_DMA_KVA(_vdm)	((void *)(_vdm)->vdm_kva)
79 #define VMX_DMA_LEN(_vdm)	((_vdm)->vdm_size)
80 
81 struct vmxnet3_softc;
82 
83 struct vmxnet3_txring {
84 	struct vmx_dmamem dmamem;
85 	struct mbuf *m[NTXDESC];
86 	bus_dmamap_t dmap[NTXDESC];
87 	struct vmxnet3_txdesc *txd;
88 	u_int32_t gen;
89 	volatile u_int prod;
90 	volatile u_int cons;
91 };
92 
93 struct vmxnet3_rxring {
94 	struct vmxnet3_softc *sc;
95 	struct vmxnet3_rxq_shared *rs; /* copy of the rxqueue rs */
96 	struct vmx_dmamem dmamem;
97 	struct mbuf *m[NRXDESC];
98 	bus_dmamap_t dmap[NRXDESC];
99 	struct mutex mtx;
100 	struct if_rxring rxr;
101 	struct timeout refill;
102 	struct vmxnet3_rxdesc *rxd;
103 	bus_size_t rxh;
104 	u_int fill;
105 	u_int32_t gen;
106 	u_int8_t rid;
107 };
108 
109 struct vmxnet3_comp_ring {
110 	struct vmx_dmamem dmamem;
111 	union {
112 		struct vmxnet3_txcompdesc *txcd;
113 		struct vmxnet3_rxcompdesc *rxcd;
114 	};
115 	u_int next;
116 	u_int32_t gen;
117 	struct mbuf *sendmp;
118 	struct mbuf *lastmp;
119 };
120 
121 struct vmxnet3_txqueue {
122 	struct vmxnet3_softc *sc; /* sigh */
123 	struct vmxnet3_txring cmd_ring;
124 	struct vmxnet3_comp_ring comp_ring;
125 	struct vmxnet3_txq_shared *ts;
126 	struct ifqueue *ifq;
127 	struct kstat *txkstat;
128 	unsigned int queue;
129 } __aligned(64);
130 
131 struct vmxnet3_rxqueue {
132 	struct vmxnet3_softc *sc; /* sigh */
133 	struct vmxnet3_rxring cmd_ring[2];
134 	struct vmxnet3_comp_ring comp_ring;
135 	struct vmxnet3_rxq_shared *rs;
136 	struct ifiqueue *ifiq;
137 	struct kstat *rxkstat;
138 } __aligned(64);
139 
140 struct vmxnet3_queue {
141 	struct vmxnet3_txqueue tx;
142 	struct vmxnet3_rxqueue rx;
143 	struct vmxnet3_softc *sc;
144 	char intrname[16];
145 	void *ih;
146 	int intr;
147 };
148 
149 struct vmxnet3_softc {
150 	struct device sc_dev;
151 	struct arpcom sc_arpcom;
152 	struct ifmedia sc_media;
153 
154 	bus_space_tag_t	sc_iot0;
155 	bus_space_tag_t	sc_iot1;
156 	bus_space_handle_t sc_ioh0;
157 	bus_space_handle_t sc_ioh1;
158 	bus_dma_tag_t sc_dmat;
159 	void *sc_ih;
160 
161 	int sc_nqueues;
162 	struct vmxnet3_queue *sc_q;
163 	struct intrmap *sc_intrmap;
164 
165 	u_int sc_vrrs;
166 	struct vmxnet3_driver_shared *sc_ds;
167 	u_int8_t *sc_mcast;
168 	struct vmxnet3_upt1_rss_conf *sc_rss;
169 
170 #if NKSTAT > 0
171 	struct rwlock		sc_kstat_lock;
172 	struct timeval		sc_kstat_updated;
173 #endif
174 };
175 
176 #define JUMBO_LEN ((16 * 1024) - 1)
177 #define DMAADDR(map) ((map)->dm_segs[0].ds_addr)
178 
179 #define READ_BAR0(sc, reg) bus_space_read_4((sc)->sc_iot0, (sc)->sc_ioh0, reg)
180 #define READ_BAR1(sc, reg) bus_space_read_4((sc)->sc_iot1, (sc)->sc_ioh1, reg)
181 #define WRITE_BAR0(sc, reg, val) \
182 	bus_space_write_4((sc)->sc_iot0, (sc)->sc_ioh0, reg, val)
183 #define WRITE_BAR1(sc, reg, val) \
184 	bus_space_write_4((sc)->sc_iot1, (sc)->sc_ioh1, reg, val)
185 #define WRITE_CMD(sc, cmd) WRITE_BAR1(sc, VMXNET3_BAR1_CMD, cmd)
186 
187 int vmxnet3_match(struct device *, void *, void *);
188 void vmxnet3_attach(struct device *, struct device *, void *);
189 int vmxnet3_dma_init(struct vmxnet3_softc *);
190 int vmxnet3_alloc_txring(struct vmxnet3_softc *, int, int);
191 int vmxnet3_alloc_rxring(struct vmxnet3_softc *, int, int);
192 void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
193 void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
194 void vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
195 void vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
196 void vmxnet3_link_state(struct vmxnet3_softc *);
197 void vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
198 void vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
199 int vmxnet3_intr(void *);
200 int vmxnet3_intr_intx(void *);
201 int vmxnet3_intr_event(void *);
202 int vmxnet3_intr_queue(void *);
203 void vmxnet3_evintr(struct vmxnet3_softc *);
204 void vmxnet3_txintr(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
205 void vmxnet3_rxintr(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
206 void vmxnet3_rxfill_tick(void *);
207 void vmxnet3_rxfill(struct vmxnet3_rxring *);
208 void vmxnet3_iff(struct vmxnet3_softc *);
209 void vmxnet3_rx_offload(struct vmxnet3_rxcompdesc *, struct mbuf *);
210 void vmxnet3_stop(struct ifnet *);
211 void vmxnet3_reset(struct vmxnet3_softc *);
212 int vmxnet3_init(struct vmxnet3_softc *);
213 int vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
214 void vmxnet3_start(struct ifqueue *);
215 void vmxnet3_watchdog(struct ifnet *);
216 void vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
217 int vmxnet3_media_change(struct ifnet *);
218 void *vmxnet3_dma_allocmem(struct vmxnet3_softc *, u_int, u_int, bus_addr_t *);
219 
220 static int	vmx_dmamem_alloc(struct vmxnet3_softc *, struct vmx_dmamem *,
221 		    bus_size_t, u_int);
222 #ifdef notyet
223 static void	vmx_dmamem_free(struct vmxnet3_softc *, struct vmx_dmamem *);
224 #endif
225 
226 #if NKSTAT > 0
227 static void	vmx_kstat_init(struct vmxnet3_softc *);
228 static void	vmx_kstat_txstats(struct vmxnet3_softc *,
229 		    struct vmxnet3_txqueue *, int);
230 static void	vmx_kstat_rxstats(struct vmxnet3_softc *,
231 		    struct vmxnet3_rxqueue *, int);
232 #endif /* NKSTAT > 0 */
233 
234 const struct pci_matchid vmx_devices[] = {
235 	{ PCI_VENDOR_VMWARE, PCI_PRODUCT_VMWARE_NET_3 }
236 };
237 
238 const struct cfattach vmx_ca = {
239 	sizeof(struct vmxnet3_softc), vmxnet3_match, vmxnet3_attach
240 };
241 
242 struct cfdriver vmx_cd = {
243 	NULL, "vmx", DV_IFNET
244 };
245 
246 int
vmxnet3_match(struct device * parent,void * match,void * aux)247 vmxnet3_match(struct device *parent, void *match, void *aux)
248 {
249 	return (pci_matchbyid(aux, vmx_devices, nitems(vmx_devices)));
250 }
251 
252 void
vmxnet3_attach(struct device * parent,struct device * self,void * aux)253 vmxnet3_attach(struct device *parent, struct device *self, void *aux)
254 {
255 	struct vmxnet3_softc *sc = (void *)self;
256 	struct pci_attach_args *pa = aux;
257 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
258 	pci_intr_handle_t ih;
259 	const char *intrstr;
260 	u_int memtype, ver, macl, mach, intrcfg;
261 	u_char enaddr[ETHER_ADDR_LEN];
262 	int (*isr)(void *);
263 	int msix = 0;
264 	int i;
265 
266 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, 0x10);
267 	if (pci_mapreg_map(pa, 0x10, memtype, 0, &sc->sc_iot0, &sc->sc_ioh0,
268 	    NULL, NULL, 0)) {
269 		printf(": failed to map BAR0\n");
270 		return;
271 	}
272 	memtype = pci_mapreg_type(pa->pa_pc, pa->pa_tag, 0x14);
273 	if (pci_mapreg_map(pa, 0x14, memtype, 0, &sc->sc_iot1, &sc->sc_ioh1,
274 	    NULL, NULL, 0)) {
275 		printf(": failed to map BAR1\n");
276 		return;
277 	}
278 
279 	/* Vmxnet3 Revision Report and Selection */
280 	ver = READ_BAR1(sc, VMXNET3_BAR1_VRRS);
281 	if (ISSET(ver, 0x2)) {
282 		sc->sc_vrrs = 2;
283 	} else if (ISSET(ver, 0x1)) {
284 		sc->sc_vrrs = 1;
285 	} else {
286 		printf(": unsupported hardware version 0x%x\n", ver);
287 		return;
288 	}
289 	WRITE_BAR1(sc, VMXNET3_BAR1_VRRS, sc->sc_vrrs);
290 
291 	/* UPT Version Report and Selection */
292 	ver = READ_BAR1(sc, VMXNET3_BAR1_UVRS);
293 	if (!ISSET(ver, 0x1)) {
294 		printf(": incompatible UPT version 0x%x\n", ver);
295 		return;
296 	}
297 	WRITE_BAR1(sc, VMXNET3_BAR1_UVRS, 1);
298 
299 	sc->sc_dmat = pa->pa_dmat;
300 
301 	WRITE_CMD(sc, VMXNET3_CMD_GET_INTRCFG);
302 	intrcfg = READ_BAR1(sc, VMXNET3_BAR1_CMD);
303 	isr = vmxnet3_intr;
304 	sc->sc_nqueues = 1;
305 
306 	switch (intrcfg & VMXNET3_INTRCFG_TYPE_MASK) {
307 	case VMXNET3_INTRCFG_TYPE_AUTO:
308 	case VMXNET3_INTRCFG_TYPE_MSIX:
309 		msix = pci_intr_msix_count(pa);
310 		if (msix > 0) {
311 			if (pci_intr_map_msix(pa, 0, &ih) == 0) {
312 				msix--; /* are there spares for tx/rx qs? */
313 				if (msix == 0)
314 					break;
315 
316 				isr = vmxnet3_intr_event;
317 				sc->sc_intrmap = intrmap_create(&sc->sc_dev,
318 				    msix, VMX_MAX_QUEUES, INTRMAP_POWEROF2);
319 				sc->sc_nqueues = intrmap_count(sc->sc_intrmap);
320 			}
321 			break;
322 		}
323 
324 		/* FALLTHROUGH */
325 	case VMXNET3_INTRCFG_TYPE_MSI:
326 		if (pci_intr_map_msi(pa, &ih) == 0)
327 			break;
328 
329 		/* FALLTHROUGH */
330 	case VMXNET3_INTRCFG_TYPE_INTX:
331 		isr = vmxnet3_intr_intx;
332 		if (pci_intr_map(pa, &ih) == 0)
333 			break;
334 
335 		printf(": failed to map interrupt\n");
336 		return;
337 	}
338 	intrstr = pci_intr_string(pa->pa_pc, ih);
339 	sc->sc_ih = pci_intr_establish(pa->pa_pc, ih, IPL_NET | IPL_MPSAFE,
340 	    isr, sc, self->dv_xname);
341 	if (sc->sc_ih == NULL) {
342 		printf(": unable to establish interrupt handler");
343 		if (intrstr != NULL)
344 			printf(" at %s", intrstr);
345 		printf("\n");
346 		return;
347 	}
348 	if (intrstr)
349 		printf(": %s", intrstr);
350 
351 	sc->sc_q = mallocarray(sc->sc_nqueues, sizeof(*sc->sc_q),
352 	    M_DEVBUF, M_WAITOK|M_ZERO);
353 
354 	if (sc->sc_intrmap != NULL) {
355 		for (i = 0; i < sc->sc_nqueues; i++) {
356 			struct vmxnet3_queue *q;
357 			int vec;
358 
359 			q = &sc->sc_q[i];
360 			vec = i + 1;
361 			if (pci_intr_map_msix(pa, vec, &ih) != 0) {
362 				printf(", failed to map interrupt %d\n", vec);
363 				return;
364 			}
365 			snprintf(q->intrname, sizeof(q->intrname), "%s:%d",
366 			    self->dv_xname, i);
367 			q->ih = pci_intr_establish_cpu(pa->pa_pc, ih,
368 			    IPL_NET | IPL_MPSAFE,
369 			    intrmap_cpu(sc->sc_intrmap, i),
370 			    vmxnet3_intr_queue, q, q->intrname);
371 			if (q->ih == NULL) {
372 				printf(": unable to establish interrupt %d\n",
373 				    vec);
374 				return;
375 			}
376 
377 			q->intr = vec;
378 			q->sc = sc;
379 		}
380 	}
381 
382 	if (vmxnet3_dma_init(sc)) {
383 		printf(": failed to setup DMA\n");
384 		return;
385 	}
386 
387 	printf(", %d queue%s", sc->sc_nqueues, sc->sc_nqueues > 1 ? "s" : "");
388 
389 	WRITE_CMD(sc, VMXNET3_CMD_GET_MACL);
390 	macl = READ_BAR1(sc, VMXNET3_BAR1_CMD);
391 	enaddr[0] = macl;
392 	enaddr[1] = macl >> 8;
393 	enaddr[2] = macl >> 16;
394 	enaddr[3] = macl >> 24;
395 	WRITE_CMD(sc, VMXNET3_CMD_GET_MACH);
396 	mach = READ_BAR1(sc, VMXNET3_BAR1_CMD);
397 	enaddr[4] = mach;
398 	enaddr[5] = mach >> 8;
399 
400 	WRITE_BAR1(sc, VMXNET3_BAR1_MACL, macl);
401 	WRITE_BAR1(sc, VMXNET3_BAR1_MACH, mach);
402 	printf(", address %s\n", ether_sprintf(enaddr));
403 
404 	bcopy(enaddr, sc->sc_arpcom.ac_enaddr, 6);
405 	strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
406 	ifp->if_softc = sc;
407 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
408 	ifp->if_xflags = IFXF_MPSAFE;
409 	ifp->if_ioctl = vmxnet3_ioctl;
410 	ifp->if_qstart = vmxnet3_start;
411 	ifp->if_watchdog = vmxnet3_watchdog;
412 	ifp->if_hardmtu = VMXNET3_MAX_MTU;
413 	ifp->if_capabilities = IFCAP_VLAN_MTU;
414 
415 	if (sc->sc_ds->upt_features & UPT1_F_CSUM) {
416 		ifp->if_capabilities |= IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4;
417 		ifp->if_capabilities |= IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
418 	}
419 
420 	ifp->if_capabilities |= IFCAP_TSOv4 | IFCAP_TSOv6;
421 
422 	if (sc->sc_vrrs == 2) {
423 		ifp->if_xflags |= IFXF_LRO;
424 		ifp->if_capabilities |= IFCAP_LRO;
425 	}
426 
427 #if NVLAN > 0
428 	if (sc->sc_ds->upt_features & UPT1_F_VLAN)
429 		ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING;
430 #endif
431 
432 	ifq_init_maxlen(&ifp->if_snd, NTXDESC);
433 
434 	ifmedia_init(&sc->sc_media, IFM_IMASK, vmxnet3_media_change,
435 	    vmxnet3_media_status);
436 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_AUTO, 0, NULL);
437 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_10G_T|IFM_FDX, 0, NULL);
438 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_10G_T, 0, NULL);
439 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_1000_T|IFM_FDX, 0, NULL);
440 	ifmedia_add(&sc->sc_media, IFM_ETHER|IFM_1000_T, 0, NULL);
441 	ifmedia_set(&sc->sc_media, IFM_ETHER|IFM_AUTO);
442 
443 	if_attach(ifp);
444 	ether_ifattach(ifp);
445 	vmxnet3_link_state(sc);
446 
447 	if_attach_queues(ifp, sc->sc_nqueues);
448 	if_attach_iqueues(ifp, sc->sc_nqueues);
449 
450 #if NKSTAT > 0
451 	vmx_kstat_init(sc);
452 #endif
453 
454 	for (i = 0; i < sc->sc_nqueues; i++) {
455 		ifp->if_ifqs[i]->ifq_softc = &sc->sc_q[i].tx;
456 		sc->sc_q[i].tx.ifq = ifp->if_ifqs[i];
457 		sc->sc_q[i].rx.ifiq = ifp->if_iqs[i];
458 
459 #if NKSTAT > 0
460 		vmx_kstat_txstats(sc, &sc->sc_q[i].tx, i);
461 		vmx_kstat_rxstats(sc, &sc->sc_q[i].rx, i);
462 #endif
463 	}
464 }
465 
466 int
vmxnet3_dma_init(struct vmxnet3_softc * sc)467 vmxnet3_dma_init(struct vmxnet3_softc *sc)
468 {
469 	struct vmxnet3_driver_shared *ds;
470 	struct vmxnet3_txq_shared *ts;
471 	struct vmxnet3_rxq_shared *rs;
472 	bus_addr_t ds_pa, qs_pa, mcast_pa;
473 	int i, queue, qs_len, intr;
474 	u_int major, minor, release_code, rev;
475 
476 	qs_len = sc->sc_nqueues * (sizeof *ts + sizeof *rs);
477 	ts = vmxnet3_dma_allocmem(sc, qs_len, VMXNET3_DMADESC_ALIGN, &qs_pa);
478 	if (ts == NULL)
479 		return -1;
480 	for (queue = 0; queue < sc->sc_nqueues; queue++)
481 		sc->sc_q[queue].tx.ts = ts++;
482 	rs = (void *)ts;
483 	for (queue = 0; queue < sc->sc_nqueues; queue++)
484 		sc->sc_q[queue].rx.rs = rs++;
485 
486 	for (queue = 0; queue < sc->sc_nqueues; queue++) {
487 		intr = sc->sc_q[queue].intr;
488 
489 		if (vmxnet3_alloc_txring(sc, queue, intr))
490 			return -1;
491 		if (vmxnet3_alloc_rxring(sc, queue, intr))
492 			return -1;
493 	}
494 
495 	sc->sc_mcast = vmxnet3_dma_allocmem(sc, 682 * ETHER_ADDR_LEN, 32, &mcast_pa);
496 	if (sc->sc_mcast == NULL)
497 		return -1;
498 
499 	ds = vmxnet3_dma_allocmem(sc, sizeof *sc->sc_ds, 8, &ds_pa);
500 	if (ds == NULL)
501 		return -1;
502 	sc->sc_ds = ds;
503 	ds->magic = VMXNET3_REV1_MAGIC;
504 	ds->version = VMXNET3_DRIVER_VERSION;
505 
506 	/*
507 	 * XXX FreeBSD version uses following values:
508 	 * (Does the device behavior depend on them?)
509 	 *
510 	 * major = __FreeBSD_version / 100000;
511 	 * minor = (__FreeBSD_version / 1000) % 100;
512 	 * release_code = (__FreeBSD_version / 100) % 10;
513 	 * rev = __FreeBSD_version % 100;
514 	 */
515 	major = 0;
516 	minor = 0;
517 	release_code = 0;
518 	rev = 0;
519 #ifdef __LP64__
520 	ds->guest = release_code << 30 | rev << 22 | major << 14 | minor << 6
521 	    | VMXNET3_GOS_FREEBSD | VMXNET3_GOS_64BIT;
522 #else
523 	ds->guest = release_code << 30 | rev << 22 | major << 14 | minor << 6
524 	    | VMXNET3_GOS_FREEBSD | VMXNET3_GOS_32BIT;
525 #endif
526 	ds->vmxnet3_revision = 1;
527 	ds->upt_version = 1;
528 	ds->upt_features = UPT1_F_CSUM;
529 #if NVLAN > 0
530 	ds->upt_features |= UPT1_F_VLAN;
531 #endif
532 	ds->driver_data = ~0ULL;
533 	ds->driver_data_len = 0;
534 	ds->queue_shared = qs_pa;
535 	ds->queue_shared_len = qs_len;
536 	ds->mtu = VMXNET3_MAX_MTU;
537 	ds->ntxqueue = sc->sc_nqueues;
538 	ds->nrxqueue = sc->sc_nqueues;
539 	ds->mcast_table = mcast_pa;
540 	ds->automask = 1;
541 	ds->nintr = 1 + (sc->sc_intrmap != NULL ? sc->sc_nqueues : 0);
542 	ds->evintr = 0;
543 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
544 	for (i = 0; i < ds->nintr; i++)
545 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
546 
547 	if (sc->sc_nqueues > 1) {
548 		struct vmxnet3_upt1_rss_conf *rsscfg;
549 		bus_addr_t rss_pa;
550 
551 		rsscfg = vmxnet3_dma_allocmem(sc, sizeof(*rsscfg), 8, &rss_pa);
552 
553 		rsscfg->hash_type = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
554 		    UPT1_RSS_HASH_TYPE_IPV4 |
555 		    UPT1_RSS_HASH_TYPE_TCP_IPV6 |
556 		    UPT1_RSS_HASH_TYPE_IPV6;
557 		rsscfg->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
558 		rsscfg->hash_key_size = sizeof(rsscfg->hash_key);
559 		stoeplitz_to_key(rsscfg->hash_key, sizeof(rsscfg->hash_key));
560 
561 		rsscfg->ind_table_size = sizeof(rsscfg->ind_table);
562 		for (i = 0; i < sizeof(rsscfg->ind_table); i++)
563 			rsscfg->ind_table[i] = i % sc->sc_nqueues;
564 
565 		ds->upt_features |= UPT1_F_RSS;
566 		ds->rss.version = 1;
567 		ds->rss.len = sizeof(*rsscfg);
568 		ds->rss.paddr = rss_pa;
569 
570 		sc->sc_rss = rsscfg;
571 	}
572 
573 	WRITE_BAR1(sc, VMXNET3_BAR1_DSL, ds_pa);
574 	WRITE_BAR1(sc, VMXNET3_BAR1_DSH, (u_int64_t)ds_pa >> 32);
575 	return 0;
576 }
577 
578 int
vmxnet3_alloc_txring(struct vmxnet3_softc * sc,int queue,int intr)579 vmxnet3_alloc_txring(struct vmxnet3_softc *sc, int queue, int intr)
580 {
581 	struct vmxnet3_txqueue *tq = &sc->sc_q[queue].tx;
582 	struct vmxnet3_txq_shared *ts;
583 	struct vmxnet3_txring *ring = &tq->cmd_ring;
584 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
585 	int idx;
586 
587 	tq->queue = queue;
588 
589 	if (vmx_dmamem_alloc(sc, &ring->dmamem,
590 	    NTXDESC * sizeof(struct vmxnet3_txdesc), 512) != 0)
591 		return -1;
592 	ring->txd = VMX_DMA_KVA(&ring->dmamem);
593 	if (vmx_dmamem_alloc(sc, &comp_ring->dmamem,
594 	    NTXCOMPDESC * sizeof(comp_ring->txcd[0]), 512) != 0)
595 		return -1;
596 	comp_ring->txcd = VMX_DMA_KVA(&comp_ring->dmamem);
597 
598 	for (idx = 0; idx < NTXDESC; idx++) {
599 		if (bus_dmamap_create(sc->sc_dmat, MAXMCLBYTES, NTXSEGS,
600 		    VMXNET3_TX_LEN_M, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
601 			return -1;
602 	}
603 
604 	ts = tq->ts;
605 	bzero(ts, sizeof *ts);
606 	ts->npending = 0;
607 	ts->intr_threshold = 1;
608 	ts->cmd_ring = VMX_DMA_DVA(&ring->dmamem);
609 	ts->cmd_ring_len = NTXDESC;
610 	ts->comp_ring = VMX_DMA_DVA(&comp_ring->dmamem);
611 	ts->comp_ring_len = NTXCOMPDESC;
612 	ts->driver_data = ~0ULL;
613 	ts->driver_data_len = 0;
614 	ts->intr_idx = intr;
615 	ts->stopped = 1;
616 	ts->error = 0;
617 	return 0;
618 }
619 
620 int
vmxnet3_alloc_rxring(struct vmxnet3_softc * sc,int queue,int intr)621 vmxnet3_alloc_rxring(struct vmxnet3_softc *sc, int queue, int intr)
622 {
623 	struct vmxnet3_rxqueue *rq = &sc->sc_q[queue].rx;
624 	struct vmxnet3_rxq_shared *rs;
625 	struct vmxnet3_rxring *ring;
626 	struct vmxnet3_comp_ring *comp_ring;
627 	int i, idx;
628 
629 	for (i = 0; i < 2; i++) {
630 		ring = &rq->cmd_ring[i];
631 		if (vmx_dmamem_alloc(sc, &ring->dmamem,
632 		    NRXDESC * sizeof(struct vmxnet3_rxdesc), 512) != 0)
633 			return -1;
634 		ring->rxd = VMX_DMA_KVA(&ring->dmamem);
635 	}
636 	comp_ring = &rq->comp_ring;
637 	if (vmx_dmamem_alloc(sc, &comp_ring->dmamem,
638 	    NRXCOMPDESC * sizeof(comp_ring->rxcd[0]), 512) != 0)
639 		return -1;
640 	comp_ring->rxcd = VMX_DMA_KVA(&comp_ring->dmamem);
641 
642 	for (i = 0; i < 2; i++) {
643 		ring = &rq->cmd_ring[i];
644 		ring->sc = sc;
645 		ring->rid = i;
646 		mtx_init(&ring->mtx, IPL_NET);
647 		timeout_set(&ring->refill, vmxnet3_rxfill_tick, ring);
648 		for (idx = 0; idx < NRXDESC; idx++) {
649 			if (bus_dmamap_create(sc->sc_dmat, JUMBO_LEN, 1,
650 			    JUMBO_LEN, 0, BUS_DMA_NOWAIT, &ring->dmap[idx]))
651 				return -1;
652 		}
653 
654 		ring->rs = rq->rs;
655 		ring->rxh = (i == 0) ?
656 		    VMXNET3_BAR0_RXH1(queue) : VMXNET3_BAR0_RXH2(queue);
657 	}
658 
659 	rs = rq->rs;
660 	bzero(rs, sizeof *rs);
661 	rs->cmd_ring[0] = VMX_DMA_DVA(&rq->cmd_ring[0].dmamem);
662 	rs->cmd_ring[1] = VMX_DMA_DVA(&rq->cmd_ring[1].dmamem);
663 	rs->cmd_ring_len[0] = NRXDESC;
664 	rs->cmd_ring_len[1] = NRXDESC;
665 	rs->comp_ring = VMX_DMA_DVA(&comp_ring->dmamem);
666 	rs->comp_ring_len = NRXCOMPDESC;
667 	rs->driver_data = ~0ULL;
668 	rs->driver_data_len = 0;
669 	rs->intr_idx = intr;
670 	rs->stopped = 1;
671 	rs->error = 0;
672 	return 0;
673 }
674 
675 void
vmxnet3_txinit(struct vmxnet3_softc * sc,struct vmxnet3_txqueue * tq)676 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
677 {
678 	struct vmxnet3_txring *ring = &tq->cmd_ring;
679 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
680 
681 	ring->cons = ring->prod = 0;
682 	ring->gen = VMX_TX_GEN;
683 	comp_ring->next = 0;
684 	comp_ring->gen = VMX_TXC_GEN;
685 	memset(VMX_DMA_KVA(&ring->dmamem), 0,
686 	    VMX_DMA_LEN(&ring->dmamem));
687 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
688 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_PREWRITE);
689 	memset(VMX_DMA_KVA(&comp_ring->dmamem), 0,
690 	    VMX_DMA_LEN(&comp_ring->dmamem));
691 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
692 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_PREREAD);
693 
694 	ifq_clr_oactive(tq->ifq);
695 }
696 
697 void
vmxnet3_rxfill_tick(void * arg)698 vmxnet3_rxfill_tick(void *arg)
699 {
700 	struct vmxnet3_rxring *ring = arg;
701 
702 	if (!mtx_enter_try(&ring->mtx))
703 		return;
704 
705 	vmxnet3_rxfill(ring);
706 	mtx_leave(&ring->mtx);
707 }
708 
709 void
vmxnet3_rxfill(struct vmxnet3_rxring * ring)710 vmxnet3_rxfill(struct vmxnet3_rxring *ring)
711 {
712 	struct vmxnet3_softc *sc = ring->sc;
713 	struct vmxnet3_rxdesc *rxd;
714 	struct mbuf *m;
715 	bus_dmamap_t map;
716 	u_int slots;
717 	unsigned int prod;
718 	uint32_t rgen;
719 	uint32_t type = htole32(VMXNET3_BTYPE_HEAD << VMXNET3_RX_BTYPE_S);
720 
721 	/* Second ring just contains packet bodies. */
722 	if (ring->rid == 1)
723 		type = htole32(VMXNET3_BTYPE_BODY << VMXNET3_RX_BTYPE_S);
724 
725 	MUTEX_ASSERT_LOCKED(&ring->mtx);
726 
727 	slots = if_rxr_get(&ring->rxr, NRXDESC);
728 	if (slots == 0)
729 		return;
730 
731 	prod = ring->fill;
732 	rgen = ring->gen;
733 
734 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
735 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_POSTWRITE);
736 
737 	do {
738 		KASSERT(ring->m[prod] == NULL);
739 
740 		m = MCLGETL(NULL, M_DONTWAIT, JUMBO_LEN);
741 		if (m == NULL)
742 			break;
743 
744 		m->m_pkthdr.len = m->m_len = JUMBO_LEN;
745 		m_adj(m, ETHER_ALIGN);
746 
747 		map = ring->dmap[prod];
748 		if (bus_dmamap_load_mbuf(sc->sc_dmat, map, m, BUS_DMA_NOWAIT))
749 			panic("load mbuf");
750 
751 		bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
752 		    BUS_DMASYNC_PREREAD);
753 
754 		ring->m[prod] = m;
755 
756 		rxd = &ring->rxd[prod];
757 		rxd->rx_addr = htole64(DMAADDR(map));
758 		bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
759 		    0, VMX_DMA_LEN(&ring->dmamem),
760 		    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_POSTWRITE);
761 		rxd->rx_word2 = (htole32(m->m_pkthdr.len & VMXNET3_RX_LEN_M) <<
762 		    VMXNET3_RX_LEN_S) | type | rgen;
763 
764 		if (++prod == NRXDESC) {
765 			prod = 0;
766 			rgen ^= VMX_RX_GEN;
767 		}
768 	} while (--slots > 0);
769 
770 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
771 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_PREWRITE);
772 
773 	if_rxr_put(&ring->rxr, slots);
774 
775 	ring->fill = prod;
776 	ring->gen = rgen;
777 
778 	if (if_rxr_inuse(&ring->rxr) == 0)
779 		timeout_add(&ring->refill, 1);
780 
781 	if (ring->rs->update_rxhead)
782 		WRITE_BAR0(sc, ring->rxh, prod);
783 }
784 
785 void
vmxnet3_rxinit(struct vmxnet3_softc * sc,struct vmxnet3_rxqueue * rq)786 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
787 {
788 	struct vmxnet3_rxring *ring;
789 	struct vmxnet3_comp_ring *comp_ring;
790 	int i;
791 
792 	for (i = 0; i < 2; i++) {
793 		ring = &rq->cmd_ring[i];
794 		if_rxr_init(&ring->rxr, 2, NRXDESC - 1);
795 		ring->fill = 0;
796 		ring->gen = VMX_RX_GEN;
797 
798 		memset(VMX_DMA_KVA(&ring->dmamem), 0,
799 		    VMX_DMA_LEN(&ring->dmamem));
800 		bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
801 		    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_PREWRITE);
802 
803 		mtx_enter(&ring->mtx);
804 		vmxnet3_rxfill(ring);
805 		mtx_leave(&ring->mtx);
806 	}
807 
808 	comp_ring = &rq->comp_ring;
809 	comp_ring->next = 0;
810 	comp_ring->gen = VMX_RXC_GEN;
811 	comp_ring->sendmp = NULL;
812 	comp_ring->lastmp = NULL;
813 
814 	memset(VMX_DMA_KVA(&comp_ring->dmamem), 0,
815 	    VMX_DMA_LEN(&comp_ring->dmamem));
816 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
817 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_PREREAD);
818 }
819 
820 void
vmxnet3_txstop(struct vmxnet3_softc * sc,struct vmxnet3_txqueue * tq)821 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
822 {
823 	struct vmxnet3_txring *ring = &tq->cmd_ring;
824 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
825 	struct ifqueue *ifq = tq->ifq;
826 	int idx;
827 
828 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
829 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_POSTREAD);
830 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
831 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_POSTWRITE);
832 
833 	for (idx = 0; idx < NTXDESC; idx++) {
834 		if (ring->m[idx]) {
835 			bus_dmamap_unload(sc->sc_dmat, ring->dmap[idx]);
836 			m_freem(ring->m[idx]);
837 			ring->m[idx] = NULL;
838 		}
839 	}
840 
841 	ifq_purge(ifq);
842 	ifq_clr_oactive(ifq);
843 }
844 
845 void
vmxnet3_rxstop(struct vmxnet3_softc * sc,struct vmxnet3_rxqueue * rq)846 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
847 {
848 	struct vmxnet3_rxring *ring;
849 	struct vmxnet3_comp_ring *comp_ring = &rq->comp_ring;
850 	int i, idx;
851 
852 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
853 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_POSTREAD);
854 
855 	for (i = 0; i < 2; i++) {
856 		ring = &rq->cmd_ring[i];
857 		bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
858 		    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_POSTWRITE);
859 		timeout_del(&ring->refill);
860 		for (idx = 0; idx < NRXDESC; idx++) {
861 			struct mbuf *m = ring->m[idx];
862 			if (m == NULL)
863 				continue;
864 
865 			ring->m[idx] = NULL;
866 			m_freem(m);
867 			bus_dmamap_unload(sc->sc_dmat, ring->dmap[idx]);
868 		}
869 	}
870 }
871 
872 void
vmxnet3_link_state(struct vmxnet3_softc * sc)873 vmxnet3_link_state(struct vmxnet3_softc *sc)
874 {
875 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
876 	u_int x, link, speed;
877 
878 	WRITE_CMD(sc, VMXNET3_CMD_GET_LINK);
879 	x = READ_BAR1(sc, VMXNET3_BAR1_CMD);
880 	speed = x >> 16;
881 	if (x & 1) {
882 		ifp->if_baudrate = IF_Mbps(speed);
883 		link = LINK_STATE_UP;
884 	} else
885 		link = LINK_STATE_DOWN;
886 
887 	if (ifp->if_link_state != link) {
888 		ifp->if_link_state = link;
889 		if_link_state_change(ifp);
890 	}
891 }
892 
893 static inline void
vmxnet3_enable_intr(struct vmxnet3_softc * sc,int irq)894 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
895 {
896 	WRITE_BAR0(sc, VMXNET3_BAR0_IMASK(irq), 0);
897 }
898 
899 static inline void
vmxnet3_disable_intr(struct vmxnet3_softc * sc,int irq)900 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
901 {
902 	WRITE_BAR0(sc, VMXNET3_BAR0_IMASK(irq), 1);
903 }
904 
905 void
vmxnet3_enable_all_intrs(struct vmxnet3_softc * sc)906 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
907 {
908 	int i;
909 
910 	sc->sc_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
911 	vmxnet3_enable_intr(sc, 0);
912 	if (sc->sc_intrmap) {
913 		for (i = 0; i < sc->sc_nqueues; i++)
914 			vmxnet3_enable_intr(sc, sc->sc_q[i].intr);
915 	}
916 }
917 
918 void
vmxnet3_disable_all_intrs(struct vmxnet3_softc * sc)919 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
920 {
921 	int i;
922 
923 	sc->sc_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
924 	vmxnet3_disable_intr(sc, 0);
925 	if (sc->sc_intrmap) {
926 		for (i = 0; i < sc->sc_nqueues; i++)
927 			vmxnet3_disable_intr(sc, sc->sc_q[i].intr);
928 	}
929 }
930 
931 int
vmxnet3_intr_intx(void * arg)932 vmxnet3_intr_intx(void *arg)
933 {
934 	struct vmxnet3_softc *sc = arg;
935 
936 	if (READ_BAR1(sc, VMXNET3_BAR1_INTR) == 0)
937 		return 0;
938 
939 	return (vmxnet3_intr(sc));
940 }
941 
942 int
vmxnet3_intr(void * arg)943 vmxnet3_intr(void *arg)
944 {
945 	struct vmxnet3_softc *sc = arg;
946 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
947 
948 	if (sc->sc_ds->event) {
949 		KERNEL_LOCK();
950 		vmxnet3_evintr(sc);
951 		KERNEL_UNLOCK();
952 	}
953 
954 	if (ifp->if_flags & IFF_RUNNING) {
955 		vmxnet3_rxintr(sc, &sc->sc_q[0].rx);
956 		vmxnet3_txintr(sc, &sc->sc_q[0].tx);
957 		vmxnet3_enable_intr(sc, 0);
958 	}
959 
960 	return 1;
961 }
962 
963 int
vmxnet3_intr_event(void * arg)964 vmxnet3_intr_event(void *arg)
965 {
966 	struct vmxnet3_softc *sc = arg;
967 
968 	if (sc->sc_ds->event) {
969 		KERNEL_LOCK();
970 		vmxnet3_evintr(sc);
971 		KERNEL_UNLOCK();
972 	}
973 
974 	vmxnet3_enable_intr(sc, 0);
975 	return 1;
976 }
977 
978 int
vmxnet3_intr_queue(void * arg)979 vmxnet3_intr_queue(void *arg)
980 {
981 	struct vmxnet3_queue *q = arg;
982 
983 	vmxnet3_rxintr(q->sc, &q->rx);
984 	vmxnet3_txintr(q->sc, &q->tx);
985 	vmxnet3_enable_intr(q->sc, q->intr);
986 
987 	return 1;
988 }
989 
990 void
vmxnet3_evintr(struct vmxnet3_softc * sc)991 vmxnet3_evintr(struct vmxnet3_softc *sc)
992 {
993 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
994 	u_int event = sc->sc_ds->event;
995 	struct vmxnet3_txq_shared *ts;
996 	struct vmxnet3_rxq_shared *rs;
997 
998 	/* Clear events. */
999 	WRITE_BAR1(sc, VMXNET3_BAR1_EVENT, event);
1000 
1001 	/* Link state change? */
1002 	if (event & VMXNET3_EVENT_LINK)
1003 		vmxnet3_link_state(sc);
1004 
1005 	/* Queue error? */
1006 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1007 		WRITE_CMD(sc, VMXNET3_CMD_GET_STATUS);
1008 
1009 		ts = sc->sc_q[0].tx.ts;
1010 		if (ts->stopped)
1011 			printf("%s: TX error 0x%x\n", ifp->if_xname, ts->error);
1012 		rs = sc->sc_q[0].rx.rs;
1013 		if (rs->stopped)
1014 			printf("%s: RX error 0x%x\n", ifp->if_xname, rs->error);
1015 		vmxnet3_init(sc);
1016 	}
1017 
1018 	if (event & VMXNET3_EVENT_DIC)
1019 		printf("%s: device implementation change event\n",
1020 		    ifp->if_xname);
1021 	if (event & VMXNET3_EVENT_DEBUG)
1022 		printf("%s: debug event\n", ifp->if_xname);
1023 }
1024 
1025 void
vmxnet3_txintr(struct vmxnet3_softc * sc,struct vmxnet3_txqueue * tq)1026 vmxnet3_txintr(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq)
1027 {
1028 	struct ifqueue *ifq = tq->ifq;
1029 	struct vmxnet3_txring *ring = &tq->cmd_ring;
1030 	struct vmxnet3_comp_ring *comp_ring = &tq->comp_ring;
1031 	struct vmxnet3_txcompdesc *txcd;
1032 	bus_dmamap_t map;
1033 	struct mbuf *m;
1034 	u_int prod, cons, next;
1035 	uint32_t rgen;
1036 
1037 	prod = ring->prod;
1038 	cons = ring->cons;
1039 
1040 	if (cons == prod)
1041 		return;
1042 
1043 	next = comp_ring->next;
1044 	rgen = comp_ring->gen;
1045 
1046 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
1047 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_POSTREAD);
1048 
1049 	do {
1050 		txcd = &comp_ring->txcd[next];
1051 		if ((txcd->txc_word3 & VMX_TXC_GEN) != rgen)
1052 			break;
1053 
1054 		if (++next == NTXCOMPDESC) {
1055 			next = 0;
1056 			rgen ^= VMX_TXC_GEN;
1057 		}
1058 
1059 		m = ring->m[cons];
1060 		ring->m[cons] = NULL;
1061 
1062 		KASSERT(m != NULL);
1063 
1064 		map = ring->dmap[cons];
1065 		bus_dmamap_unload(sc->sc_dmat, map);
1066 		m_freem(m);
1067 
1068 		cons = (letoh32(txcd->txc_word0) >> VMXNET3_TXC_EOPIDX_S) &
1069 		    VMXNET3_TXC_EOPIDX_M;
1070 		cons++;
1071 		cons %= NTXDESC;
1072 	} while (cons != prod);
1073 
1074 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
1075 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_PREREAD);
1076 
1077 	comp_ring->next = next;
1078 	comp_ring->gen = rgen;
1079 	ring->cons = cons;
1080 
1081 	if (ifq_is_oactive(ifq))
1082 		ifq_restart(ifq);
1083 }
1084 
1085 void
vmxnet3_rxintr(struct vmxnet3_softc * sc,struct vmxnet3_rxqueue * rq)1086 vmxnet3_rxintr(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq)
1087 {
1088 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1089 	struct vmxnet3_comp_ring *comp_ring = &rq->comp_ring;
1090 	struct vmxnet3_rxring *ring;
1091 	struct vmxnet3_rxcompdesc *rxcd;
1092 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
1093 	struct mbuf *m;
1094 	bus_dmamap_t map;
1095 	unsigned int idx;
1096 	unsigned int next, rgen;
1097 	unsigned int rid, done[2] = {0, 0};
1098 
1099 	next = comp_ring->next;
1100 	rgen = comp_ring->gen;
1101 
1102 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
1103 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_POSTREAD);
1104 
1105 	for (;;) {
1106 		rxcd = &comp_ring->rxcd[next];
1107 		if ((rxcd->rxc_word3 & VMX_RXC_GEN) != rgen)
1108 			break;
1109 
1110 		if (++next == NRXCOMPDESC) {
1111 			next = 0;
1112 			rgen ^= VMX_RXC_GEN;
1113 		}
1114 
1115 		idx = letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_IDX_S) &
1116 		    VMXNET3_RXC_IDX_M);
1117 
1118 		if (letoh32((rxcd->rxc_word0 >> VMXNET3_RXC_QID_S) &
1119 		    VMXNET3_RXC_QID_M) < sc->sc_nqueues)
1120 			rid = 0;
1121 		else
1122 			rid = 1;
1123 
1124 		ring = &rq->cmd_ring[rid];
1125 
1126 		m = ring->m[idx];
1127 		KASSERT(m != NULL);
1128 		ring->m[idx] = NULL;
1129 
1130 		map = ring->dmap[idx];
1131 		bus_dmamap_sync(sc->sc_dmat, map, 0, map->dm_mapsize,
1132 		    BUS_DMASYNC_POSTREAD);
1133 		bus_dmamap_unload(sc->sc_dmat, map);
1134 
1135 		done[rid]++;
1136 
1137 		/*
1138 		 * A receive descriptor of type 4 which is flagged as start of
1139 		 * packet, contains the number of TCP segment of an LRO packet.
1140 		 */
1141 		if (letoh32((rxcd->rxc_word3 & VMXNET3_RXC_TYPE_M) >>
1142 		    VMXNET3_RXC_TYPE_S) == 4 &&
1143 		    ISSET(rxcd->rxc_word0, VMXNET3_RXC_SOP)) {
1144 			m->m_pkthdr.ph_mss = letoh32(rxcd->rxc_word1 &
1145 			    VMXNET3_RXC_SEG_CNT_M);
1146 		}
1147 
1148 		m->m_len = letoh32((rxcd->rxc_word2 >> VMXNET3_RXC_LEN_S) &
1149 		    VMXNET3_RXC_LEN_M);
1150 
1151 		if (comp_ring->sendmp == NULL) {
1152 			comp_ring->sendmp = comp_ring->lastmp = m;
1153 			comp_ring->sendmp->m_pkthdr.len = 0;
1154 		} else {
1155 			CLR(m->m_flags, M_PKTHDR);
1156 			comp_ring->lastmp->m_next = m;
1157 			comp_ring->lastmp = m;
1158 		}
1159 		comp_ring->sendmp->m_pkthdr.len += m->m_len;
1160 
1161 		if (!ISSET(rxcd->rxc_word0, VMXNET3_RXC_EOP))
1162 			continue;
1163 
1164 		/*
1165 		 * End of Packet
1166 		 */
1167 
1168 		if (letoh32(rxcd->rxc_word2 & VMXNET3_RXC_ERROR)) {
1169 			ifp->if_ierrors++;
1170 			m_freem(comp_ring->sendmp);
1171 			comp_ring->sendmp = comp_ring->lastmp = NULL;
1172 			continue;
1173 		}
1174 
1175 		if (comp_ring->sendmp->m_pkthdr.len < VMXNET3_MIN_MTU) {
1176 			m_freem(comp_ring->sendmp);
1177 			comp_ring->sendmp = comp_ring->lastmp = NULL;
1178 			continue;
1179 		}
1180 
1181 		if (((letoh32(rxcd->rxc_word0) >> VMXNET3_RXC_RSSTYPE_S) &
1182 		    VMXNET3_RXC_RSSTYPE_M) != VMXNET3_RXC_RSSTYPE_NONE) {
1183 			comp_ring->sendmp->m_pkthdr.ph_flowid =
1184 			    letoh32(rxcd->rxc_word1);
1185 			SET(comp_ring->sendmp->m_pkthdr.csum_flags, M_FLOWID);
1186 		}
1187 
1188 		vmxnet3_rx_offload(rxcd, comp_ring->sendmp);
1189 		ml_enqueue(&ml, comp_ring->sendmp);
1190 		comp_ring->sendmp = comp_ring->lastmp = NULL;
1191 	}
1192 
1193 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&comp_ring->dmamem),
1194 	    0, VMX_DMA_LEN(&comp_ring->dmamem), BUS_DMASYNC_PREREAD);
1195 
1196 	comp_ring->next = next;
1197 	comp_ring->gen = rgen;
1198 
1199 	for (int i = 0; i < 2; i++) {
1200 		if (done[i] == 0)
1201 			continue;
1202 
1203 		ring = &rq->cmd_ring[i];
1204 
1205 		if (ifiq_input(rq->ifiq, &ml))
1206 			if_rxr_livelocked(&ring->rxr);
1207 
1208 		mtx_enter(&ring->mtx);
1209 		if_rxr_put(&ring->rxr, done[i]);
1210 		vmxnet3_rxfill(ring);
1211 		mtx_leave(&ring->mtx);
1212 	}
1213 }
1214 
1215 void
vmxnet3_iff(struct vmxnet3_softc * sc)1216 vmxnet3_iff(struct vmxnet3_softc *sc)
1217 {
1218 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1219 	struct arpcom *ac = &sc->sc_arpcom;
1220 	struct vmxnet3_driver_shared *ds = sc->sc_ds;
1221 	struct ether_multi *enm;
1222 	struct ether_multistep step;
1223 	u_int mode;
1224 	u_int8_t *p;
1225 
1226 	ds->mcast_tablelen = 0;
1227 	CLR(ifp->if_flags, IFF_ALLMULTI);
1228 
1229 	/*
1230 	 * Always accept broadcast frames.
1231 	 * Always accept frames destined to our station address.
1232 	 */
1233 	mode = VMXNET3_RXMODE_BCAST | VMXNET3_RXMODE_UCAST;
1234 
1235 	if (ISSET(ifp->if_flags, IFF_PROMISC) || ac->ac_multirangecnt > 0 ||
1236 	    ac->ac_multicnt > 682) {
1237 		SET(ifp->if_flags, IFF_ALLMULTI);
1238 		SET(mode, (VMXNET3_RXMODE_ALLMULTI | VMXNET3_RXMODE_MCAST));
1239 		if (ifp->if_flags & IFF_PROMISC)
1240 			SET(mode, VMXNET3_RXMODE_PROMISC);
1241 	} else {
1242 		p = sc->sc_mcast;
1243 		ETHER_FIRST_MULTI(step, ac, enm);
1244 		while (enm != NULL) {
1245 			bcopy(enm->enm_addrlo, p, ETHER_ADDR_LEN);
1246 
1247 			p += ETHER_ADDR_LEN;
1248 
1249 			ETHER_NEXT_MULTI(step, enm);
1250 		}
1251 
1252 		if (ac->ac_multicnt > 0) {
1253 			SET(mode, VMXNET3_RXMODE_MCAST);
1254 			ds->mcast_tablelen = p - sc->sc_mcast;
1255 		}
1256 	}
1257 
1258 	WRITE_CMD(sc, VMXNET3_CMD_SET_FILTER);
1259 	ds->rxmode = mode;
1260 	WRITE_CMD(sc, VMXNET3_CMD_SET_RXMODE);
1261 }
1262 
1263 
1264 void
vmxnet3_rx_offload(struct vmxnet3_rxcompdesc * rxcd,struct mbuf * m)1265 vmxnet3_rx_offload(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
1266 {
1267 	uint32_t pkts;
1268 
1269 	/*
1270 	 * VLAN Offload
1271 	 */
1272 
1273 #if NVLAN > 0
1274 	if (ISSET(rxcd->rxc_word2, VMXNET3_RXC_VLAN)) {
1275 		SET(m->m_flags, M_VLANTAG);
1276 		m->m_pkthdr.ether_vtag = letoh32((rxcd->rxc_word2 >>
1277 		    VMXNET3_RXC_VLANTAG_S) & VMXNET3_RXC_VLANTAG_M);
1278 	}
1279 #endif
1280 
1281 	/*
1282 	 * Checksum Offload
1283 	 */
1284 
1285 	if (ISSET(rxcd->rxc_word0, VMXNET3_RXC_NOCSUM))
1286 		return;
1287 
1288 	if (ISSET(rxcd->rxc_word3, VMXNET3_RXC_IPV4) &&
1289 	    ISSET(rxcd->rxc_word3, VMXNET3_RXC_IPSUM_OK))
1290 		SET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_OK);
1291 
1292 	if (ISSET(rxcd->rxc_word3, VMXNET3_RXC_FRAGMENT))
1293 		return;
1294 
1295 	if (ISSET(rxcd->rxc_word3, VMXNET3_RXC_CSUM_OK)) {
1296 		if (ISSET(rxcd->rxc_word3, VMXNET3_RXC_TCP))
1297 			SET(m->m_pkthdr.csum_flags, M_TCP_CSUM_IN_OK);
1298 		else if (ISSET(rxcd->rxc_word3, VMXNET3_RXC_UDP))
1299 			SET(m->m_pkthdr.csum_flags, M_UDP_CSUM_IN_OK);
1300 	}
1301 
1302 	/*
1303 	 * TCP Large Receive Offload
1304 	 */
1305 
1306 	pkts = m->m_pkthdr.ph_mss;
1307 	m->m_pkthdr.ph_mss = 0;
1308 
1309 	if (pkts > 1) {
1310 		struct ether_extracted ext;
1311 		uint32_t paylen;
1312 
1313 		ether_extract_headers(m, &ext);
1314 
1315 		paylen = ext.iplen;
1316 		if (ext.ip4 || ext.ip6)
1317 			paylen -= ext.iphlen;
1318 
1319 		if (ext.tcp) {
1320 			paylen -= ext.tcphlen;
1321 			tcpstat_inc(tcps_inhwlro);
1322 			tcpstat_add(tcps_inpktlro, pkts);
1323 		} else {
1324 			tcpstat_inc(tcps_inbadlro);
1325 		}
1326 
1327 		/*
1328 		 * If we gonna forward this packet, we have to mark it as TSO,
1329 		 * set a correct mss, and recalculate the TCP checksum.
1330 		 */
1331 		if (ext.tcp && paylen >= pkts) {
1332 			SET(m->m_pkthdr.csum_flags, M_TCP_TSO);
1333 			m->m_pkthdr.ph_mss = paylen / pkts;
1334 		}
1335 		if (ext.tcp &&
1336 		    ISSET(m->m_pkthdr.csum_flags, M_TCP_CSUM_IN_OK)) {
1337 			SET(m->m_pkthdr.csum_flags, M_TCP_CSUM_OUT);
1338 		}
1339 	}
1340 }
1341 
1342 void
vmxnet3_stop(struct ifnet * ifp)1343 vmxnet3_stop(struct ifnet *ifp)
1344 {
1345 	struct vmxnet3_softc *sc = ifp->if_softc;
1346 	int queue;
1347 
1348 	ifp->if_flags &= ~IFF_RUNNING;
1349 	ifp->if_timer = 0;
1350 
1351 	vmxnet3_disable_all_intrs(sc);
1352 
1353 	WRITE_CMD(sc, VMXNET3_CMD_DISABLE);
1354 
1355 	if (sc->sc_intrmap != NULL) {
1356 		for (queue = 0; queue < sc->sc_nqueues; queue++)
1357 			intr_barrier(sc->sc_q[queue].ih);
1358 	} else
1359 		intr_barrier(sc->sc_ih);
1360 
1361 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1362 		vmxnet3_txstop(sc, &sc->sc_q[queue].tx);
1363 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1364 		vmxnet3_rxstop(sc, &sc->sc_q[queue].rx);
1365 }
1366 
1367 void
vmxnet3_reset(struct vmxnet3_softc * sc)1368 vmxnet3_reset(struct vmxnet3_softc *sc)
1369 {
1370 	WRITE_CMD(sc, VMXNET3_CMD_RESET);
1371 }
1372 
1373 int
vmxnet3_init(struct vmxnet3_softc * sc)1374 vmxnet3_init(struct vmxnet3_softc *sc)
1375 {
1376 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
1377 	int queue;
1378 
1379 	/*
1380 	 * Cancel pending I/O and free all RX/TX buffers.
1381 	 */
1382 	vmxnet3_stop(ifp);
1383 
1384 #if 0
1385 	/* Put controller into known state. */
1386 	vmxnet3_reset(sc);
1387 #endif
1388 
1389 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1390 		vmxnet3_txinit(sc, &sc->sc_q[queue].tx);
1391 	for (queue = 0; queue < sc->sc_nqueues; queue++)
1392 		vmxnet3_rxinit(sc, &sc->sc_q[queue].rx);
1393 
1394 	for (queue = 0; queue < sc->sc_nqueues; queue++) {
1395 		WRITE_BAR0(sc, VMXNET3_BAR0_RXH1(queue), 0);
1396 		WRITE_BAR0(sc, VMXNET3_BAR0_RXH2(queue), 0);
1397 	}
1398 
1399 	WRITE_CMD(sc, VMXNET3_CMD_ENABLE);
1400 	if (READ_BAR1(sc, VMXNET3_BAR1_CMD)) {
1401 		printf("%s: failed to initialize\n", ifp->if_xname);
1402 		vmxnet3_stop(ifp);
1403 		return EIO;
1404 	}
1405 
1406 	/* TCP Large Receive Offload */
1407 	if (ISSET(ifp->if_xflags, IFXF_LRO))
1408 		SET(sc->sc_ds->upt_features, UPT1_F_LRO);
1409 	else
1410 		CLR(sc->sc_ds->upt_features, UPT1_F_LRO);
1411 	WRITE_CMD(sc, VMXNET3_CMD_SET_FEATURE);
1412 
1413 	/* Program promiscuous mode and multicast filters. */
1414 	vmxnet3_iff(sc);
1415 
1416 	vmxnet3_enable_all_intrs(sc);
1417 
1418 	vmxnet3_link_state(sc);
1419 
1420 	ifp->if_flags |= IFF_RUNNING;
1421 
1422 	return 0;
1423 }
1424 
1425 static int
vmx_rxr_info(struct vmxnet3_softc * sc,struct if_rxrinfo * ifri)1426 vmx_rxr_info(struct vmxnet3_softc *sc, struct if_rxrinfo *ifri)
1427 {
1428 	struct if_rxring_info *ifrs, *ifr;
1429 	int error;
1430 	unsigned int i;
1431 
1432 	ifrs = mallocarray(sc->sc_nqueues, sizeof(*ifrs),
1433 	    M_TEMP, M_WAITOK|M_ZERO|M_CANFAIL);
1434 	if (ifrs == NULL)
1435 		return (ENOMEM);
1436 
1437 	for (i = 0; i < sc->sc_nqueues; i++) {
1438 		struct if_rxring *rxr = &sc->sc_q[i].rx.cmd_ring[0].rxr;
1439 		ifr = &ifrs[i];
1440 
1441 		ifr->ifr_size = JUMBO_LEN;
1442 		snprintf(ifr->ifr_name, sizeof(ifr->ifr_name), "%u", i);
1443 		ifr->ifr_info = *rxr;
1444 	}
1445 
1446 	error = if_rxr_info_ioctl(ifri, i, ifrs);
1447 
1448 	free(ifrs, M_TEMP, i * sizeof(*ifrs));
1449 
1450 	return (error);
1451 }
1452 
1453 int
vmxnet3_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)1454 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1455 {
1456 	struct vmxnet3_softc *sc = ifp->if_softc;
1457 	struct ifreq *ifr = (struct ifreq *)data;
1458 	int error = 0, s;
1459 
1460 	s = splnet();
1461 
1462 	switch (cmd) {
1463 	case SIOCSIFADDR:
1464 		ifp->if_flags |= IFF_UP;
1465 		if ((ifp->if_flags & IFF_RUNNING) == 0)
1466 			error = vmxnet3_init(sc);
1467 		break;
1468 	case SIOCSIFFLAGS:
1469 		if (ifp->if_flags & IFF_UP) {
1470 			if (ifp->if_flags & IFF_RUNNING)
1471 				error = ENETRESET;
1472 			else
1473 				error = vmxnet3_init(sc);
1474 		} else {
1475 			if (ifp->if_flags & IFF_RUNNING)
1476 				vmxnet3_stop(ifp);
1477 		}
1478 		break;
1479 	case SIOCSIFMEDIA:
1480 	case SIOCGIFMEDIA:
1481 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1482 		break;
1483 	case SIOCGIFRXR:
1484 		error = vmx_rxr_info(sc, (struct if_rxrinfo *)ifr->ifr_data);
1485 		break;
1486 	default:
1487 		error = ether_ioctl(ifp, &sc->sc_arpcom, cmd, data);
1488 	}
1489 
1490 	if (error == ENETRESET) {
1491 		if (ifp->if_flags & IFF_RUNNING)
1492 			vmxnet3_iff(sc);
1493 		error = 0;
1494 	}
1495 
1496 	splx(s);
1497 	return error;
1498 }
1499 
1500 static inline int
vmx_load_mbuf(bus_dma_tag_t dmat,bus_dmamap_t map,struct mbuf * m)1501 vmx_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m)
1502 {
1503 	int error;
1504 
1505 	error = bus_dmamap_load_mbuf(dmat, map, m,
1506 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT);
1507 	if (error != EFBIG)
1508 		return (error);
1509 
1510 	error = m_defrag(m, M_DONTWAIT);
1511 	if (error != 0)
1512 		return (error);
1513 
1514 	return (bus_dmamap_load_mbuf(dmat, map, m,
1515 	    BUS_DMA_STREAMING | BUS_DMA_NOWAIT));
1516 }
1517 
1518 void
vmxnet3_tx_offload(struct vmxnet3_txdesc * sop,struct mbuf * m)1519 vmxnet3_tx_offload(struct vmxnet3_txdesc *sop, struct mbuf *m)
1520 {
1521 	struct ether_extracted ext;
1522 	uint32_t offset = 0;
1523 	uint32_t hdrlen;
1524 
1525 	/*
1526 	 * VLAN Offload
1527 	 */
1528 
1529 #if NVLAN > 0
1530 	if (ISSET(m->m_flags, M_VLANTAG)) {
1531 		sop->tx_word3 |= htole32(VMXNET3_TX_VTAG_MODE);
1532 		sop->tx_word3 |= htole32((m->m_pkthdr.ether_vtag &
1533 		    VMXNET3_TX_VLANTAG_M) << VMXNET3_TX_VLANTAG_S);
1534 	}
1535 #endif
1536 
1537 	/*
1538 	 * Checksum Offload
1539 	 */
1540 
1541 	if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_CSUM_OUT) &&
1542 	    !ISSET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT))
1543 		return;
1544 
1545 	ether_extract_headers(m, &ext);
1546 
1547 	hdrlen = sizeof(*ext.eh);
1548 	if (ext.evh)
1549 		hdrlen = sizeof(*ext.evh);
1550 
1551 	if (ext.ip4 || ext.ip6)
1552 		hdrlen += ext.iphlen;
1553 
1554 	if (ext.tcp)
1555 		offset = hdrlen + offsetof(struct tcphdr, th_sum);
1556 	else if (ext.udp)
1557 		offset = hdrlen + offsetof(struct udphdr, uh_sum);
1558 	else
1559 		return;
1560 
1561 	if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) {
1562 		hdrlen &= VMXNET3_TX_HLEN_M;
1563 		offset &= VMXNET3_TX_OP_M;
1564 
1565 		sop->tx_word3 |= htole32(VMXNET3_OM_CSUM << VMXNET3_TX_OM_S);
1566 		sop->tx_word3 |= htole32(hdrlen << VMXNET3_TX_HLEN_S);
1567 		sop->tx_word2 |= htole32(offset << VMXNET3_TX_OP_S);
1568 
1569 		return;
1570 	}
1571 
1572 	/*
1573 	 * TCP Segmentation Offload
1574 	 */
1575 
1576 	if (ext.tcp == NULL || m->m_pkthdr.ph_mss == 0) {
1577 		tcpstat_inc(tcps_outbadtso);
1578 		return;
1579 	}
1580 
1581 	if (ext.ip4)
1582 		ext.ip4->ip_sum = 0;
1583 
1584 	hdrlen += ext.tcphlen;
1585 	hdrlen &= VMXNET3_TX_HLEN_M;
1586 
1587 	sop->tx_word3 |= htole32(VMXNET3_OM_TSO << VMXNET3_TX_OM_S);
1588 	sop->tx_word3 |= htole32(hdrlen << VMXNET3_TX_HLEN_S);
1589 	sop->tx_word2 |= htole32(m->m_pkthdr.ph_mss << VMXNET3_TX_OP_S);
1590 
1591 	tcpstat_add(tcps_outpkttso, (m->m_pkthdr.len - hdrlen +
1592 	    m->m_pkthdr.ph_mss - 1) / m->m_pkthdr.ph_mss);
1593 }
1594 
1595 void
vmxnet3_start(struct ifqueue * ifq)1596 vmxnet3_start(struct ifqueue *ifq)
1597 {
1598 	struct ifnet *ifp = ifq->ifq_if;
1599 	struct vmxnet3_softc *sc = ifp->if_softc;
1600 	struct vmxnet3_txqueue *tq = ifq->ifq_softc;
1601 	struct vmxnet3_txring *ring = &tq->cmd_ring;
1602 	struct vmxnet3_txdesc *txd, *sop;
1603 	bus_dmamap_t map;
1604 	unsigned int prod, free, i;
1605 	unsigned int post = 0;
1606 	uint32_t rgen, gen;
1607 
1608 	struct mbuf *m;
1609 
1610 	free = ring->cons;
1611 	prod = ring->prod;
1612 	if (free <= prod)
1613 		free += NTXDESC;
1614 	free -= prod;
1615 
1616 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
1617 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_POSTWRITE);
1618 
1619 	rgen = ring->gen;
1620 
1621 	for (;;) {
1622 		int hdrlen;
1623 
1624 		if (free <= NTXSEGS)
1625 			break;
1626 
1627 		m = ifq_dequeue(ifq);
1628 		if (m == NULL)
1629 			break;
1630 
1631 		/*
1632 		 * Headers for Ether, IP, TCP including options must lay in
1633 		 * first mbuf to support TSO.  Usually our stack gets that
1634 		 * right. To avoid packet parsing here, make a rough estimate
1635 		 * for simple IPv4.  Cases seen in the wild contain only ether
1636 		 * header in separate mbuf.  To support IPv6 with TCP options,
1637 		 * move as much as possible into first mbuf.  Realloc mbuf
1638 		 * before bus dma load.
1639 		 */
1640 		hdrlen = sizeof(struct ether_header) + sizeof(struct ip) +
1641 		    sizeof(struct tcphdr);
1642 		if (ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO) &&
1643 		    m->m_len < hdrlen && hdrlen <= m->m_pkthdr.len) {
1644 			hdrlen = MHLEN;
1645 			/* m_pullup preserves alignment, reserve space */
1646 			hdrlen -= mtod(m, unsigned long) & (sizeof(long) - 1);
1647 			if (hdrlen > m->m_pkthdr.len)
1648 				hdrlen = m->m_pkthdr.len;
1649 			if ((m = m_pullup(m, hdrlen)) == NULL) {
1650 				ifq->ifq_errors++;
1651 				continue;
1652 			}
1653 		}
1654 
1655 		map = ring->dmap[prod];
1656 
1657 		if (vmx_load_mbuf(sc->sc_dmat, map, m) != 0) {
1658 			ifq->ifq_errors++;
1659 			m_freem(m);
1660 			continue;
1661 		}
1662 
1663 #if NBPFILTER > 0
1664 		if (ifp->if_bpf)
1665 			bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1666 #endif
1667 
1668 		ring->m[prod] = m;
1669 
1670 		bus_dmamap_sync(sc->sc_dmat, map, 0,
1671 		    map->dm_mapsize, BUS_DMASYNC_PREWRITE);
1672 
1673 		free -= map->dm_nsegs;
1674 		/* set oactive here since txintr may be triggered in parallel */
1675 		if (free <= NTXSEGS)
1676 			ifq_set_oactive(ifq);
1677 
1678 		gen = rgen ^ VMX_TX_GEN;
1679 		sop = &ring->txd[prod];
1680 		for (i = 0; i < map->dm_nsegs; i++) {
1681 			txd = &ring->txd[prod];
1682 			txd->tx_addr = htole64(map->dm_segs[i].ds_addr);
1683 			txd->tx_word2 = htole32(map->dm_segs[i].ds_len <<
1684 			    VMXNET3_TX_LEN_S) | gen;
1685 			txd->tx_word3 = 0;
1686 
1687 			if (++prod == NTXDESC) {
1688 				prod = 0;
1689 				rgen ^= VMX_TX_GEN;
1690 			}
1691 
1692 			gen = rgen;
1693 		}
1694 		txd->tx_word3 = htole32(VMXNET3_TX_EOP | VMXNET3_TX_COMPREQ);
1695 
1696 		vmxnet3_tx_offload(sop, m);
1697 
1698 		ring->prod = prod;
1699 		/* Change the ownership by flipping the "generation" bit */
1700 		bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
1701 		    0, VMX_DMA_LEN(&ring->dmamem),
1702 		    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_POSTWRITE);
1703 		sop->tx_word2 ^= VMX_TX_GEN;
1704 
1705 		post = 1;
1706 	}
1707 
1708 	bus_dmamap_sync(sc->sc_dmat, VMX_DMA_MAP(&ring->dmamem),
1709 	    0, VMX_DMA_LEN(&ring->dmamem), BUS_DMASYNC_PREWRITE);
1710 
1711 	if (!post)
1712 		return;
1713 
1714 	ring->gen = rgen;
1715 
1716 	WRITE_BAR0(sc, VMXNET3_BAR0_TXH(tq->queue), prod);
1717 }
1718 
1719 void
vmxnet3_watchdog(struct ifnet * ifp)1720 vmxnet3_watchdog(struct ifnet *ifp)
1721 {
1722 	struct vmxnet3_softc *sc = ifp->if_softc;
1723 	int s;
1724 
1725 	printf("%s: device timeout\n", ifp->if_xname);
1726 	s = splnet();
1727 	vmxnet3_init(sc);
1728 	splx(s);
1729 }
1730 
1731 void
vmxnet3_media_status(struct ifnet * ifp,struct ifmediareq * ifmr)1732 vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1733 {
1734 	struct vmxnet3_softc *sc = ifp->if_softc;
1735 
1736 	vmxnet3_link_state(sc);
1737 
1738 	ifmr->ifm_status = IFM_AVALID;
1739 	ifmr->ifm_active = IFM_ETHER;
1740 
1741 	if (ifp->if_link_state != LINK_STATE_UP)
1742 		return;
1743 
1744 	ifmr->ifm_status |= IFM_ACTIVE;
1745 
1746 	if (ifp->if_baudrate >= IF_Gbps(10))
1747 		ifmr->ifm_active |= IFM_10G_T;
1748 }
1749 
1750 int
vmxnet3_media_change(struct ifnet * ifp)1751 vmxnet3_media_change(struct ifnet *ifp)
1752 {
1753 	return 0;
1754 }
1755 
1756 void *
vmxnet3_dma_allocmem(struct vmxnet3_softc * sc,u_int size,u_int align,bus_addr_t * pa)1757 vmxnet3_dma_allocmem(struct vmxnet3_softc *sc, u_int size, u_int align, bus_addr_t *pa)
1758 {
1759 	bus_dma_tag_t t = sc->sc_dmat;
1760 	bus_dma_segment_t segs[1];
1761 	bus_dmamap_t map;
1762 	caddr_t va;
1763 	int n;
1764 
1765 	if (bus_dmamem_alloc(t, size, align, 0, segs, 1, &n, BUS_DMA_NOWAIT))
1766 		return NULL;
1767 	if (bus_dmamem_map(t, segs, 1, size, &va, BUS_DMA_NOWAIT))
1768 		return NULL;
1769 	if (bus_dmamap_create(t, size, 1, size, 0, BUS_DMA_NOWAIT, &map))
1770 		return NULL;
1771 	if (bus_dmamap_load(t, map, va, size, NULL, BUS_DMA_NOWAIT))
1772 		return NULL;
1773 	bzero(va, size);
1774 	*pa = DMAADDR(map);
1775 	bus_dmamap_unload(t, map);
1776 	bus_dmamap_destroy(t, map);
1777 	return va;
1778 }
1779 
1780 static int
vmx_dmamem_alloc(struct vmxnet3_softc * sc,struct vmx_dmamem * vdm,bus_size_t size,u_int align)1781 vmx_dmamem_alloc(struct vmxnet3_softc *sc, struct vmx_dmamem *vdm,
1782     bus_size_t size, u_int align)
1783 {
1784 	vdm->vdm_size = size;
1785 
1786 	if (bus_dmamap_create(sc->sc_dmat, vdm->vdm_size, 1,
1787 	    vdm->vdm_size, 0,
1788 	    BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW | BUS_DMA_64BIT,
1789 	    &vdm->vdm_map) != 0)
1790 		return (1);
1791 	if (bus_dmamem_alloc(sc->sc_dmat, vdm->vdm_size,
1792 	    align, 0, &vdm->vdm_seg, 1, &vdm->vdm_nsegs,
1793 	    BUS_DMA_WAITOK | BUS_DMA_ZERO) != 0)
1794 		goto destroy;
1795 	if (bus_dmamem_map(sc->sc_dmat, &vdm->vdm_seg, vdm->vdm_nsegs,
1796 	    vdm->vdm_size, &vdm->vdm_kva, BUS_DMA_WAITOK) != 0)
1797 		goto free;
1798 	if (bus_dmamap_load(sc->sc_dmat, vdm->vdm_map, vdm->vdm_kva,
1799 	    vdm->vdm_size, NULL, BUS_DMA_WAITOK) != 0)
1800 		goto unmap;
1801 
1802 	return (0);
1803 unmap:
1804 	bus_dmamem_unmap(sc->sc_dmat, vdm->vdm_kva, vdm->vdm_size);
1805 free:
1806 	bus_dmamem_free(sc->sc_dmat, &vdm->vdm_seg, 1);
1807 destroy:
1808 	bus_dmamap_destroy(sc->sc_dmat, vdm->vdm_map);
1809 	return (1);
1810 }
1811 
1812 #ifdef notyet
1813 static void
vmx_dmamem_free(struct vmxnet3_softc * sc,struct vmx_dmamem * vdm)1814 vmx_dmamem_free(struct vmxnet3_softc *sc, struct vmx_dmamem *vdm)
1815 {
1816 	bus_dmamap_unload(sc->sc_dmat, vdm->vdm_map);
1817 	bus_dmamem_unmap(sc->sc_dmat, vdm->vdm_kva, vdm->vdm_size);
1818 	bus_dmamem_free(sc->sc_dmat, &vdm->vdm_seg, 1);
1819 	bus_dmamap_destroy(sc->sc_dmat, vdm->vdm_map);
1820 }
1821 #endif
1822 
1823 #if NKSTAT > 0
1824 /*
1825  * "hardware" counters are exported as separate kstats for each tx
1826  * and rx ring, but the request for the hypervisor to update the
1827  * stats is done once at the controller level. we limit the number
1828  * of updates at the controller level to a rate of one per second to
1829  * debounce this a bit.
1830  */
1831 static const struct timeval vmx_kstat_rate = { 1, 0 };
1832 
1833 /*
1834  * all the vmx stats are 64 bit counters, we just need their name and units.
1835  */
1836 struct vmx_kstat_tpl {
1837 	const char		*name;
1838 	enum kstat_kv_unit	 unit;
1839 };
1840 
1841 static const struct vmx_kstat_tpl vmx_rx_kstat_tpl[UPT1_RxStats_count] = {
1842 	{ "LRO packets",	KSTAT_KV_U_PACKETS },
1843 	{ "LRO bytes",		KSTAT_KV_U_BYTES },
1844 	{ "ucast packets",	KSTAT_KV_U_PACKETS },
1845 	{ "ucast bytes",	KSTAT_KV_U_BYTES },
1846 	{ "mcast packets",	KSTAT_KV_U_PACKETS },
1847 	{ "mcast bytes",	KSTAT_KV_U_BYTES },
1848 	{ "bcast packets",	KSTAT_KV_U_PACKETS },
1849 	{ "bcast bytes",	KSTAT_KV_U_BYTES },
1850 	{ "no buffers",		KSTAT_KV_U_PACKETS },
1851 	{ "errors",		KSTAT_KV_U_PACKETS },
1852 };
1853 
1854 static const struct vmx_kstat_tpl vmx_tx_kstat_tpl[UPT1_TxStats_count] = {
1855 	{ "TSO packets",	KSTAT_KV_U_PACKETS },
1856 	{ "TSO bytes",		KSTAT_KV_U_BYTES },
1857 	{ "ucast packets",	KSTAT_KV_U_PACKETS },
1858 	{ "ucast bytes",	KSTAT_KV_U_BYTES },
1859 	{ "mcast packets",	KSTAT_KV_U_PACKETS },
1860 	{ "mcast bytes",	KSTAT_KV_U_BYTES },
1861 	{ "bcast packets",	KSTAT_KV_U_PACKETS },
1862 	{ "bcast bytes",	KSTAT_KV_U_BYTES },
1863 	{ "errors",		KSTAT_KV_U_PACKETS },
1864 	{ "discards",		KSTAT_KV_U_PACKETS },
1865 };
1866 
1867 static void
vmx_kstat_init(struct vmxnet3_softc * sc)1868 vmx_kstat_init(struct vmxnet3_softc *sc)
1869 {
1870 	rw_init(&sc->sc_kstat_lock, "vmxkstat");
1871 }
1872 
1873 static int
vmx_kstat_read(struct kstat * ks)1874 vmx_kstat_read(struct kstat *ks)
1875 {
1876 	struct vmxnet3_softc *sc = ks->ks_softc;
1877 	struct kstat_kv *kvs = ks->ks_data;
1878 	uint64_t *vs = ks->ks_ptr;
1879 	unsigned int n, i;
1880 
1881 	if (ratecheck(&sc->sc_kstat_updated, &vmx_kstat_rate)) {
1882 		WRITE_CMD(sc, VMXNET3_CMD_GET_STATS);
1883 		/* barrier? */
1884 	}
1885 
1886 	n = ks->ks_datalen / sizeof(*kvs);
1887 	for (i = 0; i < n; i++)
1888 		kstat_kv_u64(&kvs[i]) = lemtoh64(&vs[i]);
1889 
1890 	TIMEVAL_TO_TIMESPEC(&sc->sc_kstat_updated, &ks->ks_updated);
1891 
1892 	return (0);
1893 }
1894 
1895 static struct kstat *
vmx_kstat_create(struct vmxnet3_softc * sc,const char * name,unsigned int unit,const struct vmx_kstat_tpl * tpls,unsigned int n,uint64_t * vs)1896 vmx_kstat_create(struct vmxnet3_softc *sc, const char *name, unsigned int unit,
1897     const struct vmx_kstat_tpl *tpls, unsigned int n, uint64_t *vs)
1898 {
1899 	struct kstat *ks;
1900 	struct kstat_kv *kvs;
1901 	unsigned int i;
1902 
1903 	ks = kstat_create(sc->sc_dev.dv_xname, 0, name, unit,
1904 	    KSTAT_T_KV, 0);
1905 	if (ks == NULL)
1906 		return (NULL);
1907 
1908 	kvs = mallocarray(n, sizeof(*kvs), M_DEVBUF, M_WAITOK|M_ZERO);
1909 	for (i = 0; i < n; i++) {
1910 		const struct vmx_kstat_tpl *tpl = &tpls[i];
1911 
1912 		kstat_kv_unit_init(&kvs[i], tpl->name,
1913 		    KSTAT_KV_T_COUNTER64, tpl->unit);
1914 	}
1915 
1916 	ks->ks_softc = sc;
1917 	kstat_set_wlock(ks, &sc->sc_kstat_lock);
1918 	ks->ks_ptr = vs;
1919 	ks->ks_data = kvs;
1920 	ks->ks_datalen = n * sizeof(*kvs);
1921 	ks->ks_read = vmx_kstat_read;
1922 	TIMEVAL_TO_TIMESPEC(&vmx_kstat_rate, &ks->ks_interval);
1923 
1924 	kstat_install(ks);
1925 
1926 	return (ks);
1927 }
1928 
1929 static void
vmx_kstat_txstats(struct vmxnet3_softc * sc,struct vmxnet3_txqueue * tq,int unit)1930 vmx_kstat_txstats(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *tq,
1931     int unit)
1932 {
1933 	tq->txkstat = vmx_kstat_create(sc, "vmx-txstats", unit,
1934 	    vmx_tx_kstat_tpl, nitems(vmx_tx_kstat_tpl), tq->ts->stats);
1935 }
1936 
1937 static void
vmx_kstat_rxstats(struct vmxnet3_softc * sc,struct vmxnet3_rxqueue * rq,int unit)1938 vmx_kstat_rxstats(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rq,
1939     int unit)
1940 {
1941 	rq->rxkstat = vmx_kstat_create(sc, "vmx-rxstats", unit,
1942 	    vmx_rx_kstat_tpl, nitems(vmx_rx_kstat_tpl), rq->rs->stats);
1943 }
1944 #endif /* NKSTAT > 0 */
1945