xref: /freebsd/sys/dev/e1000/em_txrx.c (revision bdd1243d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2016 Nicole Graziano <nicole@nextbsd.org>
5  * Copyright (c) 2017 Matthew Macy <mmacy@mattmacy.io>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /* $FreeBSD$ */
31 #include "if_em.h"
32 
33 #ifdef RSS
34 #include <net/rss_config.h>
35 #include <netinet/in_rss.h>
36 #endif
37 
38 #ifdef VERBOSE_DEBUG
39 #define DPRINTF device_printf
40 #else
41 #define DPRINTF(...)
42 #endif
43 
44 /*********************************************************************
45  *  Local Function prototypes
46  *********************************************************************/
47 static int em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi,
48     uint32_t *txd_upper, uint32_t *txd_lower);
49 static int em_transmit_checksum_setup(struct e1000_softc *sc,
50     if_pkt_info_t pi, uint32_t *txd_upper, uint32_t *txd_lower);
51 static int em_isc_txd_encap(void *arg, if_pkt_info_t pi);
52 static void em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx);
53 static int em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear);
54 static void em_isc_rxd_refill(void *arg, if_rxd_update_t iru);
55 static void em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
56     qidx_t pidx);
57 static int em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
58     qidx_t budget);
59 static int em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
60 
61 static void lem_isc_rxd_refill(void *arg, if_rxd_update_t iru);
62 
63 static int lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx,
64    qidx_t budget);
65 static int lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri);
66 
67 static void em_receive_checksum(uint16_t, uint8_t, if_rxd_info_t);
68 static int em_determine_rsstype(uint32_t pkt_info);
69 extern int em_intr(void *arg);
70 
71 struct if_txrx em_txrx = {
72 	.ift_txd_encap = em_isc_txd_encap,
73 	.ift_txd_flush = em_isc_txd_flush,
74 	.ift_txd_credits_update = em_isc_txd_credits_update,
75 	.ift_rxd_available = em_isc_rxd_available,
76 	.ift_rxd_pkt_get = em_isc_rxd_pkt_get,
77 	.ift_rxd_refill = em_isc_rxd_refill,
78 	.ift_rxd_flush = em_isc_rxd_flush,
79 	.ift_legacy_intr = em_intr
80 };
81 
82 struct if_txrx lem_txrx = {
83 	.ift_txd_encap = em_isc_txd_encap,
84 	.ift_txd_flush = em_isc_txd_flush,
85 	.ift_txd_credits_update = em_isc_txd_credits_update,
86 	.ift_rxd_available = lem_isc_rxd_available,
87 	.ift_rxd_pkt_get = lem_isc_rxd_pkt_get,
88 	.ift_rxd_refill = lem_isc_rxd_refill,
89 	.ift_rxd_flush = em_isc_rxd_flush,
90 	.ift_legacy_intr = em_intr
91 };
92 
93 extern if_shared_ctx_t em_sctx;
94 
95 void
96 em_dump_rs(struct e1000_softc *sc)
97 {
98 	if_softc_ctx_t scctx = sc->shared;
99 	struct em_tx_queue *que;
100 	struct tx_ring *txr;
101 	qidx_t i, ntxd, qid, cur;
102 	int16_t rs_cidx;
103 	uint8_t status;
104 
105 	printf("\n");
106 	ntxd = scctx->isc_ntxd[0];
107 	for (qid = 0; qid < sc->tx_num_queues; qid++) {
108 		que = &sc->tx_queues[qid];
109 		txr =  &que->txr;
110 		rs_cidx = txr->tx_rs_cidx;
111 		if (rs_cidx != txr->tx_rs_pidx) {
112 			cur = txr->tx_rsq[rs_cidx];
113 			status = txr->tx_base[cur].upper.fields.status;
114 			if (!(status & E1000_TXD_STAT_DD))
115 				printf("qid[%d]->tx_rsq[%d]: %d clear ", qid, rs_cidx, cur);
116 		} else {
117 			rs_cidx = (rs_cidx-1)&(ntxd-1);
118 			cur = txr->tx_rsq[rs_cidx];
119 			printf("qid[%d]->tx_rsq[rs_cidx-1=%d]: %d  ", qid, rs_cidx, cur);
120 		}
121 		printf("cidx_prev=%d rs_pidx=%d ",txr->tx_cidx_processed,
122 		    txr->tx_rs_pidx);
123 		for (i = 0; i < ntxd; i++) {
124 			if (txr->tx_base[i].upper.fields.status & E1000_TXD_STAT_DD)
125 				printf("%d set ", i);
126 		}
127 		printf("\n");
128 	}
129 }
130 
131 /**********************************************************************
132  *
133  *  Setup work for hardware segmentation offload (TSO) on
134  *  adapters using advanced tx descriptors
135  *
136  **********************************************************************/
137 static int
138 em_tso_setup(struct e1000_softc *sc, if_pkt_info_t pi, uint32_t *txd_upper,
139     uint32_t *txd_lower)
140 {
141 	if_softc_ctx_t scctx = sc->shared;
142 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
143 	struct tx_ring *txr = &que->txr;
144 	struct e1000_context_desc *TXD;
145 	int cur, hdr_len;
146 
147 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
148 	*txd_lower = (E1000_TXD_CMD_DEXT |	/* Extended descr type */
149 		      E1000_TXD_DTYP_D |	/* Data descr type */
150 		      E1000_TXD_CMD_TSE);	/* Do TSE on this packet */
151 
152 	/* IP and/or TCP header checksum calculation and insertion. */
153 	*txd_upper = (E1000_TXD_POPTS_IXSM | E1000_TXD_POPTS_TXSM) << 8;
154 
155 	cur = pi->ipi_pidx;
156 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
157 
158 	/*
159 	 * Start offset for header checksum calculation.
160 	 * End offset for header checksum calculation.
161 	 * Offset of place put the checksum.
162 	 */
163 	TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
164 	TXD->lower_setup.ip_fields.ipcse =
165 	    htole16(pi->ipi_ehdrlen + pi->ipi_ip_hlen - 1);
166 	TXD->lower_setup.ip_fields.ipcso =
167 	    pi->ipi_ehdrlen + offsetof(struct ip, ip_sum);
168 
169 	/*
170 	 * Start offset for payload checksum calculation.
171 	 * End offset for payload checksum calculation.
172 	 * Offset of place to put the checksum.
173 	 */
174 	TXD->upper_setup.tcp_fields.tucss = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
175 	TXD->upper_setup.tcp_fields.tucse = 0;
176 	TXD->upper_setup.tcp_fields.tucso =
177 	    pi->ipi_ehdrlen + pi->ipi_ip_hlen + offsetof(struct tcphdr, th_sum);
178 
179 	/*
180 	 * Payload size per packet w/o any headers.
181 	 * Length of all headers up to payload.
182 	 */
183 	TXD->tcp_seg_setup.fields.mss = htole16(pi->ipi_tso_segsz);
184 	TXD->tcp_seg_setup.fields.hdr_len = hdr_len;
185 
186 	TXD->cmd_and_length = htole32(sc->txd_cmd |
187 				E1000_TXD_CMD_DEXT |	/* Extended descr */
188 				E1000_TXD_CMD_TSE |	/* TSE context */
189 				E1000_TXD_CMD_IP |	/* Do IP csum */
190 				E1000_TXD_CMD_TCP |	/* Do TCP checksum */
191 				      (pi->ipi_len - hdr_len)); /* Total len */
192 	txr->tx_tso = true;
193 
194 	if (++cur == scctx->isc_ntxd[0]) {
195 		cur = 0;
196 	}
197 	DPRINTF(iflib_get_dev(sc->ctx), "%s: pidx: %d cur: %d\n", __FUNCTION__,
198 	    pi->ipi_pidx, cur);
199 	return (cur);
200 }
201 
202 #define TSO_WORKAROUND 4
203 #define DONT_FORCE_CTX 1
204 
205 
206 /*********************************************************************
207  *  The offload context is protocol specific (TCP/UDP) and thus
208  *  only needs to be set when the protocol changes. The occasion
209  *  of a context change can be a performance detriment, and
210  *  might be better just disabled. The reason arises in the way
211  *  in which the controller supports pipelined requests from the
212  *  Tx data DMA. Up to four requests can be pipelined, and they may
213  *  belong to the same packet or to multiple packets. However all
214  *  requests for one packet are issued before a request is issued
215  *  for a subsequent packet and if a request for the next packet
216  *  requires a context change, that request will be stalled
217  *  until the previous request completes. This means setting up
218  *  a new context effectively disables pipelined Tx data DMA which
219  *  in turn greatly slow down performance to send small sized
220  *  frames.
221  **********************************************************************/
222 
223 static int
224 em_transmit_checksum_setup(struct e1000_softc *sc, if_pkt_info_t pi,
225     uint32_t *txd_upper, uint32_t *txd_lower)
226 {
227 	struct e1000_context_desc *TXD = NULL;
228 	if_softc_ctx_t scctx = sc->shared;
229 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
230 	struct tx_ring *txr = &que->txr;
231 	int csum_flags = pi->ipi_csum_flags;
232 	int cur, hdr_len;
233 	uint32_t cmd;
234 
235 	cur = pi->ipi_pidx;
236 	hdr_len = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
237 	cmd = sc->txd_cmd;
238 
239 	/*
240 	 * The 82574L can only remember the *last* context used
241 	 * regardless of queue that it was use for.  We cannot reuse
242 	 * contexts on this hardware platform and must generate a new
243 	 * context every time.  82574L hardware spec, section 7.2.6,
244 	 * second note.
245 	 */
246 	if (DONT_FORCE_CTX &&
247 	    sc->tx_num_queues == 1 &&
248 	    txr->csum_lhlen == pi->ipi_ehdrlen &&
249 	    txr->csum_iphlen == pi->ipi_ip_hlen &&
250 	    txr->csum_flags == csum_flags) {
251 		/*
252 		 * Same csum offload context as the previous packets;
253 		 * just return.
254 		 */
255 		*txd_upper = txr->csum_txd_upper;
256 		*txd_lower = txr->csum_txd_lower;
257 		return (cur);
258 	}
259 
260 	TXD = (struct e1000_context_desc *)&txr->tx_base[cur];
261 	if (csum_flags & CSUM_IP) {
262 		*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
263 		/*
264 		 * Start offset for header checksum calculation.
265 		 * End offset for header checksum calculation.
266 		 * Offset of place to put the checksum.
267 		 */
268 		TXD->lower_setup.ip_fields.ipcss = pi->ipi_ehdrlen;
269 		TXD->lower_setup.ip_fields.ipcse = htole16(hdr_len);
270 		TXD->lower_setup.ip_fields.ipcso = pi->ipi_ehdrlen +
271 		    offsetof(struct ip, ip_sum);
272 		cmd |= E1000_TXD_CMD_IP;
273 	}
274 
275 	if (csum_flags & (CSUM_TCP|CSUM_UDP)) {
276 		uint8_t tucso;
277 
278 		*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
279 		*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
280 
281 		if (csum_flags & CSUM_TCP) {
282 			tucso = hdr_len + offsetof(struct tcphdr, th_sum);
283 			cmd |= E1000_TXD_CMD_TCP;
284 		} else
285 			tucso = hdr_len + offsetof(struct udphdr, uh_sum);
286 		TXD->upper_setup.tcp_fields.tucss = hdr_len;
287 		TXD->upper_setup.tcp_fields.tucse = htole16(0);
288 		TXD->upper_setup.tcp_fields.tucso = tucso;
289 	}
290 
291 	txr->csum_lhlen = pi->ipi_ehdrlen;
292 	txr->csum_iphlen = pi->ipi_ip_hlen;
293 	txr->csum_flags = csum_flags;
294 	txr->csum_txd_upper = *txd_upper;
295 	txr->csum_txd_lower = *txd_lower;
296 
297 	TXD->tcp_seg_setup.data = htole32(0);
298 	TXD->cmd_and_length =
299 		htole32(E1000_TXD_CMD_IFCS | E1000_TXD_CMD_DEXT | cmd);
300 
301 	if (++cur == scctx->isc_ntxd[0]) {
302 		cur = 0;
303 	}
304 	DPRINTF(iflib_get_dev(sc->ctx),
305 	    "checksum_setup csum_flags=%x txd_upper=%x txd_lower=%x hdr_len=%d cmd=%x\n",
306 	    csum_flags, *txd_upper, *txd_lower, hdr_len, cmd);
307 	return (cur);
308 }
309 
310 static int
311 em_isc_txd_encap(void *arg, if_pkt_info_t pi)
312 {
313 	struct e1000_softc *sc = arg;
314 	if_softc_ctx_t scctx = sc->shared;
315 	struct em_tx_queue *que = &sc->tx_queues[pi->ipi_qsidx];
316 	struct tx_ring *txr = &que->txr;
317 	bus_dma_segment_t *segs = pi->ipi_segs;
318 	int nsegs = pi->ipi_nsegs;
319 	int csum_flags = pi->ipi_csum_flags;
320 	int i, j, first, pidx_last;
321 	uint32_t txd_flags, txd_upper = 0, txd_lower = 0;
322 
323 	struct e1000_tx_desc *ctxd = NULL;
324 	bool do_tso, tso_desc;
325 	qidx_t ntxd;
326 
327 	txd_flags = pi->ipi_flags & IPI_TX_INTR ? E1000_TXD_CMD_RS : 0;
328 	i = first = pi->ipi_pidx;
329 	do_tso = (csum_flags & CSUM_TSO);
330 	tso_desc = false;
331 	ntxd = scctx->isc_ntxd[0];
332 	/*
333 	 * TSO Hardware workaround, if this packet is not
334 	 * TSO, and is only a single descriptor long, and
335 	 * it follows a TSO burst, then we need to add a
336 	 * sentinel descriptor to prevent premature writeback.
337 	 */
338 	if ((!do_tso) && (txr->tx_tso == true)) {
339 		if (nsegs == 1)
340 			tso_desc = true;
341 		txr->tx_tso = false;
342 	}
343 
344 	/* Do hardware assists */
345 	if (do_tso) {
346 		i = em_tso_setup(sc, pi, &txd_upper, &txd_lower);
347 		tso_desc = true;
348 	} else if (csum_flags & EM_CSUM_OFFLOAD) {
349 		i = em_transmit_checksum_setup(sc, pi, &txd_upper, &txd_lower);
350 	}
351 
352 	if (pi->ipi_mflags & M_VLANTAG) {
353 		/* Set the vlan id. */
354 		txd_upper |= htole16(pi->ipi_vtag) << 16;
355 		/* Tell hardware to add tag */
356 		txd_lower |= htole32(E1000_TXD_CMD_VLE);
357 	}
358 
359 	DPRINTF(iflib_get_dev(sc->ctx),
360 	    "encap: set up tx: nsegs=%d first=%d i=%d\n", nsegs, first, i);
361 	/* XXX sc->pcix_82544 -- lem_fill_descriptors */
362 
363 	/* Set up our transmit descriptors */
364 	for (j = 0; j < nsegs; j++) {
365 		bus_size_t seg_len;
366 		bus_addr_t seg_addr;
367 		uint32_t cmd;
368 
369 		ctxd = &txr->tx_base[i];
370 		seg_addr = segs[j].ds_addr;
371 		seg_len = segs[j].ds_len;
372 		cmd = E1000_TXD_CMD_IFCS | sc->txd_cmd;
373 
374 		/*
375 		 * TSO Workaround:
376 		 * If this is the last descriptor, we want to
377 		 * split it so we have a small final sentinel
378 		 */
379 		if (tso_desc && (j == (nsegs - 1)) && (seg_len > 8)) {
380 			seg_len -= TSO_WORKAROUND;
381 			ctxd->buffer_addr = htole64(seg_addr);
382 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
383 			ctxd->upper.data = htole32(txd_upper);
384 
385 			if (++i == scctx->isc_ntxd[0])
386 				i = 0;
387 
388 			/* Now make the sentinel */
389 			ctxd = &txr->tx_base[i];
390 			ctxd->buffer_addr = htole64(seg_addr + seg_len);
391 			ctxd->lower.data = htole32(cmd | txd_lower | TSO_WORKAROUND);
392 			ctxd->upper.data = htole32(txd_upper);
393 			pidx_last = i;
394 			if (++i == scctx->isc_ntxd[0])
395 				i = 0;
396 			DPRINTF(iflib_get_dev(sc->ctx),
397 			    "TSO path pidx_last=%d i=%d ntxd[0]=%d\n",
398 			    pidx_last, i, scctx->isc_ntxd[0]);
399 		} else {
400 			ctxd->buffer_addr = htole64(seg_addr);
401 			ctxd->lower.data = htole32(cmd | txd_lower | seg_len);
402 			ctxd->upper.data = htole32(txd_upper);
403 			pidx_last = i;
404 			if (++i == scctx->isc_ntxd[0])
405 				i = 0;
406 			DPRINTF(iflib_get_dev(sc->ctx), "pidx_last=%d i=%d ntxd[0]=%d\n",
407 			    pidx_last, i, scctx->isc_ntxd[0]);
408 		}
409 	}
410 
411 	/*
412 	 * Last Descriptor of Packet
413 	 * needs End Of Packet (EOP)
414 	 * and Report Status (RS)
415 	 */
416 	if (txd_flags && nsegs) {
417 		txr->tx_rsq[txr->tx_rs_pidx] = pidx_last;
418 		DPRINTF(iflib_get_dev(sc->ctx),
419 		    "setting to RS on %d rs_pidx %d first: %d\n",
420 		    pidx_last, txr->tx_rs_pidx, first);
421 		txr->tx_rs_pidx = (txr->tx_rs_pidx+1) & (ntxd-1);
422 		MPASS(txr->tx_rs_pidx != txr->tx_rs_cidx);
423 	}
424 	ctxd->lower.data |= htole32(E1000_TXD_CMD_EOP | txd_flags);
425 	DPRINTF(iflib_get_dev(sc->ctx),
426 	    "tx_buffers[%d]->eop = %d ipi_new_pidx=%d\n", first, pidx_last, i);
427 	pi->ipi_new_pidx = i;
428 
429 	return (0);
430 }
431 
432 static void
433 em_isc_txd_flush(void *arg, uint16_t txqid, qidx_t pidx)
434 {
435 	struct e1000_softc *sc = arg;
436 	struct em_tx_queue *que = &sc->tx_queues[txqid];
437 	struct tx_ring *txr = &que->txr;
438 
439 	E1000_WRITE_REG(&sc->hw, E1000_TDT(txr->me), pidx);
440 }
441 
442 static int
443 em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear)
444 {
445 	struct e1000_softc *sc = arg;
446 	if_softc_ctx_t scctx = sc->shared;
447 	struct em_tx_queue *que = &sc->tx_queues[txqid];
448 	struct tx_ring *txr = &que->txr;
449 
450 	qidx_t processed = 0;
451 	int updated;
452 	qidx_t cur, prev, ntxd, rs_cidx;
453 	int32_t delta;
454 	uint8_t status;
455 
456 	rs_cidx = txr->tx_rs_cidx;
457 	if (rs_cidx == txr->tx_rs_pidx)
458 		return (0);
459 	cur = txr->tx_rsq[rs_cidx];
460 	MPASS(cur != QIDX_INVALID);
461 	status = txr->tx_base[cur].upper.fields.status;
462 	updated = !!(status & E1000_TXD_STAT_DD);
463 
464 	if (!updated)
465 		return (0);
466 
467 	/* If clear is false just let caller know that there
468 	 * are descriptors to reclaim */
469 	if (!clear)
470 		return (1);
471 
472 	prev = txr->tx_cidx_processed;
473 	ntxd = scctx->isc_ntxd[0];
474 	do {
475 		MPASS(prev != cur);
476 		delta = (int32_t)cur - (int32_t)prev;
477 		if (delta < 0)
478 			delta += ntxd;
479 		MPASS(delta > 0);
480 		DPRINTF(iflib_get_dev(sc->ctx),
481 			      "%s: cidx_processed=%u cur=%u clear=%d delta=%d\n",
482 			      __FUNCTION__, prev, cur, clear, delta);
483 
484 		processed += delta;
485 		prev  = cur;
486 		rs_cidx = (rs_cidx + 1) & (ntxd-1);
487 		if (rs_cidx  == txr->tx_rs_pidx)
488 			break;
489 		cur = txr->tx_rsq[rs_cidx];
490 		MPASS(cur != QIDX_INVALID);
491 		status = txr->tx_base[cur].upper.fields.status;
492 	} while ((status & E1000_TXD_STAT_DD));
493 
494 	txr->tx_rs_cidx = rs_cidx;
495 	txr->tx_cidx_processed = prev;
496 	return(processed);
497 }
498 
499 static void
500 lem_isc_rxd_refill(void *arg, if_rxd_update_t iru)
501 {
502 	struct e1000_softc *sc = arg;
503 	if_softc_ctx_t scctx = sc->shared;
504 	struct em_rx_queue *que = &sc->rx_queues[iru->iru_qsidx];
505 	struct rx_ring *rxr = &que->rxr;
506 	struct e1000_rx_desc *rxd;
507 	uint64_t *paddrs;
508 	uint32_t next_pidx, pidx;
509 	uint16_t count;
510 	int i;
511 
512 	paddrs = iru->iru_paddrs;
513 	pidx = iru->iru_pidx;
514 	count = iru->iru_count;
515 
516 	for (i = 0, next_pidx = pidx; i < count; i++) {
517 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[next_pidx];
518 		rxd->buffer_addr = htole64(paddrs[i]);
519 		/* status bits must be cleared */
520 		rxd->status = 0;
521 
522 		if (++next_pidx == scctx->isc_nrxd[0])
523 			next_pidx = 0;
524 	}
525 }
526 
527 static void
528 em_isc_rxd_refill(void *arg, if_rxd_update_t iru)
529 {
530 	struct e1000_softc *sc = arg;
531 	if_softc_ctx_t scctx = sc->shared;
532 	uint16_t rxqid = iru->iru_qsidx;
533 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
534 	struct rx_ring *rxr = &que->rxr;
535 	union e1000_rx_desc_extended *rxd;
536 	uint64_t *paddrs;
537 	uint32_t next_pidx, pidx;
538 	uint16_t count;
539 	int i;
540 
541 	paddrs = iru->iru_paddrs;
542 	pidx = iru->iru_pidx;
543 	count = iru->iru_count;
544 
545 	for (i = 0, next_pidx = pidx; i < count; i++) {
546 		rxd = &rxr->rx_base[next_pidx];
547 		rxd->read.buffer_addr = htole64(paddrs[i]);
548 		/* DD bits must be cleared */
549 		rxd->wb.upper.status_error = 0;
550 
551 		if (++next_pidx == scctx->isc_nrxd[0])
552 			next_pidx = 0;
553 	}
554 }
555 
556 static void
557 em_isc_rxd_flush(void *arg, uint16_t rxqid, uint8_t flid __unused,
558     qidx_t pidx)
559 {
560 	struct e1000_softc *sc = arg;
561 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
562 	struct rx_ring *rxr = &que->rxr;
563 
564 	E1000_WRITE_REG(&sc->hw, E1000_RDT(rxr->me), pidx);
565 }
566 
567 static int
568 lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
569 {
570 	struct e1000_softc *sc = arg;
571 	if_softc_ctx_t scctx = sc->shared;
572 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
573 	struct rx_ring *rxr = &que->rxr;
574 	struct e1000_rx_desc *rxd;
575 	uint32_t staterr = 0;
576 	int cnt, i;
577 
578 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
579 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[i];
580 		staterr = rxd->status;
581 
582 		if ((staterr & E1000_RXD_STAT_DD) == 0)
583 			break;
584 		if (++i == scctx->isc_nrxd[0])
585 			i = 0;
586 		if (staterr & E1000_RXD_STAT_EOP)
587 			cnt++;
588 	}
589 	return (cnt);
590 }
591 
592 static int
593 em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget)
594 {
595 	struct e1000_softc *sc = arg;
596 	if_softc_ctx_t scctx = sc->shared;
597 	struct em_rx_queue *que = &sc->rx_queues[rxqid];
598 	struct rx_ring *rxr = &que->rxr;
599 	union e1000_rx_desc_extended *rxd;
600 	uint32_t staterr = 0;
601 	int cnt, i;
602 
603 	for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) {
604 		rxd = &rxr->rx_base[i];
605 		staterr = le32toh(rxd->wb.upper.status_error);
606 
607 		if ((staterr & E1000_RXD_STAT_DD) == 0)
608 			break;
609 		if (++i == scctx->isc_nrxd[0])
610 			i = 0;
611 		if (staterr & E1000_RXD_STAT_EOP)
612 			cnt++;
613 	}
614 	return (cnt);
615 }
616 
617 static int
618 lem_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
619 {
620 	struct e1000_softc *sc = arg;
621 	if_softc_ctx_t scctx = sc->shared;
622 	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
623 	struct rx_ring *rxr = &que->rxr;
624 	struct e1000_rx_desc *rxd;
625 	uint16_t len;
626 	uint32_t status, errors;
627 	bool eop;
628 	int i, cidx;
629 
630 	status = errors = i = 0;
631 	cidx = ri->iri_cidx;
632 
633 	do {
634 		rxd = (struct e1000_rx_desc *)&rxr->rx_base[cidx];
635 		status = rxd->status;
636 		errors = rxd->errors;
637 
638 		/* Error Checking then decrement count */
639 		MPASS ((status & E1000_RXD_STAT_DD) != 0);
640 
641 		len = le16toh(rxd->length);
642 		ri->iri_len += len;
643 
644 		eop = (status & E1000_RXD_STAT_EOP) != 0;
645 
646 		/* Make sure bad packets are discarded */
647 		if (errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
648 			sc->dropped_pkts++;
649 			/* XXX fixup if common */
650 			return (EBADMSG);
651 		}
652 
653 		ri->iri_frags[i].irf_flid = 0;
654 		ri->iri_frags[i].irf_idx = cidx;
655 		ri->iri_frags[i].irf_len = len;
656 		/* Zero out the receive descriptors status. */
657 		rxd->status = 0;
658 
659 		if (++cidx == scctx->isc_nrxd[0])
660 			cidx = 0;
661 		i++;
662 	} while (!eop);
663 
664 	/* XXX add a faster way to look this up */
665 	if (sc->hw.mac.type >= e1000_82543)
666 		em_receive_checksum(status, errors, ri);
667 
668 	if (status & E1000_RXD_STAT_VP) {
669 		ri->iri_vtag = le16toh(rxd->special);
670 		ri->iri_flags |= M_VLANTAG;
671 	}
672 
673 	ri->iri_nfrags = i;
674 
675 	return (0);
676 }
677 
678 static int
679 em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri)
680 {
681 	struct e1000_softc *sc = arg;
682 	if_softc_ctx_t scctx = sc->shared;
683 	struct em_rx_queue *que = &sc->rx_queues[ri->iri_qsidx];
684 	struct rx_ring *rxr = &que->rxr;
685 	union e1000_rx_desc_extended *rxd;
686 
687 	uint16_t len;
688 	uint32_t pkt_info;
689 	uint32_t staterr = 0;
690 	bool eop;
691 	int i, cidx;
692 
693 	i = 0;
694 	cidx = ri->iri_cidx;
695 
696 	do {
697 		rxd = &rxr->rx_base[cidx];
698 		staterr = le32toh(rxd->wb.upper.status_error);
699 		pkt_info = le32toh(rxd->wb.lower.mrq);
700 
701 		/* Error Checking then decrement count */
702 		MPASS ((staterr & E1000_RXD_STAT_DD) != 0);
703 
704 		len = le16toh(rxd->wb.upper.length);
705 		ri->iri_len += len;
706 
707 		eop = (staterr & E1000_RXD_STAT_EOP) != 0;
708 
709 		/* Make sure bad packets are discarded */
710 		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
711 			sc->dropped_pkts++;
712 			return EBADMSG;
713 		}
714 
715 		ri->iri_frags[i].irf_flid = 0;
716 		ri->iri_frags[i].irf_idx = cidx;
717 		ri->iri_frags[i].irf_len = len;
718 		/* Zero out the receive descriptors status. */
719 		rxd->wb.upper.status_error &= htole32(~0xFF);
720 
721 		if (++cidx == scctx->isc_nrxd[0])
722 			cidx = 0;
723 		i++;
724 	} while (!eop);
725 
726 	if (scctx->isc_capenable & IFCAP_RXCSUM)
727 		em_receive_checksum(staterr, staterr >> 24, ri);
728 
729 	if (staterr & E1000_RXD_STAT_VP) {
730 		ri->iri_vtag = le16toh(rxd->wb.upper.vlan);
731 		ri->iri_flags |= M_VLANTAG;
732 	}
733 
734 	ri->iri_flowid = le32toh(rxd->wb.lower.hi_dword.rss);
735 	ri->iri_rsstype = em_determine_rsstype(pkt_info);
736 
737 	ri->iri_nfrags = i;
738 	return (0);
739 }
740 
741 /*********************************************************************
742  *
743  *  Verify that the hardware indicated that the checksum is valid.
744  *  Inform the stack about the status of checksum so that stack
745  *  doesn't spend time verifying the checksum.
746  *
747  *********************************************************************/
748 static void
749 em_receive_checksum(uint16_t status, uint8_t errors, if_rxd_info_t ri)
750 {
751 	if (__predict_false(status & E1000_RXD_STAT_IXSM))
752 		return;
753 
754 	/* If there is a layer 3 or 4 error we are done */
755 	if (__predict_false(errors & (E1000_RXD_ERR_IPE | E1000_RXD_ERR_TCPE)))
756 		return;
757 
758 	/* IP Checksum Good */
759 	if (status & E1000_RXD_STAT_IPCS)
760 		ri->iri_csum_flags = (CSUM_IP_CHECKED | CSUM_IP_VALID);
761 
762 	/* Valid L4E checksum */
763 	if (__predict_true(status &
764 	    (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))) {
765 		ri->iri_csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
766 		ri->iri_csum_data = htons(0xffff);
767 	}
768 }
769 
770 /********************************************************************
771  *
772  *  Parse the packet type to determine the appropriate hash
773  *
774  ******************************************************************/
775 static int
776 em_determine_rsstype(uint32_t pkt_info)
777 {
778 	switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
779 	case E1000_RXDADV_RSSTYPE_IPV4_TCP:
780 		return M_HASHTYPE_RSS_TCP_IPV4;
781 	case E1000_RXDADV_RSSTYPE_IPV4:
782 		return M_HASHTYPE_RSS_IPV4;
783 	case E1000_RXDADV_RSSTYPE_IPV6_TCP:
784 		return M_HASHTYPE_RSS_TCP_IPV6;
785 	case E1000_RXDADV_RSSTYPE_IPV6_EX:
786 		return M_HASHTYPE_RSS_IPV6_EX;
787 	case E1000_RXDADV_RSSTYPE_IPV6:
788 		return M_HASHTYPE_RSS_IPV6;
789 	case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
790 		return M_HASHTYPE_RSS_TCP_IPV6_EX;
791 	default:
792 		return M_HASHTYPE_OPAQUE;
793 	}
794 }
795