xref: /freebsd/sys/dev/ixgbe/ix_txrx.c (revision d6b92ffa)
1 /******************************************************************************
2 
3   Copyright (c) 2001-2017, Intel Corporation
4   All rights reserved.
5 
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions are met:
8 
9    1. Redistributions of source code must retain the above copyright notice,
10       this list of conditions and the following disclaimer.
11 
12    2. Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16    3. Neither the name of the Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30   POSSIBILITY OF SUCH DAMAGE.
31 
32 ******************************************************************************/
33 /*$FreeBSD$*/
34 
35 
36 #ifndef IXGBE_STANDALONE_BUILD
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_rss.h"
40 #endif
41 
42 #include "ixgbe.h"
43 
44 /*
45  * HW RSC control:
46  *  this feature only works with
47  *  IPv4, and only on 82599 and later.
48  *  Also this will cause IP forwarding to
49  *  fail and that can't be controlled by
50  *  the stack as LRO can. For all these
51  *  reasons I've deemed it best to leave
52  *  this off and not bother with a tuneable
53  *  interface, this would need to be compiled
54  *  to enable.
55  */
56 static bool ixgbe_rsc_enable = FALSE;
57 
58 /*
59  * For Flow Director: this is the
60  * number of TX packets we sample
61  * for the filter pool, this means
62  * every 20th packet will be probed.
63  *
64  * This feature can be disabled by
65  * setting this to 0.
66  */
67 static int atr_sample_rate = 20;
68 
69 /************************************************************************
70  *  Local Function prototypes
71  ************************************************************************/
72 static void          ixgbe_setup_transmit_ring(struct tx_ring *);
73 static void          ixgbe_free_transmit_buffers(struct tx_ring *);
74 static int           ixgbe_setup_receive_ring(struct rx_ring *);
75 static void          ixgbe_free_receive_buffers(struct rx_ring *);
76 static void          ixgbe_rx_checksum(u32, struct mbuf *, u32);
77 static void          ixgbe_refresh_mbufs(struct rx_ring *, int);
78 static int           ixgbe_xmit(struct tx_ring *, struct mbuf **);
79 static int           ixgbe_tx_ctx_setup(struct tx_ring *,
80                                         struct mbuf *, u32 *, u32 *);
81 static int           ixgbe_tso_setup(struct tx_ring *,
82                                      struct mbuf *, u32 *, u32 *);
83 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
84 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
85                                     struct mbuf *, u32);
86 static int           ixgbe_dma_malloc(struct adapter *, bus_size_t,
87                                       struct ixgbe_dma_alloc *, int);
88 static void          ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
89 
90 /************************************************************************
91  * ixgbe_legacy_start_locked - Transmit entry point
92  *
93  *   Called by the stack to initiate a transmit.
94  *   The driver will remain in this routine as long as there are
95  *   packets to transmit and transmit resources are available.
96  *   In case resources are not available, the stack is notified
97  *   and the packet is requeued.
98  ************************************************************************/
99 int
100 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
101 {
102 	struct mbuf    *m_head;
103 	struct adapter *adapter = txr->adapter;
104 
105 	IXGBE_TX_LOCK_ASSERT(txr);
106 
107 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
108 		return (ENETDOWN);
109 	if (!adapter->link_active)
110 		return (ENETDOWN);
111 
112 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
113 		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
114 			break;
115 
116 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
117 		if (m_head == NULL)
118 			break;
119 
120 		if (ixgbe_xmit(txr, &m_head)) {
121 			if (m_head != NULL)
122 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
123 			break;
124 		}
125 		/* Send a copy of the frame to the BPF listener */
126 		ETHER_BPF_MTAP(ifp, m_head);
127 	}
128 
129 	return IXGBE_SUCCESS;
130 } /* ixgbe_legacy_start_locked */
131 
132 /************************************************************************
133  * ixgbe_legacy_start
134  *
135  *   Called by the stack, this always uses the first tx ring,
136  *   and should not be used with multiqueue tx enabled.
137  ************************************************************************/
138 void
139 ixgbe_legacy_start(struct ifnet *ifp)
140 {
141 	struct adapter *adapter = ifp->if_softc;
142 	struct tx_ring *txr = adapter->tx_rings;
143 
144 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
145 		IXGBE_TX_LOCK(txr);
146 		ixgbe_legacy_start_locked(ifp, txr);
147 		IXGBE_TX_UNLOCK(txr);
148 	}
149 } /* ixgbe_legacy_start */
150 
151 /************************************************************************
152  * ixgbe_mq_start - Multiqueue Transmit Entry Point
153  *
154  *   (if_transmit function)
155  ************************************************************************/
156 int
157 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
158 {
159 	struct adapter  *adapter = ifp->if_softc;
160 	struct ix_queue *que;
161 	struct tx_ring  *txr;
162 	int             i, err = 0;
163 	uint32_t        bucket_id;
164 
165 	/*
166 	 * When doing RSS, map it to the same outbound queue
167 	 * as the incoming flow would be mapped to.
168 	 *
169 	 * If everything is setup correctly, it should be the
170 	 * same bucket that the current CPU we're on is.
171 	 */
172 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
173 		if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
174 		    (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
175 		    &bucket_id) == 0)) {
176 			i = bucket_id % adapter->num_queues;
177 #ifdef IXGBE_DEBUG
178 			if (bucket_id > adapter->num_queues)
179 				if_printf(ifp,
180 				    "bucket_id (%d) > num_queues (%d)\n",
181 				    bucket_id, adapter->num_queues);
182 #endif
183 		} else
184 			i = m->m_pkthdr.flowid % adapter->num_queues;
185 	} else
186 		i = curcpu % adapter->num_queues;
187 
188 	/* Check for a hung queue and pick alternative */
189 	if (((1 << i) & adapter->active_queues) == 0)
190 		i = ffsl(adapter->active_queues);
191 
192 	txr = &adapter->tx_rings[i];
193 	que = &adapter->queues[i];
194 
195 	err = drbr_enqueue(ifp, txr->br, m);
196 	if (err)
197 		return (err);
198 	if (IXGBE_TX_TRYLOCK(txr)) {
199 		ixgbe_mq_start_locked(ifp, txr);
200 		IXGBE_TX_UNLOCK(txr);
201 	} else
202 		taskqueue_enqueue(que->tq, &txr->txq_task);
203 
204 	return (0);
205 } /* ixgbe_mq_start */
206 
207 /************************************************************************
208  * ixgbe_mq_start_locked
209  ************************************************************************/
210 int
211 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
212 {
213 	struct mbuf    *next;
214 	int            enqueued = 0, err = 0;
215 
216 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
217 		return (ENETDOWN);
218 	if (txr->adapter->link_active == 0)
219 		return (ENETDOWN);
220 
221 	/* Process the queue */
222 #if __FreeBSD_version < 901504
223 	next = drbr_dequeue(ifp, txr->br);
224 	while (next != NULL) {
225 		if ((err = ixgbe_xmit(txr, &next)) != 0) {
226 			if (next != NULL)
227 				err = drbr_enqueue(ifp, txr->br, next);
228 #else
229 	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
230 		err = ixgbe_xmit(txr, &next);
231 		if (err != 0) {
232 			if (next == NULL)
233 				drbr_advance(ifp, txr->br);
234 			else
235 				drbr_putback(ifp, txr->br, next);
236 #endif
237 			break;
238 		}
239 #if __FreeBSD_version >= 901504
240 		drbr_advance(ifp, txr->br);
241 #endif
242 		enqueued++;
243 #if __FreeBSD_version >= 1100036
244 		/*
245 		 * Since we're looking at the tx ring, we can check
246 		 * to see if we're a VF by examing our tail register
247 		 * address.
248 		 */
249 		if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
250 		    (next->m_flags & M_MCAST))
251 			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
252 #endif
253 		/* Send a copy of the frame to the BPF listener */
254 		ETHER_BPF_MTAP(ifp, next);
255 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
256 			break;
257 #if __FreeBSD_version < 901504
258 		next = drbr_dequeue(ifp, txr->br);
259 #endif
260 	}
261 
262 	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
263 		ixgbe_txeof(txr);
264 
265 	return (err);
266 } /* ixgbe_mq_start_locked */
267 
268 /************************************************************************
269  * ixgbe_deferred_mq_start
270  *
271  *   Called from a taskqueue to drain queued transmit packets.
272  ************************************************************************/
273 void
274 ixgbe_deferred_mq_start(void *arg, int pending)
275 {
276 	struct tx_ring *txr = arg;
277 	struct adapter *adapter = txr->adapter;
278 	struct ifnet   *ifp = adapter->ifp;
279 
280 	IXGBE_TX_LOCK(txr);
281 	if (!drbr_empty(ifp, txr->br))
282 		ixgbe_mq_start_locked(ifp, txr);
283 	IXGBE_TX_UNLOCK(txr);
284 } /* ixgbe_deferred_mq_start */
285 
286 /************************************************************************
287  * ixgbe_qflush - Flush all ring buffers
288  ************************************************************************/
289 void
290 ixgbe_qflush(struct ifnet *ifp)
291 {
292 	struct adapter *adapter = ifp->if_softc;
293 	struct tx_ring *txr = adapter->tx_rings;
294 	struct mbuf    *m;
295 
296 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
297 		IXGBE_TX_LOCK(txr);
298 		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
299 			m_freem(m);
300 		IXGBE_TX_UNLOCK(txr);
301 	}
302 	if_qflush(ifp);
303 } /* ixgbe_qflush */
304 
305 
306 /************************************************************************
307  * ixgbe_xmit
308  *
309  *   Maps the mbufs to tx descriptors, allowing the
310  *   TX engine to transmit the packets.
311  *
312  *   Return 0 on success, positive on failure
313  ************************************************************************/
314 static int
315 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
316 {
317 	struct adapter          *adapter = txr->adapter;
318 	struct ixgbe_tx_buf     *txbuf;
319 	union ixgbe_adv_tx_desc *txd = NULL;
320 	struct mbuf             *m_head;
321 	int                     i, j, error, nsegs;
322 	int                     first;
323 	u32                     olinfo_status = 0, cmd_type_len;
324 	bool                    remap = TRUE;
325 	bus_dma_segment_t       segs[adapter->num_segs];
326 	bus_dmamap_t            map;
327 
328 	m_head = *m_headp;
329 
330 	/* Basic descriptor defines */
331 	cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
332 	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
333 
334 	if (m_head->m_flags & M_VLANTAG)
335 		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
336 
337 	/*
338 	 * Important to capture the first descriptor
339 	 * used because it will contain the index of
340 	 * the one we tell the hardware to report back
341 	 */
342 	first = txr->next_avail_desc;
343 	txbuf = &txr->tx_buffers[first];
344 	map = txbuf->map;
345 
346 	/*
347 	 * Map the packet for DMA.
348 	 */
349 retry:
350 	error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
351 	    &nsegs, BUS_DMA_NOWAIT);
352 
353 	if (__predict_false(error)) {
354 		struct mbuf *m;
355 
356 		switch (error) {
357 		case EFBIG:
358 			/* Try it again? - one try */
359 			if (remap == TRUE) {
360 				remap = FALSE;
361 				/*
362 				 * XXX: m_defrag will choke on
363 				 * non-MCLBYTES-sized clusters
364 				 */
365 				m = m_defrag(*m_headp, M_NOWAIT);
366 				if (m == NULL) {
367 					adapter->mbuf_defrag_failed++;
368 					m_freem(*m_headp);
369 					*m_headp = NULL;
370 					return (ENOBUFS);
371 				}
372 				*m_headp = m;
373 				goto retry;
374 			} else
375 				return (error);
376 		case ENOMEM:
377 			txr->no_tx_dma_setup++;
378 			return (error);
379 		default:
380 			txr->no_tx_dma_setup++;
381 			m_freem(*m_headp);
382 			*m_headp = NULL;
383 			return (error);
384 		}
385 	}
386 
387 	/* Make certain there are enough descriptors */
388 	if (txr->tx_avail < (nsegs + 2)) {
389 		txr->no_desc_avail++;
390 		bus_dmamap_unload(txr->txtag, map);
391 		return (ENOBUFS);
392 	}
393 	m_head = *m_headp;
394 
395 	/*
396 	 * Set up the appropriate offload context
397 	 * this will consume the first descriptor
398 	 */
399 	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
400 	if (__predict_false(error)) {
401 		if (error == ENOBUFS)
402 			*m_headp = NULL;
403 		return (error);
404 	}
405 
406 	/* Do the flow director magic */
407 	if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
408 	    (txr->atr_sample) && (!adapter->fdir_reinit)) {
409 		++txr->atr_count;
410 		if (txr->atr_count >= atr_sample_rate) {
411 			ixgbe_atr(txr, m_head);
412 			txr->atr_count = 0;
413 		}
414 	}
415 
416 	olinfo_status |= IXGBE_ADVTXD_CC;
417 	i = txr->next_avail_desc;
418 	for (j = 0; j < nsegs; j++) {
419 		bus_size_t seglen;
420 		bus_addr_t segaddr;
421 
422 		txbuf = &txr->tx_buffers[i];
423 		txd = &txr->tx_base[i];
424 		seglen = segs[j].ds_len;
425 		segaddr = htole64(segs[j].ds_addr);
426 
427 		txd->read.buffer_addr = segaddr;
428 		txd->read.cmd_type_len = htole32(txr->txd_cmd |
429 		    cmd_type_len | seglen);
430 		txd->read.olinfo_status = htole32(olinfo_status);
431 
432 		if (++i == txr->num_desc)
433 			i = 0;
434 	}
435 
436 	txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
437 	txr->tx_avail -= nsegs;
438 	txr->next_avail_desc = i;
439 
440 	txbuf->m_head = m_head;
441 	/*
442 	 * Here we swap the map so the last descriptor,
443 	 * which gets the completion interrupt has the
444 	 * real map, and the first descriptor gets the
445 	 * unused map from this descriptor.
446 	 */
447 	txr->tx_buffers[first].map = txbuf->map;
448 	txbuf->map = map;
449 	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
450 
451 	/* Set the EOP descriptor that will be marked done */
452 	txbuf = &txr->tx_buffers[first];
453 	txbuf->eop = txd;
454 
455 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
456 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
457 	/*
458 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
459 	 * hardware that this frame is available to transmit.
460 	 */
461 	++txr->total_packets;
462 	IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
463 
464 	/* Mark queue as having work */
465 	if (txr->busy == 0)
466 		txr->busy = 1;
467 
468 	return (0);
469 } /* ixgbe_xmit */
470 
471 
472 /************************************************************************
473  * ixgbe_allocate_transmit_buffers
474  *
475  *   Allocate memory for tx_buffer structures. The tx_buffer stores all
476  *   the information needed to transmit a packet on the wire. This is
477  *   called only once at attach, setup is done every reset.
478  ************************************************************************/
479 static int
480 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
481 {
482 	struct adapter      *adapter = txr->adapter;
483 	device_t            dev = adapter->dev;
484 	struct ixgbe_tx_buf *txbuf;
485 	int                 error, i;
486 
487 	/*
488 	 * Setup DMA descriptor areas.
489 	 */
490 	error = bus_dma_tag_create(
491 	         /*      parent */ bus_get_dma_tag(adapter->dev),
492 	         /*   alignment */ 1,
493 	         /*      bounds */ 0,
494 	         /*     lowaddr */ BUS_SPACE_MAXADDR,
495 	         /*    highaddr */ BUS_SPACE_MAXADDR,
496 	         /*      filter */ NULL,
497 	         /*   filterarg */ NULL,
498 	         /*     maxsize */ IXGBE_TSO_SIZE,
499 	         /*   nsegments */ adapter->num_segs,
500 	         /*  maxsegsize */ PAGE_SIZE,
501 	         /*       flags */ 0,
502 	         /*    lockfunc */ NULL,
503 	         /* lockfuncarg */ NULL,
504 	                           &txr->txtag);
505 	if (error != 0) {
506 		device_printf(dev, "Unable to allocate TX DMA tag\n");
507 		goto fail;
508 	}
509 
510 	txr->tx_buffers =
511 	    (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
512 	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
513 	if (txr->tx_buffers == NULL) {
514 		device_printf(dev, "Unable to allocate tx_buffer memory\n");
515 		error = ENOMEM;
516 		goto fail;
517 	}
518 
519 	/* Create the descriptor buffer dma maps */
520 	txbuf = txr->tx_buffers;
521 	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
522 		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
523 		if (error != 0) {
524 			device_printf(dev, "Unable to create TX DMA map\n");
525 			goto fail;
526 		}
527 	}
528 
529 	return 0;
530 fail:
531 	/* We free all, it handles case where we are in the middle */
532 	ixgbe_free_transmit_structures(adapter);
533 
534 	return (error);
535 } /* ixgbe_allocate_transmit_buffers */
536 
537 /************************************************************************
538  * ixgbe_setup_transmit_ring - Initialize a transmit ring.
539  ************************************************************************/
540 static void
541 ixgbe_setup_transmit_ring(struct tx_ring *txr)
542 {
543 	struct adapter        *adapter = txr->adapter;
544 	struct ixgbe_tx_buf   *txbuf;
545 #ifdef DEV_NETMAP
546 	struct netmap_adapter *na = NA(adapter->ifp);
547 	struct netmap_slot    *slot;
548 #endif /* DEV_NETMAP */
549 
550 	/* Clear the old ring contents */
551 	IXGBE_TX_LOCK(txr);
552 
553 #ifdef DEV_NETMAP
554 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
555 		/*
556 		 * (under lock): if in netmap mode, do some consistency
557 		 * checks and set slot to entry 0 of the netmap ring.
558 		 */
559 		slot = netmap_reset(na, NR_TX, txr->me, 0);
560 	}
561 #endif /* DEV_NETMAP */
562 
563 	bzero((void *)txr->tx_base,
564 	    (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
565 	/* Reset indices */
566 	txr->next_avail_desc = 0;
567 	txr->next_to_clean = 0;
568 
569 	/* Free any existing tx buffers. */
570 	txbuf = txr->tx_buffers;
571 	for (int i = 0; i < txr->num_desc; i++, txbuf++) {
572 		if (txbuf->m_head != NULL) {
573 			bus_dmamap_sync(txr->txtag, txbuf->map,
574 			    BUS_DMASYNC_POSTWRITE);
575 			bus_dmamap_unload(txr->txtag, txbuf->map);
576 			m_freem(txbuf->m_head);
577 			txbuf->m_head = NULL;
578 		}
579 
580 #ifdef DEV_NETMAP
581 		/*
582 		 * In netmap mode, set the map for the packet buffer.
583 		 * NOTE: Some drivers (not this one) also need to set
584 		 * the physical buffer address in the NIC ring.
585 		 * Slots in the netmap ring (indexed by "si") are
586 		 * kring->nkr_hwofs positions "ahead" wrt the
587 		 * corresponding slot in the NIC ring. In some drivers
588 		 * (not here) nkr_hwofs can be negative. Function
589 		 * netmap_idx_n2k() handles wraparounds properly.
590 		 */
591 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
592 			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
593 			netmap_load_map(na, txr->txtag,
594 			    txbuf->map, NMB(na, slot + si));
595 		}
596 #endif /* DEV_NETMAP */
597 
598 		/* Clear the EOP descriptor pointer */
599 		txbuf->eop = NULL;
600 	}
601 
602 	/* Set the rate at which we sample packets */
603 	if (adapter->feat_en & IXGBE_FEATURE_FDIR)
604 		txr->atr_sample = atr_sample_rate;
605 
606 	/* Set number of descriptors available */
607 	txr->tx_avail = adapter->num_tx_desc;
608 
609 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
610 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
611 	IXGBE_TX_UNLOCK(txr);
612 } /* ixgbe_setup_transmit_ring */
613 
614 /************************************************************************
615  * ixgbe_setup_transmit_structures - Initialize all transmit rings.
616  ************************************************************************/
617 int
618 ixgbe_setup_transmit_structures(struct adapter *adapter)
619 {
620 	struct tx_ring *txr = adapter->tx_rings;
621 
622 	for (int i = 0; i < adapter->num_queues; i++, txr++)
623 		ixgbe_setup_transmit_ring(txr);
624 
625 	return (0);
626 } /* ixgbe_setup_transmit_structures */
627 
628 /************************************************************************
629  * ixgbe_free_transmit_structures - Free all transmit rings.
630  ************************************************************************/
631 void
632 ixgbe_free_transmit_structures(struct adapter *adapter)
633 {
634 	struct tx_ring *txr = adapter->tx_rings;
635 
636 	for (int i = 0; i < adapter->num_queues; i++, txr++) {
637 		IXGBE_TX_LOCK(txr);
638 		ixgbe_free_transmit_buffers(txr);
639 		ixgbe_dma_free(adapter, &txr->txdma);
640 		IXGBE_TX_UNLOCK(txr);
641 		IXGBE_TX_LOCK_DESTROY(txr);
642 	}
643 	free(adapter->tx_rings, M_DEVBUF);
644 } /* ixgbe_free_transmit_structures */
645 
646 /************************************************************************
647  * ixgbe_free_transmit_buffers
648  *
649  *   Free transmit ring related data structures.
650  ************************************************************************/
651 static void
652 ixgbe_free_transmit_buffers(struct tx_ring *txr)
653 {
654 	struct adapter      *adapter = txr->adapter;
655 	struct ixgbe_tx_buf *tx_buffer;
656 	int                 i;
657 
658 	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
659 
660 	if (txr->tx_buffers == NULL)
661 		return;
662 
663 	tx_buffer = txr->tx_buffers;
664 	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
665 		if (tx_buffer->m_head != NULL) {
666 			bus_dmamap_sync(txr->txtag, tx_buffer->map,
667 			    BUS_DMASYNC_POSTWRITE);
668 			bus_dmamap_unload(txr->txtag, tx_buffer->map);
669 			m_freem(tx_buffer->m_head);
670 			tx_buffer->m_head = NULL;
671 			if (tx_buffer->map != NULL) {
672 				bus_dmamap_destroy(txr->txtag, tx_buffer->map);
673 				tx_buffer->map = NULL;
674 			}
675 		} else if (tx_buffer->map != NULL) {
676 			bus_dmamap_unload(txr->txtag, tx_buffer->map);
677 			bus_dmamap_destroy(txr->txtag, tx_buffer->map);
678 			tx_buffer->map = NULL;
679 		}
680 	}
681 	if (txr->br != NULL)
682 		buf_ring_free(txr->br, M_DEVBUF);
683 	if (txr->tx_buffers != NULL) {
684 		free(txr->tx_buffers, M_DEVBUF);
685 		txr->tx_buffers = NULL;
686 	}
687 	if (txr->txtag != NULL) {
688 		bus_dma_tag_destroy(txr->txtag);
689 		txr->txtag = NULL;
690 	}
691 } /* ixgbe_free_transmit_buffers */
692 
693 /************************************************************************
694  * ixgbe_tx_ctx_setup
695  *
696  *   Advanced Context Descriptor setup for VLAN, CSUM or TSO
697  ************************************************************************/
698 static int
699 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
700     u32 *cmd_type_len, u32 *olinfo_status)
701 {
702 	struct ixgbe_adv_tx_context_desc *TXD;
703 	struct ether_vlan_header         *eh;
704 #ifdef INET
705 	struct ip                        *ip;
706 #endif
707 #ifdef INET6
708 	struct ip6_hdr                   *ip6;
709 #endif
710 	int                              ehdrlen, ip_hlen = 0;
711 	int                              offload = TRUE;
712 	int                              ctxd = txr->next_avail_desc;
713 	u32                              vlan_macip_lens = 0;
714 	u32                              type_tucmd_mlhl = 0;
715 	u16                              vtag = 0;
716 	u16                              etype;
717 	u8                               ipproto = 0;
718 	caddr_t                          l3d;
719 
720 
721 	/* First check if TSO is to be used */
722 	if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
723 		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
724 
725 	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
726 		offload = FALSE;
727 
728 	/* Indicate the whole packet as payload when not doing TSO */
729 	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
730 
731 	/* Now ready a context descriptor */
732 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
733 
734 	/*
735 	 * In advanced descriptors the vlan tag must
736 	 * be placed into the context descriptor. Hence
737 	 * we need to make one even if not doing offloads.
738 	 */
739 	if (mp->m_flags & M_VLANTAG) {
740 		vtag = htole16(mp->m_pkthdr.ether_vtag);
741 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
742 	} else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
743 	           (offload == FALSE))
744 		return (0);
745 
746 	/*
747 	 * Determine where frame payload starts.
748 	 * Jump over vlan headers if already present,
749 	 * helpful for QinQ too.
750 	 */
751 	eh = mtod(mp, struct ether_vlan_header *);
752 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
753 		etype = ntohs(eh->evl_proto);
754 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
755 	} else {
756 		etype = ntohs(eh->evl_encap_proto);
757 		ehdrlen = ETHER_HDR_LEN;
758 	}
759 
760 	/* Set the ether header length */
761 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
762 
763 	if (offload == FALSE)
764 		goto no_offloads;
765 
766 	/*
767 	 * If the first mbuf only includes the ethernet header,
768 	 * jump to the next one
769 	 * XXX: This assumes the stack splits mbufs containing headers
770 	 *      on header boundaries
771 	 * XXX: And assumes the entire IP header is contained in one mbuf
772 	 */
773 	if (mp->m_len == ehdrlen && mp->m_next)
774 		l3d = mtod(mp->m_next, caddr_t);
775 	else
776 		l3d = mtod(mp, caddr_t) + ehdrlen;
777 
778 	switch (etype) {
779 #ifdef INET
780 		case ETHERTYPE_IP:
781 			ip = (struct ip *)(l3d);
782 			ip_hlen = ip->ip_hl << 2;
783 			ipproto = ip->ip_p;
784 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
785 			/* Insert IPv4 checksum into data descriptors */
786 			if (mp->m_pkthdr.csum_flags & CSUM_IP) {
787 				ip->ip_sum = 0;
788 				*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
789 			}
790 			break;
791 #endif
792 #ifdef INET6
793 		case ETHERTYPE_IPV6:
794 			ip6 = (struct ip6_hdr *)(l3d);
795 			ip_hlen = sizeof(struct ip6_hdr);
796 			ipproto = ip6->ip6_nxt;
797 			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
798 			break;
799 #endif
800 		default:
801 			offload = FALSE;
802 			break;
803 	}
804 
805 	vlan_macip_lens |= ip_hlen;
806 
807 	/* No support for offloads for non-L4 next headers */
808 	switch (ipproto) {
809 		case IPPROTO_TCP:
810 			if (mp->m_pkthdr.csum_flags &
811 			    (CSUM_IP_TCP | CSUM_IP6_TCP))
812 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
813 			else
814 				offload = false;
815 			break;
816 		case IPPROTO_UDP:
817 			if (mp->m_pkthdr.csum_flags &
818 			    (CSUM_IP_UDP | CSUM_IP6_UDP))
819 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
820 			else
821 				offload = false;
822 			break;
823 		case IPPROTO_SCTP:
824 			if (mp->m_pkthdr.csum_flags &
825 			    (CSUM_IP_SCTP | CSUM_IP6_SCTP))
826 				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
827 			else
828 				offload = false;
829 			break;
830 		default:
831 			offload = false;
832 			break;
833 	}
834 
835 	if (offload) /* Insert L4 checksum into data descriptors */
836 		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
837 
838 no_offloads:
839 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
840 
841 	/* Now copy bits into descriptor */
842 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
843 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
844 	TXD->seqnum_seed = htole32(0);
845 	TXD->mss_l4len_idx = htole32(0);
846 
847 	/* We've consumed the first desc, adjust counters */
848 	if (++ctxd == txr->num_desc)
849 		ctxd = 0;
850 	txr->next_avail_desc = ctxd;
851 	--txr->tx_avail;
852 
853 	return (0);
854 } /* ixgbe_tx_ctx_setup */
855 
856 /************************************************************************
857  * ixgbe_tso_setup
858  *
859  *   Setup work for hardware segmentation offload (TSO) on
860  *   adapters using advanced tx descriptors
861  ************************************************************************/
862 static int
863 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
864     u32 *olinfo_status)
865 {
866 	struct ixgbe_adv_tx_context_desc *TXD;
867 	struct ether_vlan_header         *eh;
868 #ifdef INET6
869 	struct ip6_hdr                   *ip6;
870 #endif
871 #ifdef INET
872 	struct ip                        *ip;
873 #endif
874 	struct tcphdr                    *th;
875 	int                              ctxd, ehdrlen, ip_hlen, tcp_hlen;
876 	u32                              vlan_macip_lens = 0;
877 	u32                              type_tucmd_mlhl = 0;
878 	u32                              mss_l4len_idx = 0, paylen;
879 	u16                              vtag = 0, eh_type;
880 
881 	/*
882 	 * Determine where frame payload starts.
883 	 * Jump over vlan headers if already present
884 	 */
885 	eh = mtod(mp, struct ether_vlan_header *);
886 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
887 		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
888 		eh_type = eh->evl_proto;
889 	} else {
890 		ehdrlen = ETHER_HDR_LEN;
891 		eh_type = eh->evl_encap_proto;
892 	}
893 
894 	switch (ntohs(eh_type)) {
895 #ifdef INET
896 	case ETHERTYPE_IP:
897 		ip = (struct ip *)(mp->m_data + ehdrlen);
898 		if (ip->ip_p != IPPROTO_TCP)
899 			return (ENXIO);
900 		ip->ip_sum = 0;
901 		ip_hlen = ip->ip_hl << 2;
902 		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
903 		th->th_sum = in_pseudo(ip->ip_src.s_addr,
904 		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
905 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
906 		/* Tell transmit desc to also do IPv4 checksum. */
907 		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
908 		break;
909 #endif
910 #ifdef INET6
911 	case ETHERTYPE_IPV6:
912 		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
913 		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
914 		if (ip6->ip6_nxt != IPPROTO_TCP)
915 			return (ENXIO);
916 		ip_hlen = sizeof(struct ip6_hdr);
917 		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
918 		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
919 		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
920 		break;
921 #endif
922 	default:
923 		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
924 		    __func__, ntohs(eh_type));
925 		break;
926 	}
927 
928 	ctxd = txr->next_avail_desc;
929 	TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
930 
931 	tcp_hlen = th->th_off << 2;
932 
933 	/* This is used in the transmit desc in encap */
934 	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
935 
936 	/* VLAN MACLEN IPLEN */
937 	if (mp->m_flags & M_VLANTAG) {
938 		vtag = htole16(mp->m_pkthdr.ether_vtag);
939 		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
940 	}
941 
942 	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
943 	vlan_macip_lens |= ip_hlen;
944 	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
945 
946 	/* ADV DTYPE TUCMD */
947 	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
948 	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
949 	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
950 
951 	/* MSS L4LEN IDX */
952 	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
953 	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
954 	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
955 
956 	TXD->seqnum_seed = htole32(0);
957 
958 	if (++ctxd == txr->num_desc)
959 		ctxd = 0;
960 
961 	txr->tx_avail--;
962 	txr->next_avail_desc = ctxd;
963 	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
964 	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
965 	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
966 	++txr->tso_tx;
967 
968 	return (0);
969 } /* ixgbe_tso_setup */
970 
971 
972 /************************************************************************
973  * ixgbe_txeof
974  *
975  *   Examine each tx_buffer in the used queue. If the hardware is done
976  *   processing the packet then free associated resources. The
977  *   tx_buffer is put back on the free queue.
978  ************************************************************************/
979 void
980 ixgbe_txeof(struct tx_ring *txr)
981 {
982 	struct adapter          *adapter = txr->adapter;
983 	struct ixgbe_tx_buf     *buf;
984 	union ixgbe_adv_tx_desc *txd;
985 	u32                     work, processed = 0;
986 	u32                     limit = adapter->tx_process_limit;
987 
988 	mtx_assert(&txr->tx_mtx, MA_OWNED);
989 
990 #ifdef DEV_NETMAP
991 	if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
992 	    (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
993 		struct netmap_adapter *na = NA(adapter->ifp);
994 		struct netmap_kring *kring = &na->tx_rings[txr->me];
995 		txd = txr->tx_base;
996 		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
997 		    BUS_DMASYNC_POSTREAD);
998 		/*
999 		 * In netmap mode, all the work is done in the context
1000 		 * of the client thread. Interrupt handlers only wake up
1001 		 * clients, which may be sleeping on individual rings
1002 		 * or on a global resource for all rings.
1003 		 * To implement tx interrupt mitigation, we wake up the client
1004 		 * thread roughly every half ring, even if the NIC interrupts
1005 		 * more frequently. This is implemented as follows:
1006 		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1007 		 *   the slot that should wake up the thread (nkr_num_slots
1008 		 *   means the user thread should not be woken up);
1009 		 * - the driver ignores tx interrupts unless netmap_mitigate=0
1010 		 *   or the slot has the DD bit set.
1011 		 */
1012 		if (!netmap_mitigate ||
1013 		    (kring->nr_kflags < kring->nkr_num_slots &&
1014 		     txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1015 			netmap_tx_irq(adapter->ifp, txr->me);
1016 		}
1017 		return;
1018 	}
1019 #endif /* DEV_NETMAP */
1020 
1021 	if (txr->tx_avail == txr->num_desc) {
1022 		txr->busy = 0;
1023 		return;
1024 	}
1025 
1026 	/* Get work starting point */
1027 	work = txr->next_to_clean;
1028 	buf = &txr->tx_buffers[work];
1029 	txd = &txr->tx_base[work];
1030 	work -= txr->num_desc; /* The distance to ring end */
1031 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1032 	    BUS_DMASYNC_POSTREAD);
1033 
1034 	do {
1035 		union ixgbe_adv_tx_desc *eop = buf->eop;
1036 		if (eop == NULL) /* No work */
1037 			break;
1038 
1039 		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1040 			break;	/* I/O not complete */
1041 
1042 		if (buf->m_head) {
1043 			txr->bytes += buf->m_head->m_pkthdr.len;
1044 			bus_dmamap_sync(txr->txtag, buf->map,
1045 			    BUS_DMASYNC_POSTWRITE);
1046 			bus_dmamap_unload(txr->txtag, buf->map);
1047 			m_freem(buf->m_head);
1048 			buf->m_head = NULL;
1049 		}
1050 		buf->eop = NULL;
1051 		++txr->tx_avail;
1052 
1053 		/* We clean the range if multi segment */
1054 		while (txd != eop) {
1055 			++txd;
1056 			++buf;
1057 			++work;
1058 			/* wrap the ring? */
1059 			if (__predict_false(!work)) {
1060 				work -= txr->num_desc;
1061 				buf = txr->tx_buffers;
1062 				txd = txr->tx_base;
1063 			}
1064 			if (buf->m_head) {
1065 				txr->bytes += buf->m_head->m_pkthdr.len;
1066 				bus_dmamap_sync(txr->txtag, buf->map,
1067 				    BUS_DMASYNC_POSTWRITE);
1068 				bus_dmamap_unload(txr->txtag, buf->map);
1069 				m_freem(buf->m_head);
1070 				buf->m_head = NULL;
1071 			}
1072 			++txr->tx_avail;
1073 			buf->eop = NULL;
1074 
1075 		}
1076 		++txr->packets;
1077 		++processed;
1078 
1079 		/* Try the next packet */
1080 		++txd;
1081 		++buf;
1082 		++work;
1083 		/* reset with a wrap */
1084 		if (__predict_false(!work)) {
1085 			work -= txr->num_desc;
1086 			buf = txr->tx_buffers;
1087 			txd = txr->tx_base;
1088 		}
1089 		prefetch(txd);
1090 	} while (__predict_true(--limit));
1091 
1092 	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1093 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1094 
1095 	work += txr->num_desc;
1096 	txr->next_to_clean = work;
1097 
1098 	/*
1099 	 * Queue Hang detection, we know there's
1100 	 * work outstanding or the first return
1101 	 * would have been taken, so increment busy
1102 	 * if nothing managed to get cleaned, then
1103 	 * in local_timer it will be checked and
1104 	 * marked as HUNG if it exceeds a MAX attempt.
1105 	 */
1106 	if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1107 		++txr->busy;
1108 	/*
1109 	 * If anything gets cleaned we reset state to 1,
1110 	 * note this will turn off HUNG if its set.
1111 	 */
1112 	if (processed)
1113 		txr->busy = 1;
1114 
1115 	if (txr->tx_avail == txr->num_desc)
1116 		txr->busy = 0;
1117 
1118 	return;
1119 } /* ixgbe_txeof */
1120 
1121 /************************************************************************
1122  * ixgbe_rsc_count
1123  *
1124  *   Used to detect a descriptor that has been merged by Hardware RSC.
1125  ************************************************************************/
1126 static inline u32
1127 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1128 {
1129 	return (le32toh(rx->wb.lower.lo_dword.data) &
1130 	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1131 } /* ixgbe_rsc_count */
1132 
1133 /************************************************************************
1134  * ixgbe_setup_hw_rsc
1135  *
1136  *   Initialize Hardware RSC (LRO) feature on 82599
1137  *   for an RX ring, this is toggled by the LRO capability
1138  *   even though it is transparent to the stack.
1139  *
1140  *   NOTE: Since this HW feature only works with IPv4 and
1141  *         testing has shown soft LRO to be as effective,
1142  *         this feature will be disabled by default.
1143  ************************************************************************/
1144 static void
1145 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1146 {
1147 	struct adapter  *adapter = rxr->adapter;
1148 	struct ixgbe_hw *hw = &adapter->hw;
1149 	u32             rscctrl, rdrxctl;
1150 
1151 	/* If turning LRO/RSC off we need to disable it */
1152 	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1153 		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1154 		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1155 		return;
1156 	}
1157 
1158 	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1159 	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1160 #ifdef DEV_NETMAP
1161 	/* Always strip CRC unless Netmap disabled it */
1162 	if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1163 	    !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1164 	    ix_crcstrip)
1165 #endif /* DEV_NETMAP */
1166 		rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1167 	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1168 	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1169 
1170 	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1171 	rscctrl |= IXGBE_RSCCTL_RSCEN;
1172 	/*
1173 	 * Limit the total number of descriptors that
1174 	 * can be combined, so it does not exceed 64K
1175 	 */
1176 	if (rxr->mbuf_sz == MCLBYTES)
1177 		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1178 	else if (rxr->mbuf_sz == MJUMPAGESIZE)
1179 		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1180 	else if (rxr->mbuf_sz == MJUM9BYTES)
1181 		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1182 	else  /* Using 16K cluster */
1183 		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1184 
1185 	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1186 
1187 	/* Enable TCP header recognition */
1188 	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1189 	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1190 
1191 	/* Disable RSC for ACK packets */
1192 	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1193 	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1194 
1195 	rxr->hw_rsc = TRUE;
1196 } /* ixgbe_setup_hw_rsc */
1197 
1198 /************************************************************************
1199  * ixgbe_refresh_mbufs
1200  *
1201  *   Refresh mbuf buffers for RX descriptor rings
1202  *    - now keeps its own state so discards due to resource
1203  *      exhaustion are unnecessary, if an mbuf cannot be obtained
1204  *      it just returns, keeping its placeholder, thus it can simply
1205  *      be recalled to try again.
1206  ************************************************************************/
1207 static void
1208 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1209 {
1210 	struct adapter      *adapter = rxr->adapter;
1211 	struct ixgbe_rx_buf *rxbuf;
1212 	struct mbuf         *mp;
1213 	bus_dma_segment_t   seg[1];
1214 	int                 i, j, nsegs, error;
1215 	bool                refreshed = FALSE;
1216 
1217 	i = j = rxr->next_to_refresh;
1218 	/* Control the loop with one beyond */
1219 	if (++j == rxr->num_desc)
1220 		j = 0;
1221 
1222 	while (j != limit) {
1223 		rxbuf = &rxr->rx_buffers[i];
1224 		if (rxbuf->buf == NULL) {
1225 			mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1226 			    rxr->mbuf_sz);
1227 			if (mp == NULL)
1228 				goto update;
1229 			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1230 				m_adj(mp, ETHER_ALIGN);
1231 		} else
1232 			mp = rxbuf->buf;
1233 
1234 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1235 
1236 		/* If we're dealing with an mbuf that was copied rather
1237 		 * than replaced, there's no need to go through busdma.
1238 		 */
1239 		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1240 			/* Get the memory mapping */
1241 			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1242 			error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1243 			    mp, seg, &nsegs, BUS_DMA_NOWAIT);
1244 			if (error != 0) {
1245 				printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1246 				m_free(mp);
1247 				rxbuf->buf = NULL;
1248 				goto update;
1249 			}
1250 			rxbuf->buf = mp;
1251 			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1252 			    BUS_DMASYNC_PREREAD);
1253 			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1254 			    htole64(seg[0].ds_addr);
1255 		} else {
1256 			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1257 			rxbuf->flags &= ~IXGBE_RX_COPY;
1258 		}
1259 
1260 		refreshed = TRUE;
1261 		/* Next is precalculated */
1262 		i = j;
1263 		rxr->next_to_refresh = i;
1264 		if (++j == rxr->num_desc)
1265 			j = 0;
1266 	}
1267 
1268 update:
1269 	if (refreshed) /* Update hardware tail index */
1270 		IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1271 
1272 	return;
1273 } /* ixgbe_refresh_mbufs */
1274 
1275 /************************************************************************
1276  * ixgbe_allocate_receive_buffers
1277  *
1278  *   Allocate memory for rx_buffer structures. Since we use one
1279  *   rx_buffer per received packet, the maximum number of rx_buffer's
1280  *   that we'll need is equal to the number of receive descriptors
1281  *   that we've allocated.
1282  ************************************************************************/
1283 static int
1284 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1285 {
1286 	struct adapter      *adapter = rxr->adapter;
1287 	device_t            dev = adapter->dev;
1288 	struct ixgbe_rx_buf *rxbuf;
1289 	int                 bsize, error;
1290 
1291 	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1292 	rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1293 	    M_NOWAIT | M_ZERO);
1294 	if (rxr->rx_buffers == NULL) {
1295 		device_printf(dev, "Unable to allocate rx_buffer memory\n");
1296 		error = ENOMEM;
1297 		goto fail;
1298 	}
1299 
1300 	error = bus_dma_tag_create(
1301 	         /*      parent */ bus_get_dma_tag(dev),
1302 	         /*   alignment */ 1,
1303 	         /*      bounds */ 0,
1304 	         /*     lowaddr */ BUS_SPACE_MAXADDR,
1305 	         /*    highaddr */ BUS_SPACE_MAXADDR,
1306 	         /*      filter */ NULL,
1307 	         /*   filterarg */ NULL,
1308 	         /*     maxsize */ MJUM16BYTES,
1309 	         /*   nsegments */ 1,
1310 	         /*  maxsegsize */ MJUM16BYTES,
1311 	         /*       flags */ 0,
1312 	         /*    lockfunc */ NULL,
1313 	         /* lockfuncarg */ NULL,
1314 	                           &rxr->ptag);
1315 	if (error != 0) {
1316 		device_printf(dev, "Unable to create RX DMA tag\n");
1317 		goto fail;
1318 	}
1319 
1320 	for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1321 		rxbuf = &rxr->rx_buffers[i];
1322 		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1323 		if (error) {
1324 			device_printf(dev, "Unable to create RX dma map\n");
1325 			goto fail;
1326 		}
1327 	}
1328 
1329 	return (0);
1330 
1331 fail:
1332 	/* Frees all, but can handle partial completion */
1333 	ixgbe_free_receive_structures(adapter);
1334 
1335 	return (error);
1336 } /* ixgbe_allocate_receive_buffers */
1337 
1338 /************************************************************************
1339  * ixgbe_free_receive_ring
1340  ************************************************************************/
1341 static void
1342 ixgbe_free_receive_ring(struct rx_ring *rxr)
1343 {
1344 	for (int i = 0; i < rxr->num_desc; i++) {
1345 		ixgbe_rx_discard(rxr, i);
1346 	}
1347 } /* ixgbe_free_receive_ring */
1348 
1349 /************************************************************************
1350  * ixgbe_setup_receive_ring
1351  *
1352  *   Initialize a receive ring and its buffers.
1353  ************************************************************************/
1354 static int
1355 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1356 {
1357 	struct adapter        *adapter;
1358 	struct ifnet          *ifp;
1359 	device_t              dev;
1360 	struct ixgbe_rx_buf   *rxbuf;
1361 	struct lro_ctrl       *lro = &rxr->lro;
1362 #ifdef DEV_NETMAP
1363 	struct netmap_adapter *na = NA(rxr->adapter->ifp);
1364 	struct netmap_slot    *slot;
1365 #endif /* DEV_NETMAP */
1366 	bus_dma_segment_t     seg[1];
1367 	int                   rsize, nsegs, error = 0;
1368 
1369 	adapter = rxr->adapter;
1370 	ifp = adapter->ifp;
1371 	dev = adapter->dev;
1372 
1373 	/* Clear the ring contents */
1374 	IXGBE_RX_LOCK(rxr);
1375 
1376 #ifdef DEV_NETMAP
1377 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1378 		slot = netmap_reset(na, NR_RX, rxr->me, 0);
1379 #endif /* DEV_NETMAP */
1380 
1381 	rsize = roundup2(adapter->num_rx_desc *
1382 	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1383 	bzero((void *)rxr->rx_base, rsize);
1384 	/* Cache the size */
1385 	rxr->mbuf_sz = adapter->rx_mbuf_sz;
1386 
1387 	/* Free current RX buffer structs and their mbufs */
1388 	ixgbe_free_receive_ring(rxr);
1389 
1390 	/* Now replenish the mbufs */
1391 	for (int j = 0; j != rxr->num_desc; ++j) {
1392 		struct mbuf *mp;
1393 
1394 		rxbuf = &rxr->rx_buffers[j];
1395 
1396 #ifdef DEV_NETMAP
1397 		/*
1398 		 * In netmap mode, fill the map and set the buffer
1399 		 * address in the NIC ring, considering the offset
1400 		 * between the netmap and NIC rings (see comment in
1401 		 * ixgbe_setup_transmit_ring() ). No need to allocate
1402 		 * an mbuf, so end the block with a continue;
1403 		 */
1404 		if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1405 			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1406 			uint64_t paddr;
1407 			void *addr;
1408 
1409 			addr = PNMB(na, slot + sj, &paddr);
1410 			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1411 			/* Update descriptor and the cached value */
1412 			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1413 			rxbuf->addr = htole64(paddr);
1414 			continue;
1415 		}
1416 #endif /* DEV_NETMAP */
1417 
1418 		rxbuf->flags = 0;
1419 		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1420 		    adapter->rx_mbuf_sz);
1421 		if (rxbuf->buf == NULL) {
1422 			error = ENOBUFS;
1423 			goto fail;
1424 		}
1425 		mp = rxbuf->buf;
1426 		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1427 		/* Get the memory mapping */
1428 		error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1429 		    &nsegs, BUS_DMA_NOWAIT);
1430 		if (error != 0)
1431 			goto fail;
1432 		bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1433 		/* Update the descriptor and the cached value */
1434 		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1435 		rxbuf->addr = htole64(seg[0].ds_addr);
1436 	}
1437 
1438 
1439 	/* Setup our descriptor indices */
1440 	rxr->next_to_check = 0;
1441 	rxr->next_to_refresh = 0;
1442 	rxr->lro_enabled = FALSE;
1443 	rxr->rx_copies = 0;
1444 	rxr->rx_bytes = 0;
1445 	rxr->vtag_strip = FALSE;
1446 
1447 	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1448 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1449 
1450 	/*
1451 	 * Now set up the LRO interface
1452 	 */
1453 	if (ixgbe_rsc_enable)
1454 		ixgbe_setup_hw_rsc(rxr);
1455 	else if (ifp->if_capenable & IFCAP_LRO) {
1456 		int err = tcp_lro_init(lro);
1457 		if (err) {
1458 			device_printf(dev, "LRO Initialization failed!\n");
1459 			goto fail;
1460 		}
1461 		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1462 		rxr->lro_enabled = TRUE;
1463 		lro->ifp = adapter->ifp;
1464 	}
1465 
1466 	IXGBE_RX_UNLOCK(rxr);
1467 
1468 	return (0);
1469 
1470 fail:
1471 	ixgbe_free_receive_ring(rxr);
1472 	IXGBE_RX_UNLOCK(rxr);
1473 
1474 	return (error);
1475 } /* ixgbe_setup_receive_ring */
1476 
1477 /************************************************************************
1478  * ixgbe_setup_receive_structures - Initialize all receive rings.
1479  ************************************************************************/
1480 int
1481 ixgbe_setup_receive_structures(struct adapter *adapter)
1482 {
1483 	struct rx_ring *rxr = adapter->rx_rings;
1484 	int            j;
1485 
1486 	for (j = 0; j < adapter->num_queues; j++, rxr++)
1487 		if (ixgbe_setup_receive_ring(rxr))
1488 			goto fail;
1489 
1490 	return (0);
1491 fail:
1492 	/*
1493 	 * Free RX buffers allocated so far, we will only handle
1494 	 * the rings that completed, the failing case will have
1495 	 * cleaned up for itself. 'j' failed, so its the terminus.
1496 	 */
1497 	for (int i = 0; i < j; ++i) {
1498 		rxr = &adapter->rx_rings[i];
1499 		IXGBE_RX_LOCK(rxr);
1500 		ixgbe_free_receive_ring(rxr);
1501 		IXGBE_RX_UNLOCK(rxr);
1502 	}
1503 
1504 	return (ENOBUFS);
1505 } /* ixgbe_setup_receive_structures */
1506 
1507 
1508 /************************************************************************
1509  * ixgbe_free_receive_structures - Free all receive rings.
1510  ************************************************************************/
1511 void
1512 ixgbe_free_receive_structures(struct adapter *adapter)
1513 {
1514 	struct rx_ring *rxr = adapter->rx_rings;
1515 
1516 	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1517 
1518 	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1519 		ixgbe_free_receive_buffers(rxr);
1520 		/* Free LRO memory */
1521 		tcp_lro_free(&rxr->lro);
1522 		/* Free the ring memory as well */
1523 		ixgbe_dma_free(adapter, &rxr->rxdma);
1524 	}
1525 
1526 	free(adapter->rx_rings, M_DEVBUF);
1527 } /* ixgbe_free_receive_structures */
1528 
1529 
1530 /************************************************************************
1531  * ixgbe_free_receive_buffers - Free receive ring data structures
1532  ************************************************************************/
1533 static void
1534 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1535 {
1536 	struct adapter      *adapter = rxr->adapter;
1537 	struct ixgbe_rx_buf *rxbuf;
1538 
1539 	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1540 
1541 	/* Cleanup any existing buffers */
1542 	if (rxr->rx_buffers != NULL) {
1543 		for (int i = 0; i < adapter->num_rx_desc; i++) {
1544 			rxbuf = &rxr->rx_buffers[i];
1545 			ixgbe_rx_discard(rxr, i);
1546 			if (rxbuf->pmap != NULL) {
1547 				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1548 				rxbuf->pmap = NULL;
1549 			}
1550 		}
1551 		if (rxr->rx_buffers != NULL) {
1552 			free(rxr->rx_buffers, M_DEVBUF);
1553 			rxr->rx_buffers = NULL;
1554 		}
1555 	}
1556 
1557 	if (rxr->ptag != NULL) {
1558 		bus_dma_tag_destroy(rxr->ptag);
1559 		rxr->ptag = NULL;
1560 	}
1561 
1562 	return;
1563 } /* ixgbe_free_receive_buffers */
1564 
1565 /************************************************************************
1566  * ixgbe_rx_input
1567  ************************************************************************/
1568 static __inline void
1569 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1570     u32 ptype)
1571 {
1572 	/*
1573 	 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1574 	 * should be computed by hardware. Also it should not have VLAN tag in
1575 	 * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
1576 	 */
1577 	if (rxr->lro_enabled &&
1578 	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1579 	    (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1580 	    ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1581 	     (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1582 	     (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1583 	     (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1584 	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1585 	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1586 		/*
1587 		 * Send to the stack if:
1588 		 *  - LRO not enabled, or
1589 		 *  - no LRO resources, or
1590 		 *  - lro enqueue fails
1591 		 */
1592 		if (rxr->lro.lro_cnt != 0)
1593 			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1594 				return;
1595 	}
1596 	IXGBE_RX_UNLOCK(rxr);
1597 	(*ifp->if_input)(ifp, m);
1598 	IXGBE_RX_LOCK(rxr);
1599 } /* ixgbe_rx_input */
1600 
1601 /************************************************************************
1602  * ixgbe_rx_discard
1603  ************************************************************************/
1604 static __inline void
1605 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1606 {
1607 	struct ixgbe_rx_buf *rbuf;
1608 
1609 	rbuf = &rxr->rx_buffers[i];
1610 
1611 	/*
1612 	 * With advanced descriptors the writeback
1613 	 * clobbers the buffer addrs, so its easier
1614 	 * to just free the existing mbufs and take
1615 	 * the normal refresh path to get new buffers
1616 	 * and mapping.
1617 	 */
1618 
1619 	if (rbuf->fmp != NULL) {/* Partial chain ? */
1620 		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1621 		m_freem(rbuf->fmp);
1622 		rbuf->fmp = NULL;
1623 		rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1624 	} else if (rbuf->buf) {
1625 		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1626 		m_free(rbuf->buf);
1627 		rbuf->buf = NULL;
1628 	}
1629 	bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1630 
1631 	rbuf->flags = 0;
1632 
1633 	return;
1634 } /* ixgbe_rx_discard */
1635 
1636 
1637 /************************************************************************
1638  * ixgbe_rxeof
1639  *
1640  *   Executes in interrupt context. It replenishes the
1641  *   mbufs in the descriptor and sends data which has
1642  *   been dma'ed into host memory to upper layer.
1643  *
1644  *   Return TRUE for more work, FALSE for all clean.
1645  ************************************************************************/
1646 bool
1647 ixgbe_rxeof(struct ix_queue *que)
1648 {
1649 	struct adapter          *adapter = que->adapter;
1650 	struct rx_ring          *rxr = que->rxr;
1651 	struct ifnet            *ifp = adapter->ifp;
1652 	struct lro_ctrl         *lro = &rxr->lro;
1653 	union ixgbe_adv_rx_desc *cur;
1654 	struct ixgbe_rx_buf     *rbuf, *nbuf;
1655 	int                     i, nextp, processed = 0;
1656 	u32                     staterr = 0;
1657 	u32                     count = adapter->rx_process_limit;
1658 	u16                     pkt_info;
1659 
1660 	IXGBE_RX_LOCK(rxr);
1661 
1662 #ifdef DEV_NETMAP
1663 	if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1664 		/* Same as the txeof routine: wakeup clients on intr. */
1665 		if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1666 			IXGBE_RX_UNLOCK(rxr);
1667 			return (FALSE);
1668 		}
1669 	}
1670 #endif /* DEV_NETMAP */
1671 
1672 	for (i = rxr->next_to_check; count != 0;) {
1673 		struct mbuf *sendmp, *mp;
1674 		u32         rsc, ptype;
1675 		u16         len;
1676 		u16         vtag = 0;
1677 		bool        eop;
1678 
1679 		/* Sync the ring. */
1680 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1681 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1682 
1683 		cur = &rxr->rx_base[i];
1684 		staterr = le32toh(cur->wb.upper.status_error);
1685 		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1686 
1687 		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1688 			break;
1689 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1690 			break;
1691 
1692 		count--;
1693 		sendmp = NULL;
1694 		nbuf = NULL;
1695 		rsc = 0;
1696 		cur->wb.upper.status_error = 0;
1697 		rbuf = &rxr->rx_buffers[i];
1698 		mp = rbuf->buf;
1699 
1700 		len = le16toh(cur->wb.upper.length);
1701 		ptype = le32toh(cur->wb.lower.lo_dword.data) &
1702 		    IXGBE_RXDADV_PKTTYPE_MASK;
1703 		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1704 
1705 		/* Make sure bad packets are discarded */
1706 		if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1707 #if __FreeBSD_version >= 1100036
1708 			if (adapter->feat_en & IXGBE_FEATURE_VF)
1709 				if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1710 #endif
1711 			rxr->rx_discarded++;
1712 			ixgbe_rx_discard(rxr, i);
1713 			goto next_desc;
1714 		}
1715 
1716 		bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1717 
1718 		/*
1719 		 * On 82599 which supports a hardware
1720 		 * LRO (called HW RSC), packets need
1721 		 * not be fragmented across sequential
1722 		 * descriptors, rather the next descriptor
1723 		 * is indicated in bits of the descriptor.
1724 		 * This also means that we might proceses
1725 		 * more than one packet at a time, something
1726 		 * that has never been true before, it
1727 		 * required eliminating global chain pointers
1728 		 * in favor of what we are doing here.  -jfv
1729 		 */
1730 		if (!eop) {
1731 			/*
1732 			 * Figure out the next descriptor
1733 			 * of this frame.
1734 			 */
1735 			if (rxr->hw_rsc == TRUE) {
1736 				rsc = ixgbe_rsc_count(cur);
1737 				rxr->rsc_num += (rsc - 1);
1738 			}
1739 			if (rsc) { /* Get hardware index */
1740 				nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1741 				    IXGBE_RXDADV_NEXTP_SHIFT);
1742 			} else { /* Just sequential */
1743 				nextp = i + 1;
1744 				if (nextp == adapter->num_rx_desc)
1745 					nextp = 0;
1746 			}
1747 			nbuf = &rxr->rx_buffers[nextp];
1748 			prefetch(nbuf);
1749 		}
1750 		/*
1751 		 * Rather than using the fmp/lmp global pointers
1752 		 * we now keep the head of a packet chain in the
1753 		 * buffer struct and pass this along from one
1754 		 * descriptor to the next, until we get EOP.
1755 		 */
1756 		mp->m_len = len;
1757 		/*
1758 		 * See if there is a stored head
1759 		 * that determines what we are
1760 		 */
1761 		sendmp = rbuf->fmp;
1762 		if (sendmp != NULL) {  /* secondary frag */
1763 			rbuf->buf = rbuf->fmp = NULL;
1764 			mp->m_flags &= ~M_PKTHDR;
1765 			sendmp->m_pkthdr.len += mp->m_len;
1766 		} else {
1767 			/*
1768 			 * Optimize.  This might be a small packet,
1769 			 * maybe just a TCP ACK.  Do a fast copy that
1770 			 * is cache aligned into a new mbuf, and
1771 			 * leave the old mbuf+cluster for re-use.
1772 			 */
1773 			if (eop && len <= IXGBE_RX_COPY_LEN) {
1774 				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1775 				if (sendmp != NULL) {
1776 					sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1777 					ixgbe_bcopy(mp->m_data, sendmp->m_data,
1778 					    len);
1779 					sendmp->m_len = len;
1780 					rxr->rx_copies++;
1781 					rbuf->flags |= IXGBE_RX_COPY;
1782 				}
1783 			}
1784 			if (sendmp == NULL) {
1785 				rbuf->buf = rbuf->fmp = NULL;
1786 				sendmp = mp;
1787 			}
1788 
1789 			/* first desc of a non-ps chain */
1790 			sendmp->m_flags |= M_PKTHDR;
1791 			sendmp->m_pkthdr.len = mp->m_len;
1792 		}
1793 		++processed;
1794 
1795 		/* Pass the head pointer on */
1796 		if (eop == 0) {
1797 			nbuf->fmp = sendmp;
1798 			sendmp = NULL;
1799 			mp->m_next = nbuf->buf;
1800 		} else { /* Sending this frame */
1801 			sendmp->m_pkthdr.rcvif = ifp;
1802 			rxr->rx_packets++;
1803 			/* capture data for AIM */
1804 			rxr->bytes += sendmp->m_pkthdr.len;
1805 			rxr->rx_bytes += sendmp->m_pkthdr.len;
1806 			/* Process vlan info */
1807 			if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1808 				vtag = le16toh(cur->wb.upper.vlan);
1809 			if (vtag) {
1810 				sendmp->m_pkthdr.ether_vtag = vtag;
1811 				sendmp->m_flags |= M_VLANTAG;
1812 			}
1813 			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1814 				ixgbe_rx_checksum(staterr, sendmp, ptype);
1815 
1816 			/*
1817 			 * In case of multiqueue, we have RXCSUM.PCSD bit set
1818 			 * and never cleared. This means we have RSS hash
1819 			 * available to be used.
1820 			 */
1821 			if (adapter->num_queues > 1) {
1822 				sendmp->m_pkthdr.flowid =
1823 				    le32toh(cur->wb.lower.hi_dword.rss);
1824 				switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1825 				case IXGBE_RXDADV_RSSTYPE_IPV4:
1826 					M_HASHTYPE_SET(sendmp,
1827 					    M_HASHTYPE_RSS_IPV4);
1828 					break;
1829 				case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1830 					M_HASHTYPE_SET(sendmp,
1831 					    M_HASHTYPE_RSS_TCP_IPV4);
1832 					break;
1833 				case IXGBE_RXDADV_RSSTYPE_IPV6:
1834 					M_HASHTYPE_SET(sendmp,
1835 					    M_HASHTYPE_RSS_IPV6);
1836 					break;
1837 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1838 					M_HASHTYPE_SET(sendmp,
1839 					    M_HASHTYPE_RSS_TCP_IPV6);
1840 					break;
1841 				case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1842 					M_HASHTYPE_SET(sendmp,
1843 					    M_HASHTYPE_RSS_IPV6_EX);
1844 					break;
1845 				case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1846 					M_HASHTYPE_SET(sendmp,
1847 					    M_HASHTYPE_RSS_TCP_IPV6_EX);
1848 					break;
1849 #if __FreeBSD_version > 1100000
1850 				case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1851 					M_HASHTYPE_SET(sendmp,
1852 					    M_HASHTYPE_RSS_UDP_IPV4);
1853 					break;
1854 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1855 					M_HASHTYPE_SET(sendmp,
1856 					    M_HASHTYPE_RSS_UDP_IPV6);
1857 					break;
1858 				case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1859 					M_HASHTYPE_SET(sendmp,
1860 					    M_HASHTYPE_RSS_UDP_IPV6_EX);
1861 					break;
1862 #endif
1863 				default:
1864 					M_HASHTYPE_SET(sendmp,
1865 					    M_HASHTYPE_OPAQUE_HASH);
1866 				}
1867 			} else {
1868 				sendmp->m_pkthdr.flowid = que->msix;
1869 				M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1870 			}
1871 		}
1872 next_desc:
1873 		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1874 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1875 
1876 		/* Advance our pointers to the next descriptor. */
1877 		if (++i == rxr->num_desc)
1878 			i = 0;
1879 
1880 		/* Now send to the stack or do LRO */
1881 		if (sendmp != NULL) {
1882 			rxr->next_to_check = i;
1883 			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1884 			i = rxr->next_to_check;
1885 		}
1886 
1887 		/* Every 8 descriptors we go to refresh mbufs */
1888 		if (processed == 8) {
1889 			ixgbe_refresh_mbufs(rxr, i);
1890 			processed = 0;
1891 		}
1892 	}
1893 
1894 	/* Refresh any remaining buf structs */
1895 	if (ixgbe_rx_unrefreshed(rxr))
1896 		ixgbe_refresh_mbufs(rxr, i);
1897 
1898 	rxr->next_to_check = i;
1899 
1900 	/*
1901 	 * Flush any outstanding LRO work
1902 	 */
1903 	tcp_lro_flush_all(lro);
1904 
1905 	IXGBE_RX_UNLOCK(rxr);
1906 
1907 	/*
1908 	 * Still have cleaning to do?
1909 	 */
1910 	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1911 		return (TRUE);
1912 
1913 	return (FALSE);
1914 } /* ixgbe_rxeof */
1915 
1916 
1917 /************************************************************************
1918  * ixgbe_rx_checksum
1919  *
1920  *   Verify that the hardware indicated that the checksum is valid.
1921  *   Inform the stack about the status of checksum so that stack
1922  *   doesn't spend time verifying the checksum.
1923  ************************************************************************/
1924 static void
1925 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1926 {
1927 	u16  status = (u16)staterr;
1928 	u8   errors = (u8)(staterr >> 24);
1929 	bool sctp = false;
1930 
1931 	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1932 	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1933 		sctp = true;
1934 
1935 	/* IPv4 checksum */
1936 	if (status & IXGBE_RXD_STAT_IPCS) {
1937 		mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1938 		/* IP Checksum Good */
1939 		if (!(errors & IXGBE_RXD_ERR_IPE))
1940 			mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1941 	}
1942 	/* TCP/UDP/SCTP checksum */
1943 	if (status & IXGBE_RXD_STAT_L4CS) {
1944 		mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1945 		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1946 			mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1947 			if (!sctp)
1948 				mp->m_pkthdr.csum_data = htons(0xffff);
1949 		}
1950 	}
1951 } /* ixgbe_rx_checksum */
1952 
1953 /************************************************************************
1954  * ixgbe_dmamap_cb - Manage DMA'able memory.
1955  ************************************************************************/
1956 static void
1957 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1958 {
1959 	if (error)
1960 		return;
1961 	*(bus_addr_t *)arg = segs->ds_addr;
1962 
1963 	return;
1964 } /* ixgbe_dmamap_cb */
1965 
1966 /************************************************************************
1967  * ixgbe_dma_malloc
1968  ************************************************************************/
1969 static int
1970 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1971                  struct ixgbe_dma_alloc *dma, int mapflags)
1972 {
1973 	device_t dev = adapter->dev;
1974 	int      r;
1975 
1976 	r = bus_dma_tag_create(
1977 	     /*      parent */ bus_get_dma_tag(adapter->dev),
1978 	     /*   alignment */ DBA_ALIGN,
1979 	     /*      bounds */ 0,
1980 	     /*     lowaddr */ BUS_SPACE_MAXADDR,
1981 	     /*    highaddr */ BUS_SPACE_MAXADDR,
1982 	     /*      filter */ NULL,
1983 	     /*   filterarg */ NULL,
1984 	     /*     maxsize */ size,
1985 	     /*   nsegments */ 1,
1986 	     /*  maxsegsize */ size,
1987 	     /*       flags */ BUS_DMA_ALLOCNOW,
1988 	     /*    lockfunc */ NULL,
1989 	     /* lockfuncarg */ NULL,
1990 	                       &dma->dma_tag);
1991 	if (r != 0) {
1992 		device_printf(dev,
1993 		    "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
1994 		    r);
1995 		goto fail_0;
1996 	}
1997 	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
1998 	    BUS_DMA_NOWAIT, &dma->dma_map);
1999 	if (r != 0) {
2000 		device_printf(dev,
2001 		    "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
2002 		goto fail_1;
2003 	}
2004 	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
2005 	    ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2006 	if (r != 0) {
2007 		device_printf(dev,
2008 		    "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2009 		goto fail_2;
2010 	}
2011 	dma->dma_size = size;
2012 
2013 	return (0);
2014 fail_2:
2015 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2016 fail_1:
2017 	bus_dma_tag_destroy(dma->dma_tag);
2018 fail_0:
2019 	dma->dma_tag = NULL;
2020 
2021 	return (r);
2022 } /* ixgbe_dma_malloc */
2023 
2024 /************************************************************************
2025  * ixgbe_dma_free
2026  ************************************************************************/
2027 static void
2028 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2029 {
2030 	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2031 	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2032 	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2033 	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2034 	bus_dma_tag_destroy(dma->dma_tag);
2035 } /* ixgbe_dma_free */
2036 
2037 
2038 /************************************************************************
2039  * ixgbe_allocate_queues
2040  *
2041  *   Allocate memory for the transmit and receive rings, and then
2042  *   the descriptors associated with each, called only once at attach.
2043  ************************************************************************/
2044 int
2045 ixgbe_allocate_queues(struct adapter *adapter)
2046 {
2047 	device_t        dev = adapter->dev;
2048 	struct ix_queue *que;
2049 	struct tx_ring  *txr;
2050 	struct rx_ring  *rxr;
2051 	int             rsize, tsize, error = IXGBE_SUCCESS;
2052 	int             txconf = 0, rxconf = 0;
2053 
2054 	/* First, allocate the top level queue structs */
2055 	adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2056 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2057 	if (adapter->queues == NULL) {
2058 		device_printf(dev, "Unable to allocate queue memory\n");
2059 		error = ENOMEM;
2060 		goto fail;
2061 	}
2062 
2063 	/* Second, allocate the TX ring struct memory */
2064 	adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2065 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2066 	if (adapter->tx_rings == NULL) {
2067 		device_printf(dev, "Unable to allocate TX ring memory\n");
2068 		error = ENOMEM;
2069 		goto tx_fail;
2070 	}
2071 
2072 	/* Third, allocate the RX ring */
2073 	adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2074 	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2075 	if (adapter->rx_rings == NULL) {
2076 		device_printf(dev, "Unable to allocate RX ring memory\n");
2077 		error = ENOMEM;
2078 		goto rx_fail;
2079 	}
2080 
2081 	/* For the ring itself */
2082 	tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2083 	    DBA_ALIGN);
2084 
2085 	/*
2086 	 * Now set up the TX queues, txconf is needed to handle the
2087 	 * possibility that things fail midcourse and we need to
2088 	 * undo memory gracefully
2089 	 */
2090 	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2091 		/* Set up some basics */
2092 		txr = &adapter->tx_rings[i];
2093 		txr->adapter = adapter;
2094 		txr->br = NULL;
2095 		/* In case SR-IOV is enabled, align the index properly */
2096 		txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2097 		    i);
2098 		txr->num_desc = adapter->num_tx_desc;
2099 
2100 		/* Initialize the TX side lock */
2101 		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2102 		    device_get_nameunit(dev), txr->me);
2103 		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2104 
2105 		if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2106 		    BUS_DMA_NOWAIT)) {
2107 			device_printf(dev,
2108 			    "Unable to allocate TX Descriptor memory\n");
2109 			error = ENOMEM;
2110 			goto err_tx_desc;
2111 		}
2112 		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2113 		bzero((void *)txr->tx_base, tsize);
2114 
2115 		/* Now allocate transmit buffers for the ring */
2116 		if (ixgbe_allocate_transmit_buffers(txr)) {
2117 			device_printf(dev,
2118 			    "Critical Failure setting up transmit buffers\n");
2119 			error = ENOMEM;
2120 			goto err_tx_desc;
2121 		}
2122 		if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2123 			/* Allocate a buf ring */
2124 			txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2125 			    M_WAITOK, &txr->tx_mtx);
2126 			if (txr->br == NULL) {
2127 				device_printf(dev,
2128 				    "Critical Failure setting up buf ring\n");
2129 				error = ENOMEM;
2130 				goto err_tx_desc;
2131 			}
2132 		}
2133 	}
2134 
2135 	/*
2136 	 * Next the RX queues...
2137 	 */
2138 	rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2139 	    DBA_ALIGN);
2140 	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2141 		rxr = &adapter->rx_rings[i];
2142 		/* Set up some basics */
2143 		rxr->adapter = adapter;
2144 		/* In case SR-IOV is enabled, align the index properly */
2145 		rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2146 		    i);
2147 		rxr->num_desc = adapter->num_rx_desc;
2148 
2149 		/* Initialize the RX side lock */
2150 		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2151 		    device_get_nameunit(dev), rxr->me);
2152 		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2153 
2154 		if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2155 		    BUS_DMA_NOWAIT)) {
2156 			device_printf(dev,
2157 			    "Unable to allocate RxDescriptor memory\n");
2158 			error = ENOMEM;
2159 			goto err_rx_desc;
2160 		}
2161 		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2162 		bzero((void *)rxr->rx_base, rsize);
2163 
2164 		/* Allocate receive buffers for the ring */
2165 		if (ixgbe_allocate_receive_buffers(rxr)) {
2166 			device_printf(dev,
2167 			    "Critical Failure setting up receive buffers\n");
2168 			error = ENOMEM;
2169 			goto err_rx_desc;
2170 		}
2171 	}
2172 
2173 	/*
2174 	 * Finally set up the queue holding structs
2175 	 */
2176 	for (int i = 0; i < adapter->num_queues; i++) {
2177 		que = &adapter->queues[i];
2178 		que->adapter = adapter;
2179 		que->me = i;
2180 		que->txr = &adapter->tx_rings[i];
2181 		que->rxr = &adapter->rx_rings[i];
2182 	}
2183 
2184 	return (0);
2185 
2186 err_rx_desc:
2187 	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2188 		ixgbe_dma_free(adapter, &rxr->rxdma);
2189 err_tx_desc:
2190 	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2191 		ixgbe_dma_free(adapter, &txr->txdma);
2192 	free(adapter->rx_rings, M_DEVBUF);
2193 rx_fail:
2194 	free(adapter->tx_rings, M_DEVBUF);
2195 tx_fail:
2196 	free(adapter->queues, M_DEVBUF);
2197 fail:
2198 	return (error);
2199 } /* ixgbe_allocate_queues */
2200