xref: /illumos-gate/usr/src/uts/common/io/ixgbe/ixgbe_tx.c (revision 57c40785)
1 /*
2  * CDDL HEADER START
3  *
4  * Copyright(c) 2007-2008 Intel Corporation. All rights reserved.
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at:
10  *      http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When using or redistributing this file, you may do so under the
15  * License only. No other modification of this header is permitted.
16  *
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 
24 /*
25  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms of the CDDL.
27  */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #include "ixgbe_sw.h"
32 
33 static boolean_t ixgbe_tx(ixgbe_tx_ring_t *, mblk_t *);
34 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *,
35     uint32_t, boolean_t);
36 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *,
37     uint32_t);
38 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *,
39     ixgbe_tx_context_t *, size_t);
40 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t);
41 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *);
42 
43 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *);
44 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *,
45     ixgbe_tx_context_t *);
46 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *,
47     ixgbe_tx_context_t *);
48 
49 #ifndef IXGBE_DEBUG
50 #pragma inline(ixgbe_save_desc)
51 #pragma inline(ixgbe_get_context)
52 #pragma inline(ixgbe_check_context)
53 #pragma inline(ixgbe_fill_context)
54 #endif
55 
56 /*
57  * ixgbe_m_tx
58  *
59  * The GLDv3 interface to call driver's tx routine to transmit
60  * the mblks.
61  */
62 mblk_t *
63 ixgbe_m_tx(void *arg, mblk_t *mp)
64 {
65 	ixgbe_t *ixgbe = (ixgbe_t *)arg;
66 	mblk_t *next;
67 	ixgbe_tx_ring_t *tx_ring;
68 
69 	/*
70 	 * If the adapter is suspended, or it is not started, or the link
71 	 * is not up, the mblks are simply dropped.
72 	 */
73 	if (((ixgbe->ixgbe_state & IXGBE_SUSPENDED) != 0) ||
74 	    ((ixgbe->ixgbe_state & IXGBE_STARTED) == 0) ||
75 	    (ixgbe->link_state != LINK_STATE_UP)) {
76 		/* Free the mblk chain */
77 		while (mp != NULL) {
78 			next = mp->b_next;
79 			mp->b_next = NULL;
80 
81 			freemsg(mp);
82 			mp = next;
83 		}
84 
85 		return (NULL);
86 	}
87 
88 	/*
89 	 * Decide which tx ring is used to transmit the packets.
90 	 * This needs to be updated later to fit the new interface
91 	 * of the multiple rings support.
92 	 */
93 	tx_ring = &ixgbe->tx_rings[0];
94 
95 	while (mp != NULL) {
96 		next = mp->b_next;
97 		mp->b_next = NULL;
98 
99 		if (!ixgbe_tx(tx_ring, mp)) {
100 			mp->b_next = next;
101 			break;
102 		}
103 
104 		mp = next;
105 	}
106 
107 	return (mp);
108 }
109 
110 /*
111  * ixgbe_tx - Main transmit processing
112  *
113  * Called from ixgbe_m_tx with an mblk ready to transmit. this
114  * routine sets up the transmit descriptors and sends data to
115  * the wire.
116  *
117  * One mblk can consist of several fragments, each fragment
118  * will be processed with different methods based on the size.
119  * For the fragments with size less than the bcopy threshold,
120  * they will be processed by using bcopy; otherwise, they will
121  * be processed by using DMA binding.
122  *
123  * To process the mblk, a tx control block is got from the
124  * free list. One tx control block contains one tx buffer, which
125  * is used to copy mblk fragments' data; and one tx DMA handle,
126  * which is used to bind a mblk fragment with DMA resource.
127  *
128  * Several small mblk fragments can be copied into one tx control
129  * block's buffer, and then the buffer will be transmitted with
130  * one tx descriptor.
131  *
132  * A large fragment only binds with one tx control block's DMA
133  * handle, and it can span several tx descriptors for transmitting.
134  *
135  * So to transmit a packet (mblk), several tx control blocks can
136  * be used. After the processing, those tx control blocks will
137  * be put to the work list.
138  */
139 static boolean_t
140 ixgbe_tx(ixgbe_tx_ring_t *tx_ring, mblk_t *mp)
141 {
142 	ixgbe_t *ixgbe = tx_ring->ixgbe;
143 	tx_type_t current_flag, next_flag;
144 	uint32_t current_len, next_len;
145 	uint32_t desc_total;
146 	size_t mbsize;
147 	int desc_num;
148 	boolean_t copy_done, eop;
149 	mblk_t *current_mp, *next_mp, *nmp;
150 	tx_control_block_t *tcb;
151 	ixgbe_tx_context_t tx_context, *ctx;
152 	link_list_t pending_list;
153 
154 	/* Get the mblk size */
155 	mbsize = 0;
156 	for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) {
157 		mbsize += MBLK_LEN(nmp);
158 	}
159 
160 	if (ixgbe->tx_hcksum_enable) {
161 		/*
162 		 * Retrieve checksum context information from the mblk
163 		 * that will be used to decide whether/how to fill the
164 		 * context descriptor.
165 		 */
166 		ctx = &tx_context;
167 		if (ixgbe_get_context(mp, ctx) < 0) {
168 			freemsg(mp);
169 			return (B_TRUE);
170 		}
171 
172 		/*
173 		 * If the mblk size exceeds the max size ixgbe could
174 		 * process, then discard this mblk, and return B_TRUE
175 		 */
176 		if ((ctx->lso_flag && ((mbsize - ctx->mac_hdr_len)
177 		    > IXGBE_LSO_MAXLEN)) || (!ctx->lso_flag &&
178 		    (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) {
179 			freemsg(mp);
180 			IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize");
181 			return (B_TRUE);
182 		}
183 	} else {
184 		ctx = NULL;
185 	}
186 
187 
188 	/*
189 	 * Check and recycle tx descriptors.
190 	 * The recycle threshold here should be selected carefully
191 	 */
192 	if (tx_ring->tbd_free < tx_ring->recycle_thresh)
193 		tx_ring->tx_recycle(tx_ring);
194 
195 	/*
196 	 * After the recycling, if the tbd_free is less than the
197 	 * overload_threshold, assert overload, return B_FALSE;
198 	 * and we need to re-schedule the tx again.
199 	 */
200 	if (tx_ring->tbd_free < tx_ring->overload_thresh) {
201 		tx_ring->reschedule = B_TRUE;
202 		IXGBE_DEBUG_STAT(tx_ring->stat_overload);
203 		return (B_FALSE);
204 	}
205 
206 	/*
207 	 * The pending_list is a linked list that is used to save
208 	 * the tx control blocks that have packet data processed
209 	 * but have not put the data to the tx descriptor ring.
210 	 * It is used to reduce the lock contention of the tx_lock.
211 	 */
212 	LINK_LIST_INIT(&pending_list);
213 	desc_num = 0;
214 	desc_total = 0;
215 
216 	current_mp = mp;
217 	current_len = MBLK_LEN(current_mp);
218 	/*
219 	 * Decide which method to use for the first fragment
220 	 */
221 	current_flag = (current_len <= tx_ring->copy_thresh) ?
222 	    USE_COPY : USE_DMA;
223 	/*
224 	 * If the mblk includes several contiguous small fragments,
225 	 * they may be copied into one buffer. This flag is used to
226 	 * indicate whether there are pending fragments that need to
227 	 * be copied to the current tx buffer.
228 	 *
229 	 * If this flag is B_TRUE, it indicates that a new tx control
230 	 * block is needed to process the next fragment using either
231 	 * copy or DMA binding.
232 	 *
233 	 * Otherwise, it indicates that the next fragment will be
234 	 * copied to the current tx buffer that is maintained by the
235 	 * current tx control block. No new tx control block is needed.
236 	 */
237 	copy_done = B_TRUE;
238 	while (current_mp) {
239 		next_mp = current_mp->b_cont;
240 		eop = (next_mp == NULL); /* Last fragment of the packet? */
241 		next_len = eop ? 0: MBLK_LEN(next_mp);
242 
243 		/*
244 		 * When the current fragment is an empty fragment, if
245 		 * the next fragment will still be copied to the current
246 		 * tx buffer, we cannot skip this fragment here. Because
247 		 * the copy processing is pending for completion. We have
248 		 * to process this empty fragment in the tx_copy routine.
249 		 *
250 		 * If the copy processing is completed or a DMA binding
251 		 * processing is just completed, we can just skip this
252 		 * empty fragment.
253 		 */
254 		if ((current_len == 0) && (copy_done)) {
255 			current_mp = next_mp;
256 			current_len = next_len;
257 			current_flag = (current_len <= tx_ring->copy_thresh) ?
258 			    USE_COPY : USE_DMA;
259 			continue;
260 		}
261 
262 		if (copy_done) {
263 			/*
264 			 * Get a new tx control block from the free list
265 			 */
266 			tcb = ixgbe_get_free_list(tx_ring);
267 
268 			if (tcb == NULL) {
269 				IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb);
270 				goto tx_failure;
271 			}
272 
273 			/*
274 			 * Push the tx control block to the pending list
275 			 * to avoid using lock too early
276 			 */
277 			LIST_PUSH_TAIL(&pending_list, &tcb->link);
278 		}
279 
280 		if (current_flag == USE_COPY) {
281 			/*
282 			 * Check whether to use bcopy or DMA binding to process
283 			 * the next fragment, and if using bcopy, whether we
284 			 * need to continue copying the next fragment into the
285 			 * current tx buffer.
286 			 */
287 			ASSERT((tcb->tx_buf.len + current_len) <=
288 			    tcb->tx_buf.size);
289 
290 			if (eop) {
291 				/*
292 				 * This is the last fragment of the packet, so
293 				 * the copy processing will be completed with
294 				 * this fragment.
295 				 */
296 				next_flag = USE_NONE;
297 				copy_done = B_TRUE;
298 			} else if ((tcb->tx_buf.len + current_len + next_len) >
299 			    tcb->tx_buf.size) {
300 				/*
301 				 * If the next fragment is too large to be
302 				 * copied to the current tx buffer, we need
303 				 * to complete the current copy processing.
304 				 */
305 				next_flag = (next_len > tx_ring->copy_thresh) ?
306 				    USE_DMA: USE_COPY;
307 				copy_done = B_TRUE;
308 			} else if (next_len > tx_ring->copy_thresh) {
309 				/*
310 				 * The next fragment needs to be processed with
311 				 * DMA binding. So the copy prcessing will be
312 				 * completed with the current fragment.
313 				 */
314 				next_flag = USE_DMA;
315 				copy_done = B_TRUE;
316 			} else {
317 				/*
318 				 * Continue to copy the next fragment to the
319 				 * current tx buffer.
320 				 */
321 				next_flag = USE_COPY;
322 				copy_done = B_FALSE;
323 			}
324 
325 			desc_num = ixgbe_tx_copy(tx_ring, tcb, current_mp,
326 			    current_len, copy_done);
327 		} else {
328 			/*
329 			 * Check whether to use bcopy or DMA binding to process
330 			 * the next fragment.
331 			 */
332 			next_flag = (next_len > tx_ring->copy_thresh) ?
333 			    USE_DMA: USE_COPY;
334 			ASSERT(copy_done == B_TRUE);
335 
336 			desc_num = ixgbe_tx_bind(tx_ring, tcb, current_mp,
337 			    current_len);
338 		}
339 
340 		if (desc_num > 0)
341 			desc_total += desc_num;
342 		else if (desc_num < 0)
343 			goto tx_failure;
344 
345 		current_mp = next_mp;
346 		current_len = next_len;
347 		current_flag = next_flag;
348 	}
349 
350 	/*
351 	 * Attach the mblk to the last tx control block
352 	 */
353 	ASSERT(tcb);
354 	ASSERT(tcb->mp == NULL);
355 	tcb->mp = mp;
356 
357 	/*
358 	 * Before fill the tx descriptor ring with the data, we need to
359 	 * ensure there are adequate free descriptors for transmit
360 	 * (including one context descriptor).
361 	 */
362 	if (tx_ring->tbd_free < (desc_total + 1)) {
363 		tx_ring->tx_recycle(tx_ring);
364 	}
365 
366 	mutex_enter(&tx_ring->tx_lock);
367 
368 	/*
369 	 * If the number of free tx descriptors is not enough for transmit
370 	 * then return failure.
371 	 *
372 	 * Note: we must put this check under the mutex protection to
373 	 * ensure the correctness when multiple threads access it in
374 	 * parallel.
375 	 */
376 	if (tx_ring->tbd_free < (desc_total + 1)) {
377 		IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tbd);
378 		mutex_exit(&tx_ring->tx_lock);
379 		goto tx_failure;
380 	}
381 
382 	desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx,
383 	    mbsize);
384 
385 	ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1)));
386 
387 	mutex_exit(&tx_ring->tx_lock);
388 
389 	return (B_TRUE);
390 
391 tx_failure:
392 	/*
393 	 * Discard the mblk and free the used resources
394 	 */
395 	tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list);
396 	while (tcb) {
397 		tcb->mp = NULL;
398 
399 		ixgbe_free_tcb(tcb);
400 
401 		tcb = (tx_control_block_t *)
402 		    LIST_GET_NEXT(&pending_list, &tcb->link);
403 	}
404 
405 	/*
406 	 * Return the tx control blocks in the pending list to the free list.
407 	 */
408 	ixgbe_put_free_list(tx_ring, &pending_list);
409 
410 	/* Transmit failed, do not drop the mblk, rechedule the transmit */
411 	tx_ring->reschedule = B_TRUE;
412 
413 	return (B_FALSE);
414 }
415 
416 /*
417  * ixgbe_tx_copy
418  *
419  * Copy the mblk fragment to the pre-allocated tx buffer
420  */
421 static int
422 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp,
423     uint32_t len, boolean_t copy_done)
424 {
425 	dma_buffer_t *tx_buf;
426 	uint32_t desc_num;
427 	_NOTE(ARGUNUSED(tx_ring));
428 
429 	tx_buf = &tcb->tx_buf;
430 
431 	/*
432 	 * Copy the packet data of the mblk fragment into the
433 	 * pre-allocated tx buffer, which is maintained by the
434 	 * tx control block.
435 	 *
436 	 * Several mblk fragments can be copied into one tx buffer.
437 	 * The destination address of the current copied fragment in
438 	 * the tx buffer is next to the end of the previous copied
439 	 * fragment.
440 	 */
441 	if (len > 0) {
442 		bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len);
443 
444 		tx_buf->len += len;
445 		tcb->frag_num++;
446 	}
447 
448 	desc_num = 0;
449 
450 	/*
451 	 * If it is the last fragment copied to the current tx buffer,
452 	 * in other words, if there's no remaining fragment or the remaining
453 	 * fragment requires a new tx control block to process, we need to
454 	 * complete the current copy processing by syncing up the current
455 	 * DMA buffer and saving the descriptor data.
456 	 */
457 	if (copy_done) {
458 		/*
459 		 * Sync the DMA buffer of the packet data
460 		 */
461 		DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV);
462 
463 		tcb->tx_type = USE_COPY;
464 
465 		/*
466 		 * Save the address and length to the private data structure
467 		 * of the tx control block, which will be used to fill the
468 		 * tx descriptor ring after all the fragments are processed.
469 		 */
470 		ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len);
471 		desc_num++;
472 	}
473 
474 	return (desc_num);
475 }
476 
477 /*
478  * ixgbe_tx_bind
479  *
480  * Bind the mblk fragment with DMA
481  */
482 static int
483 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp,
484     uint32_t len)
485 {
486 	int status, i;
487 	ddi_dma_cookie_t dma_cookie;
488 	uint_t ncookies;
489 	int desc_num;
490 
491 	/*
492 	 * Use DMA binding to process the mblk fragment
493 	 */
494 	status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL,
495 	    (caddr_t)mp->b_rptr, len,
496 	    DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
497 	    0, &dma_cookie, &ncookies);
498 
499 	if (status != DDI_DMA_MAPPED) {
500 		IXGBE_DEBUG_STAT(tx_ring->stat_fail_dma_bind);
501 		return (-1);
502 	}
503 
504 	tcb->frag_num++;
505 	tcb->tx_type = USE_DMA;
506 	/*
507 	 * Each fragment can span several cookies. One cookie will have
508 	 * one tx descriptor to transmit.
509 	 */
510 	desc_num = 0;
511 	for (i = ncookies; i > 0; i--) {
512 		/*
513 		 * Save the address and length to the private data structure
514 		 * of the tx control block, which will be used to fill the
515 		 * tx descriptor ring after all the fragments are processed.
516 		 */
517 		ixgbe_save_desc(tcb,
518 		    dma_cookie.dmac_laddress,
519 		    dma_cookie.dmac_size);
520 
521 		desc_num++;
522 
523 		if (i > 1)
524 			ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie);
525 	}
526 
527 	return (desc_num);
528 }
529 
530 /*
531  * ixgbe_get_context
532  *
533  * Get the context information from the mblk
534  */
535 static int
536 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx)
537 {
538 	uint32_t start;
539 	uint32_t flags;
540 	uint32_t len;
541 	uint32_t size;
542 	uint32_t offset;
543 	unsigned char *pos;
544 	ushort_t etype;
545 	uint32_t mac_hdr_len;
546 	uint32_t l4_proto;
547 	uint32_t l4_hdr_len;
548 
549 	ASSERT(mp != NULL);
550 
551 	hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &flags);
552 	bzero(ctx, sizeof (ixgbe_tx_context_t));
553 	ctx->hcksum_flags = flags;
554 
555 	if (flags == 0)
556 		return (0);
557 
558 	ctx->mss = DB_LSOMSS(mp);
559 	ctx->lso_flag = (ctx->hcksum_flags & HW_LSO) &&
560 	    (ctx->mss != 0);
561 
562 	/*
563 	 * LSO relies on tx h/w checksum, so here will drop the package
564 	 * if h/w checksum flag is not declared.
565 	 */
566 	if (ctx->lso_flag) {
567 		if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) &&
568 		    (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) {
569 			IXGBE_DEBUGLOG_0(NULL, "ixgbe_tx: h/w "
570 			    "checksum flags are not specified when doing LSO");
571 			return (-1);
572 		}
573 	}
574 
575 	etype = 0;
576 	mac_hdr_len = 0;
577 	l4_proto = 0;
578 
579 	/*
580 	 * Firstly get the position of the ether_type/ether_tpid.
581 	 * Here we don't assume the ether (VLAN) header is fully included
582 	 * in one mblk fragment, so we go thourgh the fragments to parse
583 	 * the ether type.
584 	 */
585 	size = len = MBLK_LEN(mp);
586 	offset = offsetof(struct ether_header, ether_type);
587 	while (size <= offset) {
588 		mp = mp->b_cont;
589 		ASSERT(mp != NULL);
590 		len = MBLK_LEN(mp);
591 		size += len;
592 	}
593 	pos = mp->b_rptr + offset + len - size;
594 
595 	etype = ntohs(*(ushort_t *)(uintptr_t)pos);
596 	if (etype == ETHERTYPE_VLAN) {
597 		/*
598 		 * Get the position of the ether_type in VLAN header
599 		 */
600 		offset = offsetof(struct ether_vlan_header, ether_type);
601 		while (size <= offset) {
602 			mp = mp->b_cont;
603 			ASSERT(mp != NULL);
604 			len = MBLK_LEN(mp);
605 			size += len;
606 		}
607 		pos = mp->b_rptr + offset + len - size;
608 
609 		etype = ntohs(*(ushort_t *)(uintptr_t)pos);
610 		mac_hdr_len = sizeof (struct ether_vlan_header);
611 	} else {
612 		mac_hdr_len = sizeof (struct ether_header);
613 	}
614 
615 	/*
616 	 * Here we assume the IP(V6) header is fully included in
617 	 * one mblk fragment.
618 	 */
619 	switch (etype) {
620 	case ETHERTYPE_IP:
621 		offset = mac_hdr_len;
622 		while (size <= offset) {
623 			mp = mp->b_cont;
624 			ASSERT(mp != NULL);
625 			len = MBLK_LEN(mp);
626 			size += len;
627 		}
628 		pos = mp->b_rptr + offset + len - size;
629 
630 		if (ctx->lso_flag) {
631 			*((uint16_t *)(uintptr_t)(pos + offsetof(ipha_t,
632 			    ipha_length))) = 0;
633 			*((uint16_t *)(uintptr_t)(pos + offsetof(ipha_t,
634 			    ipha_hdr_checksum))) = 0;
635 
636 			/*
637 			 * To perform ixgbe LSO, here also need to fill
638 			 * the tcp checksum field of the packet with the
639 			 * following pseudo-header checksum:
640 			 * (ip_source_addr, ip_destination_addr, l4_proto)
641 			 * Currently the tcp/ip stack has done it.
642 			 */
643 		}
644 
645 		l4_proto = *(uint8_t *)(pos + offsetof(ipha_t, ipha_protocol));
646 		break;
647 	case ETHERTYPE_IPV6:
648 		offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len;
649 		while (size <= offset) {
650 			mp = mp->b_cont;
651 			ASSERT(mp != NULL);
652 			len = MBLK_LEN(mp);
653 			size += len;
654 		}
655 		pos = mp->b_rptr + offset + len - size;
656 
657 		l4_proto = *(uint8_t *)pos;
658 		break;
659 	default:
660 		/* Unrecoverable error */
661 		IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum");
662 		return (-2);
663 	}
664 
665 	if (ctx->lso_flag) {
666 		offset = mac_hdr_len + start;
667 		while (size <= offset) {
668 			mp = mp->b_cont;
669 			ASSERT(mp != NULL);
670 			len = MBLK_LEN(mp);
671 			size += len;
672 		}
673 		pos = mp->b_rptr + offset + len - size;
674 
675 		l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos);
676 	} else {
677 		/*
678 		 * l4 header length is only required for LSO
679 		 */
680 		l4_hdr_len = 0;
681 	}
682 
683 	ctx->mac_hdr_len = mac_hdr_len;
684 	ctx->ip_hdr_len = start;
685 	ctx->l4_proto = l4_proto;
686 	ctx->l4_hdr_len = l4_hdr_len;
687 
688 	return (0);
689 }
690 
691 /*
692  * ixgbe_check_context
693  *
694  * Check if a new context descriptor is needed
695  */
696 static boolean_t
697 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx)
698 {
699 	ixgbe_tx_context_t *last;
700 
701 	if (ctx == NULL)
702 		return (B_FALSE);
703 
704 	/*
705 	 * Compare the checksum data retrieved from the mblk and the
706 	 * stored checksum data of the last context descriptor. The data
707 	 * need to be checked are:
708 	 *	hcksum_flags
709 	 *	l4_proto
710 	 *	mac_hdr_len
711 	 *	ip_hdr_len
712 	 *	mss (only checked for LSO)
713 	 *	l4_hr_len (only checked for LSO)
714 	 * Either one of the above data is changed, a new context descriptor
715 	 * will be needed.
716 	 */
717 	last = &tx_ring->tx_context;
718 
719 	if (ctx->hcksum_flags != 0) {
720 		if ((ctx->hcksum_flags != last->hcksum_flags) ||
721 		    (ctx->l4_proto != last->l4_proto) ||
722 		    (ctx->mac_hdr_len != last->mac_hdr_len) ||
723 		    (ctx->ip_hdr_len != last->ip_hdr_len) ||
724 		    (ctx->lso_flag && ((ctx->mss != last->mss) ||
725 		    (ctx->l4_hdr_len != last->l4_hdr_len)))) {
726 
727 			return (B_TRUE);
728 		}
729 	}
730 
731 	return (B_FALSE);
732 }
733 
734 /*
735  * ixgbe_fill_context
736  *
737  * Fill the context descriptor with hardware checksum informations
738  */
739 static void
740 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd,
741     ixgbe_tx_context_t *ctx)
742 {
743 	/*
744 	 * Fill the context descriptor with the checksum
745 	 * context information we've got
746 	 */
747 	ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len;
748 	ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len <<
749 	    IXGBE_ADVTXD_MACLEN_SHIFT;
750 
751 	ctx_tbd->type_tucmd_mlhl =
752 	    IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
753 
754 	if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM)
755 		ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
756 
757 	if (ctx->hcksum_flags & HCK_PARTIALCKSUM) {
758 		switch (ctx->l4_proto) {
759 		case IPPROTO_TCP:
760 			ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
761 			break;
762 		case IPPROTO_UDP:
763 			/*
764 			 * We don't have to explicitly set:
765 			 *	ctx_tbd->type_tucmd_mlhl |=
766 			 *	    IXGBE_ADVTXD_TUCMD_L4T_UDP;
767 			 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b
768 			 */
769 			break;
770 		default:
771 			/* Unrecoverable error */
772 			IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum");
773 			break;
774 		}
775 	}
776 
777 	ctx_tbd->seqnum_seed = 0;
778 	if (ctx->lso_flag) {
779 		ctx_tbd->mss_l4len_idx =
780 		    (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) |
781 		    (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT);
782 	} else {
783 		ctx_tbd->mss_l4len_idx = 0;
784 	}
785 }
786 
787 /*
788  * ixgbe_tx_fill_ring
789  *
790  * Fill the tx descriptor ring with the data
791  */
792 static int
793 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list,
794     ixgbe_tx_context_t *ctx, size_t mbsize)
795 {
796 	struct ixgbe_hw *hw = &tx_ring->ixgbe->hw;
797 	boolean_t load_context;
798 	uint32_t index, tcb_index, desc_num;
799 	union ixgbe_adv_tx_desc *tbd, *first_tbd;
800 	tx_control_block_t *tcb, *first_tcb;
801 	uint32_t hcksum_flags;
802 	int i;
803 
804 	ASSERT(mutex_owned(&tx_ring->tx_lock));
805 
806 	tbd = NULL;
807 	first_tbd = NULL;
808 	first_tcb = NULL;
809 	desc_num = 0;
810 	hcksum_flags = 0;
811 	load_context = B_FALSE;
812 
813 	/*
814 	 * Get the index of the first tx descriptor that will be filled,
815 	 * and the index of the first work list item that will be attached
816 	 * with the first used tx control block in the pending list.
817 	 * Note: the two indexes are the same.
818 	 */
819 	index = tx_ring->tbd_tail;
820 	tcb_index = tx_ring->tbd_tail;
821 
822 	if (ctx != NULL) {
823 		hcksum_flags = ctx->hcksum_flags;
824 
825 		/*
826 		 * Check if a new context descriptor is needed for this packet
827 		 */
828 		load_context = ixgbe_check_context(tx_ring, ctx);
829 
830 		if (load_context) {
831 			first_tcb = (tx_control_block_t *)
832 			    LIST_GET_HEAD(pending_list);
833 			tbd = &tx_ring->tbd_ring[index];
834 
835 			/*
836 			 * Fill the context descriptor with the
837 			 * hardware checksum offload informations.
838 			 */
839 			ixgbe_fill_context(
840 			    (struct ixgbe_adv_tx_context_desc *)tbd,
841 			    ctx);
842 
843 			index = NEXT_INDEX(index, 1, tx_ring->ring_size);
844 			desc_num++;
845 
846 			/*
847 			 * Store the checksum context data if
848 			 * a new context descriptor is added
849 			 */
850 			tx_ring->tx_context = *ctx;
851 		}
852 	}
853 
854 	first_tbd = &tx_ring->tbd_ring[index];
855 
856 	/*
857 	 * Fill tx data descriptors with the data saved in the pending list.
858 	 * The tx control blocks in the pending list are added to the work list
859 	 * at the same time.
860 	 *
861 	 * The work list is strictly 1:1 corresponding to the descriptor ring.
862 	 * One item of the work list corresponds to one tx descriptor. Because
863 	 * one tx control block can span multiple tx descriptors, the tx
864 	 * control block will be added to the first work list item that
865 	 * corresponds to the first tx descriptor generated from that tx
866 	 * control block.
867 	 */
868 	tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list);
869 	while (tcb != NULL) {
870 
871 		for (i = 0; i < tcb->desc_num; i++) {
872 			tbd = &tx_ring->tbd_ring[index];
873 
874 			tbd->read.buffer_addr = tcb->desc[i].address;
875 			tbd->read.cmd_type_len = tcb->desc[i].length;
876 
877 			tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_RS |
878 			    IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_DATA;
879 
880 			tbd->read.olinfo_status = 0;
881 
882 			index = NEXT_INDEX(index, 1, tx_ring->ring_size);
883 			desc_num++;
884 		}
885 
886 		if (first_tcb != NULL) {
887 			/*
888 			 * Count the checksum context descriptor for
889 			 * the first tx control block.
890 			 */
891 			first_tcb->desc_num++;
892 			first_tcb = NULL;
893 		}
894 
895 		/*
896 		 * Add the tx control block to the work list
897 		 */
898 		ASSERT(tx_ring->work_list[tcb_index] == NULL);
899 		tx_ring->work_list[tcb_index] = tcb;
900 
901 		tcb_index = index;
902 		tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list);
903 	}
904 
905 	/*
906 	 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only
907 	 * valid in the first descriptor of the packet.
908 	 */
909 	ASSERT(first_tbd != NULL);
910 	first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS;
911 
912 	if (ctx != NULL && ctx->lso_flag) {
913 		first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
914 		first_tbd->read.olinfo_status |=
915 		    (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len
916 		    - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT;
917 	}
918 
919 	/* Set hardware checksum bits */
920 	if (hcksum_flags != 0) {
921 		if (hcksum_flags & HCK_IPV4_HDRCKSUM)
922 			first_tbd->read.olinfo_status |=
923 			    IXGBE_ADVTXD_POPTS_IXSM;
924 		if (hcksum_flags & HCK_PARTIALCKSUM)
925 			first_tbd->read.olinfo_status |=
926 			    IXGBE_ADVTXD_POPTS_TXSM;
927 	}
928 
929 	/*
930 	 * The last descriptor of packet needs End Of Packet (EOP),
931 	 * and Report Status (RS) bits set
932 	 */
933 	ASSERT(tbd != NULL);
934 	tbd->read.cmd_type_len |=
935 	    IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS;
936 
937 	/*
938 	 * Sync the DMA buffer of the tx descriptor ring
939 	 */
940 	DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV);
941 
942 	if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) {
943 		ddi_fm_service_impact(tx_ring->ixgbe->dip,
944 		    DDI_SERVICE_DEGRADED);
945 	}
946 
947 	/*
948 	 * Update the number of the free tx descriptors.
949 	 * The mutual exclusion between the transmission and the recycling
950 	 * (for the tx descriptor ring and the work list) is implemented
951 	 * with the atomic operation on the number of the free tx descriptors.
952 	 *
953 	 * Note: we should always decrement the counter tbd_free before
954 	 * advancing the hardware TDT pointer to avoid the race condition -
955 	 * before the counter tbd_free is decremented, the transmit of the
956 	 * tx descriptors has done and the counter tbd_free is increased by
957 	 * the tx recycling.
958 	 */
959 	i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num);
960 	ASSERT(i >= 0);
961 
962 	tx_ring->tbd_tail = index;
963 
964 	/*
965 	 * Advance the hardware TDT pointer of the tx descriptor ring
966 	 */
967 	IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index);
968 
969 	if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) !=
970 	    DDI_FM_OK) {
971 		ddi_fm_service_impact(tx_ring->ixgbe->dip,
972 		    DDI_SERVICE_DEGRADED);
973 	}
974 
975 	return (desc_num);
976 }
977 
978 /*
979  * ixgbe_save_desc
980  *
981  * Save the address/length pair to the private array
982  * of the tx control block. The address/length pairs
983  * will be filled into the tx descriptor ring later.
984  */
985 static void
986 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length)
987 {
988 	sw_desc_t *desc;
989 
990 	desc = &tcb->desc[tcb->desc_num];
991 	desc->address = address;
992 	desc->length = length;
993 
994 	tcb->desc_num++;
995 }
996 
997 /*
998  * ixgbe_tx_recycle_legacy
999  *
1000  * Recycle the tx descriptors and tx control blocks.
1001  *
1002  * The work list is traversed to check if the corresponding
1003  * tx descriptors have been transmitted. If so, the resources
1004  * bound to the tx control blocks will be freed, and those
1005  * tx control blocks will be returned to the free list.
1006  */
1007 uint32_t
1008 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring)
1009 {
1010 	uint32_t index, last_index;
1011 	int desc_num;
1012 	boolean_t desc_done;
1013 	tx_control_block_t *tcb;
1014 	link_list_t pending_list;
1015 
1016 	/*
1017 	 * The mutex_tryenter() is used to avoid unnecessary
1018 	 * lock contention.
1019 	 */
1020 	if (mutex_tryenter(&tx_ring->recycle_lock) == 0)
1021 		return (0);
1022 
1023 	ASSERT(tx_ring->tbd_free <= tx_ring->ring_size);
1024 
1025 	if (tx_ring->tbd_free == tx_ring->ring_size) {
1026 		tx_ring->recycle_fail = 0;
1027 		tx_ring->stall_watchdog = 0;
1028 		mutex_exit(&tx_ring->recycle_lock);
1029 		return (0);
1030 	}
1031 
1032 	/*
1033 	 * Sync the DMA buffer of the tx descriptor ring
1034 	 */
1035 	DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL);
1036 
1037 	if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) {
1038 		ddi_fm_service_impact(tx_ring->ixgbe->dip,
1039 		    DDI_SERVICE_DEGRADED);
1040 	}
1041 
1042 	LINK_LIST_INIT(&pending_list);
1043 	desc_num = 0;
1044 	index = tx_ring->tbd_head;	/* Index of next tbd/tcb to recycle */
1045 
1046 	tcb = tx_ring->work_list[index];
1047 	ASSERT(tcb != NULL);
1048 
1049 	desc_done = B_TRUE;
1050 	while (desc_done && (tcb != NULL)) {
1051 
1052 		/*
1053 		 * Get the last tx descriptor of the tx control block.
1054 		 * If the last tx descriptor is done, it is done with
1055 		 * all the tx descriptors of the tx control block.
1056 		 * Then the tx control block and all the corresponding
1057 		 * tx descriptors can be recycled.
1058 		 */
1059 		last_index = NEXT_INDEX(index, tcb->desc_num - 1,
1060 		    tx_ring->ring_size);
1061 
1062 		/*
1063 		 * Check if the Descriptor Done bit is set
1064 		 */
1065 		desc_done = tx_ring->tbd_ring[last_index].wb.status &
1066 		    IXGBE_TXD_STAT_DD;
1067 		if (desc_done) {
1068 			/*
1069 			 * Strip off the tx control block from the work list,
1070 			 * and add it to the pending list.
1071 			 */
1072 			tx_ring->work_list[index] = NULL;
1073 			LIST_PUSH_TAIL(&pending_list, &tcb->link);
1074 
1075 			/*
1076 			 * Count the total number of the tx descriptors recycled
1077 			 */
1078 			desc_num += tcb->desc_num;
1079 
1080 			/*
1081 			 * Advance the index of the tx descriptor ring
1082 			 */
1083 			index = NEXT_INDEX(last_index, 1, tx_ring->ring_size);
1084 
1085 			tcb = tx_ring->work_list[index];
1086 		}
1087 	}
1088 
1089 	/*
1090 	 * If no tx descriptors are recycled, no need to do more processing
1091 	 */
1092 	if (desc_num == 0) {
1093 		tx_ring->recycle_fail++;
1094 		mutex_exit(&tx_ring->recycle_lock);
1095 		return (0);
1096 	}
1097 
1098 	tx_ring->recycle_fail = 0;
1099 	tx_ring->stall_watchdog = 0;
1100 
1101 	/*
1102 	 * Update the head index of the tx descriptor ring
1103 	 */
1104 	tx_ring->tbd_head = index;
1105 
1106 	/*
1107 	 * Update the number of the free tx descriptors with atomic operations
1108 	 */
1109 	atomic_add_32(&tx_ring->tbd_free, desc_num);
1110 
1111 	mutex_exit(&tx_ring->recycle_lock);
1112 
1113 	/*
1114 	 * Free the resources used by the tx control blocks
1115 	 * in the pending list
1116 	 */
1117 	tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list);
1118 	while (tcb != NULL) {
1119 		/*
1120 		 * Release the resources occupied by the tx control block
1121 		 */
1122 		ixgbe_free_tcb(tcb);
1123 
1124 		tcb = (tx_control_block_t *)
1125 		    LIST_GET_NEXT(&pending_list, &tcb->link);
1126 	}
1127 
1128 	/*
1129 	 * Add the tx control blocks in the pending list to the free list.
1130 	 */
1131 	ixgbe_put_free_list(tx_ring, &pending_list);
1132 
1133 	return (desc_num);
1134 }
1135 
1136 /*
1137  * ixgbe_tx_recycle_head_wb
1138  *
1139  * Check the head write-back, and recycle all the transmitted
1140  * tx descriptors and tx control blocks.
1141  */
1142 uint32_t
1143 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring)
1144 {
1145 	uint32_t index;
1146 	uint32_t head_wb;
1147 	int desc_num;
1148 	tx_control_block_t *tcb;
1149 	link_list_t pending_list;
1150 
1151 	/*
1152 	 * The mutex_tryenter() is used to avoid unnecessary
1153 	 * lock contention.
1154 	 */
1155 	if (mutex_tryenter(&tx_ring->recycle_lock) == 0)
1156 		return (0);
1157 
1158 	ASSERT(tx_ring->tbd_free <= tx_ring->ring_size);
1159 
1160 	if (tx_ring->tbd_free == tx_ring->ring_size) {
1161 		tx_ring->recycle_fail = 0;
1162 		tx_ring->stall_watchdog = 0;
1163 		mutex_exit(&tx_ring->recycle_lock);
1164 		return (0);
1165 	}
1166 
1167 	/*
1168 	 * Sync the DMA buffer of the tx descriptor ring
1169 	 *
1170 	 * Note: For head write-back mode, the tx descriptors will not
1171 	 * be written back, but the head write-back value is stored at
1172 	 * the last extra tbd at the end of the DMA area, we still need
1173 	 * to sync the head write-back value for kernel.
1174 	 *
1175 	 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL);
1176 	 */
1177 	(void) ddi_dma_sync(tx_ring->tbd_area.dma_handle,
1178 	    sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size,
1179 	    sizeof (uint32_t),
1180 	    DDI_DMA_SYNC_FORKERNEL);
1181 
1182 	if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) {
1183 		ddi_fm_service_impact(tx_ring->ixgbe->dip,
1184 		    DDI_SERVICE_DEGRADED);
1185 	}
1186 
1187 	LINK_LIST_INIT(&pending_list);
1188 	desc_num = 0;
1189 	index = tx_ring->tbd_head;	/* Next index to clean */
1190 
1191 	/*
1192 	 * Get the value of head write-back
1193 	 */
1194 	head_wb = *tx_ring->tbd_head_wb;
1195 	while (index != head_wb) {
1196 		tcb = tx_ring->work_list[index];
1197 		ASSERT(tcb != NULL);
1198 
1199 		if (OFFSET(index, head_wb, tx_ring->ring_size) <
1200 		    tcb->desc_num) {
1201 			/*
1202 			 * The current tx control block is not
1203 			 * completely transmitted, stop recycling
1204 			 */
1205 			break;
1206 		}
1207 
1208 		/*
1209 		 * Strip off the tx control block from the work list,
1210 		 * and add it to the pending list.
1211 		 */
1212 		tx_ring->work_list[index] = NULL;
1213 		LIST_PUSH_TAIL(&pending_list, &tcb->link);
1214 
1215 		/*
1216 		 * Advance the index of the tx descriptor ring
1217 		 */
1218 		index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size);
1219 
1220 		/*
1221 		 * Count the total number of the tx descriptors recycled
1222 		 */
1223 		desc_num += tcb->desc_num;
1224 	}
1225 
1226 	/*
1227 	 * If no tx descriptors are recycled, no need to do more processing
1228 	 */
1229 	if (desc_num == 0) {
1230 		tx_ring->recycle_fail++;
1231 		mutex_exit(&tx_ring->recycle_lock);
1232 		return (0);
1233 	}
1234 
1235 	tx_ring->recycle_fail = 0;
1236 	tx_ring->stall_watchdog = 0;
1237 
1238 	/*
1239 	 * Update the head index of the tx descriptor ring
1240 	 */
1241 	tx_ring->tbd_head = index;
1242 
1243 	/*
1244 	 * Update the number of the free tx descriptors with atomic operations
1245 	 */
1246 	atomic_add_32(&tx_ring->tbd_free, desc_num);
1247 
1248 	mutex_exit(&tx_ring->recycle_lock);
1249 
1250 	/*
1251 	 * Free the resources used by the tx control blocks
1252 	 * in the pending list
1253 	 */
1254 	tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list);
1255 	while (tcb) {
1256 		/*
1257 		 * Release the resources occupied by the tx control block
1258 		 */
1259 		ixgbe_free_tcb(tcb);
1260 
1261 		tcb = (tx_control_block_t *)
1262 		    LIST_GET_NEXT(&pending_list, &tcb->link);
1263 	}
1264 
1265 	/*
1266 	 * Add the tx control blocks in the pending list to the free list.
1267 	 */
1268 	ixgbe_put_free_list(tx_ring, &pending_list);
1269 
1270 	return (desc_num);
1271 }
1272 
1273 /*
1274  * ixgbe_free_tcb - free up the tx control block
1275  *
1276  * Free the resources of the tx control block, including
1277  * unbind the previously bound DMA handle, and reset other
1278  * control fields.
1279  */
1280 void
1281 ixgbe_free_tcb(tx_control_block_t *tcb)
1282 {
1283 	switch (tcb->tx_type) {
1284 	case USE_COPY:
1285 		/*
1286 		 * Reset the buffer length that is used for copy
1287 		 */
1288 		tcb->tx_buf.len = 0;
1289 		break;
1290 	case USE_DMA:
1291 		/*
1292 		 * Release the DMA resource that is used for
1293 		 * DMA binding.
1294 		 */
1295 		(void) ddi_dma_unbind_handle(tcb->tx_dma_handle);
1296 		break;
1297 	default:
1298 		break;
1299 	}
1300 
1301 	/*
1302 	 * Free the mblk
1303 	 */
1304 	if (tcb->mp != NULL) {
1305 		freemsg(tcb->mp);
1306 		tcb->mp = NULL;
1307 	}
1308 
1309 	tcb->tx_type = USE_NONE;
1310 	tcb->frag_num = 0;
1311 	tcb->desc_num = 0;
1312 }
1313 
1314 /*
1315  * ixgbe_get_free_list - Get a free tx control block from the free list
1316  *
1317  * The atomic operation on the number of the available tx control block
1318  * in the free list is used to keep this routine mutual exclusive with
1319  * the routine ixgbe_put_check_list.
1320  */
1321 static tx_control_block_t *
1322 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring)
1323 {
1324 	tx_control_block_t *tcb;
1325 
1326 	/*
1327 	 * Check and update the number of the free tx control block
1328 	 * in the free list.
1329 	 */
1330 	if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0)
1331 		return (NULL);
1332 
1333 	mutex_enter(&tx_ring->tcb_head_lock);
1334 
1335 	tcb = tx_ring->free_list[tx_ring->tcb_head];
1336 	ASSERT(tcb != NULL);
1337 	tx_ring->free_list[tx_ring->tcb_head] = NULL;
1338 	tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1,
1339 	    tx_ring->free_list_size);
1340 
1341 	mutex_exit(&tx_ring->tcb_head_lock);
1342 
1343 	return (tcb);
1344 }
1345 
1346 /*
1347  * ixgbe_put_free_list
1348  *
1349  * Put a list of used tx control blocks back to the free list
1350  *
1351  * A mutex is used here to ensure the serialization. The mutual exclusion
1352  * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with
1353  * the atomic operation on the counter tcb_free.
1354  */
1355 void
1356 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list)
1357 {
1358 	uint32_t index;
1359 	int tcb_num;
1360 	tx_control_block_t *tcb;
1361 
1362 	mutex_enter(&tx_ring->tcb_tail_lock);
1363 
1364 	index = tx_ring->tcb_tail;
1365 
1366 	tcb_num = 0;
1367 	tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list);
1368 	while (tcb != NULL) {
1369 		ASSERT(tx_ring->free_list[index] == NULL);
1370 		tx_ring->free_list[index] = tcb;
1371 
1372 		tcb_num++;
1373 
1374 		index = NEXT_INDEX(index, 1, tx_ring->free_list_size);
1375 
1376 		tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list);
1377 	}
1378 
1379 	tx_ring->tcb_tail = index;
1380 
1381 	/*
1382 	 * Update the number of the free tx control block
1383 	 * in the free list. This operation must be placed
1384 	 * under the protection of the lock.
1385 	 */
1386 	atomic_add_32(&tx_ring->tcb_free, tcb_num);
1387 
1388 	mutex_exit(&tx_ring->tcb_tail_lock);
1389 }
1390