xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c (revision 4d846d26)
1 /*-
2  * Copyright (c) 2015-2021 Mellanox Technologies. All rights reserved.
3  * Copyright (c) 2022 NVIDIA corporation & affiliates.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include "opt_kern_tls.h"
30 #include "opt_rss.h"
31 #include "opt_ratelimit.h"
32 
33 #include <dev/mlx5/mlx5_en/en.h>
34 #include <machine/atomic.h>
35 
36 static inline bool
37 mlx5e_do_send_cqe_inline(struct mlx5e_sq *sq)
38 {
39 	sq->cev_counter++;
40 	/* interleave the CQEs */
41 	if (sq->cev_counter >= sq->cev_factor) {
42 		sq->cev_counter = 0;
43 		return (true);
44 	}
45 	return (false);
46 }
47 
48 bool
49 mlx5e_do_send_cqe(struct mlx5e_sq *sq)
50 {
51 
52 	return (mlx5e_do_send_cqe_inline(sq));
53 }
54 
55 void
56 mlx5e_send_nop(struct mlx5e_sq *sq, u32 ds_cnt)
57 {
58 	u16 pi = sq->pc & sq->wq.sz_m1;
59 	struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
60 
61 	memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
62 
63 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
64 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
65 	if (mlx5e_do_send_cqe_inline(sq))
66 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
67 	else
68 		wqe->ctrl.fm_ce_se = 0;
69 
70 	/* Copy data for doorbell */
71 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
72 
73 	sq->mbuf[pi].mbuf = NULL;
74 	sq->mbuf[pi].num_bytes = 0;
75 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
76 	sq->pc += sq->mbuf[pi].num_wqebbs;
77 }
78 
79 static uint32_t mlx5e_hash_value;
80 
81 static void
82 mlx5e_hash_init(void *arg)
83 {
84 	mlx5e_hash_value = m_ether_tcpip_hash_init();
85 }
86 
87 /* Make kernel call mlx5e_hash_init after the random stack finished initializing */
88 SYSINIT(mlx5e_hash_init, SI_SUB_RANDOM, SI_ORDER_ANY, &mlx5e_hash_init, NULL);
89 
90 static struct mlx5e_sq *
91 mlx5e_select_queue_by_send_tag(if_t ifp, struct mbuf *mb)
92 {
93 	struct m_snd_tag *mb_tag;
94 	struct mlx5e_sq *sq;
95 
96 	mb_tag = mb->m_pkthdr.snd_tag;
97 
98 #ifdef KERN_TLS
99 top:
100 #endif
101 	/* get pointer to sendqueue */
102 	switch (mb_tag->sw->type) {
103 #ifdef RATELIMIT
104 	case IF_SND_TAG_TYPE_RATE_LIMIT:
105 		sq = container_of(mb_tag,
106 		    struct mlx5e_rl_channel, tag)->sq;
107 		break;
108 #ifdef KERN_TLS
109 	case IF_SND_TAG_TYPE_TLS_RATE_LIMIT:
110 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
111 		goto top;
112 #endif
113 #endif
114 	case IF_SND_TAG_TYPE_UNLIMITED:
115 		sq = &container_of(mb_tag,
116 		    struct mlx5e_channel, tag)->sq[0];
117 		KASSERT((mb_tag->refcount > 0),
118 		    ("mlx5e_select_queue: Channel refs are zero for unlimited tag"));
119 		break;
120 #ifdef KERN_TLS
121 	case IF_SND_TAG_TYPE_TLS:
122 		mb_tag = container_of(mb_tag, struct mlx5e_tls_tag, tag)->rl_tag;
123 		goto top;
124 #endif
125 	default:
126 		sq = NULL;
127 		break;
128 	}
129 
130 	/* check if valid */
131 	if (sq != NULL && READ_ONCE(sq->running) != 0)
132 		return (sq);
133 
134 	return (NULL);
135 }
136 
137 static struct mlx5e_sq *
138 mlx5e_select_queue(if_t ifp, struct mbuf *mb)
139 {
140 	struct mlx5e_priv *priv = if_getsoftc(ifp);
141 	struct mlx5e_sq *sq;
142 	u32 ch;
143 	u32 tc;
144 
145 	/* obtain VLAN information if present */
146 	if (mb->m_flags & M_VLANTAG) {
147 		tc = (mb->m_pkthdr.ether_vtag >> 13);
148 		if (tc >= priv->num_tc)
149 			tc = priv->default_vlan_prio;
150 	} else {
151 		tc = priv->default_vlan_prio;
152 	}
153 
154 	ch = priv->params.num_channels;
155 
156 	/* check if flowid is set */
157 	if (M_HASHTYPE_GET(mb) != M_HASHTYPE_NONE) {
158 #ifdef RSS
159 		u32 temp;
160 
161 		if (rss_hash2bucket(mb->m_pkthdr.flowid,
162 		    M_HASHTYPE_GET(mb), &temp) == 0)
163 			ch = temp % ch;
164 		else
165 #endif
166 			ch = (mb->m_pkthdr.flowid % 128) % ch;
167 	} else {
168 		ch = m_ether_tcpip_hash(MBUF_HASHFLAG_L3 |
169 		    MBUF_HASHFLAG_L4, mb, mlx5e_hash_value) % ch;
170 	}
171 
172 	/* check if send queue is running */
173 	sq = &priv->channel[ch].sq[tc];
174 	if (likely(READ_ONCE(sq->running) != 0))
175 		return (sq);
176 	return (NULL);
177 }
178 
179 static inline u16
180 mlx5e_get_l2_header_size(struct mlx5e_sq *sq, struct mbuf *mb)
181 {
182 	struct ether_vlan_header *eh;
183 	uint16_t eth_type;
184 	int min_inline;
185 
186 	eh = mtod(mb, struct ether_vlan_header *);
187 	if (unlikely(mb->m_len < ETHER_HDR_LEN)) {
188 		goto max_inline;
189 	} else if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
190 		if (unlikely(mb->m_len < (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)))
191 			goto max_inline;
192 		eth_type = ntohs(eh->evl_proto);
193 		min_inline = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
194 	} else {
195 		eth_type = ntohs(eh->evl_encap_proto);
196 		min_inline = ETHER_HDR_LEN;
197 	}
198 
199 	switch (eth_type) {
200 	case ETHERTYPE_IP:
201 	case ETHERTYPE_IPV6:
202 		/*
203 		 * Make sure the TOS(IPv4) or traffic class(IPv6)
204 		 * field gets inlined. Else the SQ may stall.
205 		 */
206 		min_inline += 4;
207 		break;
208 	default:
209 		goto max_inline;
210 	}
211 
212 	/*
213 	 * m_copydata() will be used on the remaining header which
214 	 * does not need to reside within the first m_len bytes of
215 	 * data:
216 	 */
217 	if (mb->m_pkthdr.len < min_inline)
218 		goto max_inline;
219 	return (min_inline);
220 
221 max_inline:
222 	return (MIN(mb->m_pkthdr.len, sq->max_inline));
223 }
224 
225 /*
226  * This function parse IPv4 and IPv6 packets looking for TCP and UDP
227  * headers.
228  *
229  * Upon return the pointer at which the "ppth" argument points, is set
230  * to the location of the TCP header. NULL is used if no TCP header is
231  * present.
232  *
233  * The return value indicates the number of bytes from the beginning
234  * of the packet until the first byte after the TCP or UDP header. If
235  * this function returns zero, the parsing failed.
236  */
237 int
238 mlx5e_get_full_header_size(const struct mbuf *mb, const struct tcphdr **ppth)
239 {
240 	const struct ether_vlan_header *eh;
241 	const struct tcphdr *th;
242 	const struct ip *ip;
243 	int ip_hlen, tcp_hlen;
244 	const struct ip6_hdr *ip6;
245 	uint16_t eth_type;
246 	int eth_hdr_len;
247 
248 	eh = mtod(mb, const struct ether_vlan_header *);
249 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
250 		goto failure;
251 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
252 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
253 			goto failure;
254 		eth_type = ntohs(eh->evl_proto);
255 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
256 	} else {
257 		eth_type = ntohs(eh->evl_encap_proto);
258 		eth_hdr_len = ETHER_HDR_LEN;
259 	}
260 
261 	switch (eth_type) {
262 	case ETHERTYPE_IP:
263 		ip = (const struct ip *)(mb->m_data + eth_hdr_len);
264 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip)))
265 			goto failure;
266 		switch (ip->ip_p) {
267 		case IPPROTO_TCP:
268 			ip_hlen = ip->ip_hl << 2;
269 			eth_hdr_len += ip_hlen;
270 			goto tcp_packet;
271 		case IPPROTO_UDP:
272 			ip_hlen = ip->ip_hl << 2;
273 			eth_hdr_len += ip_hlen + sizeof(struct udphdr);
274 			th = NULL;
275 			goto udp_packet;
276 		default:
277 			goto failure;
278 		}
279 		break;
280 	case ETHERTYPE_IPV6:
281 		ip6 = (const struct ip6_hdr *)(mb->m_data + eth_hdr_len);
282 		if (unlikely(mb->m_len < eth_hdr_len + sizeof(*ip6)))
283 			goto failure;
284 		switch (ip6->ip6_nxt) {
285 		case IPPROTO_TCP:
286 			eth_hdr_len += sizeof(*ip6);
287 			goto tcp_packet;
288 		case IPPROTO_UDP:
289 			eth_hdr_len += sizeof(*ip6) + sizeof(struct udphdr);
290 			th = NULL;
291 			goto udp_packet;
292 		default:
293 			goto failure;
294 		}
295 		break;
296 	default:
297 		goto failure;
298 	}
299 tcp_packet:
300 	if (unlikely(mb->m_len < eth_hdr_len + sizeof(*th))) {
301 		const struct mbuf *m_th = mb->m_next;
302 		if (unlikely(mb->m_len != eth_hdr_len ||
303 		    m_th == NULL || m_th->m_len < sizeof(*th)))
304 			goto failure;
305 		th = (const struct tcphdr *)(m_th->m_data);
306 	} else {
307 		th = (const struct tcphdr *)(mb->m_data + eth_hdr_len);
308 	}
309 	tcp_hlen = th->th_off << 2;
310 	eth_hdr_len += tcp_hlen;
311 udp_packet:
312 	/*
313 	 * m_copydata() will be used on the remaining header which
314 	 * does not need to reside within the first m_len bytes of
315 	 * data:
316 	 */
317 	if (unlikely(mb->m_pkthdr.len < eth_hdr_len))
318 		goto failure;
319 	if (ppth != NULL)
320 		*ppth = th;
321 	return (eth_hdr_len);
322 failure:
323 	if (ppth != NULL)
324 		*ppth = NULL;
325 	return (0);
326 }
327 
328 /*
329  * Locate a pointer inside a mbuf chain. Returns NULL upon failure.
330  */
331 static inline void *
332 mlx5e_parse_mbuf_chain(const struct mbuf **mb, int *poffset, int eth_hdr_len,
333     int min_len)
334 {
335 	if (unlikely(mb[0]->m_len == eth_hdr_len)) {
336 		poffset[0] = eth_hdr_len;
337 		if (unlikely((mb[0] = mb[0]->m_next) == NULL))
338 			return (NULL);
339 	}
340 	if (unlikely(mb[0]->m_len < eth_hdr_len - poffset[0] + min_len))
341 		return (NULL);
342 	return (mb[0]->m_data + eth_hdr_len - poffset[0]);
343 }
344 
345 /*
346  * This function parse IPv4 and IPv6 packets looking for UDP, VXLAN
347  * and TCP headers.
348  *
349  * The return value indicates the number of bytes from the beginning
350  * of the packet until the first byte after the TCP header. If this
351  * function returns zero, the parsing failed.
352  */
353 static int
354 mlx5e_get_vxlan_header_size(const struct mbuf *mb, struct mlx5e_tx_wqe *wqe,
355     uint8_t cs_mask, uint8_t opcode)
356 {
357 	const struct ether_vlan_header *eh;
358 	struct ip *ip4;
359 	struct ip6_hdr *ip6;
360 	struct tcphdr *th;
361 	struct udphdr *udp;
362 	bool has_outer_vlan_tag;
363 	uint16_t eth_type;
364 	uint8_t ip_type;
365 	int pkt_hdr_len;
366 	int eth_hdr_len;
367 	int tcp_hlen;
368 	int ip_hlen;
369 	int offset;
370 
371 	pkt_hdr_len = mb->m_pkthdr.len;
372 	has_outer_vlan_tag = (mb->m_flags & M_VLANTAG) != 0;
373 	offset = 0;
374 
375 	eh = mtod(mb, const struct ether_vlan_header *);
376 	if (unlikely(mb->m_len < ETHER_HDR_LEN))
377 		return (0);
378 
379 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
380 		if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN))
381 			return (0);
382 		eth_type = eh->evl_proto;
383 		eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
384 	} else {
385 		eth_type = eh->evl_encap_proto;
386 		eth_hdr_len = ETHER_HDR_LEN;
387 	}
388 
389 	switch (eth_type) {
390 	case htons(ETHERTYPE_IP):
391 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
392 		    sizeof(*ip4));
393 		if (unlikely(ip4 == NULL))
394 			return (0);
395 		ip_type = ip4->ip_p;
396 		if (unlikely(ip_type != IPPROTO_UDP))
397 			return (0);
398 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
399 		wqe->eth.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
400 		ip_hlen = ip4->ip_hl << 2;
401 		eth_hdr_len += ip_hlen;
402 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
403 		    sizeof(*udp));
404 		if (unlikely(udp == NULL))
405 			return (0);
406 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
407 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE;
408 		eth_hdr_len += sizeof(*udp);
409 		break;
410 	case htons(ETHERTYPE_IPV6):
411 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
412 		    sizeof(*ip6));
413 		if (unlikely(ip6 == NULL))
414 			return (0);
415 		ip_type = ip6->ip6_nxt;
416 		if (unlikely(ip_type != IPPROTO_UDP))
417 			return (0);
418 		wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2;
419 		wqe->eth.cs_flags = MLX5_ETH_WQE_L4_CSUM;
420 		eth_hdr_len += sizeof(*ip6);
421 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
422 		    sizeof(*udp));
423 		if (unlikely(udp == NULL))
424 			return (0);
425 		wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2;
426 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE |
427 		    MLX5_ETH_WQE_SWP_OUTER_L3_TYPE;
428 		eth_hdr_len += sizeof(*udp);
429 		break;
430 	default:
431 		return (0);
432 	}
433 
434 	/*
435 	 * If the hardware is not computing inner IP checksum, then
436 	 * skip inlining the inner outer UDP and VXLAN header:
437 	 */
438 	if (unlikely((cs_mask & MLX5_ETH_WQE_L3_INNER_CSUM) == 0))
439 		goto done;
440 	if (unlikely(mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
441 	    8) == NULL))
442 		return (0);
443 	eth_hdr_len += 8;
444 
445 	/* Check for ethernet header again. */
446 	eh = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, ETHER_HDR_LEN);
447 	if (unlikely(eh == NULL))
448 		return (0);
449 	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
450 		if (unlikely(mb->m_len < eth_hdr_len - offset + ETHER_HDR_LEN +
451 		    ETHER_VLAN_ENCAP_LEN))
452 			return (0);
453 		eth_type = eh->evl_proto;
454 		eth_hdr_len += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
455 	} else {
456 		eth_type = eh->evl_encap_proto;
457 		eth_hdr_len += ETHER_HDR_LEN;
458 	}
459 
460 	/* Check for IP header again. */
461 	switch (eth_type) {
462 	case htons(ETHERTYPE_IP):
463 		ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
464 		    sizeof(*ip4));
465 		if (unlikely(ip4 == NULL))
466 			return (0);
467 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
468 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
469 		ip_type = ip4->ip_p;
470 		ip_hlen = ip4->ip_hl << 2;
471 		eth_hdr_len += ip_hlen;
472 		break;
473 	case htons(ETHERTYPE_IPV6):
474 		ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
475 		    sizeof(*ip6));
476 		if (unlikely(ip6 == NULL))
477 			return (0);
478 		wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2;
479 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_TYPE;
480 		ip_type = ip6->ip6_nxt;
481 		eth_hdr_len += sizeof(*ip6);
482 		break;
483 	default:
484 		return (0);
485 	}
486 
487 	/*
488 	 * If the hardware is not computing inner UDP/TCP checksum,
489 	 * then skip inlining the inner UDP/TCP header:
490 	 */
491 	if (unlikely((cs_mask & MLX5_ETH_WQE_L4_INNER_CSUM) == 0))
492 		goto done;
493 
494 	switch (ip_type) {
495 	case IPPROTO_UDP:
496 		udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
497 		    sizeof(*udp));
498 		if (unlikely(udp == NULL))
499 			return (0);
500 		wqe->eth.swp_inner_l4_offset = (eth_hdr_len / 2);
501 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
502 		wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE;
503 		eth_hdr_len += sizeof(*udp);
504 		break;
505 	case IPPROTO_TCP:
506 		th = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len,
507 		    sizeof(*th));
508 		if (unlikely(th == NULL))
509 			return (0);
510 		wqe->eth.swp_inner_l4_offset = eth_hdr_len / 2;
511 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM;
512 		tcp_hlen = th->th_off << 2;
513 		eth_hdr_len += tcp_hlen;
514 		break;
515 	default:
516 		return (0);
517 	}
518 done:
519 	if (unlikely(pkt_hdr_len < eth_hdr_len))
520 		return (0);
521 
522 	/* Account for software inserted VLAN tag, if any. */
523 	if (unlikely(has_outer_vlan_tag)) {
524 		wqe->eth.swp_outer_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
525 		wqe->eth.swp_outer_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
526 		wqe->eth.swp_inner_l3_offset += ETHER_VLAN_ENCAP_LEN / 2;
527 		wqe->eth.swp_inner_l4_offset += ETHER_VLAN_ENCAP_LEN / 2;
528 	}
529 
530 	/*
531 	 * When inner checksums are set, outer L4 checksum flag must
532 	 * be disabled.
533 	 */
534 	if (wqe->eth.cs_flags & (MLX5_ETH_WQE_L3_INNER_CSUM |
535 	    MLX5_ETH_WQE_L4_INNER_CSUM))
536 		wqe->eth.cs_flags &= ~MLX5_ETH_WQE_L4_CSUM;
537 
538 	return (eth_hdr_len);
539 }
540 
541 struct mlx5_wqe_dump_seg {
542 	struct mlx5_wqe_ctrl_seg ctrl;
543 	struct mlx5_wqe_data_seg data;
544 } __aligned(MLX5_SEND_WQE_BB);
545 
546 CTASSERT(DIV_ROUND_UP(2, MLX5_SEND_WQEBB_NUM_DS) == 1);
547 
548 int
549 mlx5e_sq_dump_xmit(struct mlx5e_sq *sq, struct mlx5e_xmit_args *parg, struct mbuf **mbp)
550 {
551 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
552 	struct mlx5_wqe_dump_seg *wqe;
553 	struct mlx5_wqe_dump_seg *wqe_last;
554 	int nsegs;
555 	int xsegs;
556 	u32 off;
557 	u32 msb;
558 	int err;
559 	int x;
560 	struct mbuf *mb;
561 	const u32 ds_cnt = 2;
562 	u16 pi;
563 	const u8 opcode = MLX5_OPCODE_DUMP;
564 
565 	/* get pointer to mbuf */
566 	mb = *mbp;
567 
568 	/* get producer index */
569 	pi = sq->pc & sq->wq.sz_m1;
570 
571 	sq->mbuf[pi].num_bytes = mb->m_pkthdr.len;
572 	sq->mbuf[pi].num_wqebbs = 0;
573 
574 	/* check number of segments in mbuf */
575 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
576 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
577 	if (err == EFBIG) {
578 		/* update statistics */
579 		sq->stats.defragged++;
580 		/* too many mbuf fragments */
581 		mb = m_defrag(*mbp, M_NOWAIT);
582 		if (mb == NULL) {
583 			mb = *mbp;
584 			goto tx_drop;
585 		}
586 		/* try again */
587 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
588 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
589 	}
590 
591 	if (err != 0)
592 		goto tx_drop;
593 
594 	/* make sure all mbuf data, if any, is visible to the bus */
595 	bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
596 	    BUS_DMASYNC_PREWRITE);
597 
598 	/* compute number of real DUMP segments */
599 	msb = sq->priv->params_ethtool.hw_mtu_msb;
600 	for (x = xsegs = 0; x != nsegs; x++)
601 		xsegs += howmany((u32)segs[x].ds_len, msb);
602 
603 	/* check if there are no segments */
604 	if (unlikely(xsegs == 0)) {
605 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
606 		m_freem(mb);
607 		*mbp = NULL;	/* safety clear */
608 		return (0);
609 	}
610 
611 	/* return ENOBUFS if the queue is full */
612 	if (unlikely(!mlx5e_sq_has_room_for(sq, xsegs))) {
613 		sq->stats.enobuf++;
614 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
615 		m_freem(mb);
616 		*mbp = NULL;	/* safety clear */
617 		return (ENOBUFS);
618 	}
619 
620 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
621 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, sq->wq.sz_m1);
622 
623 	for (x = 0; x != nsegs; x++) {
624 		for (off = 0; off < segs[x].ds_len; off += msb) {
625 			u32 len = segs[x].ds_len - off;
626 
627 			/* limit length */
628 			if (likely(len > msb))
629 				len = msb;
630 
631 			memset(&wqe->ctrl, 0, sizeof(wqe->ctrl));
632 
633 			/* fill control segment */
634 			wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
635 			wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
636 			wqe->ctrl.imm = cpu_to_be32(parg->tisn << 8);
637 
638 			/* fill data segment */
639 			wqe->data.addr = cpu_to_be64((uint64_t)segs[x].ds_addr + off);
640 			wqe->data.lkey = sq->mkey_be;
641 			wqe->data.byte_count = cpu_to_be32(len);
642 
643 			/* advance to next building block */
644 			if (unlikely(wqe == wqe_last))
645 				wqe = mlx5_wq_cyc_get_wqe(&sq->wq, 0);
646 			else
647 				wqe++;
648 
649 			sq->mbuf[pi].num_wqebbs++;
650 			sq->pc++;
651 		}
652 	}
653 
654 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
655 	wqe_last = mlx5_wq_cyc_get_wqe(&sq->wq, (sq->pc - 1) & sq->wq.sz_m1);
656 
657 	/* put in place data fence */
658 	wqe->ctrl.fm_ce_se |= MLX5_FENCE_MODE_INITIATOR_SMALL;
659 
660 	/* check if we should generate a completion event */
661 	if (mlx5e_do_send_cqe_inline(sq))
662 		wqe_last->ctrl.fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
663 
664 	/* copy data for doorbell */
665 	memcpy(sq->doorbell.d32, wqe_last, sizeof(sq->doorbell.d32));
666 
667 	/* store pointer to mbuf */
668 	sq->mbuf[pi].mbuf = mb;
669 	sq->mbuf[pi].mst = m_snd_tag_ref(parg->mst);
670 
671 	/* count all traffic going out */
672 	sq->stats.packets++;
673 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
674 
675 	*mbp = NULL;	/* safety clear */
676 	return (0);
677 
678 tx_drop:
679 	sq->stats.dropped++;
680 	*mbp = NULL;
681 	m_freem(mb);
682 	return err;
683 }
684 
685 int
686 mlx5e_sq_xmit(struct mlx5e_sq *sq, struct mbuf **mbp)
687 {
688 	bus_dma_segment_t segs[MLX5E_MAX_TX_MBUF_FRAGS];
689 	struct mlx5e_xmit_args args = {};
690 	struct mlx5_wqe_data_seg *dseg;
691 	struct mlx5e_tx_wqe *wqe;
692 	if_t ifp;
693 	int nsegs;
694 	int err;
695 	int x;
696 	struct mbuf *mb;
697 	u16 ds_cnt;
698 	u16 pi;
699 	u8 opcode;
700 
701 #ifdef KERN_TLS
702 top:
703 #endif
704 	/* Return ENOBUFS if the queue is full */
705 	if (unlikely(!mlx5e_sq_has_room_for(sq, 2 * MLX5_SEND_WQE_MAX_WQEBBS))) {
706 		sq->stats.enobuf++;
707 		return (ENOBUFS);
708 	}
709 
710 	/* Align SQ edge with NOPs to avoid WQE wrap around */
711 	pi = ((~sq->pc) & sq->wq.sz_m1);
712 	if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
713 		/* Send one multi NOP message instead of many */
714 		mlx5e_send_nop(sq, (pi + 1) * MLX5_SEND_WQEBB_NUM_DS);
715 		pi = ((~sq->pc) & sq->wq.sz_m1);
716 		if (pi < (MLX5_SEND_WQE_MAX_WQEBBS - 1)) {
717 			sq->stats.enobuf++;
718 			return (ENOMEM);
719 		}
720 	}
721 
722 #ifdef KERN_TLS
723 	/* Special handling for TLS packets, if any */
724 	switch (mlx5e_sq_tls_xmit(sq, &args, mbp)) {
725 	case MLX5E_TLS_LOOP:
726 		goto top;
727 	case MLX5E_TLS_FAILURE:
728 		mb = *mbp;
729 		err = ENOMEM;
730 		goto tx_drop;
731 	case MLX5E_TLS_DEFERRED:
732 		return (0);
733 	case MLX5E_TLS_CONTINUE:
734 	default:
735 		break;
736 	}
737 #endif
738 
739 	/* Setup local variables */
740 	pi = sq->pc & sq->wq.sz_m1;
741 	wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
742 	ifp = sq->ifp;
743 
744 	memset(wqe, 0, sizeof(*wqe));
745 
746 	/* get pointer to mbuf */
747 	mb = *mbp;
748 
749 	/* Send a copy of the frame to the BPF listener, if any */
750 	if (ifp != NULL && if_getbpf(ifp) != NULL)
751 		ETHER_BPF_MTAP(ifp, mb);
752 
753 	if (mb->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO)) {
754 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_CSUM;
755 	}
756 	if (mb->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) {
757 		wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_CSUM;
758 	}
759 	if (wqe->eth.cs_flags == 0) {
760 		sq->stats.csum_offload_none++;
761 	}
762 	if (mb->m_pkthdr.csum_flags & CSUM_TSO) {
763 		u32 payload_len;
764 		u32 mss = mb->m_pkthdr.tso_segsz;
765 		u32 num_pkts;
766 
767 		wqe->eth.mss = cpu_to_be16(mss);
768 		opcode = MLX5_OPCODE_LSO;
769 		if (args.ihs == 0)
770 			args.ihs = mlx5e_get_full_header_size(mb, NULL);
771 		if (unlikely(args.ihs == 0)) {
772 			err = EINVAL;
773 			goto tx_drop;
774 		}
775 		payload_len = mb->m_pkthdr.len - args.ihs;
776 		if (payload_len == 0)
777 			num_pkts = 1;
778 		else
779 			num_pkts = DIV_ROUND_UP(payload_len, mss);
780 		sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs);
781 
782 
783 		sq->stats.tso_packets++;
784 		sq->stats.tso_bytes += payload_len;
785 	} else if (mb->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) {
786 		/* check for inner TCP TSO first */
787 		if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TSO |
788 		    CSUM_INNER_IP6_TSO)) {
789 			u32 payload_len;
790 			u32 mss = mb->m_pkthdr.tso_segsz;
791 			u32 num_pkts;
792 
793 			wqe->eth.mss = cpu_to_be16(mss);
794 			opcode = MLX5_OPCODE_LSO;
795 
796 			if (likely(args.ihs == 0)) {
797 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
798 				       MLX5_ETH_WQE_L3_INNER_CSUM |
799 				       MLX5_ETH_WQE_L4_INNER_CSUM |
800 				       MLX5_ETH_WQE_L4_CSUM |
801 				       MLX5_ETH_WQE_L3_CSUM,
802 				       opcode);
803 				if (unlikely(args.ihs == 0)) {
804 					err = EINVAL;
805 					goto tx_drop;
806 				}
807 			}
808 
809 			payload_len = mb->m_pkthdr.len - args.ihs;
810 			if (payload_len == 0)
811 				num_pkts = 1;
812 			else
813 				num_pkts = DIV_ROUND_UP(payload_len, mss);
814 			sq->mbuf[pi].num_bytes = payload_len +
815 			    num_pkts * args.ihs;
816 
817 			sq->stats.tso_packets++;
818 			sq->stats.tso_bytes += payload_len;
819 		} else {
820 			opcode = MLX5_OPCODE_SEND;
821 
822 			if (likely(args.ihs == 0)) {
823 				uint8_t cs_mask;
824 
825 				if (mb->m_pkthdr.csum_flags &
826 				    (CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP |
827 				     CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_UDP)) {
828 					cs_mask =
829 					    MLX5_ETH_WQE_L3_INNER_CSUM |
830 					    MLX5_ETH_WQE_L4_INNER_CSUM |
831 					    MLX5_ETH_WQE_L4_CSUM |
832 					    MLX5_ETH_WQE_L3_CSUM;
833 				} else if (mb->m_pkthdr.csum_flags & CSUM_INNER_IP) {
834 					cs_mask =
835 					    MLX5_ETH_WQE_L3_INNER_CSUM |
836 					    MLX5_ETH_WQE_L4_CSUM |
837 					    MLX5_ETH_WQE_L3_CSUM;
838 				} else {
839 					cs_mask =
840 					    MLX5_ETH_WQE_L4_CSUM |
841 					    MLX5_ETH_WQE_L3_CSUM;
842 				}
843 				args.ihs = mlx5e_get_vxlan_header_size(mb, wqe,
844 				    cs_mask, opcode);
845 				if (unlikely(args.ihs == 0)) {
846 					err = EINVAL;
847 					goto tx_drop;
848 				}
849 			}
850 
851 			sq->mbuf[pi].num_bytes = max_t (unsigned int,
852 			    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
853 		}
854 	} else {
855 		opcode = MLX5_OPCODE_SEND;
856 
857 		if (args.ihs == 0) {
858 			switch (sq->min_inline_mode) {
859 			case MLX5_INLINE_MODE_IP:
860 			case MLX5_INLINE_MODE_TCP_UDP:
861 				args.ihs = mlx5e_get_full_header_size(mb, NULL);
862 				if (unlikely(args.ihs == 0))
863 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
864 				break;
865 			case MLX5_INLINE_MODE_L2:
866 				args.ihs = mlx5e_get_l2_header_size(sq, mb);
867 				break;
868 			case MLX5_INLINE_MODE_NONE:
869 				/* FALLTHROUGH */
870 			default:
871 				if ((mb->m_flags & M_VLANTAG) != 0 &&
872 				    (sq->min_insert_caps & MLX5E_INSERT_VLAN) != 0) {
873 					/* inlining VLAN data is not required */
874 					wqe->eth.vlan_cmd = htons(0x8000); /* bit 0 CVLAN */
875 					wqe->eth.vlan_hdr = htons(mb->m_pkthdr.ether_vtag);
876 					args.ihs = 0;
877 				} else if ((mb->m_flags & M_VLANTAG) == 0 &&
878 				    (sq->min_insert_caps & MLX5E_INSERT_NON_VLAN) != 0) {
879 					/* inlining non-VLAN data is not required */
880 					args.ihs = 0;
881 				} else {
882 					/* we are forced to inlining L2 header, if any */
883 					args.ihs = mlx5e_get_l2_header_size(sq, mb);
884 				}
885 				break;
886 			}
887 		}
888 		sq->mbuf[pi].num_bytes = max_t (unsigned int,
889 		    mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN);
890 	}
891 
892 	if (likely(args.ihs == 0)) {
893 		/* nothing to inline */
894 	} else if ((mb->m_flags & M_VLANTAG) != 0) {
895 		struct ether_vlan_header *eh = (struct ether_vlan_header *)
896 		    wqe->eth.inline_hdr_start;
897 
898 		/* Range checks */
899 		if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) {
900 			if (mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN)) {
901 				err = EINVAL;
902 				goto tx_drop;
903 			}
904 			args.ihs = (sq->max_inline - ETHER_VLAN_ENCAP_LEN);
905 		} else if (unlikely(args.ihs < ETHER_HDR_LEN)) {
906 			err = EINVAL;
907 			goto tx_drop;
908 		}
909 		m_copydata(mb, 0, ETHER_HDR_LEN, (caddr_t)eh);
910 		m_adj(mb, ETHER_HDR_LEN);
911 		/* Insert 4 bytes VLAN tag into data stream */
912 		eh->evl_proto = eh->evl_encap_proto;
913 		eh->evl_encap_proto = htons(ETHERTYPE_VLAN);
914 		eh->evl_tag = htons(mb->m_pkthdr.ether_vtag);
915 		/* Copy rest of header data, if any */
916 		m_copydata(mb, 0, args.ihs - ETHER_HDR_LEN, (caddr_t)(eh + 1));
917 		m_adj(mb, args.ihs - ETHER_HDR_LEN);
918 		/* Extend header by 4 bytes */
919 		args.ihs += ETHER_VLAN_ENCAP_LEN;
920 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
921 	} else {
922 		/* check if inline header size is too big */
923 		if (unlikely(args.ihs > sq->max_inline)) {
924 			if (unlikely(mb->m_pkthdr.csum_flags & (CSUM_TSO |
925 			    CSUM_ENCAP_VXLAN))) {
926 				err = EINVAL;
927 				goto tx_drop;
928 			}
929 			args.ihs = sq->max_inline;
930 		}
931 		m_copydata(mb, 0, args.ihs, wqe->eth.inline_hdr_start);
932 		m_adj(mb, args.ihs);
933 		wqe->eth.inline_hdr_sz = cpu_to_be16(args.ihs);
934 	}
935 
936 	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
937 	if (args.ihs > sizeof(wqe->eth.inline_hdr_start)) {
938 		ds_cnt += DIV_ROUND_UP(args.ihs - sizeof(wqe->eth.inline_hdr_start),
939 		    MLX5_SEND_WQE_DS);
940 	}
941 	dseg = ((struct mlx5_wqe_data_seg *)&wqe->ctrl) + ds_cnt;
942 
943 	err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
944 	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
945 	if (err == EFBIG) {
946 		/* Update statistics */
947 		sq->stats.defragged++;
948 		/* Too many mbuf fragments */
949 		mb = m_defrag(*mbp, M_NOWAIT);
950 		if (mb == NULL) {
951 			mb = *mbp;
952 			goto tx_drop;
953 		}
954 		/* Try again */
955 		err = bus_dmamap_load_mbuf_sg(sq->dma_tag, sq->mbuf[pi].dma_map,
956 		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
957 	}
958 	/* Catch errors */
959 	if (err != 0)
960 		goto tx_drop;
961 
962 	/* Make sure all mbuf data, if any, is visible to the bus */
963 	if (nsegs != 0) {
964 		bus_dmamap_sync(sq->dma_tag, sq->mbuf[pi].dma_map,
965 		    BUS_DMASYNC_PREWRITE);
966 	} else {
967 		/* All data was inlined, free the mbuf. */
968 		bus_dmamap_unload(sq->dma_tag, sq->mbuf[pi].dma_map);
969 		m_freem(mb);
970 		mb = NULL;
971 	}
972 
973 	for (x = 0; x != nsegs; x++) {
974 		if (segs[x].ds_len == 0)
975 			continue;
976 		dseg->addr = cpu_to_be64((uint64_t)segs[x].ds_addr);
977 		dseg->lkey = sq->mkey_be;
978 		dseg->byte_count = cpu_to_be32((uint32_t)segs[x].ds_len);
979 		dseg++;
980 	}
981 
982 	ds_cnt = (dseg - ((struct mlx5_wqe_data_seg *)&wqe->ctrl));
983 
984 	wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
985 	wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
986 	wqe->ctrl.imm = cpu_to_be32(args.tisn << 8);
987 
988 	if (mlx5e_do_send_cqe_inline(sq))
989 		wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
990 	else
991 		wqe->ctrl.fm_ce_se = 0;
992 
993 	/* Copy data for doorbell */
994 	memcpy(sq->doorbell.d32, &wqe->ctrl, sizeof(sq->doorbell.d32));
995 
996 	/* Store pointer to mbuf */
997 	sq->mbuf[pi].mbuf = mb;
998 	sq->mbuf[pi].num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
999 	if (unlikely(args.mst != NULL))
1000 		sq->mbuf[pi].mst = m_snd_tag_ref(args.mst);
1001 	else
1002 		MPASS(sq->mbuf[pi].mst == NULL);
1003 
1004 	sq->pc += sq->mbuf[pi].num_wqebbs;
1005 
1006 	/* Count all traffic going out */
1007 	sq->stats.packets++;
1008 	sq->stats.bytes += sq->mbuf[pi].num_bytes;
1009 
1010 	*mbp = NULL;	/* safety clear */
1011 	return (0);
1012 
1013 tx_drop:
1014 	sq->stats.dropped++;
1015 	*mbp = NULL;
1016 	m_freem(mb);
1017 	return err;
1018 }
1019 
1020 static void
1021 mlx5e_poll_tx_cq(struct mlx5e_sq *sq, int budget)
1022 {
1023 	u16 sqcc;
1024 
1025 	/*
1026 	 * sq->cc must be updated only after mlx5_cqwq_update_db_record(),
1027 	 * otherwise a cq overrun may occur
1028 	 */
1029 	sqcc = sq->cc;
1030 
1031 	while (budget > 0) {
1032 		struct mlx5_cqe64 *cqe;
1033 		struct m_snd_tag *mst;
1034 		struct mbuf *mb;
1035 		bool match;
1036 		u16 sqcc_this;
1037 		u16 delta;
1038 		u16 x;
1039 		u16 ci;
1040 
1041 		cqe = mlx5e_get_cqe(&sq->cq);
1042 		if (!cqe)
1043 			break;
1044 
1045 		mlx5_cqwq_pop(&sq->cq.wq);
1046 
1047 		/* check if the completion event indicates an error */
1048 		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
1049 			mlx5e_dump_err_cqe(&sq->cq, sq->sqn, (const void *)cqe);
1050 			sq->stats.cqe_err++;
1051 		}
1052 
1053 		/* setup local variables */
1054 		sqcc_this = be16toh(cqe->wqe_counter);
1055 		match = false;
1056 
1057 		/* update budget according to the event factor */
1058 		budget -= sq->cev_factor;
1059 
1060 		for (x = 0;; x++) {
1061 			if (unlikely(match != false)) {
1062 				break;
1063 			} else if (unlikely(x == sq->cev_factor)) {
1064 				/* WQE counter match not found */
1065 				sq->stats.cqe_err++;
1066 				break;
1067 			}
1068 			ci = sqcc & sq->wq.sz_m1;
1069 			delta = sqcc_this - sqcc;
1070 			match = (delta < sq->mbuf[ci].num_wqebbs);
1071 			mb = sq->mbuf[ci].mbuf;
1072 			sq->mbuf[ci].mbuf = NULL;
1073 			mst = sq->mbuf[ci].mst;
1074 			sq->mbuf[ci].mst = NULL;
1075 
1076 			if (unlikely(mb == NULL)) {
1077 				if (unlikely(sq->mbuf[ci].num_bytes == 0))
1078 					sq->stats.nop++;
1079 			} else {
1080 				bus_dmamap_sync(sq->dma_tag, sq->mbuf[ci].dma_map,
1081 				    BUS_DMASYNC_POSTWRITE);
1082 				bus_dmamap_unload(sq->dma_tag, sq->mbuf[ci].dma_map);
1083 
1084 				/* Free transmitted mbuf */
1085 				m_freem(mb);
1086 			}
1087 
1088 			if (unlikely(mst != NULL))
1089 				m_snd_tag_rele(mst);
1090 
1091 			sqcc += sq->mbuf[ci].num_wqebbs;
1092 		}
1093 	}
1094 
1095 	mlx5_cqwq_update_db_record(&sq->cq.wq);
1096 
1097 	/* Ensure cq space is freed before enabling more cqes */
1098 	atomic_thread_fence_rel();
1099 
1100 	sq->cc = sqcc;
1101 }
1102 
1103 static int
1104 mlx5e_xmit_locked(if_t ifp, struct mlx5e_sq *sq, struct mbuf *mb)
1105 {
1106 	int err = 0;
1107 
1108 	if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
1109 	    READ_ONCE(sq->running) == 0)) {
1110 		m_freem(mb);
1111 		return (ENETDOWN);
1112 	}
1113 
1114 	/* Do transmit */
1115 	if (mlx5e_sq_xmit(sq, &mb) != 0) {
1116 		/* NOTE: m_freem() is NULL safe */
1117 		m_freem(mb);
1118 		err = ENOBUFS;
1119 	}
1120 
1121 	/* Write the doorbell record, if any. */
1122 	mlx5e_tx_notify_hw(sq, false);
1123 
1124 	/*
1125 	 * Check if we need to start the event timer which flushes the
1126 	 * transmit ring on timeout:
1127 	 */
1128 	if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL &&
1129 	    sq->cev_factor != 1)) {
1130 		/* start the timer */
1131 		mlx5e_sq_cev_timeout(sq);
1132 	} else {
1133 		/* don't send NOPs yet */
1134 		sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1135 	}
1136 	return (err);
1137 }
1138 
1139 int
1140 mlx5e_xmit(if_t ifp, struct mbuf *mb)
1141 {
1142 	struct mlx5e_sq *sq;
1143 	int ret;
1144 
1145 	if (mb->m_pkthdr.csum_flags & CSUM_SND_TAG) {
1146 		MPASS(mb->m_pkthdr.snd_tag->ifp == ifp);
1147 		sq = mlx5e_select_queue_by_send_tag(ifp, mb);
1148 		if (unlikely(sq == NULL)) {
1149 			goto select_queue;
1150 		}
1151 	} else {
1152 select_queue:
1153 		sq = mlx5e_select_queue(ifp, mb);
1154 		if (unlikely(sq == NULL)) {
1155 			/* Free mbuf */
1156 			m_freem(mb);
1157 
1158 			/* Invalid send queue */
1159 			return (ENXIO);
1160 		}
1161 	}
1162 
1163 	mtx_lock(&sq->lock);
1164 	ret = mlx5e_xmit_locked(ifp, sq, mb);
1165 	mtx_unlock(&sq->lock);
1166 
1167 	return (ret);
1168 }
1169 
1170 void
1171 mlx5e_tx_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe __unused)
1172 {
1173 	struct mlx5e_sq *sq = container_of(mcq, struct mlx5e_sq, cq.mcq);
1174 
1175 	mtx_lock(&sq->comp_lock);
1176 	mlx5e_poll_tx_cq(sq, MLX5E_BUDGET_MAX);
1177 	mlx5e_cq_arm(&sq->cq, MLX5_GET_DOORBELL_LOCK(&sq->priv->doorbell_lock));
1178 	mtx_unlock(&sq->comp_lock);
1179 }
1180