1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
2 
3 #include <linux/dma-mapping.h>
4 #include <linux/ip.h>
5 #include <linux/pci.h>
6 #include <linux/skbuff.h>
7 #include <linux/tcp.h>
8 #include <uapi/linux/udp.h>
9 #include "funeth.h"
10 #include "funeth_ktls.h"
11 #include "funeth_txrx.h"
12 #include "funeth_trace.h"
13 #include "fun_queue.h"
14 
15 #define FUN_XDP_CLEAN_THRES 32
16 #define FUN_XDP_CLEAN_BATCH 16
17 
18 /* DMA-map a packet and return the (length, DMA_address) pairs for its
19  * segments. If a mapping error occurs -ENOMEM is returned. The packet
20  * consists of an skb_shared_info and one additional address/length pair.
21  */
22 static int fun_map_pkt(struct device *dev, const struct skb_shared_info *si,
23 		       void *data, unsigned int data_len,
24 		       dma_addr_t *addr, unsigned int *len)
25 {
26 	const skb_frag_t *fp, *end;
27 
28 	*len = data_len;
29 	*addr = dma_map_single(dev, data, *len, DMA_TO_DEVICE);
30 	if (dma_mapping_error(dev, *addr))
31 		return -ENOMEM;
32 
33 	if (!si)
34 		return 0;
35 
36 	for (fp = si->frags, end = fp + si->nr_frags; fp < end; fp++) {
37 		*++len = skb_frag_size(fp);
38 		*++addr = skb_frag_dma_map(dev, fp, 0, *len, DMA_TO_DEVICE);
39 		if (dma_mapping_error(dev, *addr))
40 			goto unwind;
41 	}
42 	return 0;
43 
44 unwind:
45 	while (fp-- > si->frags)
46 		dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE);
47 
48 	dma_unmap_single(dev, addr[-1], data_len, DMA_TO_DEVICE);
49 	return -ENOMEM;
50 }
51 
52 /* Return the address just past the end of a Tx queue's descriptor ring.
53  * It exploits the fact that the HW writeback area is just after the end
54  * of the descriptor ring.
55  */
56 static void *txq_end(const struct funeth_txq *q)
57 {
58 	return (void *)q->hw_wb;
59 }
60 
61 /* Return the amount of space within a Tx ring from the given address to the
62  * end.
63  */
64 static unsigned int txq_to_end(const struct funeth_txq *q, void *p)
65 {
66 	return txq_end(q) - p;
67 }
68 
69 /* Return the number of Tx descriptors occupied by a Tx request. */
70 static unsigned int tx_req_ndesc(const struct fun_eth_tx_req *req)
71 {
72 	return DIV_ROUND_UP(req->len8, FUNETH_SQE_SIZE / 8);
73 }
74 
75 /* Write a gather list to the Tx descriptor at @req from @ngle address/length
76  * pairs.
77  */
78 static struct fun_dataop_gl *fun_write_gl(const struct funeth_txq *q,
79 					  struct fun_eth_tx_req *req,
80 					  const dma_addr_t *addrs,
81 					  const unsigned int *lens,
82 					  unsigned int ngle)
83 {
84 	struct fun_dataop_gl *gle;
85 	unsigned int i;
86 
87 	req->len8 = (sizeof(*req) + ngle * sizeof(*gle)) / 8;
88 
89 	for (i = 0, gle = (struct fun_dataop_gl *)req->dataop.imm;
90 	     i < ngle && txq_to_end(q, gle); i++, gle++)
91 		fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]);
92 
93 	if (txq_to_end(q, gle) == 0) {
94 		gle = (struct fun_dataop_gl *)q->desc;
95 		for ( ; i < ngle; i++, gle++)
96 			fun_dataop_gl_init(gle, 0, 0, lens[i], addrs[i]);
97 	}
98 
99 	return gle;
100 }
101 
102 static __be16 tcp_hdr_doff_flags(const struct tcphdr *th)
103 {
104 	return *(__be16 *)&tcp_flag_word(th);
105 }
106 
107 static struct sk_buff *fun_tls_tx(struct sk_buff *skb, struct funeth_txq *q,
108 				  unsigned int *tls_len)
109 {
110 #if IS_ENABLED(CONFIG_TLS_DEVICE)
111 	const struct fun_ktls_tx_ctx *tls_ctx;
112 	u32 datalen, seq;
113 
114 	datalen = skb->len - skb_tcp_all_headers(skb);
115 	if (!datalen)
116 		return skb;
117 
118 	if (likely(!tls_offload_tx_resync_pending(skb->sk))) {
119 		seq = ntohl(tcp_hdr(skb)->seq);
120 		tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
121 
122 		if (likely(tls_ctx->next_seq == seq)) {
123 			*tls_len = datalen;
124 			return skb;
125 		}
126 		if (seq - tls_ctx->next_seq < U32_MAX / 4) {
127 			tls_offload_tx_resync_request(skb->sk, seq,
128 						      tls_ctx->next_seq);
129 		}
130 	}
131 
132 	FUN_QSTAT_INC(q, tx_tls_fallback);
133 	skb = tls_encrypt_skb(skb);
134 	if (!skb)
135 		FUN_QSTAT_INC(q, tx_tls_drops);
136 
137 	return skb;
138 #else
139 	return NULL;
140 #endif
141 }
142 
143 /* Write as many descriptors as needed for the supplied skb starting at the
144  * current producer location. The caller has made certain enough descriptors
145  * are available.
146  *
147  * Returns the number of descriptors written, 0 on error.
148  */
149 static unsigned int write_pkt_desc(struct sk_buff *skb, struct funeth_txq *q,
150 				   unsigned int tls_len)
151 {
152 	unsigned int extra_bytes = 0, extra_pkts = 0;
153 	unsigned int idx = q->prod_cnt & q->mask;
154 	const struct skb_shared_info *shinfo;
155 	unsigned int lens[MAX_SKB_FRAGS + 1];
156 	dma_addr_t addrs[MAX_SKB_FRAGS + 1];
157 	struct fun_eth_tx_req *req;
158 	struct fun_dataop_gl *gle;
159 	const struct tcphdr *th;
160 	unsigned int l4_hlen;
161 	unsigned int ngle;
162 	u16 flags;
163 
164 	shinfo = skb_shinfo(skb);
165 	if (unlikely(fun_map_pkt(q->dma_dev, shinfo, skb->data,
166 				 skb_headlen(skb), addrs, lens))) {
167 		FUN_QSTAT_INC(q, tx_map_err);
168 		return 0;
169 	}
170 
171 	req = fun_tx_desc_addr(q, idx);
172 	req->op = FUN_ETH_OP_TX;
173 	req->len8 = 0;
174 	req->flags = 0;
175 	req->suboff8 = offsetof(struct fun_eth_tx_req, dataop);
176 	req->repr_idn = 0;
177 	req->encap_proto = 0;
178 
179 	if (likely(shinfo->gso_size)) {
180 		if (skb->encapsulation) {
181 			u16 ol4_ofst;
182 
183 			flags = FUN_ETH_OUTER_EN | FUN_ETH_INNER_LSO |
184 				FUN_ETH_UPDATE_INNER_L4_CKSUM |
185 				FUN_ETH_UPDATE_OUTER_L3_LEN;
186 			if (shinfo->gso_type & (SKB_GSO_UDP_TUNNEL |
187 						SKB_GSO_UDP_TUNNEL_CSUM)) {
188 				flags |= FUN_ETH_UPDATE_OUTER_L4_LEN |
189 					 FUN_ETH_OUTER_UDP;
190 				if (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)
191 					flags |= FUN_ETH_UPDATE_OUTER_L4_CKSUM;
192 				ol4_ofst = skb_transport_offset(skb);
193 			} else {
194 				ol4_ofst = skb_inner_network_offset(skb);
195 			}
196 
197 			if (ip_hdr(skb)->version == 4)
198 				flags |= FUN_ETH_UPDATE_OUTER_L3_CKSUM;
199 			else
200 				flags |= FUN_ETH_OUTER_IPV6;
201 
202 			if (skb->inner_network_header) {
203 				if (inner_ip_hdr(skb)->version == 4)
204 					flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM |
205 						 FUN_ETH_UPDATE_INNER_L3_LEN;
206 				else
207 					flags |= FUN_ETH_INNER_IPV6 |
208 						 FUN_ETH_UPDATE_INNER_L3_LEN;
209 			}
210 			th = inner_tcp_hdr(skb);
211 			l4_hlen = __tcp_hdrlen(th);
212 			fun_eth_offload_init(&req->offload, flags,
213 					     shinfo->gso_size,
214 					     tcp_hdr_doff_flags(th), 0,
215 					     skb_inner_network_offset(skb),
216 					     skb_inner_transport_offset(skb),
217 					     skb_network_offset(skb), ol4_ofst);
218 			FUN_QSTAT_INC(q, tx_encap_tso);
219 		} else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
220 			flags = FUN_ETH_INNER_LSO | FUN_ETH_INNER_UDP |
221 				FUN_ETH_UPDATE_INNER_L4_CKSUM |
222 				FUN_ETH_UPDATE_INNER_L4_LEN |
223 				FUN_ETH_UPDATE_INNER_L3_LEN;
224 
225 			if (ip_hdr(skb)->version == 4)
226 				flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM;
227 			else
228 				flags |= FUN_ETH_INNER_IPV6;
229 
230 			l4_hlen = sizeof(struct udphdr);
231 			fun_eth_offload_init(&req->offload, flags,
232 					     shinfo->gso_size,
233 					     cpu_to_be16(l4_hlen << 10), 0,
234 					     skb_network_offset(skb),
235 					     skb_transport_offset(skb), 0, 0);
236 			FUN_QSTAT_INC(q, tx_uso);
237 		} else {
238 			/* HW considers one set of headers as inner */
239 			flags = FUN_ETH_INNER_LSO |
240 				FUN_ETH_UPDATE_INNER_L4_CKSUM |
241 				FUN_ETH_UPDATE_INNER_L3_LEN;
242 			if (shinfo->gso_type & SKB_GSO_TCPV6)
243 				flags |= FUN_ETH_INNER_IPV6;
244 			else
245 				flags |= FUN_ETH_UPDATE_INNER_L3_CKSUM;
246 			th = tcp_hdr(skb);
247 			l4_hlen = __tcp_hdrlen(th);
248 			fun_eth_offload_init(&req->offload, flags,
249 					     shinfo->gso_size,
250 					     tcp_hdr_doff_flags(th), 0,
251 					     skb_network_offset(skb),
252 					     skb_transport_offset(skb), 0, 0);
253 			FUN_QSTAT_INC(q, tx_tso);
254 		}
255 
256 		u64_stats_update_begin(&q->syncp);
257 		q->stats.tx_cso += shinfo->gso_segs;
258 		u64_stats_update_end(&q->syncp);
259 
260 		extra_pkts = shinfo->gso_segs - 1;
261 		extra_bytes = (be16_to_cpu(req->offload.inner_l4_off) +
262 			       l4_hlen) * extra_pkts;
263 	} else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
264 		flags = FUN_ETH_UPDATE_INNER_L4_CKSUM;
265 		if (skb->csum_offset == offsetof(struct udphdr, check))
266 			flags |= FUN_ETH_INNER_UDP;
267 		fun_eth_offload_init(&req->offload, flags, 0, 0, 0, 0,
268 				     skb_checksum_start_offset(skb), 0, 0);
269 		FUN_QSTAT_INC(q, tx_cso);
270 	} else {
271 		fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0);
272 	}
273 
274 	ngle = shinfo->nr_frags + 1;
275 	req->dataop = FUN_DATAOP_HDR_INIT(ngle, 0, ngle, 0, skb->len);
276 
277 	gle = fun_write_gl(q, req, addrs, lens, ngle);
278 
279 	if (IS_ENABLED(CONFIG_TLS_DEVICE) && unlikely(tls_len)) {
280 		struct fun_eth_tls *tls = (struct fun_eth_tls *)gle;
281 		struct fun_ktls_tx_ctx *tls_ctx;
282 
283 		req->len8 += FUNETH_TLS_SZ / 8;
284 		req->flags = cpu_to_be16(FUN_ETH_TX_TLS);
285 
286 		tls_ctx = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX);
287 		tls->tlsid = tls_ctx->tlsid;
288 		tls_ctx->next_seq += tls_len;
289 
290 		u64_stats_update_begin(&q->syncp);
291 		q->stats.tx_tls_bytes += tls_len;
292 		q->stats.tx_tls_pkts += 1 + extra_pkts;
293 		u64_stats_update_end(&q->syncp);
294 	}
295 
296 	u64_stats_update_begin(&q->syncp);
297 	q->stats.tx_bytes += skb->len + extra_bytes;
298 	q->stats.tx_pkts += 1 + extra_pkts;
299 	u64_stats_update_end(&q->syncp);
300 
301 	q->info[idx].skb = skb;
302 
303 	trace_funeth_tx(q, skb->len, idx, req->dataop.ngather);
304 	return tx_req_ndesc(req);
305 }
306 
307 /* Return the number of available descriptors of a Tx queue.
308  * HW assumes head==tail means the ring is empty so we need to keep one
309  * descriptor unused.
310  */
311 static unsigned int fun_txq_avail(const struct funeth_txq *q)
312 {
313 	return q->mask - q->prod_cnt + q->cons_cnt;
314 }
315 
316 /* Stop a queue if it can't handle another worst-case packet. */
317 static void fun_tx_check_stop(struct funeth_txq *q)
318 {
319 	if (likely(fun_txq_avail(q) >= FUNETH_MAX_PKT_DESC))
320 		return;
321 
322 	netif_tx_stop_queue(q->ndq);
323 
324 	/* NAPI reclaim is freeing packets in parallel with us and we may race.
325 	 * We have stopped the queue but check again after synchronizing with
326 	 * reclaim.
327 	 */
328 	smp_mb();
329 	if (likely(fun_txq_avail(q) < FUNETH_MAX_PKT_DESC))
330 		FUN_QSTAT_INC(q, tx_nstops);
331 	else
332 		netif_tx_start_queue(q->ndq);
333 }
334 
335 /* Return true if a queue has enough space to restart. Current condition is
336  * that the queue must be >= 1/4 empty.
337  */
338 static bool fun_txq_may_restart(struct funeth_txq *q)
339 {
340 	return fun_txq_avail(q) >= q->mask / 4;
341 }
342 
343 netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev)
344 {
345 	struct funeth_priv *fp = netdev_priv(netdev);
346 	unsigned int qid = skb_get_queue_mapping(skb);
347 	struct funeth_txq *q = fp->txqs[qid];
348 	unsigned int tls_len = 0;
349 	unsigned int ndesc;
350 
351 	if (IS_ENABLED(CONFIG_TLS_DEVICE) && skb->sk &&
352 	    tls_is_sk_tx_device_offloaded(skb->sk)) {
353 		skb = fun_tls_tx(skb, q, &tls_len);
354 		if (unlikely(!skb))
355 			goto dropped;
356 	}
357 
358 	ndesc = write_pkt_desc(skb, q, tls_len);
359 	if (unlikely(!ndesc)) {
360 		dev_kfree_skb_any(skb);
361 		goto dropped;
362 	}
363 
364 	q->prod_cnt += ndesc;
365 	fun_tx_check_stop(q);
366 
367 	skb_tx_timestamp(skb);
368 
369 	if (__netdev_tx_sent_queue(q->ndq, skb->len, netdev_xmit_more()))
370 		fun_txq_wr_db(q);
371 	else
372 		FUN_QSTAT_INC(q, tx_more);
373 
374 	return NETDEV_TX_OK;
375 
376 dropped:
377 	/* A dropped packet may be the last one in a xmit_more train,
378 	 * ring the doorbell just in case.
379 	 */
380 	if (!netdev_xmit_more())
381 		fun_txq_wr_db(q);
382 	return NETDEV_TX_OK;
383 }
384 
385 /* Return a Tx queue's HW head index written back to host memory. */
386 static u16 txq_hw_head(const struct funeth_txq *q)
387 {
388 	return (u16)be64_to_cpu(*q->hw_wb);
389 }
390 
391 /* Unmap the Tx packet starting at the given descriptor index and
392  * return the number of Tx descriptors it occupied.
393  */
394 static unsigned int fun_unmap_pkt(const struct funeth_txq *q, unsigned int idx)
395 {
396 	const struct fun_eth_tx_req *req = fun_tx_desc_addr(q, idx);
397 	unsigned int ngle = req->dataop.ngather;
398 	struct fun_dataop_gl *gle;
399 
400 	if (ngle) {
401 		gle = (struct fun_dataop_gl *)req->dataop.imm;
402 		dma_unmap_single(q->dma_dev, be64_to_cpu(gle->sgl_data),
403 				 be32_to_cpu(gle->sgl_len), DMA_TO_DEVICE);
404 
405 		for (gle++; --ngle && txq_to_end(q, gle); gle++)
406 			dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data),
407 				       be32_to_cpu(gle->sgl_len),
408 				       DMA_TO_DEVICE);
409 
410 		for (gle = (struct fun_dataop_gl *)q->desc; ngle; ngle--, gle++)
411 			dma_unmap_page(q->dma_dev, be64_to_cpu(gle->sgl_data),
412 				       be32_to_cpu(gle->sgl_len),
413 				       DMA_TO_DEVICE);
414 	}
415 
416 	return tx_req_ndesc(req);
417 }
418 
419 /* Reclaim completed Tx descriptors and free their packets. Restart a stopped
420  * queue if we freed enough descriptors.
421  *
422  * Return true if we exhausted the budget while there is more work to be done.
423  */
424 static bool fun_txq_reclaim(struct funeth_txq *q, int budget)
425 {
426 	unsigned int npkts = 0, nbytes = 0, ndesc = 0;
427 	unsigned int head, limit, reclaim_idx;
428 
429 	/* budget may be 0, e.g., netpoll */
430 	limit = budget ? budget : UINT_MAX;
431 
432 	for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask;
433 	     head != reclaim_idx && npkts < limit; head = txq_hw_head(q)) {
434 		/* The HW head is continually updated, ensure we don't read
435 		 * descriptor state before the head tells us to reclaim it.
436 		 * On the enqueue side the doorbell is an implicit write
437 		 * barrier.
438 		 */
439 		rmb();
440 
441 		do {
442 			unsigned int pkt_desc = fun_unmap_pkt(q, reclaim_idx);
443 			struct sk_buff *skb = q->info[reclaim_idx].skb;
444 
445 			trace_funeth_tx_free(q, reclaim_idx, pkt_desc, head);
446 
447 			nbytes += skb->len;
448 			napi_consume_skb(skb, budget);
449 			ndesc += pkt_desc;
450 			reclaim_idx = (reclaim_idx + pkt_desc) & q->mask;
451 			npkts++;
452 		} while (reclaim_idx != head && npkts < limit);
453 	}
454 
455 	q->cons_cnt += ndesc;
456 	netdev_tx_completed_queue(q->ndq, npkts, nbytes);
457 	smp_mb(); /* pairs with the one in fun_tx_check_stop() */
458 
459 	if (unlikely(netif_tx_queue_stopped(q->ndq) &&
460 		     fun_txq_may_restart(q))) {
461 		netif_tx_wake_queue(q->ndq);
462 		FUN_QSTAT_INC(q, tx_nrestarts);
463 	}
464 
465 	return reclaim_idx != head;
466 }
467 
468 /* The NAPI handler for Tx queues. */
469 int fun_txq_napi_poll(struct napi_struct *napi, int budget)
470 {
471 	struct fun_irq *irq = container_of(napi, struct fun_irq, napi);
472 	struct funeth_txq *q = irq->txq;
473 	unsigned int db_val;
474 
475 	if (fun_txq_reclaim(q, budget))
476 		return budget;               /* exhausted budget */
477 
478 	napi_complete(napi);                 /* exhausted pending work */
479 	db_val = READ_ONCE(q->irq_db_val) | (q->cons_cnt & q->mask);
480 	writel(db_val, q->db);
481 	return 0;
482 }
483 
484 /* Reclaim up to @budget completed Tx packets from a TX XDP queue. */
485 static unsigned int fun_xdpq_clean(struct funeth_txq *q, unsigned int budget)
486 {
487 	unsigned int npkts = 0, ndesc = 0, head, reclaim_idx;
488 
489 	for (head = txq_hw_head(q), reclaim_idx = q->cons_cnt & q->mask;
490 	     head != reclaim_idx && npkts < budget; head = txq_hw_head(q)) {
491 		/* The HW head is continually updated, ensure we don't read
492 		 * descriptor state before the head tells us to reclaim it.
493 		 * On the enqueue side the doorbell is an implicit write
494 		 * barrier.
495 		 */
496 		rmb();
497 
498 		do {
499 			unsigned int pkt_desc = fun_unmap_pkt(q, reclaim_idx);
500 
501 			xdp_return_frame(q->info[reclaim_idx].xdpf);
502 
503 			trace_funeth_tx_free(q, reclaim_idx, pkt_desc, head);
504 
505 			reclaim_idx = (reclaim_idx + pkt_desc) & q->mask;
506 			ndesc += pkt_desc;
507 			npkts++;
508 		} while (reclaim_idx != head && npkts < budget);
509 	}
510 
511 	q->cons_cnt += ndesc;
512 	return npkts;
513 }
514 
515 bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf)
516 {
517 	unsigned int idx, nfrags = 1, ndesc = 1, tot_len = xdpf->len;
518 	const struct skb_shared_info *si = NULL;
519 	unsigned int lens[MAX_SKB_FRAGS + 1];
520 	dma_addr_t dma[MAX_SKB_FRAGS + 1];
521 	struct fun_eth_tx_req *req;
522 
523 	if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES)
524 		fun_xdpq_clean(q, FUN_XDP_CLEAN_BATCH);
525 
526 	if (unlikely(xdp_frame_has_frags(xdpf))) {
527 		si = xdp_get_shared_info_from_frame(xdpf);
528 		tot_len = xdp_get_frame_len(xdpf);
529 		nfrags += si->nr_frags;
530 		ndesc = DIV_ROUND_UP((sizeof(*req) + nfrags *
531 				      sizeof(struct fun_dataop_gl)),
532 				     FUNETH_SQE_SIZE);
533 	}
534 
535 	if (unlikely(fun_txq_avail(q) < ndesc)) {
536 		FUN_QSTAT_INC(q, tx_xdp_full);
537 		return false;
538 	}
539 
540 	if (unlikely(fun_map_pkt(q->dma_dev, si, xdpf->data, xdpf->len, dma,
541 				 lens))) {
542 		FUN_QSTAT_INC(q, tx_map_err);
543 		return false;
544 	}
545 
546 	idx = q->prod_cnt & q->mask;
547 	req = fun_tx_desc_addr(q, idx);
548 	req->op = FUN_ETH_OP_TX;
549 	req->len8 = 0;
550 	req->flags = 0;
551 	req->suboff8 = offsetof(struct fun_eth_tx_req, dataop);
552 	req->repr_idn = 0;
553 	req->encap_proto = 0;
554 	fun_eth_offload_init(&req->offload, 0, 0, 0, 0, 0, 0, 0, 0);
555 	req->dataop = FUN_DATAOP_HDR_INIT(nfrags, 0, nfrags, 0, tot_len);
556 
557 	fun_write_gl(q, req, dma, lens, nfrags);
558 
559 	q->info[idx].xdpf = xdpf;
560 
561 	u64_stats_update_begin(&q->syncp);
562 	q->stats.tx_bytes += tot_len;
563 	q->stats.tx_pkts++;
564 	u64_stats_update_end(&q->syncp);
565 
566 	trace_funeth_tx(q, tot_len, idx, nfrags);
567 	q->prod_cnt += ndesc;
568 
569 	return true;
570 }
571 
572 int fun_xdp_xmit_frames(struct net_device *dev, int n,
573 			struct xdp_frame **frames, u32 flags)
574 {
575 	struct funeth_priv *fp = netdev_priv(dev);
576 	struct funeth_txq *q, **xdpqs;
577 	int i, q_idx;
578 
579 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
580 		return -EINVAL;
581 
582 	xdpqs = rcu_dereference_bh(fp->xdpqs);
583 	if (unlikely(!xdpqs))
584 		return -ENETDOWN;
585 
586 	q_idx = smp_processor_id();
587 	if (unlikely(q_idx >= fp->num_xdpqs))
588 		return -ENXIO;
589 
590 	for (q = xdpqs[q_idx], i = 0; i < n; i++)
591 		if (!fun_xdp_tx(q, frames[i]))
592 			break;
593 
594 	if (unlikely(flags & XDP_XMIT_FLUSH))
595 		fun_txq_wr_db(q);
596 	return i;
597 }
598 
599 /* Purge a Tx queue of any queued packets. Should be called once HW access
600  * to the packets has been revoked, e.g., after the queue has been disabled.
601  */
602 static void fun_txq_purge(struct funeth_txq *q)
603 {
604 	while (q->cons_cnt != q->prod_cnt) {
605 		unsigned int idx = q->cons_cnt & q->mask;
606 
607 		q->cons_cnt += fun_unmap_pkt(q, idx);
608 		dev_kfree_skb_any(q->info[idx].skb);
609 	}
610 	netdev_tx_reset_queue(q->ndq);
611 }
612 
613 static void fun_xdpq_purge(struct funeth_txq *q)
614 {
615 	while (q->cons_cnt != q->prod_cnt) {
616 		unsigned int idx = q->cons_cnt & q->mask;
617 
618 		q->cons_cnt += fun_unmap_pkt(q, idx);
619 		xdp_return_frame(q->info[idx].xdpf);
620 	}
621 }
622 
623 /* Create a Tx queue, allocating all the host resources needed. */
624 static struct funeth_txq *fun_txq_create_sw(struct net_device *dev,
625 					    unsigned int qidx,
626 					    unsigned int ndesc,
627 					    struct fun_irq *irq)
628 {
629 	struct funeth_priv *fp = netdev_priv(dev);
630 	struct funeth_txq *q;
631 	int numa_node;
632 
633 	if (irq)
634 		numa_node = fun_irq_node(irq); /* skb Tx queue */
635 	else
636 		numa_node = cpu_to_node(qidx); /* XDP Tx queue */
637 
638 	q = kzalloc_node(sizeof(*q), GFP_KERNEL, numa_node);
639 	if (!q)
640 		goto err;
641 
642 	q->dma_dev = &fp->pdev->dev;
643 	q->desc = fun_alloc_ring_mem(q->dma_dev, ndesc, FUNETH_SQE_SIZE,
644 				     sizeof(*q->info), true, numa_node,
645 				     &q->dma_addr, (void **)&q->info,
646 				     &q->hw_wb);
647 	if (!q->desc)
648 		goto free_q;
649 
650 	q->netdev = dev;
651 	q->mask = ndesc - 1;
652 	q->qidx = qidx;
653 	q->numa_node = numa_node;
654 	u64_stats_init(&q->syncp);
655 	q->init_state = FUN_QSTATE_INIT_SW;
656 	return q;
657 
658 free_q:
659 	kfree(q);
660 err:
661 	netdev_err(dev, "Can't allocate memory for %s queue %u\n",
662 		   irq ? "Tx" : "XDP", qidx);
663 	return NULL;
664 }
665 
666 static void fun_txq_free_sw(struct funeth_txq *q)
667 {
668 	struct funeth_priv *fp = netdev_priv(q->netdev);
669 
670 	fun_free_ring_mem(q->dma_dev, q->mask + 1, FUNETH_SQE_SIZE, true,
671 			  q->desc, q->dma_addr, q->info);
672 
673 	fp->tx_packets += q->stats.tx_pkts;
674 	fp->tx_bytes   += q->stats.tx_bytes;
675 	fp->tx_dropped += q->stats.tx_map_err;
676 
677 	kfree(q);
678 }
679 
680 /* Allocate the device portion of a Tx queue. */
681 int fun_txq_create_dev(struct funeth_txq *q, struct fun_irq *irq)
682 {
683 	struct funeth_priv *fp = netdev_priv(q->netdev);
684 	unsigned int irq_idx, ndesc = q->mask + 1;
685 	int err;
686 
687 	q->irq = irq;
688 	*q->hw_wb = 0;
689 	q->prod_cnt = 0;
690 	q->cons_cnt = 0;
691 	irq_idx = irq ? irq->irq_idx : 0;
692 
693 	err = fun_sq_create(fp->fdev,
694 			    FUN_ADMIN_EPSQ_CREATE_FLAG_HEAD_WB_ADDRESS |
695 			    FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR, 0,
696 			    FUN_HCI_ID_INVALID, ilog2(FUNETH_SQE_SIZE), ndesc,
697 			    q->dma_addr, fp->tx_coal_count, fp->tx_coal_usec,
698 			    irq_idx, 0, fp->fdev->kern_end_qid, 0,
699 			    &q->hw_qid, &q->db);
700 	if (err)
701 		goto out;
702 
703 	err = fun_create_and_bind_tx(fp, q->hw_qid);
704 	if (err < 0)
705 		goto free_devq;
706 	q->ethid = err;
707 
708 	if (irq) {
709 		irq->txq = q;
710 		q->ndq = netdev_get_tx_queue(q->netdev, q->qidx);
711 		q->irq_db_val = FUN_IRQ_SQ_DB(fp->tx_coal_usec,
712 					      fp->tx_coal_count);
713 		writel(q->irq_db_val, q->db);
714 	}
715 
716 	q->init_state = FUN_QSTATE_INIT_FULL;
717 	netif_info(fp, ifup, q->netdev,
718 		   "%s queue %u, depth %u, HW qid %u, IRQ idx %u, eth id %u, node %d\n",
719 		   irq ? "Tx" : "XDP", q->qidx, ndesc, q->hw_qid, irq_idx,
720 		   q->ethid, q->numa_node);
721 	return 0;
722 
723 free_devq:
724 	fun_destroy_sq(fp->fdev, q->hw_qid);
725 out:
726 	netdev_err(q->netdev,
727 		   "Failed to create %s queue %u on device, error %d\n",
728 		   irq ? "Tx" : "XDP", q->qidx, err);
729 	return err;
730 }
731 
732 static void fun_txq_free_dev(struct funeth_txq *q)
733 {
734 	struct funeth_priv *fp = netdev_priv(q->netdev);
735 
736 	if (q->init_state < FUN_QSTATE_INIT_FULL)
737 		return;
738 
739 	netif_info(fp, ifdown, q->netdev,
740 		   "Freeing %s queue %u (id %u), IRQ %u, ethid %u\n",
741 		   q->irq ? "Tx" : "XDP", q->qidx, q->hw_qid,
742 		   q->irq ? q->irq->irq_idx : 0, q->ethid);
743 
744 	fun_destroy_sq(fp->fdev, q->hw_qid);
745 	fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, q->ethid);
746 
747 	if (q->irq) {
748 		q->irq->txq = NULL;
749 		fun_txq_purge(q);
750 	} else {
751 		fun_xdpq_purge(q);
752 	}
753 
754 	q->init_state = FUN_QSTATE_INIT_SW;
755 }
756 
757 /* Create or advance a Tx queue, allocating all the host and device resources
758  * needed to reach the target state.
759  */
760 int funeth_txq_create(struct net_device *dev, unsigned int qidx,
761 		      unsigned int ndesc, struct fun_irq *irq, int state,
762 		      struct funeth_txq **qp)
763 {
764 	struct funeth_txq *q = *qp;
765 	int err;
766 
767 	if (!q)
768 		q = fun_txq_create_sw(dev, qidx, ndesc, irq);
769 	if (!q)
770 		return -ENOMEM;
771 
772 	if (q->init_state >= state)
773 		goto out;
774 
775 	err = fun_txq_create_dev(q, irq);
776 	if (err) {
777 		if (!*qp)
778 			fun_txq_free_sw(q);
779 		return err;
780 	}
781 
782 out:
783 	*qp = q;
784 	return 0;
785 }
786 
787 /* Free Tx queue resources until it reaches the target state.
788  * The queue must be already disconnected from the stack.
789  */
790 struct funeth_txq *funeth_txq_free(struct funeth_txq *q, int state)
791 {
792 	if (state < FUN_QSTATE_INIT_FULL)
793 		fun_txq_free_dev(q);
794 
795 	if (state == FUN_QSTATE_DESTROYED) {
796 		fun_txq_free_sw(q);
797 		q = NULL;
798 	}
799 
800 	return q;
801 }
802