1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 /* Copyright (C) 2015-2019 Netronome Systems, Inc. */ 3 4 #include <linux/bpf_trace.h> 5 #include <linux/netdevice.h> 6 #include <linux/overflow.h> 7 #include <linux/sizes.h> 8 #include <linux/bitfield.h> 9 10 #include "../nfp_app.h" 11 #include "../nfp_net.h" 12 #include "../nfp_net_dp.h" 13 #include "../crypto/crypto.h" 14 #include "../crypto/fw.h" 15 #include "nfdk.h" 16 17 static int nfp_nfdk_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) 18 { 19 return !nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT * 2); 20 } 21 22 static int nfp_nfdk_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) 23 { 24 return nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT); 25 } 26 27 static void nfp_nfdk_tx_ring_stop(struct netdev_queue *nd_q, 28 struct nfp_net_tx_ring *tx_ring) 29 { 30 netif_tx_stop_queue(nd_q); 31 32 /* We can race with the TX completion out of NAPI so recheck */ 33 smp_mb(); 34 if (unlikely(nfp_nfdk_tx_ring_should_wake(tx_ring))) 35 netif_tx_start_queue(nd_q); 36 } 37 38 static __le64 39 nfp_nfdk_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfdk_tx_buf *txbuf, 40 struct sk_buff *skb) 41 { 42 u32 segs, hdrlen, l3_offset, l4_offset; 43 struct nfp_nfdk_tx_desc txd; 44 u16 mss; 45 46 if (!skb->encapsulation) { 47 l3_offset = skb_network_offset(skb); 48 l4_offset = skb_transport_offset(skb); 49 hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); 50 } else { 51 l3_offset = skb_inner_network_offset(skb); 52 l4_offset = skb_inner_transport_offset(skb); 53 hdrlen = skb_inner_transport_header(skb) - skb->data + 54 inner_tcp_hdrlen(skb); 55 } 56 57 segs = skb_shinfo(skb)->gso_segs; 58 mss = skb_shinfo(skb)->gso_size & NFDK_DESC_TX_MSS_MASK; 59 60 /* Note: TSO of the packet with metadata prepended to skb is not 61 * supported yet, in which case l3/l4_offset and lso_hdrlen need 62 * be correctly handled here. 63 * Concern: 64 * The driver doesn't have md_bytes easily available at this point. 65 * The PCI.IN PD ME won't have md_bytes bytes to add to lso_hdrlen, 66 * so it needs the full length there. The app MEs might prefer 67 * l3_offset and l4_offset relative to the start of packet data, 68 * but could probably cope with it being relative to the CTM buf 69 * data offset. 70 */ 71 txd.l3_offset = l3_offset; 72 txd.l4_offset = l4_offset; 73 txd.lso_meta_res = 0; 74 txd.mss = cpu_to_le16(mss); 75 txd.lso_hdrlen = hdrlen; 76 txd.lso_totsegs = segs; 77 78 txbuf->pkt_cnt = segs; 79 txbuf->real_len = skb->len + hdrlen * (txbuf->pkt_cnt - 1); 80 81 u64_stats_update_begin(&r_vec->tx_sync); 82 r_vec->tx_lso++; 83 u64_stats_update_end(&r_vec->tx_sync); 84 85 return txd.raw; 86 } 87 88 static u8 89 nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 90 unsigned int pkt_cnt, struct sk_buff *skb, u64 flags) 91 { 92 struct ipv6hdr *ipv6h; 93 struct iphdr *iph; 94 95 if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) 96 return flags; 97 98 if (skb->ip_summed != CHECKSUM_PARTIAL) 99 return flags; 100 101 flags |= NFDK_DESC_TX_L4_CSUM; 102 103 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); 104 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); 105 106 /* L3 checksum offloading flag is not required for ipv6 */ 107 if (iph->version == 4) { 108 flags |= NFDK_DESC_TX_L3_CSUM; 109 } else if (ipv6h->version != 6) { 110 nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); 111 return flags; 112 } 113 114 u64_stats_update_begin(&r_vec->tx_sync); 115 if (!skb->encapsulation) { 116 r_vec->hw_csum_tx += pkt_cnt; 117 } else { 118 flags |= NFDK_DESC_TX_ENCAP; 119 r_vec->hw_csum_tx_inner += pkt_cnt; 120 } 121 u64_stats_update_end(&r_vec->tx_sync); 122 123 return flags; 124 } 125 126 static int 127 nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring, 128 struct sk_buff *skb) 129 { 130 unsigned int n_descs, wr_p, nop_slots; 131 const skb_frag_t *frag, *fend; 132 struct nfp_nfdk_tx_desc *txd; 133 unsigned int nr_frags; 134 unsigned int wr_idx; 135 int err; 136 137 recount_descs: 138 n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb)); 139 nr_frags = skb_shinfo(skb)->nr_frags; 140 frag = skb_shinfo(skb)->frags; 141 fend = frag + nr_frags; 142 for (; frag < fend; frag++) 143 n_descs += DIV_ROUND_UP(skb_frag_size(frag), 144 NFDK_TX_MAX_DATA_PER_DESC); 145 146 if (unlikely(n_descs > NFDK_TX_DESC_GATHER_MAX)) { 147 if (skb_is_nonlinear(skb)) { 148 err = skb_linearize(skb); 149 if (err) 150 return err; 151 goto recount_descs; 152 } 153 return -EINVAL; 154 } 155 156 /* Under count by 1 (don't count meta) for the round down to work out */ 157 n_descs += !!skb_is_gso(skb); 158 159 if (round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 160 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) 161 goto close_block; 162 163 if ((u32)tx_ring->data_pending + skb->len > NFDK_TX_MAX_DATA_PER_BLOCK) 164 goto close_block; 165 166 return 0; 167 168 close_block: 169 wr_p = tx_ring->wr_p; 170 nop_slots = D_BLOCK_CPL(wr_p); 171 172 wr_idx = D_IDX(tx_ring, wr_p); 173 tx_ring->ktxbufs[wr_idx].skb = NULL; 174 txd = &tx_ring->ktxds[wr_idx]; 175 176 memset(txd, 0, array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 177 178 tx_ring->data_pending = 0; 179 tx_ring->wr_p += nop_slots; 180 tx_ring->wr_ptr_add += nop_slots; 181 182 return 0; 183 } 184 185 static int nfp_nfdk_prep_port_id(struct sk_buff *skb) 186 { 187 struct metadata_dst *md_dst = skb_metadata_dst(skb); 188 unsigned char *data; 189 190 if (likely(!md_dst)) 191 return 0; 192 if (unlikely(md_dst->type != METADATA_HW_PORT_MUX)) 193 return 0; 194 195 /* Note: Unsupported case when TSO a skb with metedata prepended. 196 * See the comments in `nfp_nfdk_tx_tso` for details. 197 */ 198 if (unlikely(md_dst && skb_is_gso(skb))) 199 return -EOPNOTSUPP; 200 201 if (unlikely(skb_cow_head(skb, sizeof(md_dst->u.port_info.port_id)))) 202 return -ENOMEM; 203 204 data = skb_push(skb, sizeof(md_dst->u.port_info.port_id)); 205 put_unaligned_be32(md_dst->u.port_info.port_id, data); 206 207 return sizeof(md_dst->u.port_info.port_id); 208 } 209 210 static int 211 nfp_nfdk_prep_tx_meta(struct nfp_app *app, struct sk_buff *skb, 212 struct nfp_net_r_vector *r_vec) 213 { 214 unsigned char *data; 215 int res, md_bytes; 216 u32 meta_id = 0; 217 218 res = nfp_nfdk_prep_port_id(skb); 219 if (unlikely(res <= 0)) 220 return res; 221 222 md_bytes = res; 223 meta_id = NFP_NET_META_PORTID; 224 225 if (unlikely(skb_cow_head(skb, sizeof(meta_id)))) 226 return -ENOMEM; 227 228 md_bytes += sizeof(meta_id); 229 230 meta_id = FIELD_PREP(NFDK_META_LEN, md_bytes) | 231 FIELD_PREP(NFDK_META_FIELDS, meta_id); 232 233 data = skb_push(skb, sizeof(meta_id)); 234 put_unaligned_be32(meta_id, data); 235 236 return NFDK_DESC_TX_CHAIN_META; 237 } 238 239 /** 240 * nfp_nfdk_tx() - Main transmit entry point 241 * @skb: SKB to transmit 242 * @netdev: netdev structure 243 * 244 * Return: NETDEV_TX_OK on success. 245 */ 246 netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) 247 { 248 struct nfp_net *nn = netdev_priv(netdev); 249 struct nfp_nfdk_tx_buf *txbuf, *etxbuf; 250 u32 cnt, tmp_dlen, dlen_type = 0; 251 struct nfp_net_tx_ring *tx_ring; 252 struct nfp_net_r_vector *r_vec; 253 const skb_frag_t *frag, *fend; 254 struct nfp_nfdk_tx_desc *txd; 255 unsigned int real_len, qidx; 256 unsigned int dma_len, type; 257 struct netdev_queue *nd_q; 258 struct nfp_net_dp *dp; 259 int nr_frags, wr_idx; 260 dma_addr_t dma_addr; 261 u64 metadata; 262 263 dp = &nn->dp; 264 qidx = skb_get_queue_mapping(skb); 265 tx_ring = &dp->tx_rings[qidx]; 266 r_vec = tx_ring->r_vec; 267 nd_q = netdev_get_tx_queue(dp->netdev, qidx); 268 269 /* Don't bother counting frags, assume the worst */ 270 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 271 nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", 272 qidx, tx_ring->wr_p, tx_ring->rd_p); 273 netif_tx_stop_queue(nd_q); 274 nfp_net_tx_xmit_more_flush(tx_ring); 275 u64_stats_update_begin(&r_vec->tx_sync); 276 r_vec->tx_busy++; 277 u64_stats_update_end(&r_vec->tx_sync); 278 return NETDEV_TX_BUSY; 279 } 280 281 metadata = nfp_nfdk_prep_tx_meta(nn->app, skb, r_vec); 282 if (unlikely((int)metadata < 0)) 283 goto err_flush; 284 285 if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) 286 goto err_flush; 287 288 /* nr_frags will change after skb_linearize so we get nr_frags after 289 * nfp_nfdk_tx_maybe_close_block function 290 */ 291 nr_frags = skb_shinfo(skb)->nr_frags; 292 /* DMA map all */ 293 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 294 txd = &tx_ring->ktxds[wr_idx]; 295 txbuf = &tx_ring->ktxbufs[wr_idx]; 296 297 dma_len = skb_headlen(skb); 298 if (skb_is_gso(skb)) 299 type = NFDK_DESC_TX_TYPE_TSO; 300 else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 301 type = NFDK_DESC_TX_TYPE_SIMPLE; 302 else 303 type = NFDK_DESC_TX_TYPE_GATHER; 304 305 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 306 if (dma_mapping_error(dp->dev, dma_addr)) 307 goto err_warn_dma; 308 309 txbuf->skb = skb; 310 txbuf++; 311 312 txbuf->dma_addr = dma_addr; 313 txbuf++; 314 315 /* FIELD_PREP() implicitly truncates to chunk */ 316 dma_len -= 1; 317 318 /* We will do our best to pass as much data as we can in descriptor 319 * and we need to make sure the first descriptor includes whole head 320 * since there is limitation in firmware side. Sometimes the value of 321 * dma_len bitwise and NFDK_DESC_TX_DMA_LEN_HEAD will less than 322 * headlen. 323 */ 324 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, 325 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? 326 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | 327 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 328 329 txd->dma_len_type = cpu_to_le16(dlen_type); 330 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 331 332 /* starts at bit 0 */ 333 BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1)); 334 335 /* Preserve the original dlen_type, this way below the EOP logic 336 * can use dlen_type. 337 */ 338 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 339 dma_len -= tmp_dlen; 340 dma_addr += tmp_dlen + 1; 341 txd++; 342 343 /* The rest of the data (if any) will be in larger dma descritors 344 * and is handled with the fragment loop. 345 */ 346 frag = skb_shinfo(skb)->frags; 347 fend = frag + nr_frags; 348 349 while (true) { 350 while (dma_len > 0) { 351 dma_len -= 1; 352 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 353 354 txd->dma_len_type = cpu_to_le16(dlen_type); 355 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 356 357 dma_len -= dlen_type; 358 dma_addr += dlen_type + 1; 359 txd++; 360 } 361 362 if (frag >= fend) 363 break; 364 365 dma_len = skb_frag_size(frag); 366 dma_addr = skb_frag_dma_map(dp->dev, frag, 0, dma_len, 367 DMA_TO_DEVICE); 368 if (dma_mapping_error(dp->dev, dma_addr)) 369 goto err_unmap; 370 371 txbuf->dma_addr = dma_addr; 372 txbuf++; 373 374 frag++; 375 } 376 377 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 378 379 if (!skb_is_gso(skb)) { 380 real_len = skb->len; 381 /* Metadata desc */ 382 metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata); 383 txd->raw = cpu_to_le64(metadata); 384 txd++; 385 } else { 386 /* lso desc should be placed after metadata desc */ 387 (txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb); 388 real_len = txbuf->real_len; 389 /* Metadata desc */ 390 metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata); 391 txd->raw = cpu_to_le64(metadata); 392 txd += 2; 393 txbuf++; 394 } 395 396 cnt = txd - tx_ring->ktxds - wr_idx; 397 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 398 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 399 goto err_warn_overflow; 400 401 skb_tx_timestamp(skb); 402 403 tx_ring->wr_p += cnt; 404 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 405 tx_ring->data_pending += skb->len; 406 else 407 tx_ring->data_pending = 0; 408 409 if (nfp_nfdk_tx_ring_should_stop(tx_ring)) 410 nfp_nfdk_tx_ring_stop(nd_q, tx_ring); 411 412 tx_ring->wr_ptr_add += cnt; 413 if (__netdev_tx_sent_queue(nd_q, real_len, netdev_xmit_more())) 414 nfp_net_tx_xmit_more_flush(tx_ring); 415 416 return NETDEV_TX_OK; 417 418 err_warn_overflow: 419 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 420 wr_idx, skb_headlen(skb), nr_frags, cnt); 421 if (skb_is_gso(skb)) 422 txbuf--; 423 err_unmap: 424 /* txbuf pointed to the next-to-use */ 425 etxbuf = txbuf; 426 /* first txbuf holds the skb */ 427 txbuf = &tx_ring->ktxbufs[wr_idx + 1]; 428 if (txbuf < etxbuf) { 429 dma_unmap_single(dp->dev, txbuf->dma_addr, 430 skb_headlen(skb), DMA_TO_DEVICE); 431 txbuf->raw = 0; 432 txbuf++; 433 } 434 frag = skb_shinfo(skb)->frags; 435 while (etxbuf < txbuf) { 436 dma_unmap_page(dp->dev, txbuf->dma_addr, 437 skb_frag_size(frag), DMA_TO_DEVICE); 438 txbuf->raw = 0; 439 frag++; 440 txbuf++; 441 } 442 err_warn_dma: 443 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 444 err_flush: 445 nfp_net_tx_xmit_more_flush(tx_ring); 446 u64_stats_update_begin(&r_vec->tx_sync); 447 r_vec->tx_errors++; 448 u64_stats_update_end(&r_vec->tx_sync); 449 dev_kfree_skb_any(skb); 450 return NETDEV_TX_OK; 451 } 452 453 /** 454 * nfp_nfdk_tx_complete() - Handled completed TX packets 455 * @tx_ring: TX ring structure 456 * @budget: NAPI budget (only used as bool to determine if in NAPI context) 457 */ 458 static void nfp_nfdk_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) 459 { 460 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 461 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 462 u32 done_pkts = 0, done_bytes = 0; 463 struct nfp_nfdk_tx_buf *ktxbufs; 464 struct device *dev = dp->dev; 465 struct netdev_queue *nd_q; 466 u32 rd_p, qcp_rd_p; 467 int todo; 468 469 rd_p = tx_ring->rd_p; 470 if (tx_ring->wr_p == rd_p) 471 return; 472 473 /* Work out how many descriptors have been transmitted */ 474 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 475 476 if (qcp_rd_p == tx_ring->qcp_rd_p) 477 return; 478 479 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 480 ktxbufs = tx_ring->ktxbufs; 481 482 while (todo > 0) { 483 const skb_frag_t *frag, *fend; 484 unsigned int size, n_descs = 1; 485 struct nfp_nfdk_tx_buf *txbuf; 486 struct sk_buff *skb; 487 488 txbuf = &ktxbufs[D_IDX(tx_ring, rd_p)]; 489 skb = txbuf->skb; 490 txbuf++; 491 492 /* Closed block */ 493 if (!skb) { 494 n_descs = D_BLOCK_CPL(rd_p); 495 goto next; 496 } 497 498 /* Unmap head */ 499 size = skb_headlen(skb); 500 n_descs += nfp_nfdk_headlen_to_segs(size); 501 dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE); 502 txbuf++; 503 504 /* Unmap frags */ 505 frag = skb_shinfo(skb)->frags; 506 fend = frag + skb_shinfo(skb)->nr_frags; 507 for (; frag < fend; frag++) { 508 size = skb_frag_size(frag); 509 n_descs += DIV_ROUND_UP(size, 510 NFDK_TX_MAX_DATA_PER_DESC); 511 dma_unmap_page(dev, txbuf->dma_addr, 512 skb_frag_size(frag), DMA_TO_DEVICE); 513 txbuf++; 514 } 515 516 if (!skb_is_gso(skb)) { 517 done_bytes += skb->len; 518 done_pkts++; 519 } else { 520 done_bytes += txbuf->real_len; 521 done_pkts += txbuf->pkt_cnt; 522 n_descs++; 523 } 524 525 napi_consume_skb(skb, budget); 526 next: 527 rd_p += n_descs; 528 todo -= n_descs; 529 } 530 531 tx_ring->rd_p = rd_p; 532 tx_ring->qcp_rd_p = qcp_rd_p; 533 534 u64_stats_update_begin(&r_vec->tx_sync); 535 r_vec->tx_bytes += done_bytes; 536 r_vec->tx_pkts += done_pkts; 537 u64_stats_update_end(&r_vec->tx_sync); 538 539 if (!dp->netdev) 540 return; 541 542 nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); 543 netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); 544 if (nfp_nfdk_tx_ring_should_wake(tx_ring)) { 545 /* Make sure TX thread will see updated tx_ring->rd_p */ 546 smp_mb(); 547 548 if (unlikely(netif_tx_queue_stopped(nd_q))) 549 netif_tx_wake_queue(nd_q); 550 } 551 552 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 553 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 554 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 555 } 556 557 /* Receive processing */ 558 static void * 559 nfp_nfdk_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) 560 { 561 void *frag; 562 563 if (!dp->xdp_prog) { 564 frag = napi_alloc_frag(dp->fl_bufsz); 565 if (unlikely(!frag)) 566 return NULL; 567 } else { 568 struct page *page; 569 570 page = dev_alloc_page(); 571 if (unlikely(!page)) 572 return NULL; 573 frag = page_address(page); 574 } 575 576 *dma_addr = nfp_net_dma_map_rx(dp, frag); 577 if (dma_mapping_error(dp->dev, *dma_addr)) { 578 nfp_net_free_frag(frag, dp->xdp_prog); 579 nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); 580 return NULL; 581 } 582 583 return frag; 584 } 585 586 /** 587 * nfp_nfdk_rx_give_one() - Put mapped skb on the software and hardware rings 588 * @dp: NFP Net data path struct 589 * @rx_ring: RX ring structure 590 * @frag: page fragment buffer 591 * @dma_addr: DMA address of skb mapping 592 */ 593 static void 594 nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp, 595 struct nfp_net_rx_ring *rx_ring, 596 void *frag, dma_addr_t dma_addr) 597 { 598 unsigned int wr_idx; 599 600 wr_idx = D_IDX(rx_ring, rx_ring->wr_p); 601 602 nfp_net_dma_sync_dev_rx(dp, dma_addr); 603 604 /* Stash SKB and DMA address away */ 605 rx_ring->rxbufs[wr_idx].frag = frag; 606 rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; 607 608 /* Fill freelist descriptor */ 609 rx_ring->rxds[wr_idx].fld.reserved = 0; 610 rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; 611 nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, 612 dma_addr + dp->rx_dma_off); 613 614 rx_ring->wr_p++; 615 if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { 616 /* Update write pointer of the freelist queue. Make 617 * sure all writes are flushed before telling the hardware. 618 */ 619 wmb(); 620 nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); 621 } 622 } 623 624 /** 625 * nfp_nfdk_rx_ring_fill_freelist() - Give buffers from the ring to FW 626 * @dp: NFP Net data path struct 627 * @rx_ring: RX ring to fill 628 */ 629 void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp, 630 struct nfp_net_rx_ring *rx_ring) 631 { 632 unsigned int i; 633 634 for (i = 0; i < rx_ring->cnt - 1; i++) 635 nfp_nfdk_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, 636 rx_ring->rxbufs[i].dma_addr); 637 } 638 639 /** 640 * nfp_nfdk_rx_csum_has_errors() - group check if rxd has any csum errors 641 * @flags: RX descriptor flags field in CPU byte order 642 */ 643 static int nfp_nfdk_rx_csum_has_errors(u16 flags) 644 { 645 u16 csum_all_checked, csum_all_ok; 646 647 csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; 648 csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; 649 650 return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); 651 } 652 653 /** 654 * nfp_nfdk_rx_csum() - set SKB checksum field based on RX descriptor flags 655 * @dp: NFP Net data path struct 656 * @r_vec: per-ring structure 657 * @rxd: Pointer to RX descriptor 658 * @meta: Parsed metadata prepend 659 * @skb: Pointer to SKB 660 */ 661 static void 662 nfp_nfdk_rx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 663 struct nfp_net_rx_desc *rxd, struct nfp_meta_parsed *meta, 664 struct sk_buff *skb) 665 { 666 skb_checksum_none_assert(skb); 667 668 if (!(dp->netdev->features & NETIF_F_RXCSUM)) 669 return; 670 671 if (meta->csum_type) { 672 skb->ip_summed = meta->csum_type; 673 skb->csum = meta->csum; 674 u64_stats_update_begin(&r_vec->rx_sync); 675 r_vec->hw_csum_rx_complete++; 676 u64_stats_update_end(&r_vec->rx_sync); 677 return; 678 } 679 680 if (nfp_nfdk_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { 681 u64_stats_update_begin(&r_vec->rx_sync); 682 r_vec->hw_csum_rx_error++; 683 u64_stats_update_end(&r_vec->rx_sync); 684 return; 685 } 686 687 /* Assume that the firmware will never report inner CSUM_OK unless outer 688 * L4 headers were successfully parsed. FW will always report zero UDP 689 * checksum as CSUM_OK. 690 */ 691 if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || 692 rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { 693 __skb_incr_checksum_unnecessary(skb); 694 u64_stats_update_begin(&r_vec->rx_sync); 695 r_vec->hw_csum_rx_ok++; 696 u64_stats_update_end(&r_vec->rx_sync); 697 } 698 699 if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || 700 rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { 701 __skb_incr_checksum_unnecessary(skb); 702 u64_stats_update_begin(&r_vec->rx_sync); 703 r_vec->hw_csum_rx_inner_ok++; 704 u64_stats_update_end(&r_vec->rx_sync); 705 } 706 } 707 708 static void 709 nfp_nfdk_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, 710 unsigned int type, __be32 *hash) 711 { 712 if (!(netdev->features & NETIF_F_RXHASH)) 713 return; 714 715 switch (type) { 716 case NFP_NET_RSS_IPV4: 717 case NFP_NET_RSS_IPV6: 718 case NFP_NET_RSS_IPV6_EX: 719 meta->hash_type = PKT_HASH_TYPE_L3; 720 break; 721 default: 722 meta->hash_type = PKT_HASH_TYPE_L4; 723 break; 724 } 725 726 meta->hash = get_unaligned_be32(hash); 727 } 728 729 static bool 730 nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, 731 void *data, void *pkt, unsigned int pkt_len, int meta_len) 732 { 733 u32 meta_info; 734 735 meta_info = get_unaligned_be32(data); 736 data += 4; 737 738 while (meta_info) { 739 switch (meta_info & NFP_NET_META_FIELD_MASK) { 740 case NFP_NET_META_HASH: 741 meta_info >>= NFP_NET_META_FIELD_SIZE; 742 nfp_nfdk_set_hash(netdev, meta, 743 meta_info & NFP_NET_META_FIELD_MASK, 744 (__be32 *)data); 745 data += 4; 746 break; 747 case NFP_NET_META_MARK: 748 meta->mark = get_unaligned_be32(data); 749 data += 4; 750 break; 751 case NFP_NET_META_PORTID: 752 meta->portid = get_unaligned_be32(data); 753 data += 4; 754 break; 755 case NFP_NET_META_CSUM: 756 meta->csum_type = CHECKSUM_COMPLETE; 757 meta->csum = 758 (__force __wsum)__get_unaligned_cpu32(data); 759 data += 4; 760 break; 761 case NFP_NET_META_RESYNC_INFO: 762 if (nfp_net_tls_rx_resync_req(netdev, data, pkt, 763 pkt_len)) 764 return false; 765 data += sizeof(struct nfp_net_tls_resync_req); 766 break; 767 default: 768 return true; 769 } 770 771 meta_info >>= NFP_NET_META_FIELD_SIZE; 772 } 773 774 return data != pkt; 775 } 776 777 static void 778 nfp_nfdk_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, 779 struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, 780 struct sk_buff *skb) 781 { 782 u64_stats_update_begin(&r_vec->rx_sync); 783 r_vec->rx_drops++; 784 /* If we have both skb and rxbuf the replacement buffer allocation 785 * must have failed, count this as an alloc failure. 786 */ 787 if (skb && rxbuf) 788 r_vec->rx_replace_buf_alloc_fail++; 789 u64_stats_update_end(&r_vec->rx_sync); 790 791 /* skb is build based on the frag, free_skb() would free the frag 792 * so to be able to reuse it we need an extra ref. 793 */ 794 if (skb && rxbuf && skb->head == rxbuf->frag) 795 page_ref_inc(virt_to_head_page(rxbuf->frag)); 796 if (rxbuf) 797 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); 798 if (skb) 799 dev_kfree_skb_any(skb); 800 } 801 802 static bool nfp_nfdk_xdp_complete(struct nfp_net_tx_ring *tx_ring) 803 { 804 struct nfp_net_r_vector *r_vec = tx_ring->r_vec; 805 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 806 struct nfp_net_rx_ring *rx_ring; 807 u32 qcp_rd_p, done = 0; 808 bool done_all; 809 int todo; 810 811 /* Work out how many descriptors have been transmitted */ 812 qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); 813 if (qcp_rd_p == tx_ring->qcp_rd_p) 814 return true; 815 816 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); 817 818 done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; 819 todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); 820 821 rx_ring = r_vec->rx_ring; 822 while (todo > 0) { 823 int idx = D_IDX(tx_ring, tx_ring->rd_p + done); 824 struct nfp_nfdk_tx_buf *txbuf; 825 unsigned int step = 1; 826 827 txbuf = &tx_ring->ktxbufs[idx]; 828 if (!txbuf->raw) 829 goto next; 830 831 if (NFDK_TX_BUF_INFO(txbuf->val) != NFDK_TX_BUF_INFO_SOP) { 832 WARN_ONCE(1, "Unexpected TX buffer in XDP TX ring\n"); 833 goto next; 834 } 835 836 /* Two successive txbufs are used to stash virtual and dma 837 * address respectively, recycle and clean them here. 838 */ 839 nfp_nfdk_rx_give_one(dp, rx_ring, 840 (void *)NFDK_TX_BUF_PTR(txbuf[0].val), 841 txbuf[1].dma_addr); 842 txbuf[0].raw = 0; 843 txbuf[1].raw = 0; 844 step = 2; 845 846 u64_stats_update_begin(&r_vec->tx_sync); 847 /* Note: tx_bytes not accumulated. */ 848 r_vec->tx_pkts++; 849 u64_stats_update_end(&r_vec->tx_sync); 850 next: 851 todo -= step; 852 done += step; 853 } 854 855 tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + done); 856 tx_ring->rd_p += done; 857 858 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, 859 "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", 860 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); 861 862 return done_all; 863 } 864 865 static bool 866 nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, 867 struct nfp_net_tx_ring *tx_ring, 868 struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, 869 unsigned int pkt_len, bool *completed) 870 { 871 unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; 872 unsigned int dma_len, type, cnt, dlen_type, tmp_dlen; 873 struct nfp_nfdk_tx_buf *txbuf; 874 struct nfp_nfdk_tx_desc *txd; 875 unsigned int n_descs; 876 dma_addr_t dma_addr; 877 int wr_idx; 878 879 /* Reject if xdp_adjust_tail grow packet beyond DMA area */ 880 if (pkt_len + dma_off > dma_map_sz) 881 return false; 882 883 /* Make sure there's still at least one block available after 884 * aligning to block boundary, so that the txds used below 885 * won't wrap around the tx_ring. 886 */ 887 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 888 if (!*completed) { 889 nfp_nfdk_xdp_complete(tx_ring); 890 *completed = true; 891 } 892 893 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 894 nfp_nfdk_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, 895 NULL); 896 return false; 897 } 898 } 899 900 /* Check if cross block boundary */ 901 n_descs = nfp_nfdk_headlen_to_segs(pkt_len); 902 if ((round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != 903 round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) || 904 ((u32)tx_ring->data_pending + pkt_len > 905 NFDK_TX_MAX_DATA_PER_BLOCK)) { 906 unsigned int nop_slots = D_BLOCK_CPL(tx_ring->wr_p); 907 908 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 909 txd = &tx_ring->ktxds[wr_idx]; 910 memset(txd, 0, 911 array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); 912 913 tx_ring->data_pending = 0; 914 tx_ring->wr_p += nop_slots; 915 tx_ring->wr_ptr_add += nop_slots; 916 } 917 918 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 919 920 txbuf = &tx_ring->ktxbufs[wr_idx]; 921 922 txbuf[0].val = (unsigned long)rxbuf->frag | NFDK_TX_BUF_INFO_SOP; 923 txbuf[1].dma_addr = rxbuf->dma_addr; 924 /* Note: pkt len not stored */ 925 926 dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, 927 pkt_len, DMA_BIDIRECTIONAL); 928 929 /* Build TX descriptor */ 930 txd = &tx_ring->ktxds[wr_idx]; 931 dma_len = pkt_len; 932 dma_addr = rxbuf->dma_addr + dma_off; 933 934 if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 935 type = NFDK_DESC_TX_TYPE_SIMPLE; 936 else 937 type = NFDK_DESC_TX_TYPE_GATHER; 938 939 /* FIELD_PREP() implicitly truncates to chunk */ 940 dma_len -= 1; 941 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, 942 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? 943 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | 944 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 945 946 txd->dma_len_type = cpu_to_le16(dlen_type); 947 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 948 949 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 950 dma_len -= tmp_dlen; 951 dma_addr += tmp_dlen + 1; 952 txd++; 953 954 while (dma_len > 0) { 955 dma_len -= 1; 956 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 957 txd->dma_len_type = cpu_to_le16(dlen_type); 958 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 959 960 dlen_type &= NFDK_DESC_TX_DMA_LEN; 961 dma_len -= dlen_type; 962 dma_addr += dlen_type + 1; 963 txd++; 964 } 965 966 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 967 968 /* Metadata desc */ 969 txd->raw = 0; 970 txd++; 971 972 cnt = txd - tx_ring->ktxds - wr_idx; 973 tx_ring->wr_p += cnt; 974 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 975 tx_ring->data_pending += pkt_len; 976 else 977 tx_ring->data_pending = 0; 978 979 tx_ring->wr_ptr_add += cnt; 980 return true; 981 } 982 983 /** 984 * nfp_nfdk_rx() - receive up to @budget packets on @rx_ring 985 * @rx_ring: RX ring to receive from 986 * @budget: NAPI budget 987 * 988 * Note, this function is separated out from the napi poll function to 989 * more cleanly separate packet receive code from other bookkeeping 990 * functions performed in the napi poll function. 991 * 992 * Return: Number of packets received. 993 */ 994 static int nfp_nfdk_rx(struct nfp_net_rx_ring *rx_ring, int budget) 995 { 996 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 997 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 998 struct nfp_net_tx_ring *tx_ring; 999 struct bpf_prog *xdp_prog; 1000 bool xdp_tx_cmpl = false; 1001 unsigned int true_bufsz; 1002 struct sk_buff *skb; 1003 int pkts_polled = 0; 1004 struct xdp_buff xdp; 1005 int idx; 1006 1007 xdp_prog = READ_ONCE(dp->xdp_prog); 1008 true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; 1009 xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, 1010 &rx_ring->xdp_rxq); 1011 tx_ring = r_vec->xdp_ring; 1012 1013 while (pkts_polled < budget) { 1014 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 1015 struct nfp_net_rx_buf *rxbuf; 1016 struct nfp_net_rx_desc *rxd; 1017 struct nfp_meta_parsed meta; 1018 bool redir_egress = false; 1019 struct net_device *netdev; 1020 dma_addr_t new_dma_addr; 1021 u32 meta_len_xdp = 0; 1022 void *new_frag; 1023 1024 idx = D_IDX(rx_ring, rx_ring->rd_p); 1025 1026 rxd = &rx_ring->rxds[idx]; 1027 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1028 break; 1029 1030 /* Memory barrier to ensure that we won't do other reads 1031 * before the DD bit. 1032 */ 1033 dma_rmb(); 1034 1035 memset(&meta, 0, sizeof(meta)); 1036 1037 rx_ring->rd_p++; 1038 pkts_polled++; 1039 1040 rxbuf = &rx_ring->rxbufs[idx]; 1041 /* < meta_len > 1042 * <-- [rx_offset] --> 1043 * --------------------------------------------------------- 1044 * | [XX] | metadata | packet | XXXX | 1045 * --------------------------------------------------------- 1046 * <---------------- data_len ---------------> 1047 * 1048 * The rx_offset is fixed for all packets, the meta_len can vary 1049 * on a packet by packet basis. If rx_offset is set to zero 1050 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the 1051 * buffer and is immediately followed by the packet (no [XX]). 1052 */ 1053 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1054 data_len = le16_to_cpu(rxd->rxd.data_len); 1055 pkt_len = data_len - meta_len; 1056 1057 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1058 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1059 pkt_off += meta_len; 1060 else 1061 pkt_off += dp->rx_offset; 1062 meta_off = pkt_off - meta_len; 1063 1064 /* Stats update */ 1065 u64_stats_update_begin(&r_vec->rx_sync); 1066 r_vec->rx_pkts++; 1067 r_vec->rx_bytes += pkt_len; 1068 u64_stats_update_end(&r_vec->rx_sync); 1069 1070 if (unlikely(meta_len > NFP_NET_MAX_PREPEND || 1071 (dp->rx_offset && meta_len > dp->rx_offset))) { 1072 nn_dp_warn(dp, "oversized RX packet metadata %u\n", 1073 meta_len); 1074 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1075 continue; 1076 } 1077 1078 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, 1079 data_len); 1080 1081 if (meta_len) { 1082 if (unlikely(nfp_nfdk_parse_meta(dp->netdev, &meta, 1083 rxbuf->frag + meta_off, 1084 rxbuf->frag + pkt_off, 1085 pkt_len, meta_len))) { 1086 nn_dp_warn(dp, "invalid RX packet metadata\n"); 1087 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1088 NULL); 1089 continue; 1090 } 1091 } 1092 1093 if (xdp_prog && !meta.portid) { 1094 void *orig_data = rxbuf->frag + pkt_off; 1095 unsigned int dma_off; 1096 int act; 1097 1098 xdp_prepare_buff(&xdp, 1099 rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, 1100 pkt_off - NFP_NET_RX_BUF_HEADROOM, 1101 pkt_len, true); 1102 1103 act = bpf_prog_run_xdp(xdp_prog, &xdp); 1104 1105 pkt_len = xdp.data_end - xdp.data; 1106 pkt_off += xdp.data - orig_data; 1107 1108 switch (act) { 1109 case XDP_PASS: 1110 meta_len_xdp = xdp.data - xdp.data_meta; 1111 break; 1112 case XDP_TX: 1113 dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; 1114 if (unlikely(!nfp_nfdk_tx_xdp_buf(dp, rx_ring, 1115 tx_ring, 1116 rxbuf, 1117 dma_off, 1118 pkt_len, 1119 &xdp_tx_cmpl))) 1120 trace_xdp_exception(dp->netdev, 1121 xdp_prog, act); 1122 continue; 1123 default: 1124 bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); 1125 fallthrough; 1126 case XDP_ABORTED: 1127 trace_xdp_exception(dp->netdev, xdp_prog, act); 1128 fallthrough; 1129 case XDP_DROP: 1130 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1131 rxbuf->dma_addr); 1132 continue; 1133 } 1134 } 1135 1136 if (likely(!meta.portid)) { 1137 netdev = dp->netdev; 1138 } else if (meta.portid == NFP_META_PORT_ID_CTRL) { 1139 struct nfp_net *nn = netdev_priv(dp->netdev); 1140 1141 nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, 1142 pkt_len); 1143 nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, 1144 rxbuf->dma_addr); 1145 continue; 1146 } else { 1147 struct nfp_net *nn; 1148 1149 nn = netdev_priv(dp->netdev); 1150 netdev = nfp_app_dev_get(nn->app, meta.portid, 1151 &redir_egress); 1152 if (unlikely(!netdev)) { 1153 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, 1154 NULL); 1155 continue; 1156 } 1157 1158 if (nfp_netdev_is_nfp_repr(netdev)) 1159 nfp_repr_inc_rx_stats(netdev, pkt_len); 1160 } 1161 1162 skb = build_skb(rxbuf->frag, true_bufsz); 1163 if (unlikely(!skb)) { 1164 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1165 continue; 1166 } 1167 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1168 if (unlikely(!new_frag)) { 1169 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1170 continue; 1171 } 1172 1173 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1174 1175 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1176 1177 skb_reserve(skb, pkt_off); 1178 skb_put(skb, pkt_len); 1179 1180 skb->mark = meta.mark; 1181 skb_set_hash(skb, meta.hash, meta.hash_type); 1182 1183 skb_record_rx_queue(skb, rx_ring->idx); 1184 skb->protocol = eth_type_trans(skb, netdev); 1185 1186 nfp_nfdk_rx_csum(dp, r_vec, rxd, &meta, skb); 1187 1188 if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) 1189 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), 1190 le16_to_cpu(rxd->rxd.vlan)); 1191 if (meta_len_xdp) 1192 skb_metadata_set(skb, meta_len_xdp); 1193 1194 if (likely(!redir_egress)) { 1195 napi_gro_receive(&rx_ring->r_vec->napi, skb); 1196 } else { 1197 skb->dev = netdev; 1198 skb_reset_network_header(skb); 1199 __skb_push(skb, ETH_HLEN); 1200 dev_queue_xmit(skb); 1201 } 1202 } 1203 1204 if (xdp_prog) { 1205 if (tx_ring->wr_ptr_add) 1206 nfp_net_tx_xmit_more_flush(tx_ring); 1207 else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && 1208 !xdp_tx_cmpl) 1209 if (!nfp_nfdk_xdp_complete(tx_ring)) 1210 pkts_polled = budget; 1211 } 1212 1213 return pkts_polled; 1214 } 1215 1216 /** 1217 * nfp_nfdk_poll() - napi poll function 1218 * @napi: NAPI structure 1219 * @budget: NAPI budget 1220 * 1221 * Return: number of packets polled. 1222 */ 1223 int nfp_nfdk_poll(struct napi_struct *napi, int budget) 1224 { 1225 struct nfp_net_r_vector *r_vec = 1226 container_of(napi, struct nfp_net_r_vector, napi); 1227 unsigned int pkts_polled = 0; 1228 1229 if (r_vec->tx_ring) 1230 nfp_nfdk_tx_complete(r_vec->tx_ring, budget); 1231 if (r_vec->rx_ring) 1232 pkts_polled = nfp_nfdk_rx(r_vec->rx_ring, budget); 1233 1234 if (pkts_polled < budget) 1235 if (napi_complete_done(napi, pkts_polled)) 1236 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1237 1238 if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { 1239 struct dim_sample dim_sample = {}; 1240 unsigned int start; 1241 u64 pkts, bytes; 1242 1243 do { 1244 start = u64_stats_fetch_begin(&r_vec->rx_sync); 1245 pkts = r_vec->rx_pkts; 1246 bytes = r_vec->rx_bytes; 1247 } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); 1248 1249 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1250 net_dim(&r_vec->rx_dim, dim_sample); 1251 } 1252 1253 if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { 1254 struct dim_sample dim_sample = {}; 1255 unsigned int start; 1256 u64 pkts, bytes; 1257 1258 do { 1259 start = u64_stats_fetch_begin(&r_vec->tx_sync); 1260 pkts = r_vec->tx_pkts; 1261 bytes = r_vec->tx_bytes; 1262 } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); 1263 1264 dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); 1265 net_dim(&r_vec->tx_dim, dim_sample); 1266 } 1267 1268 return pkts_polled; 1269 } 1270 1271 /* Control device data path 1272 */ 1273 1274 bool 1275 nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, 1276 struct sk_buff *skb, bool old) 1277 { 1278 u32 cnt, tmp_dlen, dlen_type = 0; 1279 struct nfp_net_tx_ring *tx_ring; 1280 struct nfp_nfdk_tx_buf *txbuf; 1281 struct nfp_nfdk_tx_desc *txd; 1282 unsigned int dma_len, type; 1283 struct nfp_net_dp *dp; 1284 dma_addr_t dma_addr; 1285 u64 metadata = 0; 1286 int wr_idx; 1287 1288 dp = &r_vec->nfp_net->dp; 1289 tx_ring = r_vec->tx_ring; 1290 1291 if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { 1292 nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); 1293 goto err_free; 1294 } 1295 1296 /* Don't bother counting frags, assume the worst */ 1297 if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { 1298 u64_stats_update_begin(&r_vec->tx_sync); 1299 r_vec->tx_busy++; 1300 u64_stats_update_end(&r_vec->tx_sync); 1301 if (!old) 1302 __skb_queue_tail(&r_vec->queue, skb); 1303 else 1304 __skb_queue_head(&r_vec->queue, skb); 1305 return NETDEV_TX_BUSY; 1306 } 1307 1308 if (nfp_app_ctrl_has_meta(nn->app)) { 1309 if (unlikely(skb_headroom(skb) < 8)) { 1310 nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); 1311 goto err_free; 1312 } 1313 metadata = NFDK_DESC_TX_CHAIN_META; 1314 put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); 1315 put_unaligned_be32(FIELD_PREP(NFDK_META_LEN, 8) | 1316 FIELD_PREP(NFDK_META_FIELDS, 1317 NFP_NET_META_PORTID), 1318 skb_push(skb, 4)); 1319 } 1320 1321 if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) 1322 goto err_free; 1323 1324 /* DMA map all */ 1325 wr_idx = D_IDX(tx_ring, tx_ring->wr_p); 1326 txd = &tx_ring->ktxds[wr_idx]; 1327 txbuf = &tx_ring->ktxbufs[wr_idx]; 1328 1329 dma_len = skb_headlen(skb); 1330 if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) 1331 type = NFDK_DESC_TX_TYPE_SIMPLE; 1332 else 1333 type = NFDK_DESC_TX_TYPE_GATHER; 1334 1335 dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); 1336 if (dma_mapping_error(dp->dev, dma_addr)) 1337 goto err_warn_dma; 1338 1339 txbuf->skb = skb; 1340 txbuf++; 1341 1342 txbuf->dma_addr = dma_addr; 1343 txbuf++; 1344 1345 dma_len -= 1; 1346 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, 1347 dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? 1348 NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | 1349 FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); 1350 1351 txd->dma_len_type = cpu_to_le16(dlen_type); 1352 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 1353 1354 tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; 1355 dma_len -= tmp_dlen; 1356 dma_addr += tmp_dlen + 1; 1357 txd++; 1358 1359 while (dma_len > 0) { 1360 dma_len -= 1; 1361 dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); 1362 txd->dma_len_type = cpu_to_le16(dlen_type); 1363 nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr); 1364 1365 dlen_type &= NFDK_DESC_TX_DMA_LEN; 1366 dma_len -= dlen_type; 1367 dma_addr += dlen_type + 1; 1368 txd++; 1369 } 1370 1371 (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); 1372 1373 /* Metadata desc */ 1374 txd->raw = cpu_to_le64(metadata); 1375 txd++; 1376 1377 cnt = txd - tx_ring->ktxds - wr_idx; 1378 if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != 1379 round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) 1380 goto err_warn_overflow; 1381 1382 tx_ring->wr_p += cnt; 1383 if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) 1384 tx_ring->data_pending += skb->len; 1385 else 1386 tx_ring->data_pending = 0; 1387 1388 tx_ring->wr_ptr_add += cnt; 1389 nfp_net_tx_xmit_more_flush(tx_ring); 1390 1391 return NETDEV_TX_OK; 1392 1393 err_warn_overflow: 1394 WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", 1395 wr_idx, skb_headlen(skb), 0, cnt); 1396 txbuf--; 1397 dma_unmap_single(dp->dev, txbuf->dma_addr, 1398 skb_headlen(skb), DMA_TO_DEVICE); 1399 txbuf->raw = 0; 1400 err_warn_dma: 1401 nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); 1402 err_free: 1403 u64_stats_update_begin(&r_vec->tx_sync); 1404 r_vec->tx_errors++; 1405 u64_stats_update_end(&r_vec->tx_sync); 1406 dev_kfree_skb_any(skb); 1407 return NETDEV_TX_OK; 1408 } 1409 1410 static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) 1411 { 1412 struct sk_buff *skb; 1413 1414 while ((skb = __skb_dequeue(&r_vec->queue))) 1415 if (nfp_nfdk_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) 1416 return; 1417 } 1418 1419 static bool 1420 nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) 1421 { 1422 u32 meta_type, meta_tag; 1423 1424 if (!nfp_app_ctrl_has_meta(nn->app)) 1425 return !meta_len; 1426 1427 if (meta_len != 8) 1428 return false; 1429 1430 meta_type = get_unaligned_be32(data); 1431 meta_tag = get_unaligned_be32(data + 4); 1432 1433 return (meta_type == NFP_NET_META_PORTID && 1434 meta_tag == NFP_META_PORT_ID_CTRL); 1435 } 1436 1437 static bool 1438 nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, 1439 struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) 1440 { 1441 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; 1442 struct nfp_net_rx_buf *rxbuf; 1443 struct nfp_net_rx_desc *rxd; 1444 dma_addr_t new_dma_addr; 1445 struct sk_buff *skb; 1446 void *new_frag; 1447 int idx; 1448 1449 idx = D_IDX(rx_ring, rx_ring->rd_p); 1450 1451 rxd = &rx_ring->rxds[idx]; 1452 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) 1453 return false; 1454 1455 /* Memory barrier to ensure that we won't do other reads 1456 * before the DD bit. 1457 */ 1458 dma_rmb(); 1459 1460 rx_ring->rd_p++; 1461 1462 rxbuf = &rx_ring->rxbufs[idx]; 1463 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; 1464 data_len = le16_to_cpu(rxd->rxd.data_len); 1465 pkt_len = data_len - meta_len; 1466 1467 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; 1468 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) 1469 pkt_off += meta_len; 1470 else 1471 pkt_off += dp->rx_offset; 1472 meta_off = pkt_off - meta_len; 1473 1474 /* Stats update */ 1475 u64_stats_update_begin(&r_vec->rx_sync); 1476 r_vec->rx_pkts++; 1477 r_vec->rx_bytes += pkt_len; 1478 u64_stats_update_end(&r_vec->rx_sync); 1479 1480 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); 1481 1482 if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { 1483 nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", 1484 meta_len); 1485 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1486 return true; 1487 } 1488 1489 skb = build_skb(rxbuf->frag, dp->fl_bufsz); 1490 if (unlikely(!skb)) { 1491 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); 1492 return true; 1493 } 1494 new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); 1495 if (unlikely(!new_frag)) { 1496 nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); 1497 return true; 1498 } 1499 1500 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); 1501 1502 nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); 1503 1504 skb_reserve(skb, pkt_off); 1505 skb_put(skb, pkt_len); 1506 1507 nfp_app_ctrl_rx(nn->app, skb); 1508 1509 return true; 1510 } 1511 1512 static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) 1513 { 1514 struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; 1515 struct nfp_net *nn = r_vec->nfp_net; 1516 struct nfp_net_dp *dp = &nn->dp; 1517 unsigned int budget = 512; 1518 1519 while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) 1520 continue; 1521 1522 return budget; 1523 } 1524 1525 void nfp_nfdk_ctrl_poll(struct tasklet_struct *t) 1526 { 1527 struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); 1528 1529 spin_lock(&r_vec->lock); 1530 nfp_nfdk_tx_complete(r_vec->tx_ring, 0); 1531 __nfp_ctrl_tx_queued(r_vec); 1532 spin_unlock(&r_vec->lock); 1533 1534 if (nfp_ctrl_rx(r_vec)) { 1535 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); 1536 } else { 1537 tasklet_schedule(&r_vec->tasklet); 1538 nn_dp_warn(&r_vec->nfp_net->dp, 1539 "control message budget exceeded!\n"); 1540 } 1541 } 1542