1 /* 2 * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/tcp.h> 34 #include <linux/if_vlan.h> 35 #include <net/geneve.h> 36 #include <net/dsfield.h> 37 #include "en.h" 38 #include "en/txrx.h" 39 #include "ipoib/ipoib.h" 40 #include "en_accel/en_accel.h" 41 #include "en_accel/ipsec_rxtx.h" 42 #include "en_accel/macsec.h" 43 #include "en/ptp.h" 44 #include <net/ipv6.h> 45 46 static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) 47 { 48 int i; 49 50 for (i = 0; i < num_dma; i++) { 51 struct mlx5e_sq_dma *last_pushed_dma = 52 mlx5e_dma_get(sq, --sq->dma_fifo_pc); 53 54 mlx5e_tx_dma_unmap(sq->pdev, last_pushed_dma); 55 } 56 } 57 58 static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) 59 { 60 #define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) 61 62 return max(skb_network_offset(skb), MLX5E_MIN_INLINE); 63 } 64 65 static inline int mlx5e_skb_l3_header_offset(struct sk_buff *skb) 66 { 67 if (skb_transport_header_was_set(skb)) 68 return skb_transport_offset(skb); 69 else 70 return mlx5e_skb_l2_header_offset(skb); 71 } 72 73 static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, 74 struct sk_buff *skb) 75 { 76 u16 hlen; 77 78 switch (mode) { 79 case MLX5_INLINE_MODE_NONE: 80 return 0; 81 case MLX5_INLINE_MODE_TCP_UDP: 82 hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); 83 if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) 84 hlen += VLAN_HLEN; 85 break; 86 case MLX5_INLINE_MODE_IP: 87 hlen = mlx5e_skb_l3_header_offset(skb); 88 break; 89 case MLX5_INLINE_MODE_L2: 90 default: 91 hlen = mlx5e_skb_l2_header_offset(skb); 92 } 93 return min_t(u16, hlen, skb_headlen(skb)); 94 } 95 96 #define MLX5_UNSAFE_MEMCPY_DISCLAIMER \ 97 "This copy has been bounds-checked earlier in " \ 98 "mlx5i_sq_calc_wqe_attr() and intentionally " \ 99 "crosses a flex array boundary. Since it is " \ 100 "performance sensitive, splitting the copy is " \ 101 "undesirable." 102 103 static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) 104 { 105 struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start; 106 int cpy1_sz = 2 * ETH_ALEN; 107 int cpy2_sz = ihs - cpy1_sz; 108 109 memcpy(&vhdr->addrs, skb->data, cpy1_sz); 110 vhdr->h_vlan_proto = skb->vlan_proto; 111 vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); 112 unsafe_memcpy(&vhdr->h_vlan_encapsulated_proto, 113 skb->data + cpy1_sz, 114 cpy2_sz, 115 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 116 } 117 118 static inline void 119 mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, 120 struct mlx5e_accel_tx_state *accel, 121 struct mlx5_wqe_eth_seg *eseg) 122 { 123 if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg))) 124 return; 125 126 if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 127 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; 128 if (skb->encapsulation) { 129 eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | 130 MLX5_ETH_WQE_L4_INNER_CSUM; 131 sq->stats->csum_partial_inner++; 132 } else { 133 eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; 134 sq->stats->csum_partial++; 135 } 136 #ifdef CONFIG_MLX5_EN_TLS 137 } else if (unlikely(accel && accel->tls.tls_tisn)) { 138 eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 139 sq->stats->csum_partial++; 140 #endif 141 } else 142 sq->stats->csum_none++; 143 } 144 145 /* Returns the number of header bytes that we plan 146 * to inline later in the transmit descriptor 147 */ 148 static inline u16 149 mlx5e_tx_get_gso_ihs(struct mlx5e_txqsq *sq, struct sk_buff *skb, int *hopbyhop) 150 { 151 struct mlx5e_sq_stats *stats = sq->stats; 152 u16 ihs; 153 154 *hopbyhop = 0; 155 if (skb->encapsulation) { 156 ihs = skb_inner_tcp_all_headers(skb); 157 stats->tso_inner_packets++; 158 stats->tso_inner_bytes += skb->len - ihs; 159 } else { 160 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { 161 ihs = skb_transport_offset(skb) + sizeof(struct udphdr); 162 } else { 163 ihs = skb_tcp_all_headers(skb); 164 if (ipv6_has_hopopt_jumbo(skb)) { 165 *hopbyhop = sizeof(struct hop_jumbo_hdr); 166 ihs -= sizeof(struct hop_jumbo_hdr); 167 } 168 } 169 stats->tso_packets++; 170 stats->tso_bytes += skb->len - ihs - *hopbyhop; 171 } 172 173 return ihs; 174 } 175 176 static inline int 177 mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, 178 unsigned char *skb_data, u16 headlen, 179 struct mlx5_wqe_data_seg *dseg) 180 { 181 dma_addr_t dma_addr = 0; 182 u8 num_dma = 0; 183 int i; 184 185 if (headlen) { 186 dma_addr = dma_map_single(sq->pdev, skb_data, headlen, 187 DMA_TO_DEVICE); 188 if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) 189 goto dma_unmap_wqe_err; 190 191 dseg->addr = cpu_to_be64(dma_addr); 192 dseg->lkey = sq->mkey_be; 193 dseg->byte_count = cpu_to_be32(headlen); 194 195 mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); 196 num_dma++; 197 dseg++; 198 } 199 200 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 201 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 202 int fsz = skb_frag_size(frag); 203 204 dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, 205 DMA_TO_DEVICE); 206 if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) 207 goto dma_unmap_wqe_err; 208 209 dseg->addr = cpu_to_be64(dma_addr); 210 dseg->lkey = sq->mkey_be; 211 dseg->byte_count = cpu_to_be32(fsz); 212 213 mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); 214 num_dma++; 215 dseg++; 216 } 217 218 return num_dma; 219 220 dma_unmap_wqe_err: 221 mlx5e_dma_unmap_wqe_err(sq, num_dma); 222 return -ENOMEM; 223 } 224 225 struct mlx5e_tx_attr { 226 u32 num_bytes; 227 u16 headlen; 228 u16 ihs; 229 __be16 mss; 230 u16 insz; 231 u8 opcode; 232 u8 hopbyhop; 233 }; 234 235 struct mlx5e_tx_wqe_attr { 236 u16 ds_cnt; 237 u16 ds_cnt_inl; 238 u16 ds_cnt_ids; 239 u8 num_wqebbs; 240 }; 241 242 static u8 243 mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb, 244 struct mlx5e_accel_tx_state *accel) 245 { 246 u8 mode; 247 248 #ifdef CONFIG_MLX5_EN_TLS 249 if (accel && accel->tls.tls_tisn) 250 return MLX5_INLINE_MODE_TCP_UDP; 251 #endif 252 253 mode = sq->min_inline_mode; 254 255 if (skb_vlan_tag_present(skb) && 256 test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) 257 mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); 258 259 return mode; 260 } 261 262 static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb, 263 struct mlx5e_accel_tx_state *accel, 264 struct mlx5e_tx_attr *attr) 265 { 266 struct mlx5e_sq_stats *stats = sq->stats; 267 268 if (skb_is_gso(skb)) { 269 int hopbyhop; 270 u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb, &hopbyhop); 271 272 *attr = (struct mlx5e_tx_attr) { 273 .opcode = MLX5_OPCODE_LSO, 274 .mss = cpu_to_be16(skb_shinfo(skb)->gso_size), 275 .ihs = ihs, 276 .num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs, 277 .headlen = skb_headlen(skb) - ihs - hopbyhop, 278 .hopbyhop = hopbyhop, 279 }; 280 281 stats->packets += skb_shinfo(skb)->gso_segs; 282 } else { 283 u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel); 284 u16 ihs = mlx5e_calc_min_inline(mode, skb); 285 286 *attr = (struct mlx5e_tx_attr) { 287 .opcode = MLX5_OPCODE_SEND, 288 .mss = cpu_to_be16(0), 289 .ihs = ihs, 290 .num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN), 291 .headlen = skb_headlen(skb) - ihs, 292 }; 293 294 stats->packets++; 295 } 296 297 attr->insz = mlx5e_accel_tx_ids_len(sq, accel); 298 stats->bytes += attr->num_bytes; 299 } 300 301 static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr, 302 struct mlx5e_tx_wqe_attr *wqe_attr) 303 { 304 u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT; 305 u16 ds_cnt_inl = 0; 306 u16 ds_cnt_ids = 0; 307 308 if (attr->insz) 309 ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz, 310 MLX5_SEND_WQE_DS); 311 312 ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids; 313 if (attr->ihs) { 314 u16 inl = attr->ihs - INL_HDR_START_SZ; 315 316 if (skb_vlan_tag_present(skb)) 317 inl += VLAN_HLEN; 318 319 ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); 320 ds_cnt += ds_cnt_inl; 321 } 322 323 *wqe_attr = (struct mlx5e_tx_wqe_attr) { 324 .ds_cnt = ds_cnt, 325 .ds_cnt_inl = ds_cnt_inl, 326 .ds_cnt_ids = ds_cnt_ids, 327 .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), 328 }; 329 } 330 331 static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb) 332 { 333 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) 334 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; 335 } 336 337 static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) 338 { 339 if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) { 340 netif_tx_stop_queue(sq->txq); 341 sq->stats->stopped++; 342 } 343 } 344 345 static void mlx5e_tx_flush(struct mlx5e_txqsq *sq) 346 { 347 struct mlx5e_tx_wqe_info *wi; 348 struct mlx5e_tx_wqe *wqe; 349 u16 pi; 350 351 /* Must not be called when a MPWQE session is active but empty. */ 352 mlx5e_tx_mpwqe_ensure_complete(sq); 353 354 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); 355 wi = &sq->db.wqe_info[pi]; 356 357 *wi = (struct mlx5e_tx_wqe_info) { 358 .num_wqebbs = 1, 359 }; 360 361 wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); 362 mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); 363 } 364 365 static inline void 366 mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, 367 const struct mlx5e_tx_attr *attr, 368 const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma, 369 struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg, 370 bool xmit_more) 371 { 372 struct mlx5_wq_cyc *wq = &sq->wq; 373 bool send_doorbell; 374 375 *wi = (struct mlx5e_tx_wqe_info) { 376 .skb = skb, 377 .num_bytes = attr->num_bytes, 378 .num_dma = num_dma, 379 .num_wqebbs = wqe_attr->num_wqebbs, 380 .num_fifo_pkts = 0, 381 }; 382 383 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode); 384 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt); 385 386 mlx5e_tx_skb_update_hwts_flags(skb); 387 388 sq->pc += wi->num_wqebbs; 389 390 mlx5e_tx_check_stop(sq); 391 392 if (unlikely(sq->ptpsq)) { 393 mlx5e_skb_cb_hwtstamp_init(skb); 394 mlx5e_skb_fifo_push(&sq->ptpsq->skb_fifo, skb); 395 skb_get(skb); 396 } 397 398 send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more); 399 if (send_doorbell) 400 mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); 401 } 402 403 static void 404 mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, 405 const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr, 406 struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more) 407 { 408 struct mlx5_wqe_ctrl_seg *cseg; 409 struct mlx5_wqe_eth_seg *eseg; 410 struct mlx5_wqe_data_seg *dseg; 411 struct mlx5e_tx_wqe_info *wi; 412 u16 ihs = attr->ihs; 413 struct ipv6hdr *h6; 414 struct mlx5e_sq_stats *stats = sq->stats; 415 int num_dma; 416 417 stats->xmit_more += xmit_more; 418 419 /* fill wqe */ 420 wi = &sq->db.wqe_info[pi]; 421 cseg = &wqe->ctrl; 422 eseg = &wqe->eth; 423 dseg = wqe->data; 424 425 eseg->mss = attr->mss; 426 427 if (ihs) { 428 u8 *start = eseg->inline_hdr.start; 429 430 if (unlikely(attr->hopbyhop)) { 431 /* remove the HBH header. 432 * Layout: [Ethernet header][IPv6 header][HBH][TCP header] 433 */ 434 if (skb_vlan_tag_present(skb)) { 435 mlx5e_insert_vlan(start, skb, ETH_HLEN + sizeof(*h6)); 436 ihs += VLAN_HLEN; 437 h6 = (struct ipv6hdr *)(start + sizeof(struct vlan_ethhdr)); 438 } else { 439 unsafe_memcpy(start, skb->data, 440 ETH_HLEN + sizeof(*h6), 441 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 442 h6 = (struct ipv6hdr *)(start + ETH_HLEN); 443 } 444 h6->nexthdr = IPPROTO_TCP; 445 /* Copy the TCP header after the IPv6 one */ 446 memcpy(h6 + 1, 447 skb->data + ETH_HLEN + sizeof(*h6) + 448 sizeof(struct hop_jumbo_hdr), 449 tcp_hdrlen(skb)); 450 /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ 451 } else if (skb_vlan_tag_present(skb)) { 452 mlx5e_insert_vlan(start, skb, ihs); 453 ihs += VLAN_HLEN; 454 stats->added_vlan_packets++; 455 } else { 456 unsafe_memcpy(eseg->inline_hdr.start, skb->data, 457 attr->ihs, 458 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 459 } 460 eseg->inline_hdr.sz |= cpu_to_be16(ihs); 461 dseg += wqe_attr->ds_cnt_inl; 462 } else if (skb_vlan_tag_present(skb)) { 463 eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); 464 if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) 465 eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); 466 eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); 467 stats->added_vlan_packets++; 468 } 469 470 dseg += wqe_attr->ds_cnt_ids; 471 num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs + attr->hopbyhop, 472 attr->headlen, dseg); 473 if (unlikely(num_dma < 0)) 474 goto err_drop; 475 476 mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more); 477 478 return; 479 480 err_drop: 481 stats->dropped++; 482 dev_kfree_skb_any(skb); 483 mlx5e_tx_flush(sq); 484 } 485 486 static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr) 487 { 488 return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs && 489 !attr->insz && !mlx5e_macsec_skb_is_offload(skb); 490 } 491 492 static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg) 493 { 494 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 495 496 /* Assumes the session is already running and has at least one packet. */ 497 return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); 498 } 499 500 static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, 501 struct mlx5_wqe_eth_seg *eseg) 502 { 503 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 504 struct mlx5e_tx_wqe *wqe; 505 u16 pi; 506 507 pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); 508 wqe = MLX5E_TX_FETCH_WQE(sq, pi); 509 net_prefetchw(wqe->data); 510 511 *session = (struct mlx5e_tx_mpwqe) { 512 .wqe = wqe, 513 .bytes_count = 0, 514 .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, 515 .pkt_count = 0, 516 .inline_on = 0, 517 }; 518 519 memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN); 520 521 sq->stats->mpwqe_blks++; 522 } 523 524 static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq) 525 { 526 return sq->mpwqe.wqe; 527 } 528 529 static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd) 530 { 531 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 532 struct mlx5_wqe_data_seg *dseg; 533 534 dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; 535 536 session->pkt_count++; 537 session->bytes_count += txd->len; 538 539 dseg->addr = cpu_to_be64(txd->dma_addr); 540 dseg->byte_count = cpu_to_be32(txd->len); 541 dseg->lkey = sq->mkey_be; 542 session->ds_count++; 543 544 sq->stats->mpwqe_pkts++; 545 } 546 547 static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq) 548 { 549 struct mlx5e_tx_mpwqe *session = &sq->mpwqe; 550 u8 ds_count = session->ds_count; 551 struct mlx5_wqe_ctrl_seg *cseg; 552 struct mlx5e_tx_wqe_info *wi; 553 u16 pi; 554 555 cseg = &session->wqe->ctrl; 556 cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW); 557 cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count); 558 559 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); 560 wi = &sq->db.wqe_info[pi]; 561 *wi = (struct mlx5e_tx_wqe_info) { 562 .skb = NULL, 563 .num_bytes = session->bytes_count, 564 .num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS), 565 .num_dma = session->pkt_count, 566 .num_fifo_pkts = session->pkt_count, 567 }; 568 569 sq->pc += wi->num_wqebbs; 570 571 session->wqe = NULL; 572 573 mlx5e_tx_check_stop(sq); 574 575 return cseg; 576 } 577 578 static void 579 mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, 580 struct mlx5_wqe_eth_seg *eseg, bool xmit_more) 581 { 582 struct mlx5_wqe_ctrl_seg *cseg; 583 struct mlx5e_xmit_data txd; 584 585 txd.data = skb->data; 586 txd.len = skb->len; 587 588 txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE); 589 if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr))) 590 goto err_unmap; 591 592 if (!mlx5e_tx_mpwqe_session_is_active(sq)) { 593 mlx5e_tx_mpwqe_session_start(sq, eseg); 594 } else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) { 595 mlx5e_tx_mpwqe_session_complete(sq); 596 mlx5e_tx_mpwqe_session_start(sq, eseg); 597 } 598 599 sq->stats->xmit_more += xmit_more; 600 601 mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); 602 mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); 603 mlx5e_tx_mpwqe_add_dseg(sq, &txd); 604 mlx5e_tx_skb_update_hwts_flags(skb); 605 606 if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) { 607 /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ 608 cseg = mlx5e_tx_mpwqe_session_complete(sq); 609 610 if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) 611 mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); 612 } else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) { 613 /* Might stop the queue, but we were asked to ring the doorbell anyway. */ 614 cseg = mlx5e_tx_mpwqe_session_complete(sq); 615 616 mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg); 617 } 618 619 return; 620 621 err_unmap: 622 mlx5e_dma_unmap_wqe_err(sq, 1); 623 sq->stats->dropped++; 624 dev_kfree_skb_any(skb); 625 mlx5e_tx_flush(sq); 626 } 627 628 void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq) 629 { 630 /* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */ 631 if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq))) 632 mlx5e_tx_mpwqe_session_complete(sq); 633 } 634 635 static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb, 636 struct mlx5_wqe_eth_seg *eseg) 637 { 638 if (ptpsq->ts_cqe_ctr_mask && unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) 639 eseg->flow_table_metadata = cpu_to_be32(ptpsq->skb_fifo_pc & 640 ptpsq->ts_cqe_ctr_mask); 641 } 642 643 static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, 644 struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, 645 struct mlx5_wqe_eth_seg *eseg, u16 ihs) 646 { 647 mlx5e_accel_tx_eseg(priv, skb, eseg, ihs); 648 mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); 649 if (unlikely(sq->ptpsq)) 650 mlx5e_cqe_ts_id_eseg(sq->ptpsq, skb, eseg); 651 } 652 653 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) 654 { 655 struct mlx5e_priv *priv = netdev_priv(dev); 656 struct mlx5e_accel_tx_state accel = {}; 657 struct mlx5e_tx_wqe_attr wqe_attr; 658 struct mlx5e_tx_attr attr; 659 struct mlx5e_tx_wqe *wqe; 660 struct mlx5e_txqsq *sq; 661 u16 pi; 662 663 /* All changes to txq2sq are performed in sync with mlx5e_xmit, when the 664 * queue being changed is disabled, and smp_wmb guarantees that the 665 * changes are visible before mlx5e_xmit tries to read from txq2sq. It 666 * guarantees that the value of txq2sq[qid] doesn't change while 667 * mlx5e_xmit is running on queue number qid. smb_wmb is paired with 668 * HARD_TX_LOCK around ndo_start_xmit, which serves as an ACQUIRE. 669 */ 670 sq = priv->txq2sq[skb_get_queue_mapping(skb)]; 671 if (unlikely(!sq)) { 672 /* Two cases when sq can be NULL: 673 * 1. The HTB node is registered, and mlx5e_select_queue 674 * selected its queue ID, but the SQ itself is not yet created. 675 * 2. HTB SQ creation failed. Similar to the previous case, but 676 * the SQ won't be created. 677 */ 678 dev_kfree_skb_any(skb); 679 return NETDEV_TX_OK; 680 } 681 682 /* May send SKBs and WQEs. */ 683 if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel))) 684 return NETDEV_TX_OK; 685 686 mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr); 687 688 if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) { 689 if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) { 690 struct mlx5_wqe_eth_seg eseg = {}; 691 692 mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs); 693 mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more()); 694 return NETDEV_TX_OK; 695 } 696 697 mlx5e_tx_mpwqe_ensure_complete(sq); 698 } 699 700 mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); 701 pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); 702 wqe = MLX5E_TX_FETCH_WQE(sq, pi); 703 704 /* May update the WQE, but may not post other WQEs. */ 705 mlx5e_accel_tx_finish(sq, wqe, &accel, 706 (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl)); 707 mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs); 708 mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more()); 709 710 return NETDEV_TX_OK; 711 } 712 713 void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more) 714 { 715 struct mlx5e_tx_wqe_attr wqe_attr; 716 struct mlx5e_tx_attr attr; 717 struct mlx5e_tx_wqe *wqe; 718 u16 pi; 719 720 mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); 721 mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); 722 pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); 723 wqe = MLX5E_TX_FETCH_WQE(sq, pi); 724 mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, &wqe->eth); 725 mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more); 726 } 727 728 static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, 729 u32 *dma_fifo_cc) 730 { 731 int i; 732 733 for (i = 0; i < wi->num_dma; i++) { 734 struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++); 735 736 mlx5e_tx_dma_unmap(sq->pdev, dma); 737 } 738 } 739 740 static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb, 741 struct mlx5_cqe64 *cqe, int napi_budget) 742 { 743 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { 744 struct skb_shared_hwtstamps hwts = {}; 745 u64 ts = get_cqe_ts(cqe); 746 747 hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts); 748 if (sq->ptpsq) 749 mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP, 750 hwts.hwtstamp, sq->ptpsq->cq_stats); 751 else 752 skb_tstamp_tx(skb, &hwts); 753 } 754 755 napi_consume_skb(skb, napi_budget); 756 } 757 758 static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, 759 struct mlx5_cqe64 *cqe, int napi_budget) 760 { 761 int i; 762 763 for (i = 0; i < wi->num_fifo_pkts; i++) { 764 struct sk_buff *skb = mlx5e_skb_fifo_pop(&sq->db.skb_fifo); 765 766 mlx5e_consume_skb(sq, skb, cqe, napi_budget); 767 } 768 } 769 770 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) 771 { 772 struct mlx5e_sq_stats *stats; 773 struct mlx5e_txqsq *sq; 774 struct mlx5_cqe64 *cqe; 775 u32 dma_fifo_cc; 776 u32 nbytes; 777 u16 npkts; 778 u16 sqcc; 779 int i; 780 781 sq = container_of(cq, struct mlx5e_txqsq, cq); 782 783 if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) 784 return false; 785 786 cqe = mlx5_cqwq_get_cqe(&cq->wq); 787 if (!cqe) 788 return false; 789 790 stats = sq->stats; 791 792 npkts = 0; 793 nbytes = 0; 794 795 /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), 796 * otherwise a cq overrun may occur 797 */ 798 sqcc = sq->cc; 799 800 /* avoid dirtying sq cache line every cqe */ 801 dma_fifo_cc = sq->dma_fifo_cc; 802 803 i = 0; 804 do { 805 struct mlx5e_tx_wqe_info *wi; 806 u16 wqe_counter; 807 bool last_wqe; 808 u16 ci; 809 810 mlx5_cqwq_pop(&cq->wq); 811 812 wqe_counter = be16_to_cpu(cqe->wqe_counter); 813 814 do { 815 last_wqe = (sqcc == wqe_counter); 816 817 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 818 wi = &sq->db.wqe_info[ci]; 819 820 sqcc += wi->num_wqebbs; 821 822 if (likely(wi->skb)) { 823 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 824 mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget); 825 826 npkts++; 827 nbytes += wi->num_bytes; 828 continue; 829 } 830 831 if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, 832 &dma_fifo_cc))) 833 continue; 834 835 if (wi->num_fifo_pkts) { 836 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 837 mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget); 838 839 npkts += wi->num_fifo_pkts; 840 nbytes += wi->num_bytes; 841 } 842 } while (!last_wqe); 843 844 if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { 845 if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, 846 &sq->state)) { 847 mlx5e_dump_error_cqe(&sq->cq, sq->sqn, 848 (struct mlx5_err_cqe *)cqe); 849 mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); 850 queue_work(cq->priv->wq, &sq->recover_work); 851 } 852 stats->cqe_err++; 853 } 854 855 } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); 856 857 stats->cqes += i; 858 859 mlx5_cqwq_update_db_record(&cq->wq); 860 861 /* ensure cq space is freed before enabling more cqes */ 862 wmb(); 863 864 sq->dma_fifo_cc = dma_fifo_cc; 865 sq->cc = sqcc; 866 867 netdev_tx_completed_queue(sq->txq, npkts, nbytes); 868 869 if (netif_tx_queue_stopped(sq->txq) && 870 mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && 871 !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { 872 netif_tx_wake_queue(sq->txq); 873 stats->wake++; 874 } 875 876 return (i == MLX5E_TX_CQ_POLL_BUDGET); 877 } 878 879 static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi) 880 { 881 int i; 882 883 for (i = 0; i < wi->num_fifo_pkts; i++) 884 dev_kfree_skb_any(mlx5e_skb_fifo_pop(&sq->db.skb_fifo)); 885 } 886 887 void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) 888 { 889 struct mlx5e_tx_wqe_info *wi; 890 u32 dma_fifo_cc, nbytes = 0; 891 u16 ci, sqcc, npkts = 0; 892 893 sqcc = sq->cc; 894 dma_fifo_cc = sq->dma_fifo_cc; 895 896 while (sqcc != sq->pc) { 897 ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); 898 wi = &sq->db.wqe_info[ci]; 899 900 sqcc += wi->num_wqebbs; 901 902 if (likely(wi->skb)) { 903 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 904 dev_kfree_skb_any(wi->skb); 905 906 npkts++; 907 nbytes += wi->num_bytes; 908 continue; 909 } 910 911 if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc))) 912 continue; 913 914 if (wi->num_fifo_pkts) { 915 mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc); 916 mlx5e_tx_wi_kfree_fifo_skbs(sq, wi); 917 918 npkts += wi->num_fifo_pkts; 919 nbytes += wi->num_bytes; 920 } 921 } 922 923 sq->dma_fifo_cc = dma_fifo_cc; 924 sq->cc = sqcc; 925 926 netdev_tx_completed_queue(sq->txq, npkts, nbytes); 927 } 928 929 #ifdef CONFIG_MLX5_CORE_IPOIB 930 static inline void 931 mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, 932 struct mlx5_wqe_datagram_seg *dseg) 933 { 934 memcpy(&dseg->av, av, sizeof(struct mlx5_av)); 935 dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV); 936 dseg->av.key.qkey.qkey = cpu_to_be32(dqkey); 937 } 938 939 static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb, 940 const struct mlx5e_tx_attr *attr, 941 struct mlx5e_tx_wqe_attr *wqe_attr) 942 { 943 u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS; 944 u16 ds_cnt_inl = 0; 945 946 ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags; 947 948 if (attr->ihs) { 949 u16 inl = attr->ihs - INL_HDR_START_SZ; 950 951 ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS); 952 ds_cnt += ds_cnt_inl; 953 } 954 955 *wqe_attr = (struct mlx5e_tx_wqe_attr) { 956 .ds_cnt = ds_cnt, 957 .ds_cnt_inl = ds_cnt_inl, 958 .num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS), 959 }; 960 } 961 962 void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, 963 struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more) 964 { 965 struct mlx5e_tx_wqe_attr wqe_attr; 966 struct mlx5e_tx_attr attr; 967 struct mlx5i_tx_wqe *wqe; 968 969 struct mlx5_wqe_datagram_seg *datagram; 970 struct mlx5_wqe_ctrl_seg *cseg; 971 struct mlx5_wqe_eth_seg *eseg; 972 struct mlx5_wqe_data_seg *dseg; 973 struct mlx5e_tx_wqe_info *wi; 974 975 struct mlx5e_sq_stats *stats = sq->stats; 976 int num_dma; 977 u16 pi; 978 979 mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); 980 mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr); 981 982 pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); 983 wqe = MLX5I_SQ_FETCH_WQE(sq, pi); 984 985 stats->xmit_more += xmit_more; 986 987 /* fill wqe */ 988 wi = &sq->db.wqe_info[pi]; 989 cseg = &wqe->ctrl; 990 datagram = &wqe->datagram; 991 eseg = &wqe->eth; 992 dseg = wqe->data; 993 994 mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); 995 996 mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg); 997 998 eseg->mss = attr.mss; 999 1000 if (attr.ihs) { 1001 if (unlikely(attr.hopbyhop)) { 1002 struct ipv6hdr *h6; 1003 1004 /* remove the HBH header. 1005 * Layout: [Ethernet header][IPv6 header][HBH][TCP header] 1006 */ 1007 unsafe_memcpy(eseg->inline_hdr.start, skb->data, 1008 ETH_HLEN + sizeof(*h6), 1009 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 1010 h6 = (struct ipv6hdr *)((char *)eseg->inline_hdr.start + ETH_HLEN); 1011 h6->nexthdr = IPPROTO_TCP; 1012 /* Copy the TCP header after the IPv6 one */ 1013 unsafe_memcpy(h6 + 1, 1014 skb->data + ETH_HLEN + sizeof(*h6) + 1015 sizeof(struct hop_jumbo_hdr), 1016 tcp_hdrlen(skb), 1017 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 1018 /* Leave ipv6 payload_len set to 0, as LSO v2 specs request. */ 1019 } else { 1020 unsafe_memcpy(eseg->inline_hdr.start, skb->data, 1021 attr.ihs, 1022 MLX5_UNSAFE_MEMCPY_DISCLAIMER); 1023 } 1024 eseg->inline_hdr.sz = cpu_to_be16(attr.ihs); 1025 dseg += wqe_attr.ds_cnt_inl; 1026 } 1027 1028 num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs + attr.hopbyhop, 1029 attr.headlen, dseg); 1030 if (unlikely(num_dma < 0)) 1031 goto err_drop; 1032 1033 mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more); 1034 1035 return; 1036 1037 err_drop: 1038 stats->dropped++; 1039 dev_kfree_skb_any(skb); 1040 mlx5e_tx_flush(sq); 1041 } 1042 #endif 1043