1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include "rx.h" 5 #include "en/xdp.h" 6 #include <net/xdp_sock_drv.h> 7 #include <linux/filter.h> 8 9 /* RX data path */ 10 11 static struct mlx5e_xdp_buff *xsk_buff_to_mxbuf(struct xdp_buff *xdp) 12 { 13 /* mlx5e_xdp_buff shares its layout with xdp_buff_xsk 14 * and private mlx5e_xdp_buff fields fall into xdp_buff_xsk->cb 15 */ 16 return (struct mlx5e_xdp_buff *)xdp; 17 } 18 19 int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) 20 { 21 struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); 22 struct mlx5e_icosq *icosq = rq->icosq; 23 struct mlx5_wq_cyc *wq = &icosq->wq; 24 struct mlx5e_umr_wqe *umr_wqe; 25 struct xdp_buff **xsk_buffs; 26 int batch, i; 27 u32 offset; /* 17-bit value with MTT. */ 28 u16 pi; 29 30 if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe))) 31 goto err; 32 33 XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff); 34 xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs; 35 batch = xsk_buff_alloc_batch(rq->xsk_pool, xsk_buffs, 36 rq->mpwqe.pages_per_wqe); 37 38 /* If batch < pages_per_wqe, either: 39 * 1. Some (or all) descriptors were invalid. 40 * 2. dma_need_sync is true, and it fell back to allocating one frame. 41 * In either case, try to continue allocating frames one by one, until 42 * the first error, which will mean there are no more valid descriptors. 43 */ 44 for (; batch < rq->mpwqe.pages_per_wqe; batch++) { 45 xsk_buffs[batch] = xsk_buff_alloc(rq->xsk_pool); 46 if (unlikely(!xsk_buffs[batch])) 47 goto err_reuse_batch; 48 } 49 50 pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs); 51 umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); 52 memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe)); 53 54 if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { 55 for (i = 0; i < batch; i++) { 56 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); 57 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); 58 59 umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { 60 .ptag = cpu_to_be64(addr | MLX5_EN_WR), 61 }; 62 mxbuf->rq = rq; 63 } 64 } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { 65 for (i = 0; i < batch; i++) { 66 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); 67 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); 68 69 umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { 70 .key = rq->mkey_be, 71 .va = cpu_to_be64(addr), 72 }; 73 mxbuf->rq = rq; 74 } 75 } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) { 76 u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); 77 78 for (i = 0; i < batch; i++) { 79 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); 80 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); 81 82 umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { 83 .key = rq->mkey_be, 84 .va = cpu_to_be64(addr), 85 }; 86 umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) { 87 .key = rq->mkey_be, 88 .va = cpu_to_be64(addr + mapping_size), 89 }; 90 umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) { 91 .key = rq->mkey_be, 92 .va = cpu_to_be64(addr + mapping_size * 2), 93 }; 94 umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) { 95 .key = rq->mkey_be, 96 .va = cpu_to_be64(rq->wqe_overflow.addr), 97 }; 98 mxbuf->rq = rq; 99 } 100 } else { 101 __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) - 102 rq->xsk_pool->chunk_size); 103 __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); 104 105 for (i = 0; i < batch; i++) { 106 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]); 107 dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]); 108 109 umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { 110 .key = rq->mkey_be, 111 .va = cpu_to_be64(addr), 112 .bcount = frame_size, 113 }; 114 umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) { 115 .key = rq->mkey_be, 116 .va = cpu_to_be64(rq->wqe_overflow.addr), 117 .bcount = pad_size, 118 }; 119 mxbuf->rq = rq; 120 } 121 } 122 123 bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); 124 wi->consumed_strides = 0; 125 126 umr_wqe->ctrl.opmod_idx_opcode = 127 cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); 128 129 /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */ 130 offset = ix * rq->mpwqe.mtts_per_wqe; 131 if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) 132 offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; 133 else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED)) 134 offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD; 135 else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) 136 offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD; 137 umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); 138 139 icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { 140 .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, 141 .num_wqebbs = rq->mpwqe.umr_wqebbs, 142 .umr.rq = rq, 143 }; 144 145 icosq->pc += rq->mpwqe.umr_wqebbs; 146 147 icosq->doorbell_cseg = &umr_wqe->ctrl; 148 149 return 0; 150 151 err_reuse_batch: 152 while (--batch >= 0) 153 xsk_buff_free(xsk_buffs[batch]); 154 155 err: 156 rq->stats->buff_alloc_err++; 157 return -ENOMEM; 158 } 159 160 int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) 161 { 162 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 163 struct xdp_buff **buffs; 164 u32 contig, alloc; 165 int i; 166 167 /* Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the 168 * rq->wqe.alloc_units->xsk_buffs array allocated here. 169 */ 170 buffs = rq->wqe.alloc_units->xsk_buffs; 171 contig = mlx5_wq_cyc_get_size(wq) - ix; 172 if (wqe_bulk <= contig) { 173 alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk); 174 } else { 175 alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig); 176 if (likely(alloc == contig)) 177 alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig); 178 } 179 180 for (i = 0; i < alloc; i++) { 181 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); 182 struct mlx5e_wqe_frag_info *frag; 183 struct mlx5e_rx_wqe_cyc *wqe; 184 dma_addr_t addr; 185 186 wqe = mlx5_wq_cyc_get_wqe(wq, j); 187 /* Assumes log_num_frags == 0. */ 188 frag = &rq->wqe.frags[j]; 189 190 addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); 191 wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); 192 frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); 193 } 194 195 return alloc; 196 } 197 198 int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) 199 { 200 struct mlx5_wq_cyc *wq = &rq->wqe.wq; 201 int i; 202 203 for (i = 0; i < wqe_bulk; i++) { 204 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i); 205 struct mlx5e_wqe_frag_info *frag; 206 struct mlx5e_rx_wqe_cyc *wqe; 207 dma_addr_t addr; 208 209 wqe = mlx5_wq_cyc_get_wqe(wq, j); 210 /* Assumes log_num_frags == 0. */ 211 frag = &rq->wqe.frags[j]; 212 213 *frag->xskp = xsk_buff_alloc(rq->xsk_pool); 214 if (unlikely(!*frag->xskp)) 215 return i; 216 217 addr = xsk_buff_xdp_get_frame_dma(*frag->xskp); 218 wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom); 219 frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); 220 } 221 222 return wqe_bulk; 223 } 224 225 static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp) 226 { 227 u32 totallen = xdp->data_end - xdp->data_meta; 228 u32 metalen = xdp->data - xdp->data_meta; 229 struct sk_buff *skb; 230 231 skb = napi_alloc_skb(rq->cq.napi, totallen); 232 if (unlikely(!skb)) { 233 rq->stats->buff_alloc_err++; 234 return NULL; 235 } 236 237 skb_put_data(skb, xdp->data_meta, totallen); 238 239 if (metalen) { 240 skb_metadata_set(skb, metalen); 241 __skb_pull(skb, metalen); 242 } 243 244 return skb; 245 } 246 247 struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, 248 struct mlx5e_mpw_info *wi, 249 struct mlx5_cqe64 *cqe, 250 u16 cqe_bcnt, 251 u32 head_offset, 252 u32 page_idx) 253 { 254 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units.xsk_buffs[page_idx]); 255 struct bpf_prog *prog; 256 257 /* Check packet size. Note LRO doesn't use linear SKB */ 258 if (unlikely(cqe_bcnt > rq->hw_mtu)) { 259 rq->stats->oversize_pkts_sw_drop++; 260 return NULL; 261 } 262 263 /* head_offset is not used in this function, because xdp->data and the 264 * DMA address point directly to the necessary place. Furthermore, in 265 * the current implementation, UMR pages are mapped to XSK frames, so 266 * head_offset should always be 0. 267 */ 268 WARN_ON_ONCE(head_offset); 269 270 /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */ 271 mxbuf->cqe = cqe; 272 xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt); 273 xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool); 274 net_prefetch(mxbuf->xdp.data); 275 276 /* Possible flows: 277 * - XDP_REDIRECT to XSKMAP: 278 * The page is owned by the userspace from now. 279 * - XDP_TX and other XDP_REDIRECTs: 280 * The page was returned by ZCA and recycled. 281 * - XDP_DROP: 282 * Recycle the page. 283 * - XDP_PASS: 284 * Allocate an SKB, copy the data and recycle the page. 285 * 286 * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its 287 * size is the same as the Driver RX Ring's size, and pages for WQEs are 288 * allocated first from the Reuse Ring, so it has enough space. 289 */ 290 291 prog = rcu_dereference(rq->xdp_prog); 292 if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { 293 if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) 294 __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ 295 return NULL; /* page/packet was consumed by XDP */ 296 } 297 298 /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the 299 * frame. On SKB allocation failure, NULL is returned. 300 */ 301 return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp); 302 } 303 304 struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, 305 struct mlx5e_wqe_frag_info *wi, 306 struct mlx5_cqe64 *cqe, 307 u32 cqe_bcnt) 308 { 309 struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(*wi->xskp); 310 struct bpf_prog *prog; 311 312 /* wi->offset is not used in this function, because xdp->data and the 313 * DMA address point directly to the necessary place. Furthermore, the 314 * XSK allocator allocates frames per packet, instead of pages, so 315 * wi->offset should always be 0. 316 */ 317 WARN_ON_ONCE(wi->offset); 318 319 /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */ 320 mxbuf->cqe = cqe; 321 xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt); 322 xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool); 323 net_prefetch(mxbuf->xdp.data); 324 325 prog = rcu_dereference(rq->xdp_prog); 326 if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) 327 return NULL; /* page/packet was consumed by XDP */ 328 329 /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse 330 * will be handled by mlx5e_free_rx_wqe. 331 * On SKB allocation failure, NULL is returned. 332 */ 333 return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp); 334 } 335