1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2019 Mellanox Technologies. */ 3 4 #include "dr_types.h" 5 6 #define QUEUE_SIZE 128 7 #define SIGNAL_PER_DIV_QUEUE 16 8 #define TH_NUMS_TO_DRAIN 2 9 10 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 }; 11 12 struct dr_data_seg { 13 u64 addr; 14 u32 length; 15 u32 lkey; 16 unsigned int send_flags; 17 }; 18 19 struct postsend_info { 20 struct dr_data_seg write; 21 struct dr_data_seg read; 22 u64 remote_addr; 23 u32 rkey; 24 }; 25 26 struct dr_qp_rtr_attr { 27 struct mlx5dr_cmd_gid_attr dgid_attr; 28 enum ib_mtu mtu; 29 u32 qp_num; 30 u16 port_num; 31 u8 min_rnr_timer; 32 u8 sgid_index; 33 u16 udp_src_port; 34 }; 35 36 struct dr_qp_rts_attr { 37 u8 timeout; 38 u8 retry_cnt; 39 u8 rnr_retry; 40 }; 41 42 struct dr_qp_init_attr { 43 u32 cqn; 44 u32 pdn; 45 u32 max_send_wr; 46 struct mlx5_uars_page *uar; 47 }; 48 49 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64) 50 { 51 unsigned int idx; 52 u8 opcode; 53 54 opcode = get_cqe_opcode(cqe64); 55 if (opcode == MLX5_CQE_REQ_ERR) { 56 idx = be16_to_cpu(cqe64->wqe_counter) & 57 (dr_cq->qp->sq.wqe_cnt - 1); 58 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 59 } else if (opcode == MLX5_CQE_RESP_ERR) { 60 ++dr_cq->qp->sq.cc; 61 } else { 62 idx = be16_to_cpu(cqe64->wqe_counter) & 63 (dr_cq->qp->sq.wqe_cnt - 1); 64 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1; 65 66 return CQ_OK; 67 } 68 69 return CQ_POLL_ERR; 70 } 71 72 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq) 73 { 74 struct mlx5_cqe64 *cqe64; 75 int err; 76 77 cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq); 78 if (!cqe64) 79 return CQ_EMPTY; 80 81 mlx5_cqwq_pop(&dr_cq->wq); 82 err = dr_parse_cqe(dr_cq, cqe64); 83 mlx5_cqwq_update_db_record(&dr_cq->wq); 84 85 return err; 86 } 87 88 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne) 89 { 90 int npolled; 91 int err = 0; 92 93 for (npolled = 0; npolled < ne; ++npolled) { 94 err = dr_cq_poll_one(dr_cq); 95 if (err != CQ_OK) 96 break; 97 } 98 99 return err == CQ_POLL_ERR ? err : npolled; 100 } 101 102 static void dr_qp_event(struct mlx5_core_qp *mqp, int event) 103 { 104 pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn); 105 } 106 107 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev, 108 struct dr_qp_init_attr *attr) 109 { 110 u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {}; 111 struct mlx5_wq_param wqp; 112 struct mlx5dr_qp *dr_qp; 113 int inlen; 114 void *qpc; 115 void *in; 116 int err; 117 118 dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL); 119 if (!dr_qp) 120 return NULL; 121 122 wqp.buf_numa_node = mdev->priv.numa_node; 123 wqp.db_numa_node = mdev->priv.numa_node; 124 125 dr_qp->rq.pc = 0; 126 dr_qp->rq.cc = 0; 127 dr_qp->rq.wqe_cnt = 4; 128 dr_qp->sq.pc = 0; 129 dr_qp->sq.cc = 0; 130 dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr); 131 132 MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 133 MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 134 MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 135 err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq, 136 &dr_qp->wq_ctrl); 137 if (err) { 138 mlx5_core_info(mdev, "Can't create QP WQ\n"); 139 goto err_wq; 140 } 141 142 dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt, 143 sizeof(dr_qp->sq.wqe_head[0]), 144 GFP_KERNEL); 145 146 if (!dr_qp->sq.wqe_head) { 147 mlx5_core_warn(mdev, "Can't allocate wqe head\n"); 148 goto err_wqe_head; 149 } 150 151 inlen = MLX5_ST_SZ_BYTES(create_qp_in) + 152 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * 153 dr_qp->wq_ctrl.buf.npages; 154 in = kvzalloc(inlen, GFP_KERNEL); 155 if (!in) { 156 err = -ENOMEM; 157 goto err_in; 158 } 159 160 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); 161 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); 162 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); 163 MLX5_SET(qpc, qpc, pd, attr->pdn); 164 MLX5_SET(qpc, qpc, uar_page, attr->uar->index); 165 MLX5_SET(qpc, qpc, log_page_size, 166 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 167 MLX5_SET(qpc, qpc, fre, 1); 168 MLX5_SET(qpc, qpc, rlky, 1); 169 MLX5_SET(qpc, qpc, cqn_snd, attr->cqn); 170 MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn); 171 MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4); 172 MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt)); 173 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ); 174 MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt)); 175 MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma); 176 if (MLX5_CAP_GEN(mdev, cqe_version) == 1) 177 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF); 178 mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf, 179 (__be64 *)MLX5_ADDR_OF(create_qp_in, 180 in, pas)); 181 182 err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen); 183 kfree(in); 184 185 if (err) { 186 mlx5_core_warn(mdev, " Can't create QP\n"); 187 goto err_in; 188 } 189 dr_qp->mqp.event = dr_qp_event; 190 dr_qp->uar = attr->uar; 191 192 return dr_qp; 193 194 err_in: 195 kfree(dr_qp->sq.wqe_head); 196 err_wqe_head: 197 mlx5_wq_destroy(&dr_qp->wq_ctrl); 198 err_wq: 199 kfree(dr_qp); 200 return NULL; 201 } 202 203 static void dr_destroy_qp(struct mlx5_core_dev *mdev, 204 struct mlx5dr_qp *dr_qp) 205 { 206 mlx5_core_destroy_qp(mdev, &dr_qp->mqp); 207 kfree(dr_qp->sq.wqe_head); 208 mlx5_wq_destroy(&dr_qp->wq_ctrl); 209 kfree(dr_qp); 210 } 211 212 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl) 213 { 214 dma_wmb(); 215 *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff); 216 217 /* After wmb() the hw aware of new work */ 218 wmb(); 219 220 mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET); 221 } 222 223 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr, 224 u32 rkey, struct dr_data_seg *data_seg, 225 u32 opcode, int nreq) 226 { 227 struct mlx5_wqe_raddr_seg *wq_raddr; 228 struct mlx5_wqe_ctrl_seg *wq_ctrl; 229 struct mlx5_wqe_data_seg *wq_dseg; 230 unsigned int size; 231 unsigned int idx; 232 233 size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 + 234 sizeof(*wq_raddr) / 16; 235 236 idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1); 237 238 wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx); 239 wq_ctrl->imm = 0; 240 wq_ctrl->fm_ce_se = (data_seg->send_flags) ? 241 MLX5_WQE_CTRL_CQ_UPDATE : 0; 242 wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) | 243 opcode); 244 wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8); 245 wq_raddr = (void *)(wq_ctrl + 1); 246 wq_raddr->raddr = cpu_to_be64(remote_addr); 247 wq_raddr->rkey = cpu_to_be32(rkey); 248 wq_raddr->reserved = 0; 249 250 wq_dseg = (void *)(wq_raddr + 1); 251 wq_dseg->byte_count = cpu_to_be32(data_seg->length); 252 wq_dseg->lkey = cpu_to_be32(data_seg->lkey); 253 wq_dseg->addr = cpu_to_be64(data_seg->addr); 254 255 dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++; 256 257 if (nreq) 258 dr_cmd_notify_hw(dr_qp, wq_ctrl); 259 } 260 261 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info) 262 { 263 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 264 &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0); 265 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey, 266 &send_info->read, MLX5_OPCODE_RDMA_READ, 1); 267 } 268 269 /** 270 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent 271 * with send_list parameters: 272 * 273 * @ste: The data that attached to this specific ste 274 * @size: of data to write 275 * @offset: of the data from start of the hw_ste entry 276 * @data: data 277 * @ste_info: ste to be sent with send_list 278 * @send_list: to append into it 279 * @copy_data: if true indicates that the data should be kept because 280 * it's not backuped any where (like in re-hash). 281 * if false, it lets the data to be updated after 282 * it was added to the list. 283 */ 284 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size, 285 u16 offset, u8 *data, 286 struct mlx5dr_ste_send_info *ste_info, 287 struct list_head *send_list, 288 bool copy_data) 289 { 290 ste_info->size = size; 291 ste_info->ste = ste; 292 ste_info->offset = offset; 293 294 if (copy_data) { 295 memcpy(ste_info->data_cont, data, size); 296 ste_info->data = ste_info->data_cont; 297 } else { 298 ste_info->data = data; 299 } 300 301 list_add_tail(&ste_info->send_list, send_list); 302 } 303 304 /* The function tries to consume one wc each time, unless the queue is full, in 305 * that case, which means that the hw is behind the sw in a full queue len 306 * the function will drain the cq till it empty. 307 */ 308 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn, 309 struct mlx5dr_send_ring *send_ring) 310 { 311 bool is_drain = false; 312 int ne; 313 314 if (send_ring->pending_wqe < send_ring->signal_th) 315 return 0; 316 317 /* Queue is full start drain it */ 318 if (send_ring->pending_wqe >= 319 dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN) 320 is_drain = true; 321 322 do { 323 ne = dr_poll_cq(send_ring->cq, 1); 324 if (ne < 0) 325 return ne; 326 else if (ne == 1) 327 send_ring->pending_wqe -= send_ring->signal_th; 328 } while (is_drain && send_ring->pending_wqe); 329 330 return 0; 331 } 332 333 static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring, 334 struct postsend_info *send_info) 335 { 336 send_ring->pending_wqe++; 337 338 if (send_ring->pending_wqe % send_ring->signal_th == 0) 339 send_info->write.send_flags |= IB_SEND_SIGNALED; 340 341 send_ring->pending_wqe++; 342 send_info->read.length = send_info->write.length; 343 /* Read into the same write area */ 344 send_info->read.addr = (uintptr_t)send_info->write.addr; 345 send_info->read.lkey = send_ring->mr->mkey.key; 346 347 if (send_ring->pending_wqe % send_ring->signal_th == 0) 348 send_info->read.send_flags = IB_SEND_SIGNALED; 349 else 350 send_info->read.send_flags = 0; 351 } 352 353 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn, 354 struct postsend_info *send_info) 355 { 356 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 357 u32 buff_offset; 358 int ret; 359 360 ret = dr_handle_pending_wc(dmn, send_ring); 361 if (ret) 362 return ret; 363 364 if (send_info->write.length > dmn->info.max_inline_size) { 365 buff_offset = (send_ring->tx_head & 366 (dmn->send_ring->signal_th - 1)) * 367 send_ring->max_post_send_size; 368 /* Copy to ring mr */ 369 memcpy(send_ring->buf + buff_offset, 370 (void *)(uintptr_t)send_info->write.addr, 371 send_info->write.length); 372 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset; 373 send_info->write.lkey = send_ring->mr->mkey.key; 374 } 375 376 send_ring->tx_head++; 377 dr_fill_data_segs(send_ring, send_info); 378 dr_post_send(send_ring->qp, send_info); 379 380 return 0; 381 } 382 383 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn, 384 struct mlx5dr_ste_htbl *htbl, 385 u8 **data, 386 u32 *byte_size, 387 int *iterations, 388 int *num_stes) 389 { 390 int alloc_size; 391 392 if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) { 393 *iterations = htbl->chunk->byte_size / 394 dmn->send_ring->max_post_send_size; 395 *byte_size = dmn->send_ring->max_post_send_size; 396 alloc_size = *byte_size; 397 *num_stes = *byte_size / DR_STE_SIZE; 398 } else { 399 *iterations = 1; 400 *num_stes = htbl->chunk->num_of_entries; 401 alloc_size = *num_stes * DR_STE_SIZE; 402 } 403 404 *data = kzalloc(alloc_size, GFP_KERNEL); 405 if (!*data) 406 return -ENOMEM; 407 408 return 0; 409 } 410 411 /** 412 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm. 413 * 414 * @dmn: Domain 415 * @ste: The ste struct that contains the data (at 416 * least part of it) 417 * @data: The real data to send size data 418 * @size: for writing. 419 * @offset: The offset from the icm mapped data to 420 * start write to this for write only part of the 421 * buffer. 422 * 423 * Return: 0 on success. 424 */ 425 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, 426 u8 *data, u16 size, u16 offset) 427 { 428 struct postsend_info send_info = {}; 429 430 send_info.write.addr = (uintptr_t)data; 431 send_info.write.length = size; 432 send_info.write.lkey = 0; 433 send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset; 434 send_info.rkey = ste->htbl->chunk->rkey; 435 436 return dr_postsend_icm_data(dmn, &send_info); 437 } 438 439 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn, 440 struct mlx5dr_ste_htbl *htbl, 441 u8 *formatted_ste, u8 *mask) 442 { 443 u32 byte_size = htbl->chunk->byte_size; 444 int num_stes_per_iter; 445 int iterations; 446 u8 *data; 447 int ret; 448 int i; 449 int j; 450 451 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 452 &iterations, &num_stes_per_iter); 453 if (ret) 454 return ret; 455 456 /* Send the data iteration times */ 457 for (i = 0; i < iterations; i++) { 458 u32 ste_index = i * (byte_size / DR_STE_SIZE); 459 struct postsend_info send_info = {}; 460 461 /* Copy all ste's on the data buffer 462 * need to add the bit_mask 463 */ 464 for (j = 0; j < num_stes_per_iter; j++) { 465 u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste; 466 u32 ste_off = j * DR_STE_SIZE; 467 468 if (mlx5dr_ste_is_not_valid_entry(hw_ste)) { 469 memcpy(data + ste_off, 470 formatted_ste, DR_STE_SIZE); 471 } else { 472 /* Copy data */ 473 memcpy(data + ste_off, 474 htbl->ste_arr[ste_index + j].hw_ste, 475 DR_STE_SIZE_REDUCED); 476 /* Copy bit_mask */ 477 memcpy(data + ste_off + DR_STE_SIZE_REDUCED, 478 mask, DR_STE_SIZE_MASK); 479 } 480 } 481 482 send_info.write.addr = (uintptr_t)data; 483 send_info.write.length = byte_size; 484 send_info.write.lkey = 0; 485 send_info.remote_addr = 486 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); 487 send_info.rkey = htbl->chunk->rkey; 488 489 ret = dr_postsend_icm_data(dmn, &send_info); 490 if (ret) 491 goto out_free; 492 } 493 494 out_free: 495 kfree(data); 496 return ret; 497 } 498 499 /* Initialize htble with default STEs */ 500 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn, 501 struct mlx5dr_ste_htbl *htbl, 502 u8 *ste_init_data, 503 bool update_hw_ste) 504 { 505 u32 byte_size = htbl->chunk->byte_size; 506 int iterations; 507 int num_stes; 508 u8 *data; 509 int ret; 510 int i; 511 512 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size, 513 &iterations, &num_stes); 514 if (ret) 515 return ret; 516 517 for (i = 0; i < num_stes; i++) { 518 u8 *copy_dst; 519 520 /* Copy the same ste on the data buffer */ 521 copy_dst = data + i * DR_STE_SIZE; 522 memcpy(copy_dst, ste_init_data, DR_STE_SIZE); 523 524 if (update_hw_ste) { 525 /* Copy the reduced ste to hash table ste_arr */ 526 copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED; 527 memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED); 528 } 529 } 530 531 /* Send the data iteration times */ 532 for (i = 0; i < iterations; i++) { 533 u8 ste_index = i * (byte_size / DR_STE_SIZE); 534 struct postsend_info send_info = {}; 535 536 send_info.write.addr = (uintptr_t)data; 537 send_info.write.length = byte_size; 538 send_info.write.lkey = 0; 539 send_info.remote_addr = 540 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index); 541 send_info.rkey = htbl->chunk->rkey; 542 543 ret = dr_postsend_icm_data(dmn, &send_info); 544 if (ret) 545 goto out_free; 546 } 547 548 out_free: 549 kfree(data); 550 return ret; 551 } 552 553 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, 554 struct mlx5dr_action *action) 555 { 556 struct postsend_info send_info = {}; 557 int ret; 558 559 send_info.write.addr = (uintptr_t)action->rewrite.data; 560 send_info.write.length = action->rewrite.chunk->byte_size; 561 send_info.write.lkey = 0; 562 send_info.remote_addr = action->rewrite.chunk->mr_addr; 563 send_info.rkey = action->rewrite.chunk->rkey; 564 565 mutex_lock(&dmn->mutex); 566 ret = dr_postsend_icm_data(dmn, &send_info); 567 mutex_unlock(&dmn->mutex); 568 569 return ret; 570 } 571 572 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, 573 struct mlx5dr_qp *dr_qp, 574 int port) 575 { 576 u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {}; 577 void *qpc; 578 579 qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc); 580 581 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port); 582 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED); 583 MLX5_SET(qpc, qpc, rre, 1); 584 MLX5_SET(qpc, qpc, rwe, 1); 585 586 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc, 587 &dr_qp->mqp); 588 } 589 590 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev, 591 struct mlx5dr_qp *dr_qp, 592 struct dr_qp_rts_attr *attr) 593 { 594 u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {}; 595 void *qpc; 596 597 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc); 598 599 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn); 600 601 MLX5_SET(qpc, qpc, log_ack_req_freq, 0); 602 MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt); 603 MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry); 604 605 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc, 606 &dr_qp->mqp); 607 } 608 609 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev, 610 struct mlx5dr_qp *dr_qp, 611 struct dr_qp_rtr_attr *attr) 612 { 613 u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {}; 614 void *qpc; 615 616 qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc); 617 618 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn); 619 620 MLX5_SET(qpc, qpc, mtu, attr->mtu); 621 MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1); 622 MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num); 623 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), 624 attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac)); 625 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip), 626 attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid)); 627 MLX5_SET(qpc, qpc, primary_address_path.src_addr_index, 628 attr->sgid_index); 629 630 if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2) 631 MLX5_SET(qpc, qpc, primary_address_path.udp_sport, 632 attr->udp_src_port); 633 634 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num); 635 MLX5_SET(qpc, qpc, min_rnr_nak, 1); 636 637 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc, 638 &dr_qp->mqp); 639 } 640 641 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) 642 { 643 struct mlx5dr_qp *dr_qp = dmn->send_ring->qp; 644 struct dr_qp_rts_attr rts_attr = {}; 645 struct dr_qp_rtr_attr rtr_attr = {}; 646 enum ib_mtu mtu = IB_MTU_1024; 647 u16 gid_index = 0; 648 int port = 1; 649 int ret; 650 651 /* Init */ 652 ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port); 653 if (ret) 654 return ret; 655 656 /* RTR */ 657 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr); 658 if (ret) 659 return ret; 660 661 rtr_attr.mtu = mtu; 662 rtr_attr.qp_num = dr_qp->mqp.qpn; 663 rtr_attr.min_rnr_timer = 12; 664 rtr_attr.port_num = port; 665 rtr_attr.sgid_index = gid_index; 666 rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp; 667 668 ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr); 669 if (ret) 670 return ret; 671 672 /* RTS */ 673 rts_attr.timeout = 14; 674 rts_attr.retry_cnt = 7; 675 rts_attr.rnr_retry = 7; 676 677 ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr); 678 if (ret) 679 return ret; 680 681 return 0; 682 } 683 684 static void dr_cq_event(struct mlx5_core_cq *mcq, 685 enum mlx5_event event) 686 { 687 pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn); 688 } 689 690 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, 691 struct mlx5_uars_page *uar, 692 size_t ncqe) 693 { 694 u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {}; 695 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 696 struct mlx5_wq_param wqp; 697 struct mlx5_cqe64 *cqe; 698 struct mlx5dr_cq *cq; 699 int inlen, err, eqn; 700 unsigned int irqn; 701 void *cqc, *in; 702 __be64 *pas; 703 u32 i; 704 705 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 706 if (!cq) 707 return NULL; 708 709 ncqe = roundup_pow_of_two(ncqe); 710 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe)); 711 712 wqp.buf_numa_node = mdev->priv.numa_node; 713 wqp.db_numa_node = mdev->priv.numa_node; 714 715 err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq, 716 &cq->wq_ctrl); 717 if (err) 718 goto out; 719 720 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { 721 cqe = mlx5_cqwq_get_wqe(&cq->wq, i); 722 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; 723 } 724 725 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 726 sizeof(u64) * cq->wq_ctrl.buf.npages; 727 in = kvzalloc(inlen, GFP_KERNEL); 728 if (!in) 729 goto err_cqwq; 730 731 err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); 732 if (err) { 733 kvfree(in); 734 goto err_cqwq; 735 } 736 737 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); 738 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); 739 MLX5_SET(cqc, cqc, c_eqn, eqn); 740 MLX5_SET(cqc, cqc, uar_page, uar->index); 741 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - 742 MLX5_ADAPTER_PAGE_SHIFT); 743 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); 744 745 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 746 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); 747 748 cq->mcq.event = dr_cq_event; 749 750 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); 751 kvfree(in); 752 753 if (err) 754 goto err_cqwq; 755 756 cq->mcq.cqe_sz = 64; 757 cq->mcq.set_ci_db = cq->wq_ctrl.db.db; 758 cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; 759 *cq->mcq.set_ci_db = 0; 760 *cq->mcq.arm_db = 0; 761 cq->mcq.vector = 0; 762 cq->mcq.irqn = irqn; 763 cq->mcq.uar = uar; 764 765 return cq; 766 767 err_cqwq: 768 mlx5_wq_destroy(&cq->wq_ctrl); 769 out: 770 kfree(cq); 771 return NULL; 772 } 773 774 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq) 775 { 776 mlx5_core_destroy_cq(mdev, &cq->mcq); 777 mlx5_wq_destroy(&cq->wq_ctrl); 778 kfree(cq); 779 } 780 781 static int 782 dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey) 783 { 784 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {}; 785 void *mkc; 786 787 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 788 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 789 MLX5_SET(mkc, mkc, a, 1); 790 MLX5_SET(mkc, mkc, rw, 1); 791 MLX5_SET(mkc, mkc, rr, 1); 792 MLX5_SET(mkc, mkc, lw, 1); 793 MLX5_SET(mkc, mkc, lr, 1); 794 795 MLX5_SET(mkc, mkc, pd, pdn); 796 MLX5_SET(mkc, mkc, length64, 1); 797 MLX5_SET(mkc, mkc, qpn, 0xffffff); 798 799 return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in)); 800 } 801 802 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev, 803 u32 pdn, void *buf, size_t size) 804 { 805 struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL); 806 struct device *dma_device; 807 dma_addr_t dma_addr; 808 int err; 809 810 if (!mr) 811 return NULL; 812 813 dma_device = &mdev->pdev->dev; 814 dma_addr = dma_map_single(dma_device, buf, size, 815 DMA_BIDIRECTIONAL); 816 err = dma_mapping_error(dma_device, dma_addr); 817 if (err) { 818 mlx5_core_warn(mdev, "Can't dma buf\n"); 819 kfree(mr); 820 return NULL; 821 } 822 823 err = dr_create_mkey(mdev, pdn, &mr->mkey); 824 if (err) { 825 mlx5_core_warn(mdev, "Can't create mkey\n"); 826 dma_unmap_single(dma_device, dma_addr, size, 827 DMA_BIDIRECTIONAL); 828 kfree(mr); 829 return NULL; 830 } 831 832 mr->dma_addr = dma_addr; 833 mr->size = size; 834 mr->addr = buf; 835 836 return mr; 837 } 838 839 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr) 840 { 841 mlx5_core_destroy_mkey(mdev, &mr->mkey); 842 dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size, 843 DMA_BIDIRECTIONAL); 844 kfree(mr); 845 } 846 847 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn) 848 { 849 struct dr_qp_init_attr init_attr = {}; 850 int cq_size; 851 int size; 852 int ret; 853 854 dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL); 855 if (!dmn->send_ring) 856 return -ENOMEM; 857 858 cq_size = QUEUE_SIZE + 1; 859 dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size); 860 if (!dmn->send_ring->cq) { 861 ret = -ENOMEM; 862 goto free_send_ring; 863 } 864 865 init_attr.cqn = dmn->send_ring->cq->mcq.cqn; 866 init_attr.pdn = dmn->pdn; 867 init_attr.uar = dmn->uar; 868 init_attr.max_send_wr = QUEUE_SIZE; 869 870 dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr); 871 if (!dmn->send_ring->qp) { 872 ret = -ENOMEM; 873 goto clean_cq; 874 } 875 876 dmn->send_ring->cq->qp = dmn->send_ring->qp; 877 878 dmn->info.max_send_wr = QUEUE_SIZE; 879 dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data, 880 DR_STE_SIZE); 881 882 dmn->send_ring->signal_th = dmn->info.max_send_wr / 883 SIGNAL_PER_DIV_QUEUE; 884 885 /* Prepare qp to be used */ 886 ret = dr_prepare_qp_to_rts(dmn); 887 if (ret) 888 goto clean_qp; 889 890 dmn->send_ring->max_post_send_size = 891 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K, 892 DR_ICM_TYPE_STE); 893 894 /* Allocating the max size as a buffer for writing */ 895 size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size; 896 dmn->send_ring->buf = kzalloc(size, GFP_KERNEL); 897 if (!dmn->send_ring->buf) { 898 ret = -ENOMEM; 899 goto clean_qp; 900 } 901 902 dmn->send_ring->buf_size = size; 903 904 dmn->send_ring->mr = dr_reg_mr(dmn->mdev, 905 dmn->pdn, dmn->send_ring->buf, size); 906 if (!dmn->send_ring->mr) { 907 ret = -ENOMEM; 908 goto free_mem; 909 } 910 911 dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev, 912 dmn->pdn, dmn->send_ring->sync_buff, 913 MIN_READ_SYNC); 914 if (!dmn->send_ring->sync_mr) { 915 ret = -ENOMEM; 916 goto clean_mr; 917 } 918 919 return 0; 920 921 clean_mr: 922 dr_dereg_mr(dmn->mdev, dmn->send_ring->mr); 923 free_mem: 924 kfree(dmn->send_ring->buf); 925 clean_qp: 926 dr_destroy_qp(dmn->mdev, dmn->send_ring->qp); 927 clean_cq: 928 dr_destroy_cq(dmn->mdev, dmn->send_ring->cq); 929 free_send_ring: 930 kfree(dmn->send_ring); 931 932 return ret; 933 } 934 935 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, 936 struct mlx5dr_send_ring *send_ring) 937 { 938 dr_destroy_qp(dmn->mdev, send_ring->qp); 939 dr_destroy_cq(dmn->mdev, send_ring->cq); 940 dr_dereg_mr(dmn->mdev, send_ring->sync_mr); 941 dr_dereg_mr(dmn->mdev, send_ring->mr); 942 kfree(send_ring->buf); 943 kfree(send_ring); 944 } 945 946 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) 947 { 948 struct mlx5dr_send_ring *send_ring = dmn->send_ring; 949 struct postsend_info send_info = {}; 950 u8 data[DR_STE_SIZE]; 951 int num_of_sends_req; 952 int ret; 953 int i; 954 955 /* Sending this amount of requests makes sure we will get drain */ 956 num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; 957 958 /* Send fake requests forcing the last to be signaled */ 959 send_info.write.addr = (uintptr_t)data; 960 send_info.write.length = DR_STE_SIZE; 961 send_info.write.lkey = 0; 962 /* Using the sync_mr in order to write/read */ 963 send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; 964 send_info.rkey = send_ring->sync_mr->mkey.key; 965 966 for (i = 0; i < num_of_sends_req; i++) { 967 ret = dr_postsend_icm_data(dmn, &send_info); 968 if (ret) 969 return ret; 970 } 971 972 ret = dr_handle_pending_wc(dmn, send_ring); 973 974 return ret; 975 } 976