1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_wr.c 29 * Hermon Work Request Processing Routines 30 * 31 * Implements all the routines necessary to provide the PostSend(), 32 * PostRecv() and PostSRQ() verbs. Also contains all the code 33 * necessary to implement the Hermon WRID tracking mechanism. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/conf.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/modctl.h> 41 #include <sys/avl.h> 42 43 #include <sys/ib/adapters/hermon/hermon.h> 44 45 static uint32_t hermon_wr_get_immediate(ibt_send_wr_t *wr); 46 static int hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr); 47 static int hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 48 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 49 static int hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 50 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 51 static void hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp); 52 static int hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 53 ibt_recv_wr_t *wr, uint64_t *desc); 54 static int hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 55 ibt_recv_wr_t *wr, uint64_t *desc); 56 static void hermon_wqe_sync(void *hdl, uint_t sync_from, 57 uint_t sync_to, uint_t sync_type, uint_t flag); 58 static hermon_workq_avl_t *hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, 59 uint_t send_or_recv); 60 static void hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl); 61 static void hermon_cq_workq_remove(hermon_cqhdl_t cq, 62 hermon_workq_avl_t *wqavl); 63 64 static ibt_wr_ds_t null_sgl = { 0, 0x00000100, 0 }; 65 66 /* 67 * Add ability to try to debug RDMA_READ/RDMA_WRITE failures. 68 * 69 * 0x1 - print rkey used during post_send 70 * 0x2 - print sgls used during post_send 71 * 0x4 - print FMR comings and goings 72 */ 73 int hermon_rdma_debug = 0x0; 74 75 static int 76 hermon_post_send_ud(hermon_state_t *state, hermon_qphdl_t qp, 77 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 78 { 79 hermon_hw_snd_wqe_ud_t *ud; 80 hermon_workq_hdr_t *wq; 81 hermon_ahhdl_t ah; 82 ibt_ud_dest_t *dest; 83 uint64_t *desc; 84 uint32_t desc_sz; 85 uint32_t signaled_dbd, solicited; 86 uint32_t head, tail, next_tail, qsize_msk; 87 uint32_t hdrmwqes; 88 uint32_t nopcode, fence, immed_data = 0; 89 hermon_hw_wqe_sgl_t *ds, *old_ds; 90 ibt_wr_ds_t *sgl; 91 uint32_t nds, dnds; 92 int i, j, last_ds, num_ds, status; 93 uint32_t *wqe_start; 94 int sectperwqe; 95 uint_t posted_cnt = 0; 96 97 /* initialize the FMA retry loop */ 98 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 99 100 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 101 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock)) 102 103 /* Grab the lock for the WRID list */ 104 membar_consumer(); 105 106 /* Save away some initial QP state */ 107 wq = qp->qp_sq_wqhdr; 108 qsize_msk = wq->wq_mask; 109 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 110 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 111 112 tail = wq->wq_tail; 113 head = wq->wq_head; 114 status = DDI_SUCCESS; 115 116 post_next: 117 /* 118 * Check for "queue full" condition. If the queue 119 * is already full, then no more WQEs can be posted. 120 * So break out, ring a doorbell (if necessary) and 121 * return an error 122 */ 123 if (wq->wq_full != 0) { 124 status = IBT_QP_FULL; 125 goto done; 126 } 127 128 next_tail = (tail + 1) & qsize_msk; 129 if (((tail + hdrmwqes) & qsize_msk) == head) { 130 wq->wq_full = 1; 131 } 132 133 desc = HERMON_QP_SQ_ENTRY(qp, tail); 134 135 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 136 sizeof (hermon_hw_snd_wqe_ctrl_t)); 137 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 138 sizeof (hermon_hw_snd_wqe_ud_t)); 139 nds = wr->wr_nds; 140 sgl = wr->wr_sgl; 141 num_ds = 0; 142 143 /* need to know the count of destination nds for backward loop */ 144 for (dnds = 0, i = 0; i < nds; i++) { 145 if (sgl[i].ds_len != 0) 146 dnds++; 147 } 148 149 /* 150 * Build a Send or Send_LSO WQE 151 */ 152 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 153 int total_len; 154 155 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 156 if (wr->wr.ud_lso.lso_hdr_sz > 60) { 157 nopcode |= (1 << 6); /* ReRead bit must be set */ 158 } 159 dest = wr->wr.ud_lso.lso_ud_dest; 160 ah = (hermon_ahhdl_t)dest->ud_ah; 161 if (ah == NULL) { 162 status = IBT_AH_HDL_INVALID; 163 goto done; 164 } 165 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 166 167 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 168 if ((uintptr_t)ds + total_len + (nds * 16) > 169 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) { 170 status = IBT_QP_SGL_LEN_INVALID; 171 goto done; 172 } 173 old_ds = ds; 174 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)old_ds + 1, 175 wr->wr.ud_lso.lso_hdr_sz); 176 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 177 i = 0; 178 } else if (wr->wr_opcode == IBT_WRC_SEND) { 179 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 180 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 181 immed_data = wr->wr.ud.udwr_immed; 182 } else { 183 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 184 } 185 dest = wr->wr.ud.udwr_dest; 186 ah = (hermon_ahhdl_t)dest->ud_ah; 187 if (ah == NULL) { 188 status = IBT_AH_HDL_INVALID; 189 goto done; 190 } 191 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 192 i = 0; 193 } else { 194 status = IBT_QP_OP_TYPE_INVALID; 195 goto done; 196 } 197 198 if (nds > qp->qp_sq_sgl) { 199 status = IBT_QP_SGL_LEN_INVALID; 200 goto done; 201 } 202 for (last_ds = num_ds, j = i; j < nds; j++) { 203 if (sgl[j].ds_len != 0) 204 last_ds++; /* real last ds of wqe to fill */ 205 } 206 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 207 for (j = nds; --j >= i; ) { 208 if (sgl[j].ds_len == 0) { 209 continue; 210 } 211 212 /* 213 * Fill in the Data Segment(s) for the current WQE, using the 214 * information contained in the scatter-gather list of the 215 * work request. 216 */ 217 last_ds--; 218 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 219 } 220 221 membar_producer(); 222 223 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 224 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 225 wr->wr.ud_lso.lso_hdr_sz); 226 } 227 228 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 229 230 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 231 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0; 232 233 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0; 234 235 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, 236 solicited, signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp); 237 238 wq->wq_wrid[tail] = wr->wr_id; 239 240 tail = next_tail; 241 242 /* Update some of the state in the QP */ 243 wq->wq_tail = tail; 244 245 membar_producer(); 246 247 /* Now set the ownership bit and opcode (first dword). */ 248 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 249 250 posted_cnt++; 251 if (--num_wr > 0) { 252 /* do the invalidate of the headroom */ 253 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 254 (tail + hdrmwqes) & qsize_msk); 255 for (i = 16; i < sectperwqe; i += 16) { 256 wqe_start[i] = 0xFFFFFFFF; 257 } 258 259 wr++; 260 goto post_next; 261 } 262 done: 263 if (posted_cnt != 0) { 264 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 265 266 membar_producer(); 267 268 /* the FMA retry loop starts for Hermon doorbell register. */ 269 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 270 fm_status, fm_test_num); 271 272 HERMON_UAR_DOORBELL(state, uarhdl, 273 (uint64_t *)(void *)&state->hs_uar->send, 274 (uint64_t)qp->qp_ring); 275 276 /* the FMA retry loop ends. */ 277 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 278 fm_status, fm_test_num); 279 280 /* do the invalidate of the headroom */ 281 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 282 (tail + hdrmwqes) & qsize_msk); 283 for (i = 16; i < sectperwqe; i += 16) { 284 wqe_start[i] = 0xFFFFFFFF; 285 } 286 } 287 if (num_posted != NULL) 288 *num_posted = posted_cnt; 289 290 mutex_exit(&qp->qp_sq_lock); 291 292 return (status); 293 294 pio_error: 295 mutex_exit(&qp->qp_sq_lock); 296 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 297 return (ibc_get_ci_failure(0)); 298 } 299 300 static int 301 hermon_post_send_rc(hermon_state_t *state, hermon_qphdl_t qp, 302 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 303 { 304 uint64_t *desc; 305 hermon_workq_hdr_t *wq; 306 uint32_t desc_sz; 307 uint32_t signaled_dbd, solicited; 308 uint32_t head, tail, next_tail, qsize_msk; 309 uint32_t hdrmwqes; 310 int status; 311 uint32_t nopcode, fence, immed_data = 0; 312 hermon_hw_snd_wqe_remaddr_t *rc; 313 hermon_hw_snd_wqe_atomic_t *at; 314 hermon_hw_snd_wqe_bind_t *bn; 315 hermon_hw_wqe_sgl_t *ds; 316 ibt_wr_ds_t *sgl; 317 uint32_t nds; 318 int i, last_ds, num_ds; 319 uint32_t *wqe_start; 320 int sectperwqe; 321 uint_t posted_cnt = 0; 322 int print_rdma; 323 int rlen; 324 uint32_t rkey; 325 uint64_t raddr; 326 327 /* initialize the FMA retry loop */ 328 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 329 330 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 331 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock)) 332 333 /* Save away some initial QP state */ 334 wq = qp->qp_sq_wqhdr; 335 qsize_msk = wq->wq_mask; 336 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 337 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 338 339 tail = wq->wq_tail; 340 head = wq->wq_head; 341 status = DDI_SUCCESS; 342 343 post_next: 344 print_rdma = 0; 345 rlen = 0; 346 347 /* 348 * Check for "queue full" condition. If the queue 349 * is already full, then no more WQEs can be posted. 350 * So break out, ring a doorbell (if necessary) and 351 * return an error 352 */ 353 if (wq->wq_full != 0) { 354 status = IBT_QP_FULL; 355 goto done; 356 } 357 next_tail = (tail + 1) & qsize_msk; 358 if (((tail + hdrmwqes) & qsize_msk) == head) { 359 wq->wq_full = 1; 360 } 361 362 desc = HERMON_QP_SQ_ENTRY(qp, tail); 363 364 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 365 sizeof (hermon_hw_snd_wqe_ctrl_t)); 366 nds = wr->wr_nds; 367 sgl = wr->wr_sgl; 368 num_ds = 0; 369 370 /* 371 * Validate the operation type. For RC requests, we allow 372 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 373 * operations, and memory window "Bind" 374 */ 375 switch (wr->wr_opcode) { 376 default: 377 status = IBT_QP_OP_TYPE_INVALID; 378 goto done; 379 380 case IBT_WRC_SEND: 381 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 382 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 383 immed_data = wr->wr.rc.rcwr.send_immed; 384 } else { 385 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 386 } 387 break; 388 389 /* 390 * If this is an RDMA Read or RDMA Write request, then fill 391 * in the "Remote Address" header fields. 392 */ 393 case IBT_WRC_RDMAW: 394 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 395 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAWI; 396 immed_data = wr->wr.rc.rcwr.rdma.rdma_immed; 397 } else { 398 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 399 } 400 /* FALLTHROUGH */ 401 case IBT_WRC_RDMAR: 402 if (wr->wr_opcode == IBT_WRC_RDMAR) 403 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 404 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 405 sizeof (hermon_hw_snd_wqe_ctrl_t)); 406 407 /* 408 * Build the Remote Address Segment for the WQE, using 409 * the information from the RC work request. 410 */ 411 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 412 413 if (hermon_rdma_debug) { 414 print_rdma = hermon_rdma_debug; 415 rkey = wr->wr.rc.rcwr.rdma.rdma_rkey; 416 raddr = wr->wr.rc.rcwr.rdma.rdma_raddr; 417 } 418 419 /* Update "ds" for filling in Data Segments (below) */ 420 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 421 sizeof (hermon_hw_snd_wqe_remaddr_t)); 422 break; 423 424 /* 425 * If this is one of the Atomic type operations (i.e 426 * Compare-Swap or Fetch-Add), then fill in both the "Remote 427 * Address" header fields and the "Atomic" header fields. 428 */ 429 case IBT_WRC_CSWAP: 430 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 431 /* FALLTHROUGH */ 432 case IBT_WRC_FADD: 433 if (wr->wr_opcode == IBT_WRC_FADD) 434 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 435 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 436 sizeof (hermon_hw_snd_wqe_ctrl_t)); 437 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 438 sizeof (hermon_hw_snd_wqe_remaddr_t)); 439 440 /* 441 * Build the Remote Address and Atomic Segments for 442 * the WQE, using the information from the RC Atomic 443 * work request. 444 */ 445 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 446 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 447 448 /* Update "ds" for filling in Data Segments (below) */ 449 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 450 sizeof (hermon_hw_snd_wqe_atomic_t)); 451 452 /* 453 * Update "nds" and "sgl" because Atomic requests have 454 * only a single Data Segment. 455 */ 456 nds = 1; 457 sgl = wr->wr_sgl; 458 break; 459 460 /* 461 * If this is memory window Bind operation, then we call the 462 * hermon_wr_bind_check() routine to validate the request and 463 * to generate the updated RKey. If this is successful, then 464 * we fill in the WQE's "Bind" header fields. 465 */ 466 case IBT_WRC_BIND: 467 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 468 status = hermon_wr_bind_check(state, wr); 469 if (status != DDI_SUCCESS) 470 goto done; 471 472 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 473 sizeof (hermon_hw_snd_wqe_ctrl_t)); 474 475 /* 476 * Build the Bind Memory Window Segments for the WQE, 477 * using the information from the RC Bind memory 478 * window work request. 479 */ 480 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 481 482 /* 483 * Update the "ds" pointer. Even though the "bind" 484 * operation requires no SGLs, this is necessary to 485 * facilitate the correct descriptor size calculations 486 * (below). 487 */ 488 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 489 sizeof (hermon_hw_snd_wqe_bind_t)); 490 nds = 0; 491 } 492 493 /* 494 * Now fill in the Data Segments (SGL) for the Send WQE based 495 * on the values setup above (i.e. "sgl", "nds", and the "ds" 496 * pointer. Start by checking for a valid number of SGL entries 497 */ 498 if (nds > qp->qp_sq_sgl) { 499 status = IBT_QP_SGL_LEN_INVALID; 500 goto done; 501 } 502 503 for (last_ds = num_ds, i = 0; i < nds; i++) { 504 if (sgl[i].ds_len != 0) 505 last_ds++; /* real last ds of wqe to fill */ 506 } 507 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 508 for (i = nds; --i >= 0; ) { 509 if (sgl[i].ds_len == 0) { 510 continue; 511 } 512 rlen += sgl[i].ds_len; 513 if (print_rdma & 0x2) 514 IBTF_DPRINTF_L2("rdma", "post: [%d]: laddr %llx " 515 "llen %x", i, sgl[i].ds_va, sgl[i].ds_len); 516 517 /* 518 * Fill in the Data Segment(s) for the current WQE, using the 519 * information contained in the scatter-gather list of the 520 * work request. 521 */ 522 last_ds--; 523 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[i]); 524 } 525 526 if (print_rdma & 0x1) { 527 IBTF_DPRINTF_L2("rdma", "post: indx %x rkey %x raddr %llx " 528 "total len %x", tail, rkey, raddr, rlen); 529 } 530 531 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 532 533 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 534 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0; 535 536 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0; 537 538 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited, 539 signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp); 540 541 wq->wq_wrid[tail] = wr->wr_id; 542 543 tail = next_tail; 544 545 /* Update some of the state in the QP */ 546 wq->wq_tail = tail; 547 548 membar_producer(); 549 550 /* Now set the ownership bit of the first one in the chain. */ 551 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 552 553 posted_cnt++; 554 if (--num_wr > 0) { 555 /* do the invalidate of the headroom */ 556 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 557 (tail + hdrmwqes) & qsize_msk); 558 for (i = 16; i < sectperwqe; i += 16) { 559 wqe_start[i] = 0xFFFFFFFF; 560 } 561 562 wr++; 563 goto post_next; 564 } 565 done: 566 567 if (posted_cnt != 0) { 568 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 569 570 membar_producer(); 571 572 /* the FMA retry loop starts for Hermon doorbell register. */ 573 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 574 fm_status, fm_test_num); 575 576 /* Ring the doorbell */ 577 HERMON_UAR_DOORBELL(state, uarhdl, 578 (uint64_t *)(void *)&state->hs_uar->send, 579 (uint64_t)qp->qp_ring); 580 581 /* the FMA retry loop ends. */ 582 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 583 fm_status, fm_test_num); 584 585 /* do the invalidate of the headroom */ 586 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 587 (tail + hdrmwqes) & qsize_msk); 588 for (i = 16; i < sectperwqe; i += 16) { 589 wqe_start[i] = 0xFFFFFFFF; 590 } 591 } 592 /* 593 * Update the "num_posted" return value (if necessary). 594 * Then drop the locks and return success. 595 */ 596 if (num_posted != NULL) { 597 *num_posted = posted_cnt; 598 } 599 600 mutex_exit(&qp->qp_sq_lock); 601 return (status); 602 603 pio_error: 604 mutex_exit(&qp->qp_sq_lock); 605 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 606 return (ibc_get_ci_failure(0)); 607 } 608 609 /* 610 * hermon_post_send() 611 * Context: Can be called from interrupt or base context. 612 */ 613 int 614 hermon_post_send(hermon_state_t *state, hermon_qphdl_t qp, 615 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 616 { 617 ibt_send_wr_t *curr_wr; 618 hermon_workq_hdr_t *wq; 619 hermon_ahhdl_t ah; 620 uint64_t *desc, *prev; 621 uint32_t desc_sz; 622 uint32_t signaled_dbd, solicited; 623 uint32_t head, tail, next_tail, qsize_msk; 624 uint32_t sync_from, sync_to; 625 uint32_t hdrmwqes; 626 uint_t currindx, wrindx, numremain; 627 uint_t chainlen; 628 uint_t posted_cnt, maxstat; 629 uint_t total_posted; 630 int status; 631 uint32_t nopcode, fence, immed_data = 0; 632 uint32_t prev_nopcode; 633 634 /* initialize the FMA retry loop */ 635 hermon_pio_init(fm_loop_cnt, fm_status, fm_test); 636 637 /* 638 * Check for user-mappable QP memory. Note: We do not allow kernel 639 * clients to post to QP memory that is accessible directly by the 640 * user. If the QP memory is user accessible, then return an error. 641 */ 642 if (qp->qp_is_umap) { 643 return (IBT_QP_HDL_INVALID); 644 } 645 646 mutex_enter(&qp->qp_lock); 647 648 /* 649 * Check QP state. Can not post Send requests from the "Reset", 650 * "Init", or "RTR" states 651 */ 652 if ((qp->qp_state == HERMON_QP_RESET) || 653 (qp->qp_state == HERMON_QP_INIT) || 654 (qp->qp_state == HERMON_QP_RTR)) { 655 mutex_exit(&qp->qp_lock); 656 return (IBT_QP_STATE_INVALID); 657 } 658 mutex_exit(&qp->qp_lock); 659 mutex_enter(&qp->qp_sq_lock); 660 661 if (qp->qp_is_special) 662 goto post_many; 663 664 /* Use these optimized functions most of the time */ 665 if (qp->qp_serv_type == HERMON_QP_UD) { 666 if (wr->wr_trans != IBT_UD_SRV) { 667 mutex_exit(&qp->qp_sq_lock); 668 return (IBT_QP_SRV_TYPE_INVALID); 669 } 670 return (hermon_post_send_ud(state, qp, wr, num_wr, num_posted)); 671 } 672 673 if (qp->qp_serv_type == HERMON_QP_RC) { 674 if (wr->wr_trans != IBT_RC_SRV) { 675 mutex_exit(&qp->qp_sq_lock); 676 return (IBT_QP_SRV_TYPE_INVALID); 677 } 678 return (hermon_post_send_rc(state, qp, wr, num_wr, num_posted)); 679 } 680 681 if (qp->qp_serv_type == HERMON_QP_UC) 682 goto post_many; 683 684 mutex_exit(&qp->qp_sq_lock); 685 return (IBT_QP_SRV_TYPE_INVALID); 686 687 post_many: 688 /* general loop for non-optimized posting */ 689 690 /* Save away some initial QP state */ 691 wq = qp->qp_sq_wqhdr; 692 qsize_msk = wq->wq_mask; 693 tail = wq->wq_tail; 694 head = wq->wq_head; 695 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 696 697 /* Initialize posted_cnt */ 698 posted_cnt = 0; 699 total_posted = 0; 700 701 /* 702 * For each ibt_send_wr_t in the wr[] list passed in, parse the 703 * request and build a Send WQE. NOTE: Because we are potentially 704 * building a chain of WQEs to post, we want to build them all first, 705 * and set the valid (HW Ownership) bit on all but the first. 706 * However, we do not want to validate the first one until the 707 * entire chain of WQEs has been built. Then in the final 708 * we set the valid bit in the first, flush if needed, and as a last 709 * step ring the appropriate doorbell. NOTE: the doorbell ring may 710 * NOT be needed if the HCA is already processing, but the doorbell 711 * ring will be done regardless. NOTE ALSO: It is possible for 712 * more Work Requests to be posted than the HW will support at one 713 * shot. If this happens, we need to be able to post and ring 714 * several chains here until the the entire request is complete. 715 * NOTE ALSO: the term "chain" is used to differentiate it from 716 * Work Request List passed in; and because that's the terminology 717 * from the previous generations of HCA - but the WQEs are not, in fact 718 * chained together for Hermon 719 */ 720 721 wrindx = 0; 722 numremain = num_wr; 723 status = DDI_SUCCESS; 724 while ((wrindx < num_wr) && (status == DDI_SUCCESS)) { 725 /* 726 * For the first WQE on a new chain we need "prev" to point 727 * to the current descriptor. 728 */ 729 prev = HERMON_QP_SQ_ENTRY(qp, tail); 730 731 /* 732 * unlike Tavor & Arbel, tail will maintain the number of the 733 * next (this) WQE to be posted. Since there is no backward linking 734 * in Hermon, we can always just look ahead 735 */ 736 /* 737 * Before we begin, save the current "tail index" for later 738 * DMA sync 739 */ 740 /* NOTE: don't need to go back one like arbel/tavor */ 741 sync_from = tail; 742 743 /* 744 * Break the request up into lists that are less than or 745 * equal to the maximum number of WQEs that can be posted 746 * per doorbell ring - 256 currently 747 */ 748 chainlen = (numremain > HERMON_QP_MAXDESC_PER_DB) ? 749 HERMON_QP_MAXDESC_PER_DB : numremain; 750 numremain -= chainlen; 751 752 for (currindx = 0; currindx < chainlen; currindx++, wrindx++) { 753 /* 754 * Check for "queue full" condition. If the queue 755 * is already full, then no more WQEs can be posted. 756 * So break out, ring a doorbell (if necessary) and 757 * return an error 758 */ 759 if (wq->wq_full != 0) { 760 status = IBT_QP_FULL; 761 break; 762 } 763 764 /* 765 * Increment the "tail index". Check for "queue 766 * full" condition incl. headroom. If we detect that 767 * the current work request is going to fill the work 768 * queue, then we mark this condition and continue. 769 * Don't need >=, because going one-by-one we have to 770 * hit it exactly sooner or later 771 */ 772 773 next_tail = (tail + 1) & qsize_msk; 774 if (((tail + hdrmwqes) & qsize_msk) == head) { 775 wq->wq_full = 1; 776 } 777 778 /* 779 * Get the address of the location where the next 780 * Send WQE should be built 781 */ 782 desc = HERMON_QP_SQ_ENTRY(qp, tail); 783 /* 784 * Call hermon_wqe_send_build() to build the WQE 785 * at the given address. This routine uses the 786 * information in the ibt_send_wr_t list (wr[]) and 787 * returns the size of the WQE when it returns. 788 */ 789 status = hermon_wqe_send_build(state, qp, 790 &wr[wrindx], desc, &desc_sz); 791 if (status != DDI_SUCCESS) { 792 break; 793 } 794 795 /* 796 * Now, build the Ctrl Segment based on 797 * what was just done 798 */ 799 curr_wr = &wr[wrindx]; 800 801 switch (curr_wr->wr_opcode) { 802 case IBT_WRC_RDMAW: 803 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 804 nopcode = 805 HERMON_WQE_SEND_NOPCODE_RDMAWI; 806 immed_data = 807 hermon_wr_get_immediate(curr_wr); 808 } else { 809 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 810 } 811 break; 812 813 case IBT_WRC_SEND: 814 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 815 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 816 immed_data = 817 hermon_wr_get_immediate(curr_wr); 818 } else { 819 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 820 } 821 break; 822 823 case IBT_WRC_SEND_LSO: 824 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 825 break; 826 827 case IBT_WRC_RDMAR: 828 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 829 break; 830 831 case IBT_WRC_CSWAP: 832 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 833 break; 834 835 case IBT_WRC_FADD: 836 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 837 break; 838 839 case IBT_WRC_BIND: 840 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 841 break; 842 } 843 844 fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 845 846 /* 847 * now, build up the control segment, leaving the 848 * owner bit as it is 849 */ 850 851 if ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 852 (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL)) { 853 signaled_dbd = 1; 854 } else { 855 signaled_dbd = 0; 856 } 857 if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT) 858 solicited = 1; 859 else 860 solicited = 0; 861 862 if (qp->qp_is_special) { 863 /* Ensure correctness, set the ReRead bit */ 864 nopcode |= (1 << 6); 865 ah = (hermon_ahhdl_t) 866 curr_wr->wr.ud.udwr_dest->ud_ah; 867 mutex_enter(&ah->ah_lock); 868 maxstat = ah->ah_udav->max_stat_rate; 869 HERMON_WQE_SET_MLX_CTRL_SEGMENT(desc, desc_sz, 870 signaled_dbd, maxstat, ah->ah_udav->rlid, 871 qp, ah->ah_udav->sl); 872 mutex_exit(&ah->ah_lock); 873 } else { 874 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, 875 fence, immed_data, solicited, 876 signaled_dbd, curr_wr->wr_flags & 877 IBT_WR_SEND_CKSUM, qp); 878 } 879 wq->wq_wrid[tail] = curr_wr->wr_id; 880 881 /* 882 * If this is not the first descriptor on the current 883 * chain, then set the ownership bit. 884 */ 885 if (currindx != 0) { /* not the first */ 886 membar_producer(); 887 HERMON_SET_SEND_WQE_OWNER(qp, 888 (uint32_t *)desc, nopcode); 889 } else 890 prev_nopcode = nopcode; 891 892 /* 893 * Update the current "tail index" and increment 894 * "posted_cnt" 895 */ 896 tail = next_tail; 897 posted_cnt++; 898 } 899 900 /* 901 * If we reach here and there are one or more WQEs which have 902 * been successfully built as a chain, we have to finish up 903 * and prepare them for writing to the HW 904 * The steps are: 905 * 1. do the headroom fixup 906 * 2. add in the size of the headroom for the sync 907 * 3. write the owner bit for the first WQE 908 * 4. sync them 909 * 5. fix up the structures 910 * 6. hit the doorbell in UAR 911 */ 912 if (posted_cnt != 0) { 913 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 914 915 /* 916 * Save away updated "tail index" for the DMA sync 917 * including the headroom that will be needed 918 */ 919 sync_to = (tail + hdrmwqes) & qsize_msk; 920 921 /* do the invalidate of the headroom */ 922 923 hermon_wqe_headroom(tail, qp); 924 925 /* Do a DMA sync for current send WQE(s) */ 926 hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_SEND, 927 DDI_DMA_SYNC_FORDEV); 928 929 /* Update some of the state in the QP */ 930 wq->wq_tail = tail; 931 total_posted += posted_cnt; 932 posted_cnt = 0; 933 934 membar_producer(); 935 936 /* 937 * Now set the ownership bit of the first 938 * one in the chain 939 */ 940 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)prev, 941 prev_nopcode); 942 943 /* the FMA retry loop starts for Hermon doorbell. */ 944 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 945 fm_status, fm_test); 946 947 HERMON_UAR_DOORBELL(state, uarhdl, 948 (uint64_t *)(void *)&state->hs_uar->send, 949 (uint64_t)qp->qp_ring); 950 951 /* the FMA retry loop ends. */ 952 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 953 fm_status, fm_test); 954 } 955 } 956 957 /* 958 * Update the "num_posted" return value (if necessary). 959 * Then drop the locks and return success. 960 */ 961 if (num_posted != NULL) { 962 *num_posted = total_posted; 963 } 964 mutex_exit(&qp->qp_sq_lock); 965 return (status); 966 967 pio_error: 968 mutex_exit(&qp->qp_sq_lock); 969 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 970 return (ibc_get_ci_failure(0)); 971 } 972 973 974 /* 975 * hermon_post_recv() 976 * Context: Can be called from interrupt or base context. 977 */ 978 int 979 hermon_post_recv(hermon_state_t *state, hermon_qphdl_t qp, 980 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 981 { 982 uint64_t *desc; 983 hermon_workq_hdr_t *wq; 984 uint32_t head, tail, next_tail, qsize_msk; 985 uint32_t sync_from, sync_to; 986 uint_t wrindx; 987 uint_t posted_cnt; 988 int status; 989 990 /* 991 * Check for user-mappable QP memory. Note: We do not allow kernel 992 * clients to post to QP memory that is accessible directly by the 993 * user. If the QP memory is user accessible, then return an error. 994 */ 995 if (qp->qp_is_umap) { 996 return (IBT_QP_HDL_INVALID); 997 } 998 999 /* Initialize posted_cnt */ 1000 posted_cnt = 0; 1001 1002 mutex_enter(&qp->qp_lock); 1003 1004 /* 1005 * Check if QP is associated with an SRQ 1006 */ 1007 if (qp->qp_srq_en == HERMON_QP_SRQ_ENABLED) { 1008 mutex_exit(&qp->qp_lock); 1009 return (IBT_SRQ_IN_USE); 1010 } 1011 1012 /* 1013 * Check QP state. Can not post Recv requests from the "Reset" state 1014 */ 1015 if (qp->qp_state == HERMON_QP_RESET) { 1016 mutex_exit(&qp->qp_lock); 1017 return (IBT_QP_STATE_INVALID); 1018 } 1019 1020 /* Check that work request transport type is valid */ 1021 if ((qp->qp_serv_type != HERMON_QP_UD) && 1022 (qp->qp_serv_type != HERMON_QP_RC) && 1023 (qp->qp_serv_type != HERMON_QP_UC)) { 1024 mutex_exit(&qp->qp_lock); 1025 return (IBT_QP_SRV_TYPE_INVALID); 1026 } 1027 1028 mutex_exit(&qp->qp_lock); 1029 mutex_enter(&qp->qp_rq_lock); 1030 1031 /* 1032 * Grab the lock for the WRID list, i.e., membar_consumer(). 1033 * This is not needed because the mutex_enter() above has 1034 * the same effect. 1035 */ 1036 1037 /* Save away some initial QP state */ 1038 wq = qp->qp_rq_wqhdr; 1039 qsize_msk = wq->wq_mask; 1040 tail = wq->wq_tail; 1041 head = wq->wq_head; 1042 1043 wrindx = 0; 1044 status = DDI_SUCCESS; 1045 /* 1046 * Before we begin, save the current "tail index" for later 1047 * DMA sync 1048 */ 1049 sync_from = tail; 1050 1051 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1052 if (wq->wq_full != 0) { 1053 status = IBT_QP_FULL; 1054 break; 1055 } 1056 next_tail = (tail + 1) & qsize_msk; 1057 if (next_tail == head) { 1058 wq->wq_full = 1; 1059 } 1060 desc = HERMON_QP_RQ_ENTRY(qp, tail); 1061 status = hermon_wqe_recv_build(state, qp, &wr[wrindx], desc); 1062 if (status != DDI_SUCCESS) { 1063 break; 1064 } 1065 1066 wq->wq_wrid[tail] = wr[wrindx].wr_id; 1067 qp->qp_rq_wqecntr++; 1068 1069 tail = next_tail; 1070 posted_cnt++; 1071 } 1072 1073 if (posted_cnt != 0) { 1074 /* Save away updated "tail index" for the DMA sync */ 1075 sync_to = tail; 1076 1077 hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_RECV, 1078 DDI_DMA_SYNC_FORDEV); 1079 1080 wq->wq_tail = tail; 1081 1082 membar_producer(); /* ensure wrids are visible */ 1083 1084 /* Update the doorbell record w/ wqecntr */ 1085 HERMON_UAR_DB_RECORD_WRITE(qp->qp_rq_vdbr, 1086 qp->qp_rq_wqecntr & 0xFFFF); 1087 } 1088 1089 if (num_posted != NULL) { 1090 *num_posted = posted_cnt; 1091 } 1092 1093 1094 mutex_exit(&qp->qp_rq_lock); 1095 return (status); 1096 } 1097 1098 /* 1099 * hermon_post_srq() 1100 * Context: Can be called from interrupt or base context. 1101 */ 1102 int 1103 hermon_post_srq(hermon_state_t *state, hermon_srqhdl_t srq, 1104 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 1105 { 1106 uint64_t *desc; 1107 hermon_workq_hdr_t *wq; 1108 uint_t indx, wrindx; 1109 uint_t posted_cnt; 1110 int status; 1111 1112 mutex_enter(&srq->srq_lock); 1113 1114 /* 1115 * Check for user-mappable QP memory. Note: We do not allow kernel 1116 * clients to post to QP memory that is accessible directly by the 1117 * user. If the QP memory is user accessible, then return an error. 1118 */ 1119 if (srq->srq_is_umap) { 1120 mutex_exit(&srq->srq_lock); 1121 return (IBT_SRQ_HDL_INVALID); 1122 } 1123 1124 /* 1125 * Check SRQ state. Can not post Recv requests when SRQ is in error 1126 */ 1127 if (srq->srq_state == HERMON_SRQ_STATE_ERROR) { 1128 mutex_exit(&srq->srq_lock); 1129 return (IBT_QP_STATE_INVALID); 1130 } 1131 1132 status = DDI_SUCCESS; 1133 posted_cnt = 0; 1134 wq = srq->srq_wq_wqhdr; 1135 indx = wq->wq_head; 1136 1137 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1138 1139 if (indx == wq->wq_tail) { 1140 status = IBT_QP_FULL; 1141 break; 1142 } 1143 desc = HERMON_SRQ_WQE_ADDR(srq, indx); 1144 1145 wq->wq_wrid[indx] = wr[wrindx].wr_id; 1146 1147 status = hermon_wqe_srq_build(state, srq, &wr[wrindx], desc); 1148 if (status != DDI_SUCCESS) { 1149 break; 1150 } 1151 1152 hermon_wqe_sync(srq, indx, indx + 1, 1153 HERMON_WR_SRQ, DDI_DMA_SYNC_FORDEV); 1154 posted_cnt++; 1155 indx = htons(((uint16_t *)desc)[1]); 1156 wq->wq_head = indx; 1157 } 1158 1159 if (posted_cnt != 0) { 1160 1161 srq->srq_wq_wqecntr += posted_cnt; 1162 1163 membar_producer(); /* ensure wrids are visible */ 1164 1165 /* Ring the doorbell w/ wqecntr */ 1166 HERMON_UAR_DB_RECORD_WRITE(srq->srq_wq_vdbr, 1167 srq->srq_wq_wqecntr & 0xFFFF); 1168 } 1169 1170 if (num_posted != NULL) { 1171 *num_posted = posted_cnt; 1172 } 1173 1174 mutex_exit(&srq->srq_lock); 1175 return (status); 1176 } 1177 1178 1179 /* 1180 * hermon_wqe_send_build() 1181 * Context: Can be called from interrupt or base context. 1182 */ 1183 static int 1184 hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 1185 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1186 { 1187 hermon_hw_snd_wqe_ud_t *ud; 1188 hermon_hw_snd_wqe_remaddr_t *rc; 1189 hermon_hw_snd_wqe_atomic_t *at; 1190 hermon_hw_snd_wqe_remaddr_t *uc; 1191 hermon_hw_snd_wqe_bind_t *bn; 1192 hermon_hw_wqe_sgl_t *ds, *old_ds; 1193 ibt_ud_dest_t *dest; 1194 ibt_wr_ds_t *sgl; 1195 hermon_ahhdl_t ah; 1196 uint32_t nds; 1197 int i, j, last_ds, num_ds, status; 1198 int tmpsize; 1199 1200 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1201 1202 /* Initialize the information for the Data Segments */ 1203 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1204 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1205 nds = wr->wr_nds; 1206 sgl = wr->wr_sgl; 1207 num_ds = 0; 1208 i = 0; 1209 1210 /* 1211 * Build a Send WQE depends first and foremost on the transport 1212 * type of Work Request (i.e. UD, RC, or UC) 1213 */ 1214 switch (wr->wr_trans) { 1215 case IBT_UD_SRV: 1216 /* Ensure that work request transport type matches QP type */ 1217 if (qp->qp_serv_type != HERMON_QP_UD) { 1218 return (IBT_QP_SRV_TYPE_INVALID); 1219 } 1220 1221 /* 1222 * Validate the operation type. For UD requests, only the 1223 * "Send" and "Send LSO" operations are valid. 1224 */ 1225 if (wr->wr_opcode != IBT_WRC_SEND && 1226 wr->wr_opcode != IBT_WRC_SEND_LSO) { 1227 return (IBT_QP_OP_TYPE_INVALID); 1228 } 1229 1230 /* 1231 * If this is a Special QP (QP0 or QP1), then we need to 1232 * build MLX WQEs instead. So jump to hermon_wqe_mlx_build() 1233 * and return whatever status it returns 1234 */ 1235 if (qp->qp_is_special) { 1236 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1237 return (IBT_QP_OP_TYPE_INVALID); 1238 } 1239 status = hermon_wqe_mlx_build(state, qp, 1240 wr, desc, size); 1241 return (status); 1242 } 1243 1244 /* 1245 * Otherwise, if this is a normal UD Send request, then fill 1246 * all the fields in the Hermon UD header for the WQE. Note: 1247 * to do this we'll need to extract some information from the 1248 * Address Handle passed with the work request. 1249 */ 1250 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 1251 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1252 if (wr->wr_opcode == IBT_WRC_SEND) { 1253 dest = wr->wr.ud.udwr_dest; 1254 } else { 1255 dest = wr->wr.ud_lso.lso_ud_dest; 1256 } 1257 ah = (hermon_ahhdl_t)dest->ud_ah; 1258 if (ah == NULL) { 1259 return (IBT_AH_HDL_INVALID); 1260 } 1261 1262 /* 1263 * Build the Unreliable Datagram Segment for the WQE, using 1264 * the information from the address handle and the work 1265 * request. 1266 */ 1267 /* mutex_enter(&ah->ah_lock); */ 1268 if (wr->wr_opcode == IBT_WRC_SEND) { 1269 HERMON_WQE_BUILD_UD(qp, ud, ah, wr->wr.ud.udwr_dest); 1270 } else { /* IBT_WRC_SEND_LSO */ 1271 HERMON_WQE_BUILD_UD(qp, ud, ah, 1272 wr->wr.ud_lso.lso_ud_dest); 1273 } 1274 /* mutex_exit(&ah->ah_lock); */ 1275 1276 /* Update "ds" for filling in Data Segments (below) */ 1277 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 1278 sizeof (hermon_hw_snd_wqe_ud_t)); 1279 1280 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1281 int total_len; 1282 1283 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 1284 if ((uintptr_t)ds + total_len + (nds * 16) > 1285 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) 1286 return (IBT_QP_SGL_LEN_INVALID); 1287 1288 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1, 1289 wr->wr.ud_lso.lso_hdr_sz); 1290 old_ds = ds; 1291 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 1292 for (; i < nds; i++) { 1293 if (sgl[i].ds_len == 0) 1294 continue; 1295 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], 1296 &sgl[i]); 1297 num_ds++; 1298 i++; 1299 break; 1300 } 1301 membar_producer(); 1302 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 1303 wr->wr.ud_lso.lso_hdr_sz); 1304 } 1305 1306 break; 1307 1308 case IBT_RC_SRV: 1309 /* Ensure that work request transport type matches QP type */ 1310 if (qp->qp_serv_type != HERMON_QP_RC) { 1311 return (IBT_QP_SRV_TYPE_INVALID); 1312 } 1313 1314 /* 1315 * Validate the operation type. For RC requests, we allow 1316 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 1317 * operations, and memory window "Bind" 1318 */ 1319 if ((wr->wr_opcode != IBT_WRC_SEND) && 1320 (wr->wr_opcode != IBT_WRC_RDMAR) && 1321 (wr->wr_opcode != IBT_WRC_RDMAW) && 1322 (wr->wr_opcode != IBT_WRC_CSWAP) && 1323 (wr->wr_opcode != IBT_WRC_FADD) && 1324 (wr->wr_opcode != IBT_WRC_BIND)) { 1325 return (IBT_QP_OP_TYPE_INVALID); 1326 } 1327 1328 /* 1329 * If this is a Send request, then all we need to do is break 1330 * out and here and begin the Data Segment processing below 1331 */ 1332 if (wr->wr_opcode == IBT_WRC_SEND) { 1333 break; 1334 } 1335 1336 /* 1337 * If this is an RDMA Read or RDMA Write request, then fill 1338 * in the "Remote Address" header fields. 1339 */ 1340 if ((wr->wr_opcode == IBT_WRC_RDMAR) || 1341 (wr->wr_opcode == IBT_WRC_RDMAW)) { 1342 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1343 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1344 1345 /* 1346 * Build the Remote Address Segment for the WQE, using 1347 * the information from the RC work request. 1348 */ 1349 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 1350 1351 /* Update "ds" for filling in Data Segments (below) */ 1352 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 1353 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1354 break; 1355 } 1356 1357 /* 1358 * If this is one of the Atomic type operations (i.e 1359 * Compare-Swap or Fetch-Add), then fill in both the "Remote 1360 * Address" header fields and the "Atomic" header fields. 1361 */ 1362 if ((wr->wr_opcode == IBT_WRC_CSWAP) || 1363 (wr->wr_opcode == IBT_WRC_FADD)) { 1364 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1365 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1366 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 1367 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1368 1369 /* 1370 * Build the Remote Address and Atomic Segments for 1371 * the WQE, using the information from the RC Atomic 1372 * work request. 1373 */ 1374 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 1375 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 1376 1377 /* Update "ds" for filling in Data Segments (below) */ 1378 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 1379 sizeof (hermon_hw_snd_wqe_atomic_t)); 1380 1381 /* 1382 * Update "nds" and "sgl" because Atomic requests have 1383 * only a single Data Segment (and they are encoded 1384 * somewhat differently in the work request. 1385 */ 1386 nds = 1; 1387 sgl = wr->wr_sgl; 1388 break; 1389 } 1390 1391 /* 1392 * If this is memory window Bind operation, then we call the 1393 * hermon_wr_bind_check() routine to validate the request and 1394 * to generate the updated RKey. If this is successful, then 1395 * we fill in the WQE's "Bind" header fields. 1396 */ 1397 if (wr->wr_opcode == IBT_WRC_BIND) { 1398 status = hermon_wr_bind_check(state, wr); 1399 if (status != DDI_SUCCESS) { 1400 return (status); 1401 } 1402 1403 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1404 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1405 1406 /* 1407 * Build the Bind Memory Window Segments for the WQE, 1408 * using the information from the RC Bind memory 1409 * window work request. 1410 */ 1411 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 1412 1413 /* 1414 * Update the "ds" pointer. Even though the "bind" 1415 * operation requires no SGLs, this is necessary to 1416 * facilitate the correct descriptor size calculations 1417 * (below). 1418 */ 1419 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1420 sizeof (hermon_hw_snd_wqe_bind_t)); 1421 nds = 0; 1422 } 1423 break; 1424 1425 case IBT_UC_SRV: 1426 /* Ensure that work request transport type matches QP type */ 1427 if (qp->qp_serv_type != HERMON_QP_UC) { 1428 return (IBT_QP_SRV_TYPE_INVALID); 1429 } 1430 1431 /* 1432 * Validate the operation type. For UC requests, we only 1433 * allow "Send", "RDMA Write", and memory window "Bind". 1434 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic" 1435 * operations 1436 */ 1437 if ((wr->wr_opcode != IBT_WRC_SEND) && 1438 (wr->wr_opcode != IBT_WRC_RDMAW) && 1439 (wr->wr_opcode != IBT_WRC_BIND)) { 1440 return (IBT_QP_OP_TYPE_INVALID); 1441 } 1442 1443 /* 1444 * If this is a Send request, then all we need to do is break 1445 * out and here and begin the Data Segment processing below 1446 */ 1447 if (wr->wr_opcode == IBT_WRC_SEND) { 1448 break; 1449 } 1450 1451 /* 1452 * If this is an RDMA Write request, then fill in the "Remote 1453 * Address" header fields. 1454 */ 1455 if (wr->wr_opcode == IBT_WRC_RDMAW) { 1456 uc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1457 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1458 1459 /* 1460 * Build the Remote Address Segment for the WQE, using 1461 * the information from the UC work request. 1462 */ 1463 HERMON_WQE_BUILD_REMADDR(qp, uc, &wr->wr.uc.ucwr.rdma); 1464 1465 /* Update "ds" for filling in Data Segments (below) */ 1466 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)uc + 1467 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1468 break; 1469 } 1470 1471 /* 1472 * If this is memory window Bind operation, then we call the 1473 * hermon_wr_bind_check() routine to validate the request and 1474 * to generate the updated RKey. If this is successful, then 1475 * we fill in the WQE's "Bind" header fields. 1476 */ 1477 if (wr->wr_opcode == IBT_WRC_BIND) { 1478 status = hermon_wr_bind_check(state, wr); 1479 if (status != DDI_SUCCESS) { 1480 return (status); 1481 } 1482 1483 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1484 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1485 1486 /* 1487 * Build the Bind Memory Window Segments for the WQE, 1488 * using the information from the UC Bind memory 1489 * window work request. 1490 */ 1491 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.uc.ucwr.bind); 1492 1493 /* 1494 * Update the "ds" pointer. Even though the "bind" 1495 * operation requires no SGLs, this is necessary to 1496 * facilitate the correct descriptor size calculations 1497 * (below). 1498 */ 1499 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1500 sizeof (hermon_hw_snd_wqe_bind_t)); 1501 nds = 0; 1502 } 1503 break; 1504 1505 default: 1506 return (IBT_QP_SRV_TYPE_INVALID); 1507 } 1508 1509 /* 1510 * Now fill in the Data Segments (SGL) for the Send WQE based on 1511 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer 1512 * Start by checking for a valid number of SGL entries 1513 */ 1514 if (nds > qp->qp_sq_sgl) { 1515 return (IBT_QP_SGL_LEN_INVALID); 1516 } 1517 1518 /* 1519 * For each SGL in the Send Work Request, fill in the Send WQE's data 1520 * segments. Note: We skip any SGL with zero size because Hermon 1521 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1522 * the encoding for zero means a 2GB transfer. 1523 */ 1524 for (last_ds = num_ds, j = i; j < nds; j++) { 1525 if (sgl[j].ds_len != 0) 1526 last_ds++; /* real last ds of wqe to fill */ 1527 } 1528 1529 /* 1530 * Return the size of descriptor (in 16-byte chunks) 1531 * For Hermon, we want them (for now) to be on stride size 1532 * boundaries, which was implicit in Tavor/Arbel 1533 * 1534 */ 1535 tmpsize = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc); 1536 1537 *size = tmpsize >> 0x4; 1538 1539 for (j = nds; --j >= i; ) { 1540 if (sgl[j].ds_len == 0) { 1541 continue; 1542 } 1543 1544 /* 1545 * Fill in the Data Segment(s) for the current WQE, using the 1546 * information contained in the scatter-gather list of the 1547 * work request. 1548 */ 1549 last_ds--; 1550 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 1551 } 1552 1553 return (DDI_SUCCESS); 1554 } 1555 1556 1557 1558 /* 1559 * hermon_wqe_mlx_build() 1560 * Context: Can be called from interrupt or base context. 1561 */ 1562 static int 1563 hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 1564 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1565 { 1566 hermon_ahhdl_t ah; 1567 hermon_hw_udav_t *udav; 1568 ib_lrh_hdr_t *lrh; 1569 ib_grh_t *grh; 1570 ib_bth_hdr_t *bth; 1571 ib_deth_hdr_t *deth; 1572 hermon_hw_wqe_sgl_t *ds; 1573 ibt_wr_ds_t *sgl; 1574 uint8_t *mgmtclass, *hpoint, *hcount; 1575 uint32_t nds, offset, pktlen; 1576 uint32_t desc_sz; 1577 int i, num_ds; 1578 int tmpsize; 1579 1580 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1581 1582 /* Initialize the information for the Data Segments */ 1583 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1584 sizeof (hermon_hw_mlx_wqe_nextctrl_t)); 1585 1586 /* 1587 * Pull the address handle from the work request. The UDAV will 1588 * be used to answer some questions about the request. 1589 */ 1590 ah = (hermon_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah; 1591 if (ah == NULL) { 1592 return (IBT_AH_HDL_INVALID); 1593 } 1594 mutex_enter(&ah->ah_lock); 1595 udav = ah->ah_udav; 1596 1597 /* 1598 * If the request is for QP1 and the destination LID is equal to 1599 * the Permissive LID, then return an error. This combination is 1600 * not allowed 1601 */ 1602 if ((udav->rlid == IB_LID_PERMISSIVE) && 1603 (qp->qp_is_special == HERMON_QP_GSI)) { 1604 mutex_exit(&ah->ah_lock); 1605 return (IBT_AH_HDL_INVALID); 1606 } 1607 1608 /* 1609 * Calculate the size of the packet headers, including the GRH 1610 * (if necessary) 1611 */ 1612 desc_sz = sizeof (ib_lrh_hdr_t) + sizeof (ib_bth_hdr_t) + 1613 sizeof (ib_deth_hdr_t); 1614 if (udav->grh) { 1615 desc_sz += sizeof (ib_grh_t); 1616 } 1617 1618 /* 1619 * Begin to build the first "inline" data segment for the packet 1620 * headers. Note: By specifying "inline" we can build the contents 1621 * of the MAD packet headers directly into the work queue (as part 1622 * descriptor). This has the advantage of both speeding things up 1623 * and of not requiring the driver to allocate/register any additional 1624 * memory for the packet headers. 1625 */ 1626 HERMON_WQE_BUILD_INLINE(qp, &ds[0], desc_sz); 1627 desc_sz += 4; 1628 1629 /* 1630 * Build Local Route Header (LRH) 1631 * We start here by building the LRH into a temporary location. 1632 * When we have finished we copy the LRH data into the descriptor. 1633 * 1634 * Notice that the VL values are hardcoded. This is not a problem 1635 * because VL15 is decided later based on the value in the MLX 1636 * transport "next/ctrl" header (see the "vl15" bit below), and it 1637 * is otherwise (meaning for QP1) chosen from the SL-to-VL table 1638 * values. This rule does not hold for loopback packets however 1639 * (all of which bypass the SL-to-VL tables) and it is the reason 1640 * that non-QP0 MADs are setup with VL hardcoded to zero below. 1641 * 1642 * Notice also that Source LID is hardcoded to the Permissive LID 1643 * (0xFFFF). This is also not a problem because if the Destination 1644 * LID is not the Permissive LID, then the "slr" value in the MLX 1645 * transport "next/ctrl" header will be set to zero and the hardware 1646 * will pull the LID from value in the port. 1647 */ 1648 lrh = (ib_lrh_hdr_t *)((uintptr_t)&ds[0] + 4); 1649 pktlen = (desc_sz + 0x100) >> 2; 1650 HERMON_WQE_BUILD_MLX_LRH(lrh, qp, udav, pktlen); 1651 1652 /* 1653 * Build Global Route Header (GRH) 1654 * This is only built if necessary as defined by the "grh" bit in 1655 * the address vector. Note: We also calculate the offset to the 1656 * next header (BTH) based on whether or not the "grh" bit is set. 1657 */ 1658 if (udav->grh) { 1659 /* 1660 * If the request is for QP0, then return an error. The 1661 * combination of global routine (GRH) and QP0 is not allowed. 1662 */ 1663 if (qp->qp_is_special == HERMON_QP_SMI) { 1664 mutex_exit(&ah->ah_lock); 1665 return (IBT_AH_HDL_INVALID); 1666 } 1667 grh = (ib_grh_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1668 HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen); 1669 1670 bth = (ib_bth_hdr_t *)((uintptr_t)grh + sizeof (ib_grh_t)); 1671 } else { 1672 bth = (ib_bth_hdr_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1673 } 1674 mutex_exit(&ah->ah_lock); 1675 1676 1677 /* 1678 * Build Base Transport Header (BTH) 1679 * Notice that the M, PadCnt, and TVer fields are all set 1680 * to zero implicitly. This is true for all Management Datagrams 1681 * MADs whether GSI are SMI. 1682 */ 1683 HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr); 1684 1685 /* 1686 * Build Datagram Extended Transport Header (DETH) 1687 */ 1688 deth = (ib_deth_hdr_t *)((uintptr_t)bth + sizeof (ib_bth_hdr_t)); 1689 HERMON_WQE_BUILD_MLX_DETH(deth, qp); 1690 1691 /* Ensure that the Data Segment is aligned on a 16-byte boundary */ 1692 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)deth + sizeof (ib_deth_hdr_t)); 1693 ds = (hermon_hw_wqe_sgl_t *)(((uintptr_t)ds + 0xF) & ~0xF); 1694 nds = wr->wr_nds; 1695 sgl = wr->wr_sgl; 1696 num_ds = 0; 1697 1698 /* 1699 * Now fill in the Data Segments (SGL) for the MLX WQE based on the 1700 * values set up above (i.e. "sgl", "nds", and the "ds" pointer 1701 * Start by checking for a valid number of SGL entries 1702 */ 1703 if (nds > qp->qp_sq_sgl) { 1704 return (IBT_QP_SGL_LEN_INVALID); 1705 } 1706 1707 /* 1708 * For each SGL in the Send Work Request, fill in the MLX WQE's data 1709 * segments. Note: We skip any SGL with zero size because Hermon 1710 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1711 * the encoding for zero means a 2GB transfer. Because of this special 1712 * encoding in the hardware, we mask the requested length with 1713 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1714 * zero.) 1715 */ 1716 mgmtclass = hpoint = hcount = NULL; 1717 offset = 0; 1718 for (i = 0; i < nds; i++) { 1719 if (sgl[i].ds_len == 0) { 1720 continue; 1721 } 1722 1723 /* 1724 * Fill in the Data Segment(s) for the MLX send WQE, using 1725 * the information contained in the scatter-gather list of 1726 * the work request. 1727 */ 1728 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]); 1729 1730 /* 1731 * Search through the contents of all MADs posted to QP0 to 1732 * initialize pointers to the places where Directed Route "hop 1733 * pointer", "hop count", and "mgmtclass" would be. Hermon 1734 * needs these updated (i.e. incremented or decremented, as 1735 * necessary) by software. 1736 */ 1737 if (qp->qp_is_special == HERMON_QP_SMI) { 1738 1739 HERMON_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass, 1740 offset, sgl[i].ds_va, sgl[i].ds_len); 1741 1742 HERMON_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint, 1743 offset, sgl[i].ds_va, sgl[i].ds_len); 1744 1745 HERMON_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount, 1746 offset, sgl[i].ds_va, sgl[i].ds_len); 1747 1748 offset += sgl[i].ds_len; 1749 } 1750 num_ds++; 1751 } 1752 1753 /* 1754 * Hermon's Directed Route MADs need to have the "hop pointer" 1755 * incremented/decremented (as necessary) depending on whether it is 1756 * currently less than or greater than the "hop count" (i.e. whether 1757 * the MAD is a request or a response.) 1758 */ 1759 if (qp->qp_is_special == HERMON_QP_SMI) { 1760 HERMON_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass, 1761 *hpoint, *hcount); 1762 } 1763 1764 /* 1765 * Now fill in the ICRC Data Segment. This data segment is inlined 1766 * just like the packets headers above, but it is only four bytes and 1767 * set to zero (to indicate that we wish the hardware to generate ICRC. 1768 */ 1769 HERMON_WQE_BUILD_INLINE_ICRC(qp, &ds[num_ds], 4, 0); 1770 num_ds++; 1771 1772 /* 1773 * Return the size of descriptor (in 16-byte chunks) 1774 * For Hermon, we want them (for now) to be on stride size 1775 * boundaries, which was implicit in Tavor/Arbel 1776 */ 1777 tmpsize = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc); 1778 1779 *size = tmpsize >> 0x04; 1780 1781 return (DDI_SUCCESS); 1782 } 1783 1784 1785 1786 /* 1787 * hermon_wqe_recv_build() 1788 * Context: Can be called from interrupt or base context. 1789 */ 1790 /* ARGSUSED */ 1791 static int 1792 hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 1793 ibt_recv_wr_t *wr, uint64_t *desc) 1794 { 1795 hermon_hw_wqe_sgl_t *ds; 1796 int i, num_ds; 1797 1798 ASSERT(MUTEX_HELD(&qp->qp_rq_lock)); 1799 1800 /* 1801 * Fill in the Data Segments (SGL) for the Recv WQE - don't 1802 * need to have a reserved for the ctrl, there is none on the 1803 * recv queue for hermon, but will need to put an invalid 1804 * (null) scatter pointer per PRM 1805 */ 1806 ds = (hermon_hw_wqe_sgl_t *)(uintptr_t)desc; 1807 num_ds = 0; 1808 1809 /* Check for valid number of SGL entries */ 1810 if (wr->wr_nds > qp->qp_rq_sgl) { 1811 return (IBT_QP_SGL_LEN_INVALID); 1812 } 1813 1814 /* 1815 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1816 * segments. Note: We skip any SGL with zero size because Hermon 1817 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1818 * the encoding for zero means a 2GB transfer. Because of this special 1819 * encoding in the hardware, we mask the requested length with 1820 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1821 * zero.) 1822 */ 1823 for (i = 0; i < wr->wr_nds; i++) { 1824 if (wr->wr_sgl[i].ds_len == 0) { 1825 continue; 1826 } 1827 1828 /* 1829 * Fill in the Data Segment(s) for the receive WQE, using the 1830 * information contained in the scatter-gather list of the 1831 * work request. 1832 */ 1833 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 1834 num_ds++; 1835 } 1836 1837 /* put the null sgl pointer as well if needed */ 1838 if (num_ds < qp->qp_rq_sgl) { 1839 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 1840 } 1841 1842 return (DDI_SUCCESS); 1843 } 1844 1845 1846 1847 /* 1848 * hermon_wqe_srq_build() 1849 * Context: Can be called from interrupt or base context. 1850 */ 1851 /* ARGSUSED */ 1852 static int 1853 hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 1854 ibt_recv_wr_t *wr, uint64_t *desc) 1855 { 1856 hermon_hw_wqe_sgl_t *ds; 1857 int i, num_ds; 1858 1859 ASSERT(MUTEX_HELD(&srq->srq_lock)); 1860 1861 /* Fill in the Data Segments (SGL) for the Recv WQE */ 1862 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1863 sizeof (hermon_hw_srq_wqe_next_t)); 1864 num_ds = 0; 1865 1866 /* Check for valid number of SGL entries */ 1867 if (wr->wr_nds > srq->srq_wq_sgl) { 1868 return (IBT_QP_SGL_LEN_INVALID); 1869 } 1870 1871 /* 1872 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1873 * segments. Note: We skip any SGL with zero size because Hermon 1874 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1875 * the encoding for zero means a 2GB transfer. Because of this special 1876 * encoding in the hardware, we mask the requested length with 1877 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1878 * zero.) 1879 */ 1880 for (i = 0; i < wr->wr_nds; i++) { 1881 if (wr->wr_sgl[i].ds_len == 0) { 1882 continue; 1883 } 1884 1885 /* 1886 * Fill in the Data Segment(s) for the receive WQE, using the 1887 * information contained in the scatter-gather list of the 1888 * work request. 1889 */ 1890 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 1891 num_ds++; 1892 } 1893 1894 /* 1895 * put in the null sgl pointer as well, if needed 1896 */ 1897 if (num_ds < srq->srq_wq_sgl) { 1898 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 1899 } 1900 1901 return (DDI_SUCCESS); 1902 } 1903 1904 1905 /* 1906 * hermon_wr_get_immediate() 1907 * Context: Can be called from interrupt or base context. 1908 */ 1909 static uint32_t 1910 hermon_wr_get_immediate(ibt_send_wr_t *wr) 1911 { 1912 /* 1913 * This routine extracts the "immediate data" from the appropriate 1914 * location in the IBTF work request. Because of the way the 1915 * work request structure is defined, the location for this data 1916 * depends on the actual work request operation type. 1917 */ 1918 1919 /* For RDMA Write, test if RC or UC */ 1920 if (wr->wr_opcode == IBT_WRC_RDMAW) { 1921 if (wr->wr_trans == IBT_RC_SRV) { 1922 return (wr->wr.rc.rcwr.rdma.rdma_immed); 1923 } else { /* IBT_UC_SRV */ 1924 return (wr->wr.uc.ucwr.rdma.rdma_immed); 1925 } 1926 } 1927 1928 /* For Send, test if RC, UD, or UC */ 1929 if (wr->wr_opcode == IBT_WRC_SEND) { 1930 if (wr->wr_trans == IBT_RC_SRV) { 1931 return (wr->wr.rc.rcwr.send_immed); 1932 } else if (wr->wr_trans == IBT_UD_SRV) { 1933 return (wr->wr.ud.udwr_immed); 1934 } else { /* IBT_UC_SRV */ 1935 return (wr->wr.uc.ucwr.send_immed); 1936 } 1937 } 1938 1939 /* 1940 * If any other type of request, then immediate is undefined 1941 */ 1942 return (0); 1943 } 1944 1945 /* 1946 * hermon_wqe_headroom() 1947 * Context: can be called from interrupt or base, currently only from 1948 * base context. 1949 * Routine that fills in the headroom for the Send Queue 1950 */ 1951 1952 static void 1953 hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp) 1954 { 1955 uint32_t *wqe_start, *wqe_top, *wqe_base, qsize; 1956 int hdrmwqes, wqesizebytes, sectperwqe; 1957 uint32_t invalue; 1958 int i, j; 1959 1960 qsize = qp->qp_sq_bufsz; 1961 wqesizebytes = 1 << qp->qp_sq_log_wqesz; 1962 sectperwqe = wqesizebytes >> 6; /* 64 bytes/section */ 1963 hdrmwqes = qp->qp_sq_hdrmwqes; 1964 wqe_base = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 0); 1965 wqe_top = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, qsize); 1966 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, from); 1967 1968 for (i = 0; i < hdrmwqes; i++) { 1969 for (j = 0; j < sectperwqe; j++) { 1970 if (j == 0) { /* 1st section of wqe */ 1971 /* perserve ownership bit */ 1972 invalue = ddi_get32(qp->qp_wqinfo.qa_acchdl, 1973 wqe_start) | 0x7FFFFFFF; 1974 } else { 1975 /* or just invalidate it */ 1976 invalue = 0xFFFFFFFF; 1977 } 1978 ddi_put32(qp->qp_wqinfo.qa_acchdl, wqe_start, invalue); 1979 wqe_start += 16; /* move 64 bytes */ 1980 } 1981 if (wqe_start == wqe_top) /* hit the end of the queue */ 1982 wqe_start = wqe_base; /* wrap to start */ 1983 } 1984 } 1985 1986 /* 1987 * hermon_wqe_sync() 1988 * Context: Can be called from interrupt or base context. 1989 */ 1990 static void 1991 hermon_wqe_sync(void *hdl, uint_t sync_from, uint_t sync_to, 1992 uint_t sync_type, uint_t flag) 1993 { 1994 hermon_qphdl_t qp; 1995 hermon_srqhdl_t srq; 1996 uint64_t *wqe_from, *wqe_to; 1997 uint64_t *wq_base, *wq_top, *qp_base; 1998 ddi_dma_handle_t dmahdl; 1999 off_t offset; 2000 size_t length; 2001 uint32_t qsize; 2002 int status; 2003 2004 if (sync_type == HERMON_WR_SRQ) { 2005 srq = (hermon_srqhdl_t)hdl; 2006 /* Get the DMA handle from SRQ context */ 2007 dmahdl = srq->srq_mrhdl->mr_bindinfo.bi_dmahdl; 2008 /* get base addr of the buffer */ 2009 qp_base = (uint64_t *)(void *)srq->srq_wq_buf; 2010 } else { 2011 qp = (hermon_qphdl_t)hdl; 2012 /* Get the DMA handle from QP context */ 2013 dmahdl = qp->qp_mrhdl->mr_bindinfo.bi_dmahdl; 2014 /* Determine the base address of the QP buffer */ 2015 if (qp->qp_sq_baseaddr == 0) { 2016 qp_base = (uint64_t *)(void *)(qp->qp_sq_buf); 2017 } else { 2018 qp_base = (uint64_t *)(void *)(qp->qp_rq_buf); 2019 } 2020 } 2021 2022 /* 2023 * Depending on the type of the work queue, we grab information 2024 * about the address ranges we need to DMA sync. 2025 */ 2026 2027 if (sync_type == HERMON_WR_SEND) { 2028 wqe_from = HERMON_QP_SQ_ENTRY(qp, sync_from); 2029 wqe_to = HERMON_QP_SQ_ENTRY(qp, sync_to); 2030 qsize = qp->qp_sq_bufsz; 2031 2032 wq_base = HERMON_QP_SQ_ENTRY(qp, 0); 2033 wq_top = HERMON_QP_SQ_ENTRY(qp, qsize); 2034 } else if (sync_type == HERMON_WR_RECV) { 2035 wqe_from = HERMON_QP_RQ_ENTRY(qp, sync_from); 2036 wqe_to = HERMON_QP_RQ_ENTRY(qp, sync_to); 2037 qsize = qp->qp_rq_bufsz; 2038 2039 wq_base = HERMON_QP_RQ_ENTRY(qp, 0); 2040 wq_top = HERMON_QP_RQ_ENTRY(qp, qsize); 2041 } else { 2042 wqe_from = HERMON_SRQ_WQ_ENTRY(srq, sync_from); 2043 wqe_to = HERMON_SRQ_WQ_ENTRY(srq, sync_to); 2044 qsize = srq->srq_wq_bufsz; 2045 2046 wq_base = HERMON_SRQ_WQ_ENTRY(srq, 0); 2047 wq_top = HERMON_SRQ_WQ_ENTRY(srq, qsize); 2048 } 2049 2050 /* 2051 * There are two possible cases for the beginning and end of the WQE 2052 * chain we are trying to sync. Either this is the simple case, where 2053 * the end of the chain is below the beginning of the chain, or it is 2054 * the "wrap-around" case, where the end of the chain has wrapped over 2055 * the end of the queue. In the former case, we simply need to 2056 * calculate the span from beginning to end and sync it. In the latter 2057 * case, however, we need to calculate the span from the top of the 2058 * work queue to the end of the chain and sync that, and then we need 2059 * to find the other portion (from beginning of chain to end of queue) 2060 * and sync that as well. Note: if the "top to end" span is actually 2061 * zero length, then we don't do a DMA sync because a zero length DMA 2062 * sync unnecessarily syncs the entire work queue. 2063 */ 2064 if (wqe_to > wqe_from) { 2065 /* "From Beginning to End" */ 2066 2067 offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base); 2068 length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wqe_from); 2069 2070 status = ddi_dma_sync(dmahdl, offset, length, flag); 2071 if (status != DDI_SUCCESS) { 2072 return; 2073 } 2074 } else { 2075 /* "From Top to End" */ 2076 2077 offset = (off_t)((uintptr_t)wq_base - (uintptr_t)qp_base); 2078 length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wq_base); 2079 if (length) { 2080 status = ddi_dma_sync(dmahdl, offset, length, flag); 2081 if (status != DDI_SUCCESS) { 2082 return; 2083 } 2084 } 2085 2086 /* "From Beginning to Bottom" */ 2087 2088 offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base); 2089 length = (size_t)((uintptr_t)wq_top - (uintptr_t)wqe_from); 2090 status = ddi_dma_sync(dmahdl, offset, length, flag); 2091 if (status != DDI_SUCCESS) { 2092 return; 2093 } 2094 } 2095 } 2096 2097 2098 /* 2099 * hermon_wr_bind_check() 2100 * Context: Can be called from interrupt or base context. 2101 */ 2102 /* ARGSUSED */ 2103 static int 2104 hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr) 2105 { 2106 ibt_bind_flags_t bind_flags; 2107 uint64_t vaddr, len; 2108 uint64_t reg_start_addr, reg_end_addr; 2109 hermon_mwhdl_t mw; 2110 hermon_mrhdl_t mr; 2111 hermon_rsrc_t *mpt; 2112 uint32_t new_rkey; 2113 2114 /* Check for a valid Memory Window handle in the WR */ 2115 mw = (hermon_mwhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mw_hdl; 2116 if (mw == NULL) { 2117 return (IBT_MW_HDL_INVALID); 2118 } 2119 2120 /* Check for a valid Memory Region handle in the WR */ 2121 mr = (hermon_mrhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mr_hdl; 2122 if (mr == NULL) { 2123 return (IBT_MR_HDL_INVALID); 2124 } 2125 2126 mutex_enter(&mr->mr_lock); 2127 mutex_enter(&mw->mr_lock); 2128 2129 /* 2130 * Check here to see if the memory region has already been partially 2131 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 2132 * If so, this is an error, return failure. 2133 */ 2134 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 2135 mutex_exit(&mr->mr_lock); 2136 mutex_exit(&mw->mr_lock); 2137 return (IBT_MR_HDL_INVALID); 2138 } 2139 2140 /* Check for a valid Memory Window RKey (i.e. a matching RKey) */ 2141 if (mw->mr_rkey != wr->wr.rc.rcwr.bind->bind_rkey) { 2142 mutex_exit(&mr->mr_lock); 2143 mutex_exit(&mw->mr_lock); 2144 return (IBT_MR_RKEY_INVALID); 2145 } 2146 2147 /* Check for a valid Memory Region LKey (i.e. a matching LKey) */ 2148 if (mr->mr_lkey != wr->wr.rc.rcwr.bind->bind_lkey) { 2149 mutex_exit(&mr->mr_lock); 2150 mutex_exit(&mw->mr_lock); 2151 return (IBT_MR_LKEY_INVALID); 2152 } 2153 2154 /* 2155 * Now check for valid "vaddr" and "len". Note: We don't check the 2156 * "vaddr" range when "len == 0" (i.e. on unbind operations) 2157 */ 2158 len = wr->wr.rc.rcwr.bind->bind_len; 2159 if (len != 0) { 2160 vaddr = wr->wr.rc.rcwr.bind->bind_va; 2161 reg_start_addr = mr->mr_bindinfo.bi_addr; 2162 reg_end_addr = mr->mr_bindinfo.bi_addr + 2163 (mr->mr_bindinfo.bi_len - 1); 2164 if ((vaddr < reg_start_addr) || (vaddr > reg_end_addr)) { 2165 mutex_exit(&mr->mr_lock); 2166 mutex_exit(&mw->mr_lock); 2167 return (IBT_MR_VA_INVALID); 2168 } 2169 vaddr = (vaddr + len) - 1; 2170 if (vaddr > reg_end_addr) { 2171 mutex_exit(&mr->mr_lock); 2172 mutex_exit(&mw->mr_lock); 2173 return (IBT_MR_LEN_INVALID); 2174 } 2175 } 2176 2177 /* 2178 * Validate the bind access flags. Remote Write and Atomic access for 2179 * the Memory Window require that Local Write access be set in the 2180 * corresponding Memory Region. 2181 */ 2182 bind_flags = wr->wr.rc.rcwr.bind->bind_flags; 2183 if (((bind_flags & IBT_WR_BIND_WRITE) || 2184 (bind_flags & IBT_WR_BIND_ATOMIC)) && 2185 !(mr->mr_accflag & IBT_MR_LOCAL_WRITE)) { 2186 mutex_exit(&mr->mr_lock); 2187 mutex_exit(&mw->mr_lock); 2188 return (IBT_MR_ACCESS_REQ_INVALID); 2189 } 2190 2191 /* Calculate the new RKey for the Memory Window */ 2192 mpt = mw->mr_mptrsrcp; 2193 new_rkey = hermon_mr_keycalc(mpt->hr_indx); 2194 new_rkey = hermon_mr_key_swap(new_rkey); 2195 2196 wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey; 2197 mw->mr_rkey = new_rkey; 2198 2199 mutex_exit(&mr->mr_lock); 2200 mutex_exit(&mw->mr_lock); 2201 return (DDI_SUCCESS); 2202 } 2203 2204 2205 /* 2206 * hermon_wrid_from_reset_handling() 2207 * Context: Can be called from interrupt or base context. 2208 */ 2209 /* ARGSUSED */ 2210 int 2211 hermon_wrid_from_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2212 { 2213 hermon_workq_hdr_t *swq, *rwq; 2214 uint_t qp_srq_en; 2215 2216 if (qp->qp_is_umap) 2217 return (DDI_SUCCESS); 2218 2219 /* grab the cq lock(s) to modify the wqavl tree */ 2220 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2221 #ifdef __lock_lint 2222 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2223 #else 2224 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2225 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2226 #endif 2227 2228 /* Chain the newly allocated work queue header to the CQ's list */ 2229 hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2230 2231 swq = qp->qp_sq_wqhdr; 2232 swq->wq_head = 0; 2233 swq->wq_tail = 0; 2234 swq->wq_full = 0; 2235 2236 /* 2237 * Now we repeat all the above operations for the receive work queue, 2238 * or shared receive work queue. 2239 * 2240 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case. 2241 */ 2242 qp_srq_en = qp->qp_srq_en; 2243 2244 #ifdef __lock_lint 2245 mutex_enter(&qp->qp_srqhdl->srq_lock); 2246 #else 2247 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2248 mutex_enter(&qp->qp_srqhdl->srq_lock); 2249 } else { 2250 rwq = qp->qp_rq_wqhdr; 2251 rwq->wq_head = 0; 2252 rwq->wq_tail = 0; 2253 rwq->wq_full = 0; 2254 qp->qp_rq_wqecntr = 0; 2255 } 2256 #endif 2257 hermon_cq_workq_add(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2258 2259 #ifdef __lock_lint 2260 mutex_exit(&qp->qp_srqhdl->srq_lock); 2261 #else 2262 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2263 mutex_exit(&qp->qp_srqhdl->srq_lock); 2264 } 2265 #endif 2266 2267 #ifdef __lock_lint 2268 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2269 #else 2270 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2271 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2272 #endif 2273 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2274 return (DDI_SUCCESS); 2275 } 2276 2277 2278 /* 2279 * hermon_wrid_to_reset_handling() 2280 * Context: Can be called from interrupt or base context. 2281 */ 2282 int 2283 hermon_wrid_to_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2284 { 2285 uint_t qp_srq_en; 2286 2287 if (qp->qp_is_umap) 2288 return (DDI_SUCCESS); 2289 2290 /* 2291 * If there are unpolled entries in these CQs, they are 2292 * polled/flushed. 2293 * Grab the CQ lock(s) before manipulating the lists. 2294 */ 2295 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2296 #ifdef __lock_lint 2297 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2298 #else 2299 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2300 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2301 #endif 2302 2303 qp_srq_en = qp->qp_srq_en; 2304 #ifdef __lock_lint 2305 mutex_enter(&qp->qp_srqhdl->srq_lock); 2306 #else 2307 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2308 mutex_enter(&qp->qp_srqhdl->srq_lock); 2309 } 2310 #endif 2311 /* 2312 * Flush the entries on the CQ for this QP's QPN. 2313 */ 2314 hermon_cq_entries_flush(state, qp); 2315 2316 #ifdef __lock_lint 2317 mutex_exit(&qp->qp_srqhdl->srq_lock); 2318 #else 2319 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2320 mutex_exit(&qp->qp_srqhdl->srq_lock); 2321 } 2322 #endif 2323 2324 hermon_cq_workq_remove(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2325 hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2326 2327 #ifdef __lock_lint 2328 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2329 #else 2330 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2331 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2332 #endif 2333 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2334 2335 return (IBT_SUCCESS); 2336 } 2337 2338 2339 /* 2340 * hermon_wrid_get_entry() 2341 * Context: Can be called from interrupt or base context. 2342 */ 2343 uint64_t 2344 hermon_wrid_get_entry(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe) 2345 { 2346 hermon_workq_avl_t *wqa; 2347 hermon_workq_hdr_t *wq; 2348 uint64_t wrid; 2349 uint_t send_or_recv, qpnum; 2350 uint32_t indx; 2351 2352 /* 2353 * Determine whether this CQE is a send or receive completion. 2354 */ 2355 send_or_recv = HERMON_CQE_SENDRECV_GET(cq, cqe); 2356 2357 /* Find the work queue for this QP number (send or receive side) */ 2358 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe); 2359 wqa = hermon_wrid_wqavl_find(cq, qpnum, send_or_recv); 2360 wq = wqa->wqa_wq; 2361 2362 /* 2363 * Regardless of whether the completion is the result of a "success" 2364 * or a "failure", we lock the list of "containers" and attempt to 2365 * search for the the first matching completion (i.e. the first WR 2366 * with a matching WQE addr and size). Once we find it, we pull out 2367 * the "wrid" field and return it (see below). XXX Note: One possible 2368 * future enhancement would be to enable this routine to skip over 2369 * any "unsignaled" completions to go directly to the next "signaled" 2370 * entry on success. 2371 */ 2372 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & wq->wq_mask; 2373 wrid = wq->wq_wrid[indx]; 2374 if (wqa->wqa_srq_en) { 2375 struct hermon_sw_srq_s *srq; 2376 uint64_t *desc; 2377 2378 /* put wqe back on the srq free list */ 2379 srq = wqa->wqa_srq; 2380 mutex_enter(&srq->srq_lock); 2381 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail); 2382 ((uint16_t *)desc)[1] = htons(indx); 2383 wq->wq_tail = indx; 2384 mutex_exit(&srq->srq_lock); 2385 } else { 2386 wq->wq_head = (indx + 1) & wq->wq_mask; 2387 wq->wq_full = 0; 2388 } 2389 2390 return (wrid); 2391 } 2392 2393 2394 int 2395 hermon_wrid_workq_compare(const void *p1, const void *p2) 2396 { 2397 hermon_workq_compare_t *cmpp; 2398 hermon_workq_avl_t *curr; 2399 2400 cmpp = (hermon_workq_compare_t *)p1; 2401 curr = (hermon_workq_avl_t *)p2; 2402 2403 if (cmpp->cmp_qpn < curr->wqa_qpn) 2404 return (-1); 2405 else if (cmpp->cmp_qpn > curr->wqa_qpn) 2406 return (+1); 2407 else if (cmpp->cmp_type < curr->wqa_type) 2408 return (-1); 2409 else if (cmpp->cmp_type > curr->wqa_type) 2410 return (+1); 2411 else 2412 return (0); 2413 } 2414 2415 2416 /* 2417 * hermon_wrid_workq_find() 2418 * Context: Can be called from interrupt or base context. 2419 */ 2420 static hermon_workq_avl_t * 2421 hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, uint_t wq_type) 2422 { 2423 hermon_workq_avl_t *curr; 2424 hermon_workq_compare_t cmp; 2425 2426 /* 2427 * Walk the CQ's work queue list, trying to find a send or recv queue 2428 * with the same QP number. We do this even if we are going to later 2429 * create a new entry because it helps us easily find the end of the 2430 * list. 2431 */ 2432 cmp.cmp_qpn = qpn; 2433 cmp.cmp_type = wq_type; 2434 #ifdef __lock_lint 2435 hermon_wrid_workq_compare(NULL, NULL); 2436 #endif 2437 curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL); 2438 2439 return (curr); 2440 } 2441 2442 2443 /* 2444 * hermon_wrid_wqhdr_create() 2445 * Context: Can be called from base context. 2446 */ 2447 /* ARGSUSED */ 2448 hermon_workq_hdr_t * 2449 hermon_wrid_wqhdr_create(int bufsz) 2450 { 2451 hermon_workq_hdr_t *wqhdr; 2452 2453 /* 2454 * Allocate space for the wqhdr, and an array to record all the wrids. 2455 */ 2456 wqhdr = (hermon_workq_hdr_t *)kmem_zalloc(sizeof (*wqhdr), KM_NOSLEEP); 2457 if (wqhdr == NULL) { 2458 return (NULL); 2459 } 2460 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr)) 2461 wqhdr->wq_wrid = kmem_zalloc(bufsz * sizeof (uint64_t), KM_NOSLEEP); 2462 if (wqhdr->wq_wrid == NULL) { 2463 kmem_free(wqhdr, sizeof (*wqhdr)); 2464 return (NULL); 2465 } 2466 wqhdr->wq_size = bufsz; 2467 wqhdr->wq_mask = bufsz - 1; 2468 2469 return (wqhdr); 2470 } 2471 2472 void 2473 hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t *wqhdr) 2474 { 2475 kmem_free(wqhdr->wq_wrid, wqhdr->wq_size * sizeof (uint64_t)); 2476 kmem_free(wqhdr, sizeof (*wqhdr)); 2477 } 2478 2479 2480 /* 2481 * hermon_cq_workq_add() 2482 * Context: Can be called from interrupt or base context. 2483 */ 2484 static void 2485 hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2486 { 2487 hermon_workq_compare_t cmp; 2488 avl_index_t where; 2489 2490 cmp.cmp_qpn = wqavl->wqa_qpn; 2491 cmp.cmp_type = wqavl->wqa_type; 2492 #ifdef __lock_lint 2493 hermon_wrid_workq_compare(NULL, NULL); 2494 #endif 2495 (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where); 2496 avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqavl, where); 2497 } 2498 2499 2500 /* 2501 * hermon_cq_workq_remove() 2502 * Context: Can be called from interrupt or base context. 2503 */ 2504 static void 2505 hermon_cq_workq_remove(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2506 { 2507 #ifdef __lock_lint 2508 hermon_wrid_workq_compare(NULL, NULL); 2509 #endif 2510 avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqavl); 2511 } 2512