1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_wr.c 29 * Hermon Work Request Processing Routines 30 * 31 * Implements all the routines necessary to provide the PostSend(), 32 * PostRecv() and PostSRQ() verbs. Also contains all the code 33 * necessary to implement the Hermon WRID tracking mechanism. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/conf.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/modctl.h> 41 #include <sys/avl.h> 42 43 #include <sys/ib/adapters/hermon/hermon.h> 44 45 static uint32_t hermon_wr_get_immediate(ibt_send_wr_t *wr); 46 static int hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr); 47 static int hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 48 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 49 static int hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 50 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 51 static void hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp); 52 static int hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 53 ibt_recv_wr_t *wr, uint64_t *desc); 54 static int hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 55 ibt_recv_wr_t *wr, uint64_t *desc); 56 static void hermon_wqe_sync(void *hdl, uint_t sync_from, 57 uint_t sync_to, uint_t sync_type, uint_t flag); 58 static hermon_workq_avl_t *hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, 59 uint_t send_or_recv); 60 static void hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl); 61 static void hermon_cq_workq_remove(hermon_cqhdl_t cq, 62 hermon_workq_avl_t *wqavl); 63 64 static ibt_wr_ds_t null_sgl = { 0, 0x00000100, 0 }; 65 66 static int 67 hermon_post_send_ud(hermon_state_t *state, hermon_qphdl_t qp, 68 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 69 { 70 hermon_hw_snd_wqe_ud_t *ud; 71 hermon_workq_hdr_t *wq; 72 hermon_ahhdl_t ah; 73 ibt_ud_dest_t *dest; 74 uint64_t *desc; 75 uint32_t desc_sz; 76 uint32_t signaled_dbd, solicited; 77 uint32_t head, tail, next_tail, qsize_msk; 78 uint32_t hdrmwqes; 79 uint32_t nopcode, fence, immed_data = 0; 80 hermon_hw_wqe_sgl_t *ds; 81 ibt_wr_ds_t *sgl; 82 uint32_t nds, dnds; 83 int i, j, last_ds, num_ds, status; 84 uint32_t *wqe_start; 85 int sectperwqe; 86 uint_t posted_cnt = 0; 87 88 /* initialize the FMA retry loop */ 89 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 90 91 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 92 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock)) 93 94 /* Grab the lock for the WRID list */ 95 membar_consumer(); 96 97 /* Save away some initial QP state */ 98 wq = qp->qp_sq_wqhdr; 99 qsize_msk = wq->wq_mask; 100 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 101 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 102 103 tail = wq->wq_tail; 104 head = wq->wq_head; 105 status = DDI_SUCCESS; 106 107 post_next: 108 /* 109 * Check for "queue full" condition. If the queue 110 * is already full, then no more WQEs can be posted. 111 * So break out, ring a doorbell (if necessary) and 112 * return an error 113 */ 114 if (wq->wq_full != 0) { 115 status = IBT_QP_FULL; 116 goto done; 117 } 118 119 next_tail = (tail + 1) & qsize_msk; 120 if (((tail + hdrmwqes) & qsize_msk) == head) { 121 wq->wq_full = 1; 122 } 123 124 desc = HERMON_QP_SQ_ENTRY(qp, tail); 125 126 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 127 sizeof (hermon_hw_snd_wqe_ctrl_t)); 128 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 129 sizeof (hermon_hw_snd_wqe_ud_t)); 130 nds = wr->wr_nds; 131 sgl = wr->wr_sgl; 132 num_ds = 0; 133 134 /* need to know the count of destination nds for backward loop */ 135 for (dnds = 0, i = 0; i < nds; i++) { 136 if (sgl[i].ds_len != 0) 137 dnds++; 138 } 139 140 /* 141 * Build a Send or Send_LSO WQE 142 */ 143 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 144 int total_len; 145 hermon_hw_wqe_sgl_t *old_ds; 146 147 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 148 dest = wr->wr.ud_lso.lso_ud_dest; 149 ah = (hermon_ahhdl_t)dest->ud_ah; 150 if (ah == NULL) { 151 status = IBT_AH_HDL_INVALID; 152 goto done; 153 } 154 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 155 156 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 157 if ((uintptr_t)ds + total_len + (nds * 16) > 158 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) { 159 status = IBT_QP_SGL_LEN_INVALID; 160 goto done; 161 } 162 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1, 163 wr->wr.ud_lso.lso_hdr_sz); 164 old_ds = ds; 165 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 166 for (i = 0; i < nds; i++) { 167 if (sgl[i].ds_len == 0) 168 continue; 169 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]); 170 num_ds++; 171 i++; 172 break; 173 } 174 membar_producer(); 175 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 176 wr->wr.ud_lso.lso_hdr_sz); 177 } else if (wr->wr_opcode == IBT_WRC_SEND) { 178 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 179 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 180 immed_data = wr->wr.ud.udwr_immed; 181 } else { 182 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 183 } 184 dest = wr->wr.ud.udwr_dest; 185 ah = (hermon_ahhdl_t)dest->ud_ah; 186 if (ah == NULL) { 187 status = IBT_AH_HDL_INVALID; 188 goto done; 189 } 190 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 191 i = 0; 192 } else { 193 status = IBT_QP_OP_TYPE_INVALID; 194 goto done; 195 } 196 197 if (nds > qp->qp_sq_sgl) { 198 status = IBT_QP_SGL_LEN_INVALID; 199 goto done; 200 } 201 for (last_ds = num_ds, j = i; j < nds; j++) { 202 if (sgl[j].ds_len != 0) 203 last_ds++; /* real last ds of wqe to fill */ 204 } 205 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 206 for (j = nds; --j >= i; ) { 207 if (sgl[j].ds_len == 0) { 208 continue; 209 } 210 211 /* 212 * Fill in the Data Segment(s) for the current WQE, using the 213 * information contained in the scatter-gather list of the 214 * work request. 215 */ 216 last_ds--; 217 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 218 } 219 220 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 221 222 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 223 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0; 224 225 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0; 226 227 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, 228 solicited, signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp); 229 230 wq->wq_wrid[tail] = wr->wr_id; 231 232 tail = next_tail; 233 234 /* Update some of the state in the QP */ 235 wq->wq_tail = tail; 236 237 membar_producer(); 238 239 /* Now set the ownership bit and opcode (first dword). */ 240 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 241 242 posted_cnt++; 243 if (--num_wr > 0) { 244 /* do the invalidate of the headroom */ 245 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 246 (tail + hdrmwqes) & qsize_msk); 247 for (i = 16; i < sectperwqe; i += 16) { 248 wqe_start[i] = 0xFFFFFFFF; 249 } 250 251 wr++; 252 goto post_next; 253 } 254 done: 255 if (posted_cnt != 0) { 256 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 257 258 membar_producer(); 259 260 /* the FMA retry loop starts for Hermon doorbell register. */ 261 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 262 fm_status, fm_test_num); 263 264 HERMON_UAR_DOORBELL(state, uarhdl, 265 (uint64_t *)(void *)&state->hs_uar->send, 266 (uint64_t)qp->qp_ring); 267 268 /* the FMA retry loop ends. */ 269 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 270 fm_status, fm_test_num); 271 272 /* do the invalidate of the headroom */ 273 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 274 (tail + hdrmwqes) & qsize_msk); 275 for (i = 16; i < sectperwqe; i += 16) { 276 wqe_start[i] = 0xFFFFFFFF; 277 } 278 } 279 if (num_posted != NULL) 280 *num_posted = posted_cnt; 281 282 mutex_exit(&qp->qp_sq_lock); 283 284 return (status); 285 286 pio_error: 287 mutex_exit(&qp->qp_sq_lock); 288 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 289 return (ibc_get_ci_failure(0)); 290 } 291 292 static int 293 hermon_post_send_rc(hermon_state_t *state, hermon_qphdl_t qp, 294 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 295 { 296 uint64_t *desc; 297 hermon_workq_hdr_t *wq; 298 uint32_t desc_sz; 299 uint32_t signaled_dbd, solicited; 300 uint32_t head, tail, next_tail, qsize_msk; 301 uint32_t hdrmwqes; 302 int status; 303 uint32_t nopcode, fence, immed_data = 0; 304 hermon_hw_snd_wqe_remaddr_t *rc; 305 hermon_hw_snd_wqe_atomic_t *at; 306 hermon_hw_snd_wqe_bind_t *bn; 307 hermon_hw_wqe_sgl_t *ds; 308 ibt_wr_ds_t *sgl; 309 uint32_t nds; 310 int i, last_ds, num_ds; 311 uint32_t *wqe_start; 312 int sectperwqe; 313 uint_t posted_cnt = 0; 314 315 /* initialize the FMA retry loop */ 316 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 317 318 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 319 _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock)) 320 321 /* make sure we see any update of wq_head */ 322 membar_consumer(); 323 324 /* Save away some initial QP state */ 325 wq = qp->qp_sq_wqhdr; 326 qsize_msk = wq->wq_mask; 327 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 328 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 329 330 tail = wq->wq_tail; 331 head = wq->wq_head; 332 status = DDI_SUCCESS; 333 334 post_next: 335 /* 336 * Check for "queue full" condition. If the queue 337 * is already full, then no more WQEs can be posted. 338 * So break out, ring a doorbell (if necessary) and 339 * return an error 340 */ 341 if (wq->wq_full != 0) { 342 status = IBT_QP_FULL; 343 goto done; 344 } 345 next_tail = (tail + 1) & qsize_msk; 346 if (((tail + hdrmwqes) & qsize_msk) == head) { 347 wq->wq_full = 1; 348 } 349 350 desc = HERMON_QP_SQ_ENTRY(qp, tail); 351 352 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 353 sizeof (hermon_hw_snd_wqe_ctrl_t)); 354 nds = wr->wr_nds; 355 sgl = wr->wr_sgl; 356 num_ds = 0; 357 358 /* 359 * Validate the operation type. For RC requests, we allow 360 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 361 * operations, and memory window "Bind" 362 */ 363 switch (wr->wr_opcode) { 364 default: 365 status = IBT_QP_OP_TYPE_INVALID; 366 goto done; 367 368 case IBT_WRC_SEND: 369 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 370 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 371 immed_data = wr->wr.rc.rcwr.send_immed; 372 } else { 373 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 374 } 375 break; 376 377 /* 378 * If this is an RDMA Read or RDMA Write request, then fill 379 * in the "Remote Address" header fields. 380 */ 381 case IBT_WRC_RDMAW: 382 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 383 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAWI; 384 immed_data = wr->wr.rc.rcwr.rdma.rdma_immed; 385 } else { 386 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 387 } 388 /* FALLTHROUGH */ 389 case IBT_WRC_RDMAR: 390 if (wr->wr_opcode == IBT_WRC_RDMAR) 391 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 392 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 393 sizeof (hermon_hw_snd_wqe_ctrl_t)); 394 395 /* 396 * Build the Remote Address Segment for the WQE, using 397 * the information from the RC work request. 398 */ 399 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 400 401 /* Update "ds" for filling in Data Segments (below) */ 402 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 403 sizeof (hermon_hw_snd_wqe_remaddr_t)); 404 break; 405 406 /* 407 * If this is one of the Atomic type operations (i.e 408 * Compare-Swap or Fetch-Add), then fill in both the "Remote 409 * Address" header fields and the "Atomic" header fields. 410 */ 411 case IBT_WRC_CSWAP: 412 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 413 /* FALLTHROUGH */ 414 case IBT_WRC_FADD: 415 if (wr->wr_opcode == IBT_WRC_FADD) 416 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 417 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 418 sizeof (hermon_hw_snd_wqe_ctrl_t)); 419 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 420 sizeof (hermon_hw_snd_wqe_remaddr_t)); 421 422 /* 423 * Build the Remote Address and Atomic Segments for 424 * the WQE, using the information from the RC Atomic 425 * work request. 426 */ 427 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 428 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 429 430 /* Update "ds" for filling in Data Segments (below) */ 431 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 432 sizeof (hermon_hw_snd_wqe_atomic_t)); 433 434 /* 435 * Update "nds" and "sgl" because Atomic requests have 436 * only a single Data Segment. 437 */ 438 nds = 1; 439 sgl = wr->wr_sgl; 440 break; 441 442 /* 443 * If this is memory window Bind operation, then we call the 444 * hermon_wr_bind_check() routine to validate the request and 445 * to generate the updated RKey. If this is successful, then 446 * we fill in the WQE's "Bind" header fields. 447 */ 448 case IBT_WRC_BIND: 449 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 450 status = hermon_wr_bind_check(state, wr); 451 if (status != DDI_SUCCESS) 452 goto done; 453 454 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 455 sizeof (hermon_hw_snd_wqe_ctrl_t)); 456 457 /* 458 * Build the Bind Memory Window Segments for the WQE, 459 * using the information from the RC Bind memory 460 * window work request. 461 */ 462 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 463 464 /* 465 * Update the "ds" pointer. Even though the "bind" 466 * operation requires no SGLs, this is necessary to 467 * facilitate the correct descriptor size calculations 468 * (below). 469 */ 470 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 471 sizeof (hermon_hw_snd_wqe_bind_t)); 472 nds = 0; 473 } 474 475 /* 476 * Now fill in the Data Segments (SGL) for the Send WQE based 477 * on the values setup above (i.e. "sgl", "nds", and the "ds" 478 * pointer. Start by checking for a valid number of SGL entries 479 */ 480 if (nds > qp->qp_sq_sgl) { 481 status = IBT_QP_SGL_LEN_INVALID; 482 goto done; 483 } 484 485 for (last_ds = num_ds, i = 0; i < nds; i++) { 486 if (sgl[i].ds_len != 0) 487 last_ds++; /* real last ds of wqe to fill */ 488 } 489 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 490 for (i = nds; --i >= 0; ) { 491 if (sgl[i].ds_len == 0) { 492 continue; 493 } 494 495 /* 496 * Fill in the Data Segment(s) for the current WQE, using the 497 * information contained in the scatter-gather list of the 498 * work request. 499 */ 500 last_ds--; 501 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[i]); 502 } 503 504 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 505 506 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 507 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 1 : 0; 508 509 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 1 : 0; 510 511 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited, 512 signaled_dbd, wr->wr_flags & IBT_WR_SEND_CKSUM, qp); 513 514 wq->wq_wrid[tail] = wr->wr_id; 515 516 tail = next_tail; 517 518 /* Update some of the state in the QP */ 519 wq->wq_tail = tail; 520 521 membar_producer(); 522 523 /* Now set the ownership bit of the first one in the chain. */ 524 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 525 526 posted_cnt++; 527 if (--num_wr > 0) { 528 /* do the invalidate of the headroom */ 529 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 530 (tail + hdrmwqes) & qsize_msk); 531 for (i = 16; i < sectperwqe; i += 16) { 532 wqe_start[i] = 0xFFFFFFFF; 533 } 534 535 wr++; 536 goto post_next; 537 } 538 done: 539 540 if (posted_cnt != 0) { 541 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 542 543 membar_producer(); 544 545 /* the FMA retry loop starts for Hermon doorbell register. */ 546 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 547 fm_status, fm_test_num); 548 549 /* Ring the doorbell */ 550 HERMON_UAR_DOORBELL(state, uarhdl, 551 (uint64_t *)(void *)&state->hs_uar->send, 552 (uint64_t)qp->qp_ring); 553 554 /* the FMA retry loop ends. */ 555 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 556 fm_status, fm_test_num); 557 558 /* do the invalidate of the headroom */ 559 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 560 (tail + hdrmwqes) & qsize_msk); 561 for (i = 16; i < sectperwqe; i += 16) { 562 wqe_start[i] = 0xFFFFFFFF; 563 } 564 } 565 /* 566 * Update the "num_posted" return value (if necessary). 567 * Then drop the locks and return success. 568 */ 569 if (num_posted != NULL) { 570 *num_posted = posted_cnt; 571 } 572 573 mutex_exit(&qp->qp_sq_lock); 574 return (status); 575 576 pio_error: 577 mutex_exit(&qp->qp_sq_lock); 578 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 579 return (ibc_get_ci_failure(0)); 580 } 581 582 /* 583 * hermon_post_send() 584 * Context: Can be called from interrupt or base context. 585 */ 586 int 587 hermon_post_send(hermon_state_t *state, hermon_qphdl_t qp, 588 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 589 { 590 ibt_send_wr_t *curr_wr; 591 hermon_workq_hdr_t *wq; 592 hermon_ahhdl_t ah; 593 uint64_t *desc, *prev; 594 uint32_t desc_sz; 595 uint32_t signaled_dbd, solicited; 596 uint32_t head, tail, next_tail, qsize_msk; 597 uint32_t sync_from, sync_to; 598 uint32_t hdrmwqes; 599 uint_t currindx, wrindx, numremain; 600 uint_t chainlen; 601 uint_t posted_cnt, maxstat; 602 uint_t total_posted; 603 int status; 604 uint32_t nopcode, fence, immed_data = 0; 605 uint32_t prev_nopcode; 606 607 /* initialize the FMA retry loop */ 608 hermon_pio_init(fm_loop_cnt, fm_status, fm_test); 609 610 /* 611 * Check for user-mappable QP memory. Note: We do not allow kernel 612 * clients to post to QP memory that is accessible directly by the 613 * user. If the QP memory is user accessible, then return an error. 614 */ 615 if (qp->qp_is_umap) { 616 return (IBT_QP_HDL_INVALID); 617 } 618 619 mutex_enter(&qp->qp_lock); 620 621 /* 622 * Check QP state. Can not post Send requests from the "Reset", 623 * "Init", or "RTR" states 624 */ 625 if ((qp->qp_state == HERMON_QP_RESET) || 626 (qp->qp_state == HERMON_QP_INIT) || 627 (qp->qp_state == HERMON_QP_RTR)) { 628 mutex_exit(&qp->qp_lock); 629 return (IBT_QP_STATE_INVALID); 630 } 631 mutex_exit(&qp->qp_lock); 632 mutex_enter(&qp->qp_sq_lock); 633 634 if (qp->qp_is_special) 635 goto post_many; 636 637 /* Use these optimized functions most of the time */ 638 if (qp->qp_serv_type == HERMON_QP_UD) 639 return (hermon_post_send_ud(state, qp, wr, num_wr, num_posted)); 640 641 if (qp->qp_serv_type == HERMON_QP_RC) 642 return (hermon_post_send_rc(state, qp, wr, num_wr, num_posted)); 643 644 if (qp->qp_serv_type == HERMON_QP_UC) 645 goto post_many; 646 647 mutex_exit(&qp->qp_sq_lock); 648 return (IBT_QP_SRV_TYPE_INVALID); 649 650 post_many: 651 /* general loop for non-optimized posting */ 652 653 /* Grab the lock for the WRID list */ 654 membar_consumer(); 655 656 /* Save away some initial QP state */ 657 wq = qp->qp_sq_wqhdr; 658 qsize_msk = wq->wq_mask; 659 tail = wq->wq_tail; 660 head = wq->wq_head; 661 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 662 663 /* Initialize posted_cnt */ 664 posted_cnt = 0; 665 total_posted = 0; 666 667 /* 668 * For each ibt_send_wr_t in the wr[] list passed in, parse the 669 * request and build a Send WQE. NOTE: Because we are potentially 670 * building a chain of WQEs to post, we want to build them all first, 671 * and set the valid (HW Ownership) bit on all but the first. 672 * However, we do not want to validate the first one until the 673 * entire chain of WQEs has been built. Then in the final 674 * we set the valid bit in the first, flush if needed, and as a last 675 * step ring the appropriate doorbell. NOTE: the doorbell ring may 676 * NOT be needed if the HCA is already processing, but the doorbell 677 * ring will be done regardless. NOTE ALSO: It is possible for 678 * more Work Requests to be posted than the HW will support at one 679 * shot. If this happens, we need to be able to post and ring 680 * several chains here until the the entire request is complete. 681 * NOTE ALSO: the term "chain" is used to differentiate it from 682 * Work Request List passed in; and because that's the terminology 683 * from the previous generations of HCA - but the WQEs are not, in fact 684 * chained together for Hermon 685 */ 686 687 wrindx = 0; 688 numremain = num_wr; 689 status = DDI_SUCCESS; 690 while ((wrindx < num_wr) && (status == DDI_SUCCESS)) { 691 /* 692 * For the first WQE on a new chain we need "prev" to point 693 * to the current descriptor. 694 */ 695 prev = HERMON_QP_SQ_ENTRY(qp, tail); 696 697 /* 698 * unlike Tavor & Arbel, tail will maintain the number of the 699 * next (this) WQE to be posted. Since there is no backward linking 700 * in Hermon, we can always just look ahead 701 */ 702 /* 703 * Before we begin, save the current "tail index" for later 704 * DMA sync 705 */ 706 /* NOTE: don't need to go back one like arbel/tavor */ 707 sync_from = tail; 708 709 /* 710 * Break the request up into lists that are less than or 711 * equal to the maximum number of WQEs that can be posted 712 * per doorbell ring - 256 currently 713 */ 714 chainlen = (numremain > HERMON_QP_MAXDESC_PER_DB) ? 715 HERMON_QP_MAXDESC_PER_DB : numremain; 716 numremain -= chainlen; 717 718 for (currindx = 0; currindx < chainlen; currindx++, wrindx++) { 719 /* 720 * Check for "queue full" condition. If the queue 721 * is already full, then no more WQEs can be posted. 722 * So break out, ring a doorbell (if necessary) and 723 * return an error 724 */ 725 if (wq->wq_full != 0) { 726 status = IBT_QP_FULL; 727 break; 728 } 729 730 /* 731 * Increment the "tail index". Check for "queue 732 * full" condition incl. headroom. If we detect that 733 * the current work request is going to fill the work 734 * queue, then we mark this condition and continue. 735 * Don't need >=, because going one-by-one we have to 736 * hit it exactly sooner or later 737 */ 738 739 next_tail = (tail + 1) & qsize_msk; 740 if (((tail + hdrmwqes) & qsize_msk) == head) { 741 wq->wq_full = 1; 742 } 743 744 /* 745 * Get the address of the location where the next 746 * Send WQE should be built 747 */ 748 desc = HERMON_QP_SQ_ENTRY(qp, tail); 749 /* 750 * Call hermon_wqe_send_build() to build the WQE 751 * at the given address. This routine uses the 752 * information in the ibt_send_wr_t list (wr[]) and 753 * returns the size of the WQE when it returns. 754 */ 755 status = hermon_wqe_send_build(state, qp, 756 &wr[wrindx], desc, &desc_sz); 757 if (status != DDI_SUCCESS) { 758 break; 759 } 760 761 /* 762 * Now, build the Ctrl Segment based on 763 * what was just done 764 */ 765 curr_wr = &wr[wrindx]; 766 767 switch (curr_wr->wr_opcode) { 768 case IBT_WRC_RDMAW: 769 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 770 nopcode = 771 HERMON_WQE_SEND_NOPCODE_RDMAWI; 772 immed_data = 773 hermon_wr_get_immediate(curr_wr); 774 } else { 775 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 776 } 777 break; 778 779 case IBT_WRC_SEND: 780 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 781 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 782 immed_data = 783 hermon_wr_get_immediate(curr_wr); 784 } else { 785 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 786 } 787 break; 788 789 case IBT_WRC_SEND_LSO: 790 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 791 break; 792 793 case IBT_WRC_RDMAR: 794 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 795 break; 796 797 case IBT_WRC_CSWAP: 798 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 799 break; 800 801 case IBT_WRC_FADD: 802 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 803 break; 804 805 case IBT_WRC_BIND: 806 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 807 break; 808 } 809 810 fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 811 812 /* 813 * now, build up the control segment, leaving the 814 * owner bit as it is 815 */ 816 817 if ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 818 (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL)) { 819 signaled_dbd = 1; 820 } else { 821 signaled_dbd = 0; 822 } 823 if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT) 824 solicited = 1; 825 else 826 solicited = 0; 827 828 if (qp->qp_is_special) { 829 ah = (hermon_ahhdl_t) 830 curr_wr->wr.ud.udwr_dest->ud_ah; 831 mutex_enter(&ah->ah_lock); 832 maxstat = ah->ah_udav->max_stat_rate; 833 HERMON_WQE_SET_MLX_CTRL_SEGMENT(desc, desc_sz, 834 signaled_dbd, maxstat, ah->ah_udav->rlid, 835 qp, ah->ah_udav->sl); 836 mutex_exit(&ah->ah_lock); 837 } else { 838 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, 839 fence, immed_data, solicited, 840 signaled_dbd, curr_wr->wr_flags & 841 IBT_WR_SEND_CKSUM, qp); 842 } 843 wq->wq_wrid[tail] = curr_wr->wr_id; 844 845 /* 846 * If this is not the first descriptor on the current 847 * chain, then set the ownership bit. 848 */ 849 if (currindx != 0) { /* not the first */ 850 membar_producer(); 851 HERMON_SET_SEND_WQE_OWNER(qp, 852 (uint32_t *)desc, nopcode); 853 } else 854 prev_nopcode = nopcode; 855 856 /* 857 * Update the current "tail index" and increment 858 * "posted_cnt" 859 */ 860 tail = next_tail; 861 posted_cnt++; 862 } 863 864 /* 865 * If we reach here and there are one or more WQEs which have 866 * been successfully built as a chain, we have to finish up 867 * and prepare them for writing to the HW 868 * The steps are: 869 * 1. do the headroom fixup 870 * 2. add in the size of the headroom for the sync 871 * 3. write the owner bit for the first WQE 872 * 4. sync them 873 * 5. fix up the structures 874 * 6. hit the doorbell in UAR 875 */ 876 if (posted_cnt != 0) { 877 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 878 879 /* 880 * Save away updated "tail index" for the DMA sync 881 * including the headroom that will be needed 882 */ 883 sync_to = (tail + hdrmwqes) & qsize_msk; 884 885 /* do the invalidate of the headroom */ 886 887 hermon_wqe_headroom(tail, qp); 888 889 /* Do a DMA sync for current send WQE(s) */ 890 hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_SEND, 891 DDI_DMA_SYNC_FORDEV); 892 893 /* Update some of the state in the QP */ 894 wq->wq_tail = tail; 895 total_posted += posted_cnt; 896 posted_cnt = 0; 897 898 membar_producer(); 899 900 /* 901 * Now set the ownership bit of the first 902 * one in the chain 903 */ 904 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)prev, 905 prev_nopcode); 906 907 /* the FMA retry loop starts for Hermon doorbell. */ 908 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 909 fm_status, fm_test); 910 911 HERMON_UAR_DOORBELL(state, uarhdl, 912 (uint64_t *)(void *)&state->hs_uar->send, 913 (uint64_t)qp->qp_ring); 914 915 /* the FMA retry loop ends. */ 916 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 917 fm_status, fm_test); 918 } 919 } 920 921 /* 922 * Update the "num_posted" return value (if necessary). 923 * Then drop the locks and return success. 924 */ 925 if (num_posted != NULL) { 926 *num_posted = total_posted; 927 } 928 mutex_exit(&qp->qp_sq_lock); 929 return (status); 930 931 pio_error: 932 mutex_exit(&qp->qp_sq_lock); 933 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 934 return (ibc_get_ci_failure(0)); 935 } 936 937 938 /* 939 * hermon_post_recv() 940 * Context: Can be called from interrupt or base context. 941 */ 942 int 943 hermon_post_recv(hermon_state_t *state, hermon_qphdl_t qp, 944 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 945 { 946 uint64_t *desc; 947 hermon_workq_hdr_t *wq; 948 uint32_t head, tail, next_tail, qsize_msk; 949 uint32_t sync_from, sync_to; 950 uint_t wrindx; 951 uint_t posted_cnt; 952 int status; 953 954 /* 955 * Check for user-mappable QP memory. Note: We do not allow kernel 956 * clients to post to QP memory that is accessible directly by the 957 * user. If the QP memory is user accessible, then return an error. 958 */ 959 if (qp->qp_is_umap) { 960 return (IBT_QP_HDL_INVALID); 961 } 962 963 /* Initialize posted_cnt */ 964 posted_cnt = 0; 965 966 mutex_enter(&qp->qp_lock); 967 968 /* 969 * Check if QP is associated with an SRQ 970 */ 971 if (qp->qp_srq_en == HERMON_QP_SRQ_ENABLED) { 972 mutex_exit(&qp->qp_lock); 973 return (IBT_SRQ_IN_USE); 974 } 975 976 /* 977 * Check QP state. Can not post Recv requests from the "Reset" state 978 */ 979 if (qp->qp_state == HERMON_QP_RESET) { 980 mutex_exit(&qp->qp_lock); 981 return (IBT_QP_STATE_INVALID); 982 } 983 984 /* Check that work request transport type is valid */ 985 if ((qp->qp_serv_type != HERMON_QP_UD) && 986 (qp->qp_serv_type != HERMON_QP_RC) && 987 (qp->qp_serv_type != HERMON_QP_UC)) { 988 mutex_exit(&qp->qp_lock); 989 return (IBT_QP_SRV_TYPE_INVALID); 990 } 991 992 mutex_exit(&qp->qp_lock); 993 mutex_enter(&qp->qp_rq_lock); 994 995 /* 996 * Grab the lock for the WRID list, i.e., membar_consumer(). 997 * This is not needed because the mutex_enter() above has 998 * the same effect. 999 */ 1000 1001 /* Save away some initial QP state */ 1002 wq = qp->qp_rq_wqhdr; 1003 qsize_msk = wq->wq_mask; 1004 tail = wq->wq_tail; 1005 head = wq->wq_head; 1006 1007 wrindx = 0; 1008 status = DDI_SUCCESS; 1009 /* 1010 * Before we begin, save the current "tail index" for later 1011 * DMA sync 1012 */ 1013 sync_from = tail; 1014 1015 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1016 if (wq->wq_full != 0) { 1017 status = IBT_QP_FULL; 1018 break; 1019 } 1020 next_tail = (tail + 1) & qsize_msk; 1021 if (next_tail == head) { 1022 wq->wq_full = 1; 1023 } 1024 desc = HERMON_QP_RQ_ENTRY(qp, tail); 1025 status = hermon_wqe_recv_build(state, qp, &wr[wrindx], desc); 1026 if (status != DDI_SUCCESS) { 1027 break; 1028 } 1029 1030 wq->wq_wrid[tail] = wr[wrindx].wr_id; 1031 qp->qp_rq_wqecntr++; 1032 1033 tail = next_tail; 1034 posted_cnt++; 1035 } 1036 1037 if (posted_cnt != 0) { 1038 /* Save away updated "tail index" for the DMA sync */ 1039 sync_to = tail; 1040 1041 hermon_wqe_sync(qp, sync_from, sync_to, HERMON_WR_RECV, 1042 DDI_DMA_SYNC_FORDEV); 1043 1044 wq->wq_tail = tail; 1045 1046 membar_producer(); /* ensure wrids are visible */ 1047 1048 /* Update the doorbell record w/ wqecntr */ 1049 HERMON_UAR_DB_RECORD_WRITE(qp->qp_rq_vdbr, 1050 qp->qp_rq_wqecntr & 0xFFFF); 1051 } 1052 1053 if (num_posted != NULL) { 1054 *num_posted = posted_cnt; 1055 } 1056 1057 1058 mutex_exit(&qp->qp_rq_lock); 1059 return (status); 1060 } 1061 1062 /* 1063 * hermon_post_srq() 1064 * Context: Can be called from interrupt or base context. 1065 */ 1066 int 1067 hermon_post_srq(hermon_state_t *state, hermon_srqhdl_t srq, 1068 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 1069 { 1070 uint64_t *desc; 1071 hermon_workq_hdr_t *wq; 1072 uint_t indx, wrindx; 1073 uint_t posted_cnt; 1074 int status; 1075 1076 mutex_enter(&srq->srq_lock); 1077 1078 /* 1079 * Check for user-mappable QP memory. Note: We do not allow kernel 1080 * clients to post to QP memory that is accessible directly by the 1081 * user. If the QP memory is user accessible, then return an error. 1082 */ 1083 if (srq->srq_is_umap) { 1084 mutex_exit(&srq->srq_lock); 1085 return (IBT_SRQ_HDL_INVALID); 1086 } 1087 1088 /* 1089 * Check SRQ state. Can not post Recv requests when SRQ is in error 1090 */ 1091 if (srq->srq_state == HERMON_SRQ_STATE_ERROR) { 1092 mutex_exit(&srq->srq_lock); 1093 return (IBT_QP_STATE_INVALID); 1094 } 1095 1096 status = DDI_SUCCESS; 1097 posted_cnt = 0; 1098 wq = srq->srq_wq_wqhdr; 1099 indx = wq->wq_head; 1100 1101 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1102 1103 if (indx == wq->wq_tail) { 1104 status = IBT_QP_FULL; 1105 break; 1106 } 1107 desc = HERMON_SRQ_WQE_ADDR(srq, indx); 1108 1109 wq->wq_wrid[indx] = wr[wrindx].wr_id; 1110 1111 status = hermon_wqe_srq_build(state, srq, &wr[wrindx], desc); 1112 if (status != DDI_SUCCESS) { 1113 break; 1114 } 1115 1116 hermon_wqe_sync(srq, indx, indx + 1, 1117 HERMON_WR_SRQ, DDI_DMA_SYNC_FORDEV); 1118 posted_cnt++; 1119 indx = htons(((uint16_t *)desc)[1]); 1120 wq->wq_head = indx; 1121 } 1122 1123 if (posted_cnt != 0) { 1124 1125 srq->srq_wq_wqecntr += posted_cnt; 1126 1127 membar_producer(); /* ensure wrids are visible */ 1128 1129 /* Ring the doorbell w/ wqecntr */ 1130 HERMON_UAR_DB_RECORD_WRITE(srq->srq_wq_vdbr, 1131 srq->srq_wq_wqecntr & 0xFFFF); 1132 } 1133 1134 if (num_posted != NULL) { 1135 *num_posted = posted_cnt; 1136 } 1137 1138 mutex_exit(&srq->srq_lock); 1139 return (status); 1140 } 1141 1142 1143 /* 1144 * hermon_wqe_send_build() 1145 * Context: Can be called from interrupt or base context. 1146 */ 1147 static int 1148 hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 1149 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1150 { 1151 hermon_hw_snd_wqe_ud_t *ud; 1152 hermon_hw_snd_wqe_remaddr_t *rc; 1153 hermon_hw_snd_wqe_atomic_t *at; 1154 hermon_hw_snd_wqe_remaddr_t *uc; 1155 hermon_hw_snd_wqe_bind_t *bn; 1156 hermon_hw_wqe_sgl_t *ds, *old_ds; 1157 ibt_ud_dest_t *dest; 1158 ibt_wr_ds_t *sgl; 1159 hermon_ahhdl_t ah; 1160 uint32_t nds; 1161 int i, j, last_ds, num_ds, status; 1162 int tmpsize; 1163 1164 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1165 1166 /* Initialize the information for the Data Segments */ 1167 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1168 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1169 nds = wr->wr_nds; 1170 sgl = wr->wr_sgl; 1171 num_ds = 0; 1172 i = 0; 1173 1174 /* 1175 * Build a Send WQE depends first and foremost on the transport 1176 * type of Work Request (i.e. UD, RC, or UC) 1177 */ 1178 switch (wr->wr_trans) { 1179 case IBT_UD_SRV: 1180 /* Ensure that work request transport type matches QP type */ 1181 if (qp->qp_serv_type != HERMON_QP_UD) { 1182 return (IBT_QP_SRV_TYPE_INVALID); 1183 } 1184 1185 /* 1186 * Validate the operation type. For UD requests, only the 1187 * "Send" and "Send LSO" operations are valid. 1188 */ 1189 if (wr->wr_opcode != IBT_WRC_SEND && 1190 wr->wr_opcode != IBT_WRC_SEND_LSO) { 1191 return (IBT_QP_OP_TYPE_INVALID); 1192 } 1193 1194 /* 1195 * If this is a Special QP (QP0 or QP1), then we need to 1196 * build MLX WQEs instead. So jump to hermon_wqe_mlx_build() 1197 * and return whatever status it returns 1198 */ 1199 if (qp->qp_is_special) { 1200 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1201 return (IBT_QP_OP_TYPE_INVALID); 1202 } 1203 status = hermon_wqe_mlx_build(state, qp, 1204 wr, desc, size); 1205 return (status); 1206 } 1207 1208 /* 1209 * Otherwise, if this is a normal UD Send request, then fill 1210 * all the fields in the Hermon UD header for the WQE. Note: 1211 * to do this we'll need to extract some information from the 1212 * Address Handle passed with the work request. 1213 */ 1214 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 1215 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1216 if (wr->wr_opcode == IBT_WRC_SEND) { 1217 dest = wr->wr.ud.udwr_dest; 1218 } else { 1219 dest = wr->wr.ud_lso.lso_ud_dest; 1220 } 1221 ah = (hermon_ahhdl_t)dest->ud_ah; 1222 if (ah == NULL) { 1223 return (IBT_AH_HDL_INVALID); 1224 } 1225 1226 /* 1227 * Build the Unreliable Datagram Segment for the WQE, using 1228 * the information from the address handle and the work 1229 * request. 1230 */ 1231 /* mutex_enter(&ah->ah_lock); */ 1232 if (wr->wr_opcode == IBT_WRC_SEND) { 1233 HERMON_WQE_BUILD_UD(qp, ud, ah, wr->wr.ud.udwr_dest); 1234 } else { /* IBT_WRC_SEND_LSO */ 1235 HERMON_WQE_BUILD_UD(qp, ud, ah, 1236 wr->wr.ud_lso.lso_ud_dest); 1237 } 1238 /* mutex_exit(&ah->ah_lock); */ 1239 1240 /* Update "ds" for filling in Data Segments (below) */ 1241 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 1242 sizeof (hermon_hw_snd_wqe_ud_t)); 1243 1244 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1245 int total_len; 1246 1247 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 1248 if ((uintptr_t)ds + total_len + (nds * 16) > 1249 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) 1250 return (IBT_QP_SGL_LEN_INVALID); 1251 1252 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1, 1253 wr->wr.ud_lso.lso_hdr_sz); 1254 old_ds = ds; 1255 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 1256 for (; i < nds; i++) { 1257 if (sgl[i].ds_len == 0) 1258 continue; 1259 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], 1260 &sgl[i]); 1261 num_ds++; 1262 i++; 1263 break; 1264 } 1265 membar_producer(); 1266 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 1267 wr->wr.ud_lso.lso_hdr_sz); 1268 } 1269 1270 break; 1271 1272 case IBT_RC_SRV: 1273 /* Ensure that work request transport type matches QP type */ 1274 if (qp->qp_serv_type != HERMON_QP_RC) { 1275 return (IBT_QP_SRV_TYPE_INVALID); 1276 } 1277 1278 /* 1279 * Validate the operation type. For RC requests, we allow 1280 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 1281 * operations, and memory window "Bind" 1282 */ 1283 if ((wr->wr_opcode != IBT_WRC_SEND) && 1284 (wr->wr_opcode != IBT_WRC_RDMAR) && 1285 (wr->wr_opcode != IBT_WRC_RDMAW) && 1286 (wr->wr_opcode != IBT_WRC_CSWAP) && 1287 (wr->wr_opcode != IBT_WRC_FADD) && 1288 (wr->wr_opcode != IBT_WRC_BIND)) { 1289 return (IBT_QP_OP_TYPE_INVALID); 1290 } 1291 1292 /* 1293 * If this is a Send request, then all we need to do is break 1294 * out and here and begin the Data Segment processing below 1295 */ 1296 if (wr->wr_opcode == IBT_WRC_SEND) { 1297 break; 1298 } 1299 1300 /* 1301 * If this is an RDMA Read or RDMA Write request, then fill 1302 * in the "Remote Address" header fields. 1303 */ 1304 if ((wr->wr_opcode == IBT_WRC_RDMAR) || 1305 (wr->wr_opcode == IBT_WRC_RDMAW)) { 1306 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1307 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1308 1309 /* 1310 * Build the Remote Address Segment for the WQE, using 1311 * the information from the RC work request. 1312 */ 1313 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 1314 1315 /* Update "ds" for filling in Data Segments (below) */ 1316 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 1317 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1318 break; 1319 } 1320 1321 /* 1322 * If this is one of the Atomic type operations (i.e 1323 * Compare-Swap or Fetch-Add), then fill in both the "Remote 1324 * Address" header fields and the "Atomic" header fields. 1325 */ 1326 if ((wr->wr_opcode == IBT_WRC_CSWAP) || 1327 (wr->wr_opcode == IBT_WRC_FADD)) { 1328 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1329 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1330 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 1331 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1332 1333 /* 1334 * Build the Remote Address and Atomic Segments for 1335 * the WQE, using the information from the RC Atomic 1336 * work request. 1337 */ 1338 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 1339 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 1340 1341 /* Update "ds" for filling in Data Segments (below) */ 1342 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 1343 sizeof (hermon_hw_snd_wqe_atomic_t)); 1344 1345 /* 1346 * Update "nds" and "sgl" because Atomic requests have 1347 * only a single Data Segment (and they are encoded 1348 * somewhat differently in the work request. 1349 */ 1350 nds = 1; 1351 sgl = wr->wr_sgl; 1352 break; 1353 } 1354 1355 /* 1356 * If this is memory window Bind operation, then we call the 1357 * hermon_wr_bind_check() routine to validate the request and 1358 * to generate the updated RKey. If this is successful, then 1359 * we fill in the WQE's "Bind" header fields. 1360 */ 1361 if (wr->wr_opcode == IBT_WRC_BIND) { 1362 status = hermon_wr_bind_check(state, wr); 1363 if (status != DDI_SUCCESS) { 1364 return (status); 1365 } 1366 1367 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1368 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1369 1370 /* 1371 * Build the Bind Memory Window Segments for the WQE, 1372 * using the information from the RC Bind memory 1373 * window work request. 1374 */ 1375 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 1376 1377 /* 1378 * Update the "ds" pointer. Even though the "bind" 1379 * operation requires no SGLs, this is necessary to 1380 * facilitate the correct descriptor size calculations 1381 * (below). 1382 */ 1383 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1384 sizeof (hermon_hw_snd_wqe_bind_t)); 1385 nds = 0; 1386 } 1387 break; 1388 1389 case IBT_UC_SRV: 1390 /* Ensure that work request transport type matches QP type */ 1391 if (qp->qp_serv_type != HERMON_QP_UC) { 1392 return (IBT_QP_SRV_TYPE_INVALID); 1393 } 1394 1395 /* 1396 * Validate the operation type. For UC requests, we only 1397 * allow "Send", "RDMA Write", and memory window "Bind". 1398 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic" 1399 * operations 1400 */ 1401 if ((wr->wr_opcode != IBT_WRC_SEND) && 1402 (wr->wr_opcode != IBT_WRC_RDMAW) && 1403 (wr->wr_opcode != IBT_WRC_BIND)) { 1404 return (IBT_QP_OP_TYPE_INVALID); 1405 } 1406 1407 /* 1408 * If this is a Send request, then all we need to do is break 1409 * out and here and begin the Data Segment processing below 1410 */ 1411 if (wr->wr_opcode == IBT_WRC_SEND) { 1412 break; 1413 } 1414 1415 /* 1416 * If this is an RDMA Write request, then fill in the "Remote 1417 * Address" header fields. 1418 */ 1419 if (wr->wr_opcode == IBT_WRC_RDMAW) { 1420 uc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1421 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1422 1423 /* 1424 * Build the Remote Address Segment for the WQE, using 1425 * the information from the UC work request. 1426 */ 1427 HERMON_WQE_BUILD_REMADDR(qp, uc, &wr->wr.uc.ucwr.rdma); 1428 1429 /* Update "ds" for filling in Data Segments (below) */ 1430 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)uc + 1431 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1432 break; 1433 } 1434 1435 /* 1436 * If this is memory window Bind operation, then we call the 1437 * hermon_wr_bind_check() routine to validate the request and 1438 * to generate the updated RKey. If this is successful, then 1439 * we fill in the WQE's "Bind" header fields. 1440 */ 1441 if (wr->wr_opcode == IBT_WRC_BIND) { 1442 status = hermon_wr_bind_check(state, wr); 1443 if (status != DDI_SUCCESS) { 1444 return (status); 1445 } 1446 1447 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1448 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1449 1450 /* 1451 * Build the Bind Memory Window Segments for the WQE, 1452 * using the information from the UC Bind memory 1453 * window work request. 1454 */ 1455 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.uc.ucwr.bind); 1456 1457 /* 1458 * Update the "ds" pointer. Even though the "bind" 1459 * operation requires no SGLs, this is necessary to 1460 * facilitate the correct descriptor size calculations 1461 * (below). 1462 */ 1463 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1464 sizeof (hermon_hw_snd_wqe_bind_t)); 1465 nds = 0; 1466 } 1467 break; 1468 1469 default: 1470 return (IBT_QP_SRV_TYPE_INVALID); 1471 } 1472 1473 /* 1474 * Now fill in the Data Segments (SGL) for the Send WQE based on 1475 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer 1476 * Start by checking for a valid number of SGL entries 1477 */ 1478 if (nds > qp->qp_sq_sgl) { 1479 return (IBT_QP_SGL_LEN_INVALID); 1480 } 1481 1482 /* 1483 * For each SGL in the Send Work Request, fill in the Send WQE's data 1484 * segments. Note: We skip any SGL with zero size because Hermon 1485 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1486 * the encoding for zero means a 2GB transfer. 1487 */ 1488 for (last_ds = num_ds, j = i; j < nds; j++) { 1489 if (sgl[j].ds_len != 0) 1490 last_ds++; /* real last ds of wqe to fill */ 1491 } 1492 1493 /* 1494 * Return the size of descriptor (in 16-byte chunks) 1495 * For Hermon, we want them (for now) to be on stride size 1496 * boundaries, which was implicit in Tavor/Arbel 1497 * 1498 */ 1499 tmpsize = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc); 1500 1501 *size = tmpsize >> 0x4; 1502 1503 for (j = nds; --j >= i; ) { 1504 if (sgl[j].ds_len == 0) { 1505 continue; 1506 } 1507 1508 /* 1509 * Fill in the Data Segment(s) for the current WQE, using the 1510 * information contained in the scatter-gather list of the 1511 * work request. 1512 */ 1513 last_ds--; 1514 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 1515 } 1516 1517 return (DDI_SUCCESS); 1518 } 1519 1520 1521 1522 /* 1523 * hermon_wqe_mlx_build() 1524 * Context: Can be called from interrupt or base context. 1525 */ 1526 static int 1527 hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 1528 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1529 { 1530 hermon_ahhdl_t ah; 1531 hermon_hw_udav_t *udav; 1532 ib_lrh_hdr_t *lrh; 1533 ib_grh_t *grh; 1534 ib_bth_hdr_t *bth; 1535 ib_deth_hdr_t *deth; 1536 hermon_hw_wqe_sgl_t *ds; 1537 ibt_wr_ds_t *sgl; 1538 uint8_t *mgmtclass, *hpoint, *hcount; 1539 uint32_t nds, offset, pktlen; 1540 uint32_t desc_sz; 1541 int i, num_ds; 1542 int tmpsize; 1543 1544 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1545 1546 /* Initialize the information for the Data Segments */ 1547 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1548 sizeof (hermon_hw_mlx_wqe_nextctrl_t)); 1549 1550 /* 1551 * Pull the address handle from the work request. The UDAV will 1552 * be used to answer some questions about the request. 1553 */ 1554 ah = (hermon_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah; 1555 if (ah == NULL) { 1556 return (IBT_AH_HDL_INVALID); 1557 } 1558 mutex_enter(&ah->ah_lock); 1559 udav = ah->ah_udav; 1560 1561 /* 1562 * If the request is for QP1 and the destination LID is equal to 1563 * the Permissive LID, then return an error. This combination is 1564 * not allowed 1565 */ 1566 if ((udav->rlid == IB_LID_PERMISSIVE) && 1567 (qp->qp_is_special == HERMON_QP_GSI)) { 1568 mutex_exit(&ah->ah_lock); 1569 return (IBT_AH_HDL_INVALID); 1570 } 1571 1572 /* 1573 * Calculate the size of the packet headers, including the GRH 1574 * (if necessary) 1575 */ 1576 desc_sz = sizeof (ib_lrh_hdr_t) + sizeof (ib_bth_hdr_t) + 1577 sizeof (ib_deth_hdr_t); 1578 if (udav->grh) { 1579 desc_sz += sizeof (ib_grh_t); 1580 } 1581 1582 /* 1583 * Begin to build the first "inline" data segment for the packet 1584 * headers. Note: By specifying "inline" we can build the contents 1585 * of the MAD packet headers directly into the work queue (as part 1586 * descriptor). This has the advantage of both speeding things up 1587 * and of not requiring the driver to allocate/register any additional 1588 * memory for the packet headers. 1589 */ 1590 HERMON_WQE_BUILD_INLINE(qp, &ds[0], desc_sz); 1591 desc_sz += 4; 1592 1593 /* 1594 * Build Local Route Header (LRH) 1595 * We start here by building the LRH into a temporary location. 1596 * When we have finished we copy the LRH data into the descriptor. 1597 * 1598 * Notice that the VL values are hardcoded. This is not a problem 1599 * because VL15 is decided later based on the value in the MLX 1600 * transport "next/ctrl" header (see the "vl15" bit below), and it 1601 * is otherwise (meaning for QP1) chosen from the SL-to-VL table 1602 * values. This rule does not hold for loopback packets however 1603 * (all of which bypass the SL-to-VL tables) and it is the reason 1604 * that non-QP0 MADs are setup with VL hardcoded to zero below. 1605 * 1606 * Notice also that Source LID is hardcoded to the Permissive LID 1607 * (0xFFFF). This is also not a problem because if the Destination 1608 * LID is not the Permissive LID, then the "slr" value in the MLX 1609 * transport "next/ctrl" header will be set to zero and the hardware 1610 * will pull the LID from value in the port. 1611 */ 1612 lrh = (ib_lrh_hdr_t *)((uintptr_t)&ds[0] + 4); 1613 pktlen = (desc_sz + 0x100) >> 2; 1614 HERMON_WQE_BUILD_MLX_LRH(lrh, qp, udav, pktlen); 1615 1616 /* 1617 * Build Global Route Header (GRH) 1618 * This is only built if necessary as defined by the "grh" bit in 1619 * the address vector. Note: We also calculate the offset to the 1620 * next header (BTH) based on whether or not the "grh" bit is set. 1621 */ 1622 if (udav->grh) { 1623 /* 1624 * If the request is for QP0, then return an error. The 1625 * combination of global routine (GRH) and QP0 is not allowed. 1626 */ 1627 if (qp->qp_is_special == HERMON_QP_SMI) { 1628 mutex_exit(&ah->ah_lock); 1629 return (IBT_AH_HDL_INVALID); 1630 } 1631 grh = (ib_grh_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1632 HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen); 1633 1634 bth = (ib_bth_hdr_t *)((uintptr_t)grh + sizeof (ib_grh_t)); 1635 } else { 1636 bth = (ib_bth_hdr_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1637 } 1638 mutex_exit(&ah->ah_lock); 1639 1640 1641 /* 1642 * Build Base Transport Header (BTH) 1643 * Notice that the M, PadCnt, and TVer fields are all set 1644 * to zero implicitly. This is true for all Management Datagrams 1645 * MADs whether GSI are SMI. 1646 */ 1647 HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr); 1648 1649 /* 1650 * Build Datagram Extended Transport Header (DETH) 1651 */ 1652 deth = (ib_deth_hdr_t *)((uintptr_t)bth + sizeof (ib_bth_hdr_t)); 1653 HERMON_WQE_BUILD_MLX_DETH(deth, qp); 1654 1655 /* Ensure that the Data Segment is aligned on a 16-byte boundary */ 1656 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)deth + sizeof (ib_deth_hdr_t)); 1657 ds = (hermon_hw_wqe_sgl_t *)(((uintptr_t)ds + 0xF) & ~0xF); 1658 nds = wr->wr_nds; 1659 sgl = wr->wr_sgl; 1660 num_ds = 0; 1661 1662 /* 1663 * Now fill in the Data Segments (SGL) for the MLX WQE based on the 1664 * values set up above (i.e. "sgl", "nds", and the "ds" pointer 1665 * Start by checking for a valid number of SGL entries 1666 */ 1667 if (nds > qp->qp_sq_sgl) { 1668 return (IBT_QP_SGL_LEN_INVALID); 1669 } 1670 1671 /* 1672 * For each SGL in the Send Work Request, fill in the MLX WQE's data 1673 * segments. Note: We skip any SGL with zero size because Hermon 1674 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1675 * the encoding for zero means a 2GB transfer. Because of this special 1676 * encoding in the hardware, we mask the requested length with 1677 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1678 * zero.) 1679 */ 1680 mgmtclass = hpoint = hcount = NULL; 1681 offset = 0; 1682 for (i = 0; i < nds; i++) { 1683 if (sgl[i].ds_len == 0) { 1684 continue; 1685 } 1686 1687 /* 1688 * Fill in the Data Segment(s) for the MLX send WQE, using 1689 * the information contained in the scatter-gather list of 1690 * the work request. 1691 */ 1692 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]); 1693 1694 /* 1695 * Search through the contents of all MADs posted to QP0 to 1696 * initialize pointers to the places where Directed Route "hop 1697 * pointer", "hop count", and "mgmtclass" would be. Hermon 1698 * needs these updated (i.e. incremented or decremented, as 1699 * necessary) by software. 1700 */ 1701 if (qp->qp_is_special == HERMON_QP_SMI) { 1702 1703 HERMON_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass, 1704 offset, sgl[i].ds_va, sgl[i].ds_len); 1705 1706 HERMON_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint, 1707 offset, sgl[i].ds_va, sgl[i].ds_len); 1708 1709 HERMON_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount, 1710 offset, sgl[i].ds_va, sgl[i].ds_len); 1711 1712 offset += sgl[i].ds_len; 1713 } 1714 num_ds++; 1715 } 1716 1717 /* 1718 * Hermon's Directed Route MADs need to have the "hop pointer" 1719 * incremented/decremented (as necessary) depending on whether it is 1720 * currently less than or greater than the "hop count" (i.e. whether 1721 * the MAD is a request or a response.) 1722 */ 1723 if (qp->qp_is_special == HERMON_QP_SMI) { 1724 HERMON_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass, 1725 *hpoint, *hcount); 1726 } 1727 1728 /* 1729 * Now fill in the ICRC Data Segment. This data segment is inlined 1730 * just like the packets headers above, but it is only four bytes and 1731 * set to zero (to indicate that we wish the hardware to generate ICRC. 1732 */ 1733 HERMON_WQE_BUILD_INLINE_ICRC(qp, &ds[num_ds], 4, 0); 1734 num_ds++; 1735 1736 /* 1737 * Return the size of descriptor (in 16-byte chunks) 1738 * For Hermon, we want them (for now) to be on stride size 1739 * boundaries, which was implicit in Tavor/Arbel 1740 */ 1741 tmpsize = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc); 1742 1743 *size = tmpsize >> 0x04; 1744 1745 return (DDI_SUCCESS); 1746 } 1747 1748 1749 1750 /* 1751 * hermon_wqe_recv_build() 1752 * Context: Can be called from interrupt or base context. 1753 */ 1754 /* ARGSUSED */ 1755 static int 1756 hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 1757 ibt_recv_wr_t *wr, uint64_t *desc) 1758 { 1759 hermon_hw_wqe_sgl_t *ds; 1760 int i, num_ds; 1761 1762 ASSERT(MUTEX_HELD(&qp->qp_rq_lock)); 1763 1764 /* 1765 * Fill in the Data Segments (SGL) for the Recv WQE - don't 1766 * need to have a reserved for the ctrl, there is none on the 1767 * recv queue for hermon, but will need to put an invalid 1768 * (null) scatter pointer per PRM 1769 */ 1770 ds = (hermon_hw_wqe_sgl_t *)(uintptr_t)desc; 1771 num_ds = 0; 1772 1773 /* Check for valid number of SGL entries */ 1774 if (wr->wr_nds > qp->qp_rq_sgl) { 1775 return (IBT_QP_SGL_LEN_INVALID); 1776 } 1777 1778 /* 1779 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1780 * segments. Note: We skip any SGL with zero size because Hermon 1781 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1782 * the encoding for zero means a 2GB transfer. Because of this special 1783 * encoding in the hardware, we mask the requested length with 1784 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1785 * zero.) 1786 */ 1787 for (i = 0; i < wr->wr_nds; i++) { 1788 if (wr->wr_sgl[i].ds_len == 0) { 1789 continue; 1790 } 1791 1792 /* 1793 * Fill in the Data Segment(s) for the receive WQE, using the 1794 * information contained in the scatter-gather list of the 1795 * work request. 1796 */ 1797 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 1798 num_ds++; 1799 } 1800 1801 /* put the null sgl pointer as well if needed */ 1802 if (num_ds < qp->qp_rq_sgl) { 1803 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 1804 } 1805 1806 return (DDI_SUCCESS); 1807 } 1808 1809 1810 1811 /* 1812 * hermon_wqe_srq_build() 1813 * Context: Can be called from interrupt or base context. 1814 */ 1815 /* ARGSUSED */ 1816 static int 1817 hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 1818 ibt_recv_wr_t *wr, uint64_t *desc) 1819 { 1820 hermon_hw_wqe_sgl_t *ds; 1821 int i, num_ds; 1822 1823 ASSERT(MUTEX_HELD(&srq->srq_lock)); 1824 1825 /* Fill in the Data Segments (SGL) for the Recv WQE */ 1826 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1827 sizeof (hermon_hw_srq_wqe_next_t)); 1828 num_ds = 0; 1829 1830 /* Check for valid number of SGL entries */ 1831 if (wr->wr_nds > srq->srq_wq_sgl) { 1832 return (IBT_QP_SGL_LEN_INVALID); 1833 } 1834 1835 /* 1836 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1837 * segments. Note: We skip any SGL with zero size because Hermon 1838 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1839 * the encoding for zero means a 2GB transfer. Because of this special 1840 * encoding in the hardware, we mask the requested length with 1841 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1842 * zero.) 1843 */ 1844 for (i = 0; i < wr->wr_nds; i++) { 1845 if (wr->wr_sgl[i].ds_len == 0) { 1846 continue; 1847 } 1848 1849 /* 1850 * Fill in the Data Segment(s) for the receive WQE, using the 1851 * information contained in the scatter-gather list of the 1852 * work request. 1853 */ 1854 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 1855 num_ds++; 1856 } 1857 1858 /* 1859 * put in the null sgl pointer as well, if needed 1860 */ 1861 if (num_ds < srq->srq_wq_sgl) { 1862 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 1863 } 1864 1865 return (DDI_SUCCESS); 1866 } 1867 1868 1869 /* 1870 * hermon_wr_get_immediate() 1871 * Context: Can be called from interrupt or base context. 1872 */ 1873 static uint32_t 1874 hermon_wr_get_immediate(ibt_send_wr_t *wr) 1875 { 1876 /* 1877 * This routine extracts the "immediate data" from the appropriate 1878 * location in the IBTF work request. Because of the way the 1879 * work request structure is defined, the location for this data 1880 * depends on the actual work request operation type. 1881 */ 1882 1883 /* For RDMA Write, test if RC or UC */ 1884 if (wr->wr_opcode == IBT_WRC_RDMAW) { 1885 if (wr->wr_trans == IBT_RC_SRV) { 1886 return (wr->wr.rc.rcwr.rdma.rdma_immed); 1887 } else { /* IBT_UC_SRV */ 1888 return (wr->wr.uc.ucwr.rdma.rdma_immed); 1889 } 1890 } 1891 1892 /* For Send, test if RC, UD, or UC */ 1893 if (wr->wr_opcode == IBT_WRC_SEND) { 1894 if (wr->wr_trans == IBT_RC_SRV) { 1895 return (wr->wr.rc.rcwr.send_immed); 1896 } else if (wr->wr_trans == IBT_UD_SRV) { 1897 return (wr->wr.ud.udwr_immed); 1898 } else { /* IBT_UC_SRV */ 1899 return (wr->wr.uc.ucwr.send_immed); 1900 } 1901 } 1902 1903 /* 1904 * If any other type of request, then immediate is undefined 1905 */ 1906 return (0); 1907 } 1908 1909 /* 1910 * hermon_wqe_headroom() 1911 * Context: can be called from interrupt or base, currently only from 1912 * base context. 1913 * Routine that fills in the headroom for the Send Queue 1914 */ 1915 1916 static void 1917 hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp) 1918 { 1919 uint32_t *wqe_start, *wqe_top, *wqe_base, qsize; 1920 int hdrmwqes, wqesizebytes, sectperwqe; 1921 uint32_t invalue; 1922 int i, j; 1923 1924 qsize = qp->qp_sq_bufsz; 1925 wqesizebytes = 1 << qp->qp_sq_log_wqesz; 1926 sectperwqe = wqesizebytes >> 6; /* 64 bytes/section */ 1927 hdrmwqes = qp->qp_sq_hdrmwqes; 1928 wqe_base = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 0); 1929 wqe_top = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, qsize); 1930 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, from); 1931 1932 for (i = 0; i < hdrmwqes; i++) { 1933 for (j = 0; j < sectperwqe; j++) { 1934 if (j == 0) { /* 1st section of wqe */ 1935 /* perserve ownership bit */ 1936 invalue = ddi_get32(qp->qp_wqinfo.qa_acchdl, 1937 wqe_start) | 0x7FFFFFFF; 1938 } else { 1939 /* or just invalidate it */ 1940 invalue = 0xFFFFFFFF; 1941 } 1942 ddi_put32(qp->qp_wqinfo.qa_acchdl, wqe_start, invalue); 1943 wqe_start += 16; /* move 64 bytes */ 1944 } 1945 if (wqe_start == wqe_top) /* hit the end of the queue */ 1946 wqe_start = wqe_base; /* wrap to start */ 1947 } 1948 } 1949 1950 /* 1951 * hermon_wqe_sync() 1952 * Context: Can be called from interrupt or base context. 1953 */ 1954 static void 1955 hermon_wqe_sync(void *hdl, uint_t sync_from, uint_t sync_to, 1956 uint_t sync_type, uint_t flag) 1957 { 1958 hermon_qphdl_t qp; 1959 hermon_srqhdl_t srq; 1960 uint64_t *wqe_from, *wqe_to; 1961 uint64_t *wq_base, *wq_top, *qp_base; 1962 ddi_dma_handle_t dmahdl; 1963 off_t offset; 1964 size_t length; 1965 uint32_t qsize; 1966 int status; 1967 1968 if (sync_type == HERMON_WR_SRQ) { 1969 srq = (hermon_srqhdl_t)hdl; 1970 /* Get the DMA handle from SRQ context */ 1971 dmahdl = srq->srq_mrhdl->mr_bindinfo.bi_dmahdl; 1972 /* get base addr of the buffer */ 1973 qp_base = (uint64_t *)(void *)srq->srq_wq_buf; 1974 } else { 1975 qp = (hermon_qphdl_t)hdl; 1976 /* Get the DMA handle from QP context */ 1977 dmahdl = qp->qp_mrhdl->mr_bindinfo.bi_dmahdl; 1978 /* Determine the base address of the QP buffer */ 1979 if (qp->qp_sq_baseaddr == 0) { 1980 qp_base = (uint64_t *)(void *)(qp->qp_sq_buf); 1981 } else { 1982 qp_base = (uint64_t *)(void *)(qp->qp_rq_buf); 1983 } 1984 } 1985 1986 /* 1987 * Depending on the type of the work queue, we grab information 1988 * about the address ranges we need to DMA sync. 1989 */ 1990 1991 if (sync_type == HERMON_WR_SEND) { 1992 wqe_from = HERMON_QP_SQ_ENTRY(qp, sync_from); 1993 wqe_to = HERMON_QP_SQ_ENTRY(qp, sync_to); 1994 qsize = qp->qp_sq_bufsz; 1995 1996 wq_base = HERMON_QP_SQ_ENTRY(qp, 0); 1997 wq_top = HERMON_QP_SQ_ENTRY(qp, qsize); 1998 } else if (sync_type == HERMON_WR_RECV) { 1999 wqe_from = HERMON_QP_RQ_ENTRY(qp, sync_from); 2000 wqe_to = HERMON_QP_RQ_ENTRY(qp, sync_to); 2001 qsize = qp->qp_rq_bufsz; 2002 2003 wq_base = HERMON_QP_RQ_ENTRY(qp, 0); 2004 wq_top = HERMON_QP_RQ_ENTRY(qp, qsize); 2005 } else { 2006 wqe_from = HERMON_SRQ_WQ_ENTRY(srq, sync_from); 2007 wqe_to = HERMON_SRQ_WQ_ENTRY(srq, sync_to); 2008 qsize = srq->srq_wq_bufsz; 2009 2010 wq_base = HERMON_SRQ_WQ_ENTRY(srq, 0); 2011 wq_top = HERMON_SRQ_WQ_ENTRY(srq, qsize); 2012 } 2013 2014 /* 2015 * There are two possible cases for the beginning and end of the WQE 2016 * chain we are trying to sync. Either this is the simple case, where 2017 * the end of the chain is below the beginning of the chain, or it is 2018 * the "wrap-around" case, where the end of the chain has wrapped over 2019 * the end of the queue. In the former case, we simply need to 2020 * calculate the span from beginning to end and sync it. In the latter 2021 * case, however, we need to calculate the span from the top of the 2022 * work queue to the end of the chain and sync that, and then we need 2023 * to find the other portion (from beginning of chain to end of queue) 2024 * and sync that as well. Note: if the "top to end" span is actually 2025 * zero length, then we don't do a DMA sync because a zero length DMA 2026 * sync unnecessarily syncs the entire work queue. 2027 */ 2028 if (wqe_to > wqe_from) { 2029 /* "From Beginning to End" */ 2030 2031 offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base); 2032 length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wqe_from); 2033 2034 status = ddi_dma_sync(dmahdl, offset, length, flag); 2035 if (status != DDI_SUCCESS) { 2036 return; 2037 } 2038 } else { 2039 /* "From Top to End" */ 2040 2041 offset = (off_t)((uintptr_t)wq_base - (uintptr_t)qp_base); 2042 length = (size_t)((uintptr_t)wqe_to - (uintptr_t)wq_base); 2043 if (length) { 2044 status = ddi_dma_sync(dmahdl, offset, length, flag); 2045 if (status != DDI_SUCCESS) { 2046 return; 2047 } 2048 } 2049 2050 /* "From Beginning to Bottom" */ 2051 2052 offset = (off_t)((uintptr_t)wqe_from - (uintptr_t)qp_base); 2053 length = (size_t)((uintptr_t)wq_top - (uintptr_t)wqe_from); 2054 status = ddi_dma_sync(dmahdl, offset, length, flag); 2055 if (status != DDI_SUCCESS) { 2056 return; 2057 } 2058 } 2059 } 2060 2061 2062 /* 2063 * hermon_wr_bind_check() 2064 * Context: Can be called from interrupt or base context. 2065 */ 2066 /* ARGSUSED */ 2067 static int 2068 hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr) 2069 { 2070 ibt_bind_flags_t bind_flags; 2071 uint64_t vaddr, len; 2072 uint64_t reg_start_addr, reg_end_addr; 2073 hermon_mwhdl_t mw; 2074 hermon_mrhdl_t mr; 2075 hermon_rsrc_t *mpt; 2076 uint32_t new_rkey; 2077 2078 /* Check for a valid Memory Window handle in the WR */ 2079 mw = (hermon_mwhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mw_hdl; 2080 if (mw == NULL) { 2081 return (IBT_MW_HDL_INVALID); 2082 } 2083 2084 /* Check for a valid Memory Region handle in the WR */ 2085 mr = (hermon_mrhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mr_hdl; 2086 if (mr == NULL) { 2087 return (IBT_MR_HDL_INVALID); 2088 } 2089 2090 mutex_enter(&mr->mr_lock); 2091 mutex_enter(&mw->mr_lock); 2092 2093 /* 2094 * Check here to see if the memory region has already been partially 2095 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 2096 * If so, this is an error, return failure. 2097 */ 2098 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 2099 mutex_exit(&mr->mr_lock); 2100 mutex_exit(&mw->mr_lock); 2101 return (IBT_MR_HDL_INVALID); 2102 } 2103 2104 /* Check for a valid Memory Window RKey (i.e. a matching RKey) */ 2105 if (mw->mr_rkey != wr->wr.rc.rcwr.bind->bind_rkey) { 2106 mutex_exit(&mr->mr_lock); 2107 mutex_exit(&mw->mr_lock); 2108 return (IBT_MR_RKEY_INVALID); 2109 } 2110 2111 /* Check for a valid Memory Region LKey (i.e. a matching LKey) */ 2112 if (mr->mr_lkey != wr->wr.rc.rcwr.bind->bind_lkey) { 2113 mutex_exit(&mr->mr_lock); 2114 mutex_exit(&mw->mr_lock); 2115 return (IBT_MR_LKEY_INVALID); 2116 } 2117 2118 /* 2119 * Now check for valid "vaddr" and "len". Note: We don't check the 2120 * "vaddr" range when "len == 0" (i.e. on unbind operations) 2121 */ 2122 len = wr->wr.rc.rcwr.bind->bind_len; 2123 if (len != 0) { 2124 vaddr = wr->wr.rc.rcwr.bind->bind_va; 2125 reg_start_addr = mr->mr_bindinfo.bi_addr; 2126 reg_end_addr = mr->mr_bindinfo.bi_addr + 2127 (mr->mr_bindinfo.bi_len - 1); 2128 if ((vaddr < reg_start_addr) || (vaddr > reg_end_addr)) { 2129 mutex_exit(&mr->mr_lock); 2130 mutex_exit(&mw->mr_lock); 2131 return (IBT_MR_VA_INVALID); 2132 } 2133 vaddr = (vaddr + len) - 1; 2134 if (vaddr > reg_end_addr) { 2135 mutex_exit(&mr->mr_lock); 2136 mutex_exit(&mw->mr_lock); 2137 return (IBT_MR_LEN_INVALID); 2138 } 2139 } 2140 2141 /* 2142 * Validate the bind access flags. Remote Write and Atomic access for 2143 * the Memory Window require that Local Write access be set in the 2144 * corresponding Memory Region. 2145 */ 2146 bind_flags = wr->wr.rc.rcwr.bind->bind_flags; 2147 if (((bind_flags & IBT_WR_BIND_WRITE) || 2148 (bind_flags & IBT_WR_BIND_ATOMIC)) && 2149 !(mr->mr_accflag & IBT_MR_LOCAL_WRITE)) { 2150 mutex_exit(&mr->mr_lock); 2151 mutex_exit(&mw->mr_lock); 2152 return (IBT_MR_ACCESS_REQ_INVALID); 2153 } 2154 2155 /* Calculate the new RKey for the Memory Window */ 2156 mpt = mw->mr_mptrsrcp; 2157 new_rkey = hermon_mr_keycalc(mpt->hr_indx); 2158 new_rkey = hermon_mr_key_swap(new_rkey); 2159 2160 wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey; 2161 mw->mr_rkey = new_rkey; 2162 2163 mutex_exit(&mr->mr_lock); 2164 mutex_exit(&mw->mr_lock); 2165 return (DDI_SUCCESS); 2166 } 2167 2168 2169 /* 2170 * hermon_wrid_from_reset_handling() 2171 * Context: Can be called from interrupt or base context. 2172 */ 2173 /* ARGSUSED */ 2174 int 2175 hermon_wrid_from_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2176 { 2177 hermon_workq_hdr_t *swq, *rwq; 2178 uint_t qp_srq_en; 2179 2180 if (qp->qp_is_umap) 2181 return (DDI_SUCCESS); 2182 2183 /* grab the cq lock(s) to modify the wqavl tree */ 2184 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2185 #ifdef __lock_lint 2186 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2187 #else 2188 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2189 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2190 #endif 2191 2192 /* Chain the newly allocated work queue header to the CQ's list */ 2193 hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2194 2195 swq = qp->qp_sq_wqhdr; 2196 swq->wq_head = 0; 2197 swq->wq_tail = 0; 2198 swq->wq_full = 0; 2199 2200 /* 2201 * Now we repeat all the above operations for the receive work queue, 2202 * or shared receive work queue. 2203 * 2204 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case. 2205 */ 2206 qp_srq_en = qp->qp_srq_en; 2207 2208 #ifdef __lock_lint 2209 mutex_enter(&qp->qp_srqhdl->srq_lock); 2210 #else 2211 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2212 mutex_enter(&qp->qp_srqhdl->srq_lock); 2213 } else { 2214 rwq = qp->qp_rq_wqhdr; 2215 rwq->wq_head = 0; 2216 rwq->wq_tail = 0; 2217 rwq->wq_full = 0; 2218 qp->qp_rq_wqecntr = 0; 2219 } 2220 #endif 2221 hermon_cq_workq_add(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2222 2223 #ifdef __lock_lint 2224 mutex_exit(&qp->qp_srqhdl->srq_lock); 2225 #else 2226 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2227 mutex_exit(&qp->qp_srqhdl->srq_lock); 2228 } 2229 #endif 2230 2231 #ifdef __lock_lint 2232 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2233 #else 2234 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2235 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2236 #endif 2237 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2238 return (DDI_SUCCESS); 2239 } 2240 2241 2242 /* 2243 * hermon_wrid_to_reset_handling() 2244 * Context: Can be called from interrupt or base context. 2245 */ 2246 int 2247 hermon_wrid_to_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2248 { 2249 uint_t qp_srq_en; 2250 2251 if (qp->qp_is_umap) 2252 return (DDI_SUCCESS); 2253 2254 /* 2255 * If there are unpolled entries in these CQs, they are 2256 * polled/flushed. 2257 * Grab the CQ lock(s) before manipulating the lists. 2258 */ 2259 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2260 #ifdef __lock_lint 2261 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2262 #else 2263 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2264 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2265 #endif 2266 2267 qp_srq_en = qp->qp_srq_en; 2268 #ifdef __lock_lint 2269 mutex_enter(&qp->qp_srqhdl->srq_lock); 2270 #else 2271 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2272 mutex_enter(&qp->qp_srqhdl->srq_lock); 2273 } 2274 #endif 2275 /* 2276 * Flush the entries on the CQ for this QP's QPN. 2277 */ 2278 hermon_cq_entries_flush(state, qp); 2279 2280 #ifdef __lock_lint 2281 mutex_exit(&qp->qp_srqhdl->srq_lock); 2282 #else 2283 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 2284 mutex_exit(&qp->qp_srqhdl->srq_lock); 2285 } 2286 #endif 2287 2288 hermon_cq_workq_remove(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2289 hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2290 2291 #ifdef __lock_lint 2292 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2293 #else 2294 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl) 2295 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2296 #endif 2297 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2298 2299 return (IBT_SUCCESS); 2300 } 2301 2302 2303 /* 2304 * hermon_wrid_get_entry() 2305 * Context: Can be called from interrupt or base context. 2306 */ 2307 uint64_t 2308 hermon_wrid_get_entry(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe) 2309 { 2310 hermon_workq_avl_t *wqa; 2311 hermon_workq_hdr_t *wq; 2312 uint64_t wrid; 2313 uint_t send_or_recv, qpnum; 2314 uint32_t indx; 2315 2316 /* 2317 * Determine whether this CQE is a send or receive completion. 2318 */ 2319 send_or_recv = HERMON_CQE_SENDRECV_GET(cq, cqe); 2320 2321 /* Find the work queue for this QP number (send or receive side) */ 2322 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe); 2323 wqa = hermon_wrid_wqavl_find(cq, qpnum, send_or_recv); 2324 wq = wqa->wqa_wq; 2325 2326 /* 2327 * Regardless of whether the completion is the result of a "success" 2328 * or a "failure", we lock the list of "containers" and attempt to 2329 * search for the the first matching completion (i.e. the first WR 2330 * with a matching WQE addr and size). Once we find it, we pull out 2331 * the "wrid" field and return it (see below). XXX Note: One possible 2332 * future enhancement would be to enable this routine to skip over 2333 * any "unsignaled" completions to go directly to the next "signaled" 2334 * entry on success. 2335 */ 2336 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & wq->wq_mask; 2337 wrid = wq->wq_wrid[indx]; 2338 if (wqa->wqa_srq_en) { 2339 struct hermon_sw_srq_s *srq; 2340 uint64_t *desc; 2341 2342 /* put wqe back on the srq free list */ 2343 srq = wqa->wqa_srq; 2344 mutex_enter(&srq->srq_lock); 2345 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail); 2346 ((uint16_t *)desc)[1] = htons(indx); 2347 wq->wq_tail = indx; 2348 mutex_exit(&srq->srq_lock); 2349 } else { 2350 wq->wq_head = (indx + 1) & wq->wq_mask; 2351 wq->wq_full = 0; 2352 } 2353 2354 return (wrid); 2355 } 2356 2357 2358 int 2359 hermon_wrid_workq_compare(const void *p1, const void *p2) 2360 { 2361 hermon_workq_compare_t *cmpp; 2362 hermon_workq_avl_t *curr; 2363 2364 cmpp = (hermon_workq_compare_t *)p1; 2365 curr = (hermon_workq_avl_t *)p2; 2366 2367 if (cmpp->cmp_qpn < curr->wqa_qpn) 2368 return (-1); 2369 else if (cmpp->cmp_qpn > curr->wqa_qpn) 2370 return (+1); 2371 else if (cmpp->cmp_type < curr->wqa_type) 2372 return (-1); 2373 else if (cmpp->cmp_type > curr->wqa_type) 2374 return (+1); 2375 else 2376 return (0); 2377 } 2378 2379 2380 /* 2381 * hermon_wrid_workq_find() 2382 * Context: Can be called from interrupt or base context. 2383 */ 2384 static hermon_workq_avl_t * 2385 hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, uint_t wq_type) 2386 { 2387 hermon_workq_avl_t *curr; 2388 hermon_workq_compare_t cmp; 2389 2390 /* 2391 * Walk the CQ's work queue list, trying to find a send or recv queue 2392 * with the same QP number. We do this even if we are going to later 2393 * create a new entry because it helps us easily find the end of the 2394 * list. 2395 */ 2396 cmp.cmp_qpn = qpn; 2397 cmp.cmp_type = wq_type; 2398 #ifdef __lock_lint 2399 hermon_wrid_workq_compare(NULL, NULL); 2400 #endif 2401 curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL); 2402 2403 return (curr); 2404 } 2405 2406 2407 /* 2408 * hermon_wrid_wqhdr_create() 2409 * Context: Can be called from base context. 2410 */ 2411 /* ARGSUSED */ 2412 hermon_workq_hdr_t * 2413 hermon_wrid_wqhdr_create(int bufsz) 2414 { 2415 hermon_workq_hdr_t *wqhdr; 2416 2417 /* 2418 * Allocate space for the wqhdr, and an array to record all the wrids. 2419 */ 2420 wqhdr = (hermon_workq_hdr_t *)kmem_zalloc(sizeof (*wqhdr), KM_NOSLEEP); 2421 if (wqhdr == NULL) { 2422 return (NULL); 2423 } 2424 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr)) 2425 wqhdr->wq_wrid = kmem_zalloc(bufsz * sizeof (uint64_t), KM_NOSLEEP); 2426 if (wqhdr->wq_wrid == NULL) { 2427 kmem_free(wqhdr, sizeof (*wqhdr)); 2428 return (NULL); 2429 } 2430 wqhdr->wq_size = bufsz; 2431 wqhdr->wq_mask = bufsz - 1; 2432 2433 return (wqhdr); 2434 } 2435 2436 void 2437 hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t *wqhdr) 2438 { 2439 kmem_free(wqhdr->wq_wrid, wqhdr->wq_size * sizeof (uint64_t)); 2440 kmem_free(wqhdr, sizeof (*wqhdr)); 2441 } 2442 2443 2444 /* 2445 * hermon_cq_workq_add() 2446 * Context: Can be called from interrupt or base context. 2447 */ 2448 static void 2449 hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2450 { 2451 hermon_workq_compare_t cmp; 2452 avl_index_t where; 2453 2454 cmp.cmp_qpn = wqavl->wqa_qpn; 2455 cmp.cmp_type = wqavl->wqa_type; 2456 #ifdef __lock_lint 2457 hermon_wrid_workq_compare(NULL, NULL); 2458 #endif 2459 (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where); 2460 avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqavl, where); 2461 } 2462 2463 2464 /* 2465 * hermon_cq_workq_remove() 2466 * Context: Can be called from interrupt or base context. 2467 */ 2468 static void 2469 hermon_cq_workq_remove(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2470 { 2471 #ifdef __lock_lint 2472 hermon_wrid_workq_compare(NULL, NULL); 2473 #endif 2474 avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqavl); 2475 } 2476