1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_qp.c 29 * Hermon Queue Pair Processing Routines 30 * 31 * Implements all the routines necessary for allocating, freeing, and 32 * querying the Hermon queue pairs. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/modctl.h> 40 #include <sys/bitmap.h> 41 #include <sys/sysmacros.h> 42 43 #include <sys/ib/adapters/hermon/hermon.h> 44 #include <sys/ib/ib_pkt_hdrs.h> 45 46 static int hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp, 47 hermon_rsrc_t *qpc); 48 static int hermon_qpn_avl_compare(const void *q, const void *e); 49 static int hermon_special_qp_rsrc_alloc(hermon_state_t *state, 50 ibt_sqp_type_t type, uint_t port, hermon_rsrc_t **qp_rsrc); 51 static int hermon_special_qp_rsrc_free(hermon_state_t *state, 52 ibt_sqp_type_t type, uint_t port); 53 static void hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl, 54 uint_t real_max_sgl, hermon_qp_wq_type_t wq_type, 55 uint_t *logwqesz, uint_t *max_sgl); 56 57 /* 58 * hermon_qp_alloc() 59 * Context: Can be called only from user or kernel context. 60 */ 61 int 62 hermon_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo, 63 uint_t sleepflag) 64 { 65 hermon_rsrc_t *qpc, *rsrc; 66 hermon_umap_db_entry_t *umapdb; 67 hermon_qphdl_t qp; 68 ibt_qp_alloc_attr_t *attr_p; 69 ibt_qp_type_t type; 70 hermon_qp_wq_type_t swq_type; 71 ibtl_qp_hdl_t ibt_qphdl; 72 ibt_chan_sizes_t *queuesz_p; 73 ib_qpn_t *qpn; 74 hermon_qphdl_t *qphdl; 75 ibt_mr_attr_t mr_attr; 76 hermon_mr_options_t mr_op; 77 hermon_srqhdl_t srq; 78 hermon_pdhdl_t pd; 79 hermon_cqhdl_t sq_cq, rq_cq; 80 hermon_mrhdl_t mr; 81 uint64_t value, qp_desc_off; 82 uint64_t *thewqe, thewqesz; 83 uint32_t *sq_buf, *rq_buf; 84 uint32_t log_qp_sq_size, log_qp_rq_size; 85 uint32_t sq_size, rq_size; 86 uint32_t sq_depth, rq_depth; 87 uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift; 88 uint32_t max_sgl, max_recv_sgl, uarpg; 89 uint_t qp_is_umap; 90 uint_t qp_srq_en, i, j; 91 int status, flag; 92 93 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p)) 94 95 /* 96 * Extract the necessary info from the hermon_qp_info_t structure 97 */ 98 attr_p = qpinfo->qpi_attrp; 99 type = qpinfo->qpi_type; 100 ibt_qphdl = qpinfo->qpi_ibt_qphdl; 101 queuesz_p = qpinfo->qpi_queueszp; 102 qpn = qpinfo->qpi_qpn; 103 qphdl = &qpinfo->qpi_qphdl; 104 105 /* 106 * Determine whether QP is being allocated for userland access or 107 * whether it is being allocated for kernel access. If the QP is 108 * being allocated for userland access, then lookup the UAR 109 * page number for the current process. Note: If this is not found 110 * (e.g. if the process has not previously open()'d the Hermon driver), 111 * then an error is returned. 112 */ 113 114 115 qp_is_umap = (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) ? 1 : 0; 116 if (qp_is_umap) { 117 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(), 118 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); 119 if (status != DDI_SUCCESS) { 120 status = IBT_INVALID_PARAM; 121 goto qpalloc_fail; 122 } 123 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx; 124 } else { 125 uarpg = state->hs_kernel_uar_index; 126 } 127 128 /* 129 * Determine whether QP is being associated with an SRQ 130 */ 131 qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0; 132 if (qp_srq_en) { 133 /* 134 * Check for valid SRQ handle pointers 135 */ 136 if (attr_p->qp_ibc_srq_hdl == NULL) { 137 status = IBT_SRQ_HDL_INVALID; 138 goto qpalloc_fail; 139 } 140 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl; 141 } 142 143 /* 144 * Check for valid QP service type (only UD/RC/UC supported) 145 */ 146 if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) && 147 (type != IBT_UC_RQP))) { 148 status = IBT_QP_SRV_TYPE_INVALID; 149 goto qpalloc_fail; 150 } 151 152 153 /* 154 * Check for valid PD handle pointer 155 */ 156 if (attr_p->qp_pd_hdl == NULL) { 157 status = IBT_PD_HDL_INVALID; 158 goto qpalloc_fail; 159 } 160 pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl; 161 162 /* 163 * If on an SRQ, check to make sure the PD is the same 164 */ 165 if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) { 166 status = IBT_PD_HDL_INVALID; 167 goto qpalloc_fail; 168 } 169 170 /* Increment the reference count on the protection domain (PD) */ 171 hermon_pd_refcnt_inc(pd); 172 173 /* 174 * Check for valid CQ handle pointers 175 */ 176 if ((attr_p->qp_ibc_scq_hdl == NULL) || 177 (attr_p->qp_ibc_rcq_hdl == NULL)) { 178 status = IBT_CQ_HDL_INVALID; 179 goto qpalloc_fail1; 180 } 181 sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl; 182 rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl; 183 184 /* 185 * Increment the reference count on the CQs. One or both of these 186 * could return error if we determine that the given CQ is already 187 * being used with a special (SMI/GSI) QP. 188 */ 189 status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL); 190 if (status != DDI_SUCCESS) { 191 status = IBT_CQ_HDL_INVALID; 192 goto qpalloc_fail1; 193 } 194 status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL); 195 if (status != DDI_SUCCESS) { 196 status = IBT_CQ_HDL_INVALID; 197 goto qpalloc_fail2; 198 } 199 200 /* 201 * Allocate an QP context entry. This will be filled in with all 202 * the necessary parameters to define the Queue Pair. Unlike 203 * other Hermon hardware resources, ownership is not immediately 204 * given to hardware in the final step here. Instead, we must 205 * wait until the QP is later transitioned to the "Init" state before 206 * passing the QP to hardware. If we fail here, we must undo all 207 * the reference count (CQ and PD). 208 */ 209 status = hermon_rsrc_alloc(state, HERMON_QPC, 1, sleepflag, &qpc); 210 if (status != DDI_SUCCESS) { 211 status = IBT_INSUFF_RESOURCE; 212 goto qpalloc_fail3; 213 } 214 215 /* 216 * Allocate the software structure for tracking the queue pair 217 * (i.e. the Hermon Queue Pair handle). If we fail here, we must 218 * undo the reference counts and the previous resource allocation. 219 */ 220 status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc); 221 if (status != DDI_SUCCESS) { 222 status = IBT_INSUFF_RESOURCE; 223 goto qpalloc_fail4; 224 } 225 qp = (hermon_qphdl_t)rsrc->hr_addr; 226 bzero(qp, sizeof (struct hermon_sw_qp_s)); 227 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) 228 229 /* 230 * Calculate the QP number from QPC index. This routine handles 231 * all of the operations necessary to keep track of used, unused, 232 * and released QP numbers. 233 */ 234 status = hermon_qp_create_qpn(state, qp, qpc); 235 if (status != DDI_SUCCESS) { 236 status = IBT_INSUFF_RESOURCE; 237 goto qpalloc_fail5; 238 } 239 240 /* 241 * If this will be a user-mappable QP, then allocate an entry for 242 * the "userland resources database". This will later be added to 243 * the database (after all further QP operations are successful). 244 * If we fail here, we must undo the reference counts and the 245 * previous resource allocation. 246 */ 247 if (qp_is_umap) { 248 umapdb = hermon_umap_db_alloc(state->hs_instance, qp->qp_qpnum, 249 MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc); 250 if (umapdb == NULL) { 251 status = IBT_INSUFF_RESOURCE; 252 goto qpalloc_fail6; 253 } 254 } 255 256 /* 257 * Allocate the doorbell record. Hermon just needs one for the RQ, 258 * if the QP is not associated with an SRQ, and use uarpg (above) as 259 * the uar index 260 */ 261 262 if (!qp_srq_en) { 263 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl, 264 &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset); 265 if (status != DDI_SUCCESS) { 266 status = IBT_INSUFF_RESOURCE; 267 goto qpalloc_fail6; 268 } 269 } 270 271 qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO); 272 273 /* 274 * We verify that the requested number of SGL is valid (i.e. 275 * consistent with the device limits and/or software-configured 276 * limits). If not, then obviously the same cleanup needs to be done. 277 */ 278 if (type == IBT_UD_RQP) { 279 max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz; 280 swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD; 281 } else { 282 max_sgl = state->hs_ibtfinfo.hca_attr->hca_conn_send_sgl_sz; 283 swq_type = HERMON_QP_WQ_TYPE_SENDQ_CONN; 284 } 285 max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz; 286 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) || 287 (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) { 288 status = IBT_HCA_SGL_EXCEEDED; 289 goto qpalloc_fail7; 290 } 291 292 /* 293 * Determine this QP's WQE stride (for both the Send and Recv WQEs). 294 * This will depend on the requested number of SGLs. Note: this 295 * has the side-effect of also calculating the real number of SGLs 296 * (for the calculated WQE size). 297 * 298 * For QP's on an SRQ, we set these to 0. 299 */ 300 if (qp_srq_en) { 301 qp->qp_rq_log_wqesz = 0; 302 qp->qp_rq_sgl = 0; 303 } else { 304 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl, 305 max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ, 306 &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl); 307 } 308 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl, 309 max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl); 310 311 sq_wqe_size = 1 << qp->qp_sq_log_wqesz; 312 313 /* NOTE: currently policy in driver, later maybe IBTF interface */ 314 qp->qp_no_prefetch = 0; 315 316 /* 317 * for prefetching, we need to add the number of wqes in 318 * the 2k area plus one to the number requested, but 319 * ONLY for send queue. If no_prefetch == 1 (prefetch off) 320 * it's exactly TWO wqes for the headroom 321 */ 322 if (qp->qp_no_prefetch) 323 qp->qp_sq_headroom = 2 * sq_wqe_size; 324 else 325 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE; 326 /* 327 * hdrm wqes must be integral since both sq_wqe_size & 328 * HERMON_QP_OH_SIZE are power of 2 329 */ 330 qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size); 331 332 333 /* 334 * Calculate the appropriate size for the work queues. 335 * For send queue, add in the headroom wqes to the calculation. 336 * Note: All Hermon QP work queues must be a power-of-2 in size. Also 337 * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is 338 * to round the requested size up to the next highest power-of-2 339 */ 340 /* first, adjust to a minimum and tell the caller the change */ 341 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, 342 HERMON_QP_MIN_SIZE); 343 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, 344 HERMON_QP_MIN_SIZE); 345 /* 346 * now, calculate the alloc size, taking into account 347 * the headroom for the sq 348 */ 349 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes); 350 /* if the total is a power of two, reduce it */ 351 if (((attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes) & 352 (attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes - 1)) == 0) { 353 log_qp_sq_size = log_qp_sq_size - 1; 354 } 355 356 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq); 357 if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) { 358 log_qp_rq_size = log_qp_rq_size - 1; 359 } 360 361 /* 362 * Next we verify that the rounded-up size is valid (i.e. consistent 363 * with the device limits and/or software-configured limits). If not, 364 * then obviously we have a lot of cleanup to do before returning. 365 * 366 * NOTE: the first condition deals with the (test) case of cs_sq 367 * being just less than 2^32. In this case, the headroom addition 368 * to the requested cs_sq will pass the test when it should not. 369 * This test no longer lets that case slip through the check. 370 */ 371 if ((attr_p->qp_sizes.cs_sq > 372 (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) || 373 (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) || 374 (!qp_srq_en && (log_qp_rq_size > 375 state->hs_cfg_profile->cp_log_max_qp_sz))) { 376 status = IBT_HCA_WR_EXCEEDED; 377 goto qpalloc_fail7; 378 } 379 380 /* 381 * Allocate the memory for QP work queues. Since Hermon work queues 382 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of 383 * the work queue memory is very important. We used to allocate 384 * work queues (the combined receive and send queues) so that they 385 * would be aligned on their combined size. That alignment guaranteed 386 * that they would never cross the 4GB boundary (Hermon work queues 387 * are on the order of MBs at maximum). Now we are able to relax 388 * this alignment constraint by ensuring that the IB address assigned 389 * to the queue memory (as a result of the hermon_mr_register() call) 390 * is offset from zero. 391 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to 392 * guarantee the alignment, but when attempting to use IOMMU bypass 393 * mode we found that we were not allowed to specify any alignment 394 * that was more restrictive than the system page size. 395 * So we avoided this constraint by passing two alignment values, 396 * one for the memory allocation itself and the other for the DMA 397 * handle (for later bind). This used to cause more memory than 398 * necessary to be allocated (in order to guarantee the more 399 * restrictive alignment contraint). But by guaranteeing the 400 * zero-based IB virtual address for the queue, we are able to 401 * conserve this memory. 402 */ 403 sq_wqe_size = 1 << qp->qp_sq_log_wqesz; 404 sq_depth = 1 << log_qp_sq_size; 405 sq_size = sq_depth * sq_wqe_size; 406 407 /* QP on SRQ sets these to 0 */ 408 if (qp_srq_en) { 409 rq_wqe_size = 0; 410 rq_size = 0; 411 } else { 412 rq_wqe_size = 1 << qp->qp_rq_log_wqesz; 413 rq_depth = 1 << log_qp_rq_size; 414 rq_size = rq_depth * rq_wqe_size; 415 } 416 417 qp->qp_wqinfo.qa_size = sq_size + rq_size; 418 419 qp->qp_wqinfo.qa_alloc_align = PAGESIZE; 420 qp->qp_wqinfo.qa_bind_align = PAGESIZE; 421 422 if (qp_is_umap) { 423 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND; 424 } else { 425 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; 426 } 427 status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag); 428 if (status != DDI_SUCCESS) { 429 status = IBT_INSUFF_RESOURCE; 430 goto qpalloc_fail7; 431 } 432 433 /* 434 * Sort WQs in memory according to stride (*q_wqe_size), largest first 435 * If they are equal, still put the SQ first 436 */ 437 qp->qp_sq_baseaddr = 0; 438 qp->qp_rq_baseaddr = 0; 439 if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) { 440 sq_buf = qp->qp_wqinfo.qa_buf_aligned; 441 442 /* if this QP is on an SRQ, set the rq_buf to NULL */ 443 if (qp_srq_en) { 444 rq_buf = NULL; 445 } else { 446 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size); 447 qp->qp_rq_baseaddr = sq_size; 448 } 449 } else { 450 rq_buf = qp->qp_wqinfo.qa_buf_aligned; 451 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size); 452 qp->qp_sq_baseaddr = rq_size; 453 } 454 455 if (qp_is_umap == 0) { 456 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth); 457 if (qp->qp_sq_wqhdr == NULL) { 458 status = IBT_INSUFF_RESOURCE; 459 goto qpalloc_fail8; 460 } 461 if (qp_srq_en) { 462 qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr; 463 qp->qp_rq_wqavl.wqa_srq_en = 1; 464 qp->qp_rq_wqavl.wqa_srq = srq; 465 } else { 466 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth); 467 if (qp->qp_rq_wqhdr == NULL) { 468 status = IBT_INSUFF_RESOURCE; 469 goto qpalloc_fail8; 470 } 471 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr; 472 } 473 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum; 474 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND; 475 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr; 476 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum; 477 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV; 478 } 479 480 /* 481 * Register the memory for the QP work queues. The memory for the 482 * QP must be registered in the Hermon cMPT tables. This gives us the 483 * LKey to specify in the QP context later. Note: The memory for 484 * Hermon work queues (both Send and Recv) must be contiguous and 485 * registered as a single memory region. Note: If the QP memory is 486 * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to 487 * meet the alignment restriction, we pass the "mro_bind_override_addr" 488 * flag in the call to hermon_mr_register(). This guarantees that the 489 * resulting IB vaddr will be zero-based (modulo the offset into the 490 * first page). If we fail here, we still have the bunch of resource 491 * and reference count cleanup to do. 492 */ 493 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : 494 IBT_MR_NOSLEEP; 495 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned; 496 mr_attr.mr_len = qp->qp_wqinfo.qa_size; 497 mr_attr.mr_as = NULL; 498 mr_attr.mr_flags = flag; 499 if (qp_is_umap) { 500 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; 501 } else { 502 /* HERMON_QUEUE_LOCATION_NORMAL */ 503 mr_op.mro_bind_type = 504 state->hs_cfg_profile->cp_iommu_bypass; 505 } 506 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl; 507 mr_op.mro_bind_override_addr = 1; 508 status = hermon_mr_register(state, pd, &mr_attr, &mr, 509 &mr_op, HERMON_QP_CMPT); 510 if (status != DDI_SUCCESS) { 511 status = IBT_INSUFF_RESOURCE; 512 goto qpalloc_fail9; 513 } 514 515 /* 516 * Calculate the offset between the kernel virtual address space 517 * and the IB virtual address space. This will be used when 518 * posting work requests to properly initialize each WQE. 519 */ 520 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned - 521 (uint64_t)mr->mr_bindinfo.bi_addr; 522 523 /* 524 * Fill in all the return arguments (if necessary). This includes 525 * real work queue sizes (in wqes), real SGLs, and QP number 526 */ 527 if (queuesz_p != NULL) { 528 queuesz_p->cs_sq = 529 (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes; 530 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl; 531 532 /* if this QP is on an SRQ, set these to 0 */ 533 if (qp_srq_en) { 534 queuesz_p->cs_rq = 0; 535 queuesz_p->cs_rq_sgl = 0; 536 } else { 537 queuesz_p->cs_rq = (1 << log_qp_rq_size); 538 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl; 539 } 540 } 541 if (qpn != NULL) { 542 *qpn = (ib_qpn_t)qp->qp_qpnum; 543 } 544 545 /* 546 * Fill in the rest of the Hermon Queue Pair handle. 547 */ 548 qp->qp_qpcrsrcp = qpc; 549 qp->qp_rsrcp = rsrc; 550 qp->qp_state = HERMON_QP_RESET; 551 qp->qp_pdhdl = pd; 552 qp->qp_mrhdl = mr; 553 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ? 554 HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED; 555 qp->qp_is_special = 0; 556 qp->qp_is_umap = qp_is_umap; 557 qp->qp_uarpg = uarpg; 558 qp->qp_umap_dhp = (devmap_cookie_t)NULL; 559 qp->qp_sq_cqhdl = sq_cq; 560 qp->qp_sq_bufsz = (1 << log_qp_sq_size); 561 qp->qp_sq_logqsz = log_qp_sq_size; 562 qp->qp_sq_buf = sq_buf; 563 qp->qp_desc_off = qp_desc_off; 564 qp->qp_rq_cqhdl = rq_cq; 565 qp->qp_rq_buf = rq_buf; 566 qp->qp_rlky = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) != 567 0; 568 569 /* if this QP is on an SRQ, set rq_bufsz to 0 */ 570 if (qp_srq_en) { 571 qp->qp_rq_bufsz = 0; 572 qp->qp_rq_logqsz = 0; 573 } else { 574 qp->qp_rq_bufsz = (1 << log_qp_rq_size); 575 qp->qp_rq_logqsz = log_qp_rq_size; 576 } 577 578 qp->qp_forward_sqd_event = 0; 579 qp->qp_sqd_still_draining = 0; 580 qp->qp_hdlrarg = (void *)ibt_qphdl; 581 qp->qp_mcg_refcnt = 0; 582 583 /* 584 * If this QP is to be associated with an SRQ, set the SRQ handle 585 */ 586 if (qp_srq_en) { 587 qp->qp_srqhdl = srq; 588 qp->qp_srq_en = HERMON_QP_SRQ_ENABLED; 589 hermon_srq_refcnt_inc(qp->qp_srqhdl); 590 } else { 591 qp->qp_srqhdl = NULL; 592 qp->qp_srq_en = HERMON_QP_SRQ_DISABLED; 593 } 594 595 /* Determine the QP service type */ 596 if (type == IBT_RC_RQP) { 597 qp->qp_serv_type = HERMON_QP_RC; 598 } else if (type == IBT_UD_RQP) { 599 qp->qp_serv_type = HERMON_QP_UD; 600 } else { 601 qp->qp_serv_type = HERMON_QP_UC; 602 } 603 604 /* 605 * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed 606 */ 607 608 /* 609 * Initialize the SQ WQEs - all that needs to be done is every 64 bytes 610 * set the quadword to all F's - high-order bit is owner (init to one) 611 * and the rest for the headroom definition of prefetching 612 * 613 */ 614 wqesz_shift = qp->qp_sq_log_wqesz; 615 thewqesz = 1 << wqesz_shift; 616 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf); 617 if (qp_is_umap == 0) { 618 for (i = 0; i < sq_depth; i++) { 619 /* 620 * for each stride, go through and every 64 bytes 621 * write the init value - having set the address 622 * once, just keep incrementing it 623 */ 624 for (j = 0; j < thewqesz; j += 64, thewqe += 8) { 625 *(uint32_t *)thewqe = 0xFFFFFFFF; 626 } 627 } 628 } 629 630 /* Zero out the QP context */ 631 bzero(&qp->qpc, sizeof (hermon_hw_qpc_t)); 632 633 /* 634 * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the 635 * "qphdl" and return success 636 */ 637 ASSERT(state->hs_qphdl[qpc->hr_indx] == NULL); 638 state->hs_qphdl[qpc->hr_indx] = qp; 639 640 /* 641 * If this is a user-mappable QP, then we need to insert the previously 642 * allocated entry into the "userland resources database". This will 643 * allow for later lookup during devmap() (i.e. mmap()) calls. 644 */ 645 if (qp_is_umap) { 646 hermon_umap_db_add(umapdb); 647 } 648 mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER, 649 DDI_INTR_PRI(state->hs_intrmsi_pri)); 650 mutex_init(&qp->qp_rq_lock, NULL, MUTEX_DRIVER, 651 DDI_INTR_PRI(state->hs_intrmsi_pri)); 652 653 *qphdl = qp; 654 655 return (DDI_SUCCESS); 656 657 /* 658 * The following is cleanup for all possible failure cases in this routine 659 */ 660 qpalloc_fail9: 661 hermon_queue_free(&qp->qp_wqinfo); 662 qpalloc_fail8: 663 if (qp->qp_sq_wqhdr) 664 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr); 665 if (qp->qp_rq_wqhdr) 666 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr); 667 qpalloc_fail7: 668 if (qp_is_umap) { 669 hermon_umap_db_free(umapdb); 670 } 671 if (!qp_srq_en) { 672 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr); 673 } 674 675 qpalloc_fail6: 676 /* 677 * Releasing the QPN will also free up the QPC context. Update 678 * the QPC context pointer to indicate this. 679 */ 680 hermon_qp_release_qpn(state, qp->qp_qpn_hdl, HERMON_QPN_RELEASE); 681 qpc = NULL; 682 qpalloc_fail5: 683 hermon_rsrc_free(state, &rsrc); 684 qpalloc_fail4: 685 if (qpc) { 686 hermon_rsrc_free(state, &qpc); 687 } 688 qpalloc_fail3: 689 hermon_cq_refcnt_dec(rq_cq); 690 qpalloc_fail2: 691 hermon_cq_refcnt_dec(sq_cq); 692 qpalloc_fail1: 693 hermon_pd_refcnt_dec(pd); 694 qpalloc_fail: 695 return (status); 696 } 697 698 699 700 /* 701 * hermon_special_qp_alloc() 702 * Context: Can be called only from user or kernel context. 703 */ 704 int 705 hermon_special_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo, 706 uint_t sleepflag) 707 { 708 hermon_rsrc_t *qpc, *rsrc; 709 hermon_qphdl_t qp; 710 ibt_qp_alloc_attr_t *attr_p; 711 ibt_sqp_type_t type; 712 uint8_t port; 713 ibtl_qp_hdl_t ibt_qphdl; 714 ibt_chan_sizes_t *queuesz_p; 715 hermon_qphdl_t *qphdl; 716 ibt_mr_attr_t mr_attr; 717 hermon_mr_options_t mr_op; 718 hermon_pdhdl_t pd; 719 hermon_cqhdl_t sq_cq, rq_cq; 720 hermon_mrhdl_t mr; 721 uint64_t qp_desc_off; 722 uint64_t *thewqe, thewqesz; 723 uint32_t *sq_buf, *rq_buf; 724 uint32_t log_qp_sq_size, log_qp_rq_size; 725 uint32_t sq_size, rq_size, max_sgl; 726 uint32_t uarpg; 727 uint32_t sq_depth; 728 uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift; 729 int status, flag, i, j; 730 731 /* 732 * Extract the necessary info from the hermon_qp_info_t structure 733 */ 734 attr_p = qpinfo->qpi_attrp; 735 type = qpinfo->qpi_type; 736 port = qpinfo->qpi_port; 737 ibt_qphdl = qpinfo->qpi_ibt_qphdl; 738 queuesz_p = qpinfo->qpi_queueszp; 739 qphdl = &qpinfo->qpi_qphdl; 740 741 /* 742 * Check for valid special QP type (only SMI & GSI supported) 743 */ 744 if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) { 745 status = IBT_QP_SPECIAL_TYPE_INVALID; 746 goto spec_qpalloc_fail; 747 } 748 749 /* 750 * Check for valid port number 751 */ 752 if (!hermon_portnum_is_valid(state, port)) { 753 status = IBT_HCA_PORT_INVALID; 754 goto spec_qpalloc_fail; 755 } 756 port = port - 1; 757 758 /* 759 * Check for valid PD handle pointer 760 */ 761 if (attr_p->qp_pd_hdl == NULL) { 762 status = IBT_PD_HDL_INVALID; 763 goto spec_qpalloc_fail; 764 } 765 pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl; 766 767 /* Increment the reference count on the PD */ 768 hermon_pd_refcnt_inc(pd); 769 770 /* 771 * Check for valid CQ handle pointers 772 */ 773 if ((attr_p->qp_ibc_scq_hdl == NULL) || 774 (attr_p->qp_ibc_rcq_hdl == NULL)) { 775 status = IBT_CQ_HDL_INVALID; 776 goto spec_qpalloc_fail1; 777 } 778 sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl; 779 rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl; 780 781 /* 782 * Increment the reference count on the CQs. One or both of these 783 * could return error if we determine that the given CQ is already 784 * being used with a non-special QP (i.e. a normal QP). 785 */ 786 status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_SPECIAL); 787 if (status != DDI_SUCCESS) { 788 status = IBT_CQ_HDL_INVALID; 789 goto spec_qpalloc_fail1; 790 } 791 status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_SPECIAL); 792 if (status != DDI_SUCCESS) { 793 status = IBT_CQ_HDL_INVALID; 794 goto spec_qpalloc_fail2; 795 } 796 797 /* 798 * Allocate the special QP resources. Essentially, this allocation 799 * amounts to checking if the request special QP has already been 800 * allocated. If successful, the QP context return is an actual 801 * QP context that has been "aliased" to act as a special QP of the 802 * appropriate type (and for the appropriate port). Just as in 803 * hermon_qp_alloc() above, ownership for this QP context is not 804 * immediately given to hardware in the final step here. Instead, we 805 * wait until the QP is later transitioned to the "Init" state before 806 * passing the QP to hardware. If we fail here, we must undo all 807 * the reference count (CQ and PD). 808 */ 809 status = hermon_special_qp_rsrc_alloc(state, type, port, &qpc); 810 if (status != DDI_SUCCESS) { 811 goto spec_qpalloc_fail3; 812 } 813 814 /* 815 * Allocate the software structure for tracking the special queue 816 * pair (i.e. the Hermon Queue Pair handle). If we fail here, we 817 * must undo the reference counts and the previous resource allocation. 818 */ 819 status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc); 820 if (status != DDI_SUCCESS) { 821 status = IBT_INSUFF_RESOURCE; 822 goto spec_qpalloc_fail4; 823 } 824 qp = (hermon_qphdl_t)rsrc->hr_addr; 825 826 bzero(qp, sizeof (struct hermon_sw_qp_s)); 827 828 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) 829 830 /* 831 * Actual QP number is a combination of the index of the QPC and 832 * the port number. This is because the special QP contexts must 833 * be allocated two-at-a-time. 834 */ 835 qp->qp_qpnum = qpc->hr_indx + port; 836 qp->qp_ring = qp->qp_qpnum << 8; 837 838 uarpg = state->hs_kernel_uar_index; /* must be for spec qp */ 839 /* 840 * Allocate the doorbell record. Hermon uses only one for the RQ so 841 * alloc a qp doorbell, using uarpg (above) as the uar index 842 */ 843 844 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl, 845 &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset); 846 if (status != DDI_SUCCESS) { 847 status = IBT_INSUFF_RESOURCE; 848 goto spec_qpalloc_fail5; 849 } 850 /* 851 * Calculate the appropriate size for the work queues. 852 * Note: All Hermon QP work queues must be a power-of-2 in size. Also 853 * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is 854 * to round the requested size up to the next highest power-of-2 855 */ 856 attr_p->qp_sizes.cs_sq = 857 max(attr_p->qp_sizes.cs_sq, HERMON_QP_MIN_SIZE); 858 attr_p->qp_sizes.cs_rq = 859 max(attr_p->qp_sizes.cs_rq, HERMON_QP_MIN_SIZE); 860 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq); 861 if ((attr_p->qp_sizes.cs_sq & (attr_p->qp_sizes.cs_sq - 1)) == 0) { 862 log_qp_sq_size = log_qp_sq_size - 1; 863 } 864 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq); 865 if ((attr_p->qp_sizes.cs_rq & (attr_p->qp_sizes.cs_rq - 1)) == 0) { 866 log_qp_rq_size = log_qp_rq_size - 1; 867 } 868 869 /* 870 * Next we verify that the rounded-up size is valid (i.e. consistent 871 * with the device limits and/or software-configured limits). If not, 872 * then obviously we have a bit of cleanup to do before returning. 873 */ 874 if ((log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) || 875 (log_qp_rq_size > state->hs_cfg_profile->cp_log_max_qp_sz)) { 876 status = IBT_HCA_WR_EXCEEDED; 877 goto spec_qpalloc_fail5a; 878 } 879 880 /* 881 * Next we verify that the requested number of SGL is valid (i.e. 882 * consistent with the device limits and/or software-configured 883 * limits). If not, then obviously the same cleanup needs to be done. 884 */ 885 max_sgl = state->hs_cfg_profile->cp_wqe_real_max_sgl; 886 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) || 887 (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) { 888 status = IBT_HCA_SGL_EXCEEDED; 889 goto spec_qpalloc_fail5a; 890 } 891 892 /* 893 * Determine this QP's WQE stride (for both the Send and Recv WQEs). 894 * This will depend on the requested number of SGLs. Note: this 895 * has the side-effect of also calculating the real number of SGLs 896 * (for the calculated WQE size). 897 */ 898 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl, 899 max_sgl, HERMON_QP_WQ_TYPE_RECVQ, 900 &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl); 901 if (type == IBT_SMI_SQP) { 902 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl, 903 max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP0, 904 &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl); 905 } else { 906 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl, 907 max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP1, 908 &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl); 909 } 910 911 /* 912 * Allocate the memory for QP work queues. Since Hermon work queues 913 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of 914 * the work queue memory is very important. We used to allocate 915 * work queues (the combined receive and send queues) so that they 916 * would be aligned on their combined size. That alignment guaranteed 917 * that they would never cross the 4GB boundary (Hermon work queues 918 * are on the order of MBs at maximum). Now we are able to relax 919 * this alignment constraint by ensuring that the IB address assigned 920 * to the queue memory (as a result of the hermon_mr_register() call) 921 * is offset from zero. 922 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to 923 * guarantee the alignment, but when attempting to use IOMMU bypass 924 * mode we found that we were not allowed to specify any alignment 925 * that was more restrictive than the system page size. 926 * So we avoided this constraint by passing two alignment values, 927 * one for the memory allocation itself and the other for the DMA 928 * handle (for later bind). This used to cause more memory than 929 * necessary to be allocated (in order to guarantee the more 930 * restrictive alignment contraint). But by guaranteeing the 931 * zero-based IB virtual address for the queue, we are able to 932 * conserve this memory. 933 */ 934 sq_wqe_size = 1 << qp->qp_sq_log_wqesz; 935 sq_depth = 1 << log_qp_sq_size; 936 sq_size = (1 << log_qp_sq_size) * sq_wqe_size; 937 938 rq_wqe_size = 1 << qp->qp_rq_log_wqesz; 939 rq_size = (1 << log_qp_rq_size) * rq_wqe_size; 940 941 qp->qp_wqinfo.qa_size = sq_size + rq_size; 942 943 qp->qp_wqinfo.qa_alloc_align = PAGESIZE; 944 qp->qp_wqinfo.qa_bind_align = PAGESIZE; 945 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; 946 947 status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag); 948 if (status != NULL) { 949 status = IBT_INSUFF_RESOURCE; 950 goto spec_qpalloc_fail5a; 951 } 952 953 /* 954 * Sort WQs in memory according to depth, stride (*q_wqe_size), 955 * biggest first. If equal, the Send Queue still goes first 956 */ 957 qp->qp_sq_baseaddr = 0; 958 qp->qp_rq_baseaddr = 0; 959 if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) { 960 sq_buf = qp->qp_wqinfo.qa_buf_aligned; 961 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size); 962 qp->qp_rq_baseaddr = sq_size; 963 } else { 964 rq_buf = qp->qp_wqinfo.qa_buf_aligned; 965 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size); 966 qp->qp_sq_baseaddr = rq_size; 967 } 968 969 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth); 970 if (qp->qp_sq_wqhdr == NULL) { 971 status = IBT_INSUFF_RESOURCE; 972 goto spec_qpalloc_fail6; 973 } 974 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(1 << log_qp_rq_size); 975 if (qp->qp_rq_wqhdr == NULL) { 976 status = IBT_INSUFF_RESOURCE; 977 goto spec_qpalloc_fail6; 978 } 979 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum; 980 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND; 981 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr; 982 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum; 983 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV; 984 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr; 985 986 /* 987 * Register the memory for the special QP work queues. The memory for 988 * the special QP must be registered in the Hermon cMPT tables. This 989 * gives us the LKey to specify in the QP context later. Note: The 990 * memory for Hermon work queues (both Send and Recv) must be contiguous 991 * and registered as a single memory region. Also, in order to meet the 992 * alignment restriction, we pass the "mro_bind_override_addr" flag in 993 * the call to hermon_mr_register(). This guarantees that the resulting 994 * IB vaddr will be zero-based (modulo the offset into the first page). 995 * If we fail here, we have a bunch of resource and reference count 996 * cleanup to do. 997 */ 998 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : 999 IBT_MR_NOSLEEP; 1000 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned; 1001 mr_attr.mr_len = qp->qp_wqinfo.qa_size; 1002 mr_attr.mr_as = NULL; 1003 mr_attr.mr_flags = flag; 1004 1005 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; 1006 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl; 1007 mr_op.mro_bind_override_addr = 1; 1008 1009 status = hermon_mr_register(state, pd, &mr_attr, &mr, &mr_op, 1010 HERMON_QP_CMPT); 1011 if (status != DDI_SUCCESS) { 1012 status = IBT_INSUFF_RESOURCE; 1013 goto spec_qpalloc_fail6; 1014 } 1015 1016 /* 1017 * Calculate the offset between the kernel virtual address space 1018 * and the IB virtual address space. This will be used when 1019 * posting work requests to properly initialize each WQE. 1020 */ 1021 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned - 1022 (uint64_t)mr->mr_bindinfo.bi_addr; 1023 1024 /* set the prefetch - initially, not prefetching */ 1025 qp->qp_no_prefetch = 1; 1026 1027 if (qp->qp_no_prefetch) 1028 qp->qp_sq_headroom = 2 * sq_wqe_size; 1029 else 1030 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE; 1031 /* 1032 * hdrm wqes must be integral since both sq_wqe_size & 1033 * HERMON_QP_OH_SIZE are power of 2 1034 */ 1035 qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size); 1036 /* 1037 * Fill in all the return arguments (if necessary). This includes 1038 * real work queue sizes, real SGLs, and QP number (which will be 1039 * either zero or one, depending on the special QP type) 1040 */ 1041 if (queuesz_p != NULL) { 1042 queuesz_p->cs_sq = 1043 (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes; 1044 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl; 1045 queuesz_p->cs_rq = (1 << log_qp_rq_size); 1046 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl; 1047 } 1048 1049 /* 1050 * Fill in the rest of the Hermon Queue Pair handle. We can update 1051 * the following fields for use in further operations on the QP. 1052 */ 1053 qp->qp_qpcrsrcp = qpc; 1054 qp->qp_rsrcp = rsrc; 1055 qp->qp_state = HERMON_QP_RESET; 1056 qp->qp_pdhdl = pd; 1057 qp->qp_mrhdl = mr; 1058 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ? 1059 HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED; 1060 qp->qp_is_special = (type == IBT_SMI_SQP) ? 1061 HERMON_QP_SMI : HERMON_QP_GSI; 1062 qp->qp_is_umap = 0; 1063 qp->qp_uarpg = uarpg; 1064 qp->qp_umap_dhp = (devmap_cookie_t)NULL; 1065 qp->qp_sq_cqhdl = sq_cq; 1066 qp->qp_sq_bufsz = (1 << log_qp_sq_size); 1067 qp->qp_sq_buf = sq_buf; 1068 qp->qp_sq_logqsz = log_qp_sq_size; 1069 qp->qp_desc_off = qp_desc_off; 1070 qp->qp_rq_cqhdl = rq_cq; 1071 qp->qp_rq_bufsz = (1 << log_qp_rq_size); 1072 qp->qp_rq_buf = rq_buf; 1073 qp->qp_rq_logqsz = log_qp_rq_size; 1074 qp->qp_portnum = port; 1075 qp->qp_pkeyindx = 0; 1076 qp->qp_forward_sqd_event = 0; 1077 qp->qp_sqd_still_draining = 0; 1078 qp->qp_hdlrarg = (void *)ibt_qphdl; 1079 qp->qp_mcg_refcnt = 0; 1080 qp->qp_srq_en = 0; 1081 qp->qp_srqhdl = NULL; 1082 1083 /* All special QPs are UD QP service type */ 1084 qp->qp_serv_type = HERMON_QP_UD; 1085 1086 /* 1087 * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed 1088 */ 1089 1090 /* 1091 * Initialize the SQ WQEs - all that needs to be done is every 64 bytes 1092 * set the quadword to all F's - high-order bit is owner (init to one) 1093 * and the rest for the headroom definition of prefetching 1094 * 1095 */ 1096 1097 wqesz_shift = qp->qp_sq_log_wqesz; 1098 thewqesz = 1 << wqesz_shift; 1099 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf); 1100 for (i = 0; i < sq_depth; i++) { 1101 /* 1102 * for each stride, go through and every 64 bytes write the 1103 * init value - having set the address once, just keep 1104 * incrementing it 1105 */ 1106 for (j = 0; j < thewqesz; j += 64, thewqe += 8) { 1107 *(uint32_t *)thewqe = 0xFFFFFFFF; 1108 } 1109 } 1110 1111 1112 /* Zero out the QP context */ 1113 bzero(&qp->qpc, sizeof (hermon_hw_qpc_t)); 1114 1115 /* 1116 * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the 1117 * "qphdl" and return success 1118 */ 1119 ASSERT(state->hs_qphdl[qpc->hr_indx + port] == NULL); 1120 state->hs_qphdl[qpc->hr_indx + port] = qp; 1121 1122 *qphdl = qp; 1123 1124 return (DDI_SUCCESS); 1125 1126 /* 1127 * The following is cleanup for all possible failure cases in this routine 1128 */ 1129 spec_qpalloc_fail6: 1130 hermon_queue_free(&qp->qp_wqinfo); 1131 if (qp->qp_sq_wqhdr) 1132 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr); 1133 if (qp->qp_rq_wqhdr) 1134 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr); 1135 spec_qpalloc_fail5a: 1136 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr); 1137 spec_qpalloc_fail5: 1138 hermon_rsrc_free(state, &rsrc); 1139 spec_qpalloc_fail4: 1140 if (hermon_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) { 1141 HERMON_WARNING(state, "failed to free special QP rsrc"); 1142 } 1143 spec_qpalloc_fail3: 1144 hermon_cq_refcnt_dec(rq_cq); 1145 spec_qpalloc_fail2: 1146 hermon_cq_refcnt_dec(sq_cq); 1147 spec_qpalloc_fail1: 1148 hermon_pd_refcnt_dec(pd); 1149 spec_qpalloc_fail: 1150 return (status); 1151 } 1152 1153 1154 /* 1155 * hermon_qp_free() 1156 * This function frees up the QP resources. Depending on the value 1157 * of the "free_qp_flags", the QP number may not be released until 1158 * a subsequent call to hermon_qp_release_qpn(). 1159 * 1160 * Context: Can be called only from user or kernel context. 1161 */ 1162 /* ARGSUSED */ 1163 int 1164 hermon_qp_free(hermon_state_t *state, hermon_qphdl_t *qphdl, 1165 ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh, 1166 uint_t sleepflag) 1167 { 1168 hermon_rsrc_t *qpc, *rsrc; 1169 hermon_umap_db_entry_t *umapdb; 1170 hermon_qpn_entry_t *entry; 1171 hermon_pdhdl_t pd; 1172 hermon_mrhdl_t mr; 1173 hermon_cqhdl_t sq_cq, rq_cq; 1174 hermon_srqhdl_t srq; 1175 hermon_qphdl_t qp; 1176 uint64_t value; 1177 uint_t type, port; 1178 uint_t maxprot; 1179 uint_t qp_srq_en; 1180 int status; 1181 1182 /* 1183 * Pull all the necessary information from the Hermon Queue Pair 1184 * handle. This is necessary here because the resource for the 1185 * QP handle is going to be freed up as part of this operation. 1186 */ 1187 qp = *qphdl; 1188 mutex_enter(&qp->qp_lock); 1189 qpc = qp->qp_qpcrsrcp; 1190 rsrc = qp->qp_rsrcp; 1191 pd = qp->qp_pdhdl; 1192 srq = qp->qp_srqhdl; 1193 mr = qp->qp_mrhdl; 1194 rq_cq = qp->qp_rq_cqhdl; 1195 sq_cq = qp->qp_sq_cqhdl; 1196 port = qp->qp_portnum; 1197 qp_srq_en = qp->qp_srq_en; 1198 1199 /* 1200 * If the QP is part of an MCG, then we fail the qp_free 1201 */ 1202 if (qp->qp_mcg_refcnt != 0) { 1203 mutex_exit(&qp->qp_lock); 1204 status = ibc_get_ci_failure(0); 1205 goto qpfree_fail; 1206 } 1207 1208 /* 1209 * If the QP is not already in "Reset" state, then transition to 1210 * "Reset". This is necessary because software does not reclaim 1211 * ownership of the QP context until the QP is in the "Reset" state. 1212 * If the ownership transfer fails for any reason, then it is an 1213 * indication that something (either in HW or SW) has gone seriously 1214 * wrong. So we print a warning message and return. 1215 */ 1216 if (qp->qp_state != HERMON_QP_RESET) { 1217 if (hermon_qp_to_reset(state, qp) != DDI_SUCCESS) { 1218 mutex_exit(&qp->qp_lock); 1219 HERMON_WARNING(state, "failed to reset QP context"); 1220 status = ibc_get_ci_failure(0); 1221 goto qpfree_fail; 1222 } 1223 qp->qp_state = HERMON_QP_RESET; 1224 1225 /* 1226 * Do any additional handling necessary for the transition 1227 * to the "Reset" state (e.g. update the WRID lists) 1228 */ 1229 if (hermon_wrid_to_reset_handling(state, qp) != DDI_SUCCESS) { 1230 mutex_exit(&qp->qp_lock); 1231 HERMON_WARNING(state, "failed to reset QP WRID list"); 1232 status = ibc_get_ci_failure(0); 1233 goto qpfree_fail; 1234 } 1235 } 1236 1237 /* 1238 * If this was a user-mappable QP, then we need to remove its entry 1239 * from the "userland resources database". If it is also currently 1240 * mmap()'d out to a user process, then we need to call 1241 * devmap_devmem_remap() to remap the QP memory to an invalid mapping. 1242 * We also need to invalidate the QP tracking information for the 1243 * user mapping. 1244 */ 1245 if (qp->qp_is_umap) { 1246 status = hermon_umap_db_find(state->hs_instance, qp->qp_qpnum, 1247 MLNX_UMAP_QPMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE, 1248 &umapdb); 1249 if (status != DDI_SUCCESS) { 1250 mutex_exit(&qp->qp_lock); 1251 HERMON_WARNING(state, "failed to find in database"); 1252 return (ibc_get_ci_failure(0)); 1253 } 1254 hermon_umap_db_free(umapdb); 1255 if (qp->qp_umap_dhp != NULL) { 1256 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 1257 status = devmap_devmem_remap(qp->qp_umap_dhp, 1258 state->hs_dip, 0, 0, qp->qp_wqinfo.qa_size, 1259 maxprot, DEVMAP_MAPPING_INVALID, NULL); 1260 if (status != DDI_SUCCESS) { 1261 mutex_exit(&qp->qp_lock); 1262 HERMON_WARNING(state, "failed in QP memory " 1263 "devmap_devmem_remap()"); 1264 return (ibc_get_ci_failure(0)); 1265 } 1266 qp->qp_umap_dhp = (devmap_cookie_t)NULL; 1267 } 1268 } 1269 1270 1271 /* 1272 * Put NULL into the Hermon QPNum-to-QPHdl list. This will allow any 1273 * in-progress events to detect that the QP corresponding to this 1274 * number has been freed. Note: it does depend in whether we are 1275 * freeing a special QP or not. 1276 */ 1277 if (qp->qp_is_special) { 1278 state->hs_qphdl[qpc->hr_indx + port] = NULL; 1279 } else { 1280 state->hs_qphdl[qpc->hr_indx] = NULL; 1281 } 1282 1283 /* 1284 * Drop the QP lock 1285 * At this point the lock is no longer necessary. We cannot 1286 * protect from multiple simultaneous calls to free the same QP. 1287 * In addition, since the QP lock is contained in the QP "software 1288 * handle" resource, which we will free (see below), it is 1289 * important that we have no further references to that memory. 1290 */ 1291 mutex_exit(&qp->qp_lock); 1292 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) 1293 1294 /* 1295 * Free the QP resources 1296 * Start by deregistering and freeing the memory for work queues. 1297 * Next free any previously allocated context information 1298 * (depending on QP type) 1299 * Finally, decrement the necessary reference counts. 1300 * If this fails for any reason, then it is an indication that 1301 * something (either in HW or SW) has gone seriously wrong. So we 1302 * print a warning message and return. 1303 */ 1304 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 1305 sleepflag); 1306 if (status != DDI_SUCCESS) { 1307 HERMON_WARNING(state, "failed to deregister QP memory"); 1308 status = ibc_get_ci_failure(0); 1309 goto qpfree_fail; 1310 } 1311 1312 /* Free the memory for the QP */ 1313 hermon_queue_free(&qp->qp_wqinfo); 1314 1315 if (qp->qp_sq_wqhdr) 1316 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr); 1317 if (qp->qp_rq_wqhdr) 1318 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr); 1319 1320 /* Free the dbr */ 1321 if (!qp_srq_en) { 1322 hermon_dbr_free(state, qp->qp_uarpg, qp->qp_rq_vdbr); 1323 } 1324 1325 /* 1326 * Free up the remainder of the QP resources. Note: we have a few 1327 * different resources to free up depending on whether the QP is a 1328 * special QP or not. As described above, if any of these fail for 1329 * any reason it is an indication that something (either in HW or SW) 1330 * has gone seriously wrong. So we print a warning message and 1331 * return. 1332 */ 1333 if (qp->qp_is_special) { 1334 type = (qp->qp_is_special == HERMON_QP_SMI) ? 1335 IBT_SMI_SQP : IBT_GSI_SQP; 1336 1337 /* Free up resources for the special QP */ 1338 status = hermon_special_qp_rsrc_free(state, type, port); 1339 if (status != DDI_SUCCESS) { 1340 HERMON_WARNING(state, "failed to free special QP rsrc"); 1341 status = ibc_get_ci_failure(0); 1342 goto qpfree_fail; 1343 } 1344 1345 } else { 1346 type = qp->qp_serv_type; 1347 1348 /* 1349 * Check the flags and determine whether to release the 1350 * QPN or not, based on their value. 1351 */ 1352 if (free_qp_flags == IBC_FREE_QP_ONLY) { 1353 entry = qp->qp_qpn_hdl; 1354 hermon_qp_release_qpn(state, qp->qp_qpn_hdl, 1355 HERMON_QPN_FREE_ONLY); 1356 *qpnh = (ibc_qpn_hdl_t)entry; 1357 } else { 1358 hermon_qp_release_qpn(state, qp->qp_qpn_hdl, 1359 HERMON_QPN_RELEASE); 1360 } 1361 } 1362 mutex_destroy(&qp->qp_sq_lock); 1363 mutex_destroy(&qp->qp_rq_lock); 1364 1365 /* Free the Hermon Queue Pair handle */ 1366 hermon_rsrc_free(state, &rsrc); 1367 1368 /* Decrement the reference counts on CQs, PD and SRQ (if needed) */ 1369 hermon_cq_refcnt_dec(rq_cq); 1370 hermon_cq_refcnt_dec(sq_cq); 1371 hermon_pd_refcnt_dec(pd); 1372 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) { 1373 hermon_srq_refcnt_dec(srq); 1374 } 1375 1376 /* Set the qphdl pointer to NULL and return success */ 1377 *qphdl = NULL; 1378 1379 return (DDI_SUCCESS); 1380 1381 qpfree_fail: 1382 return (status); 1383 } 1384 1385 1386 /* 1387 * hermon_qp_query() 1388 * Context: Can be called from interrupt or base context. 1389 */ 1390 int 1391 hermon_qp_query(hermon_state_t *state, hermon_qphdl_t qp, 1392 ibt_qp_query_attr_t *attr_p) 1393 { 1394 ibt_cep_state_t qp_state; 1395 ibt_qp_ud_attr_t *ud; 1396 ibt_qp_rc_attr_t *rc; 1397 ibt_qp_uc_attr_t *uc; 1398 ibt_cep_flags_t enable_flags; 1399 hermon_hw_addr_path_t *qpc_path, *qpc_alt_path; 1400 ibt_cep_path_t *path_ptr, *alt_path_ptr; 1401 hermon_hw_qpc_t *qpc; 1402 int status; 1403 uint_t tmp_sched_q, tmp_alt_sched_q; 1404 1405 mutex_enter(&qp->qp_lock); 1406 1407 /* 1408 * Grab the temporary QPC entry from QP software state 1409 */ 1410 qpc = &qp->qpc; 1411 1412 /* Convert the current Hermon QP state to IBTF QP state */ 1413 switch (qp->qp_state) { 1414 case HERMON_QP_RESET: 1415 qp_state = IBT_STATE_RESET; /* "Reset" */ 1416 break; 1417 case HERMON_QP_INIT: 1418 qp_state = IBT_STATE_INIT; /* Initialized */ 1419 break; 1420 case HERMON_QP_RTR: 1421 qp_state = IBT_STATE_RTR; /* Ready to Receive */ 1422 break; 1423 case HERMON_QP_RTS: 1424 qp_state = IBT_STATE_RTS; /* Ready to Send */ 1425 break; 1426 case HERMON_QP_SQERR: 1427 qp_state = IBT_STATE_SQE; /* Send Queue Error */ 1428 break; 1429 case HERMON_QP_SQD: 1430 if (qp->qp_sqd_still_draining) { 1431 qp_state = IBT_STATE_SQDRAIN; /* SQ Draining */ 1432 } else { 1433 qp_state = IBT_STATE_SQD; /* SQ Drained */ 1434 } 1435 break; 1436 case HERMON_QP_ERR: 1437 qp_state = IBT_STATE_ERROR; /* Error */ 1438 break; 1439 default: 1440 mutex_exit(&qp->qp_lock); 1441 return (ibc_get_ci_failure(0)); 1442 } 1443 attr_p->qp_info.qp_state = qp_state; 1444 1445 /* SRQ Hook. */ 1446 attr_p->qp_srq = NULL; 1447 1448 /* 1449 * The following QP information is always returned, regardless of 1450 * the current QP state. Note: Some special handling is necessary 1451 * for calculating the QP number on special QP (QP0 and QP1). 1452 */ 1453 attr_p->qp_sq_cq = qp->qp_sq_cqhdl->cq_hdlrarg; 1454 attr_p->qp_rq_cq = qp->qp_rq_cqhdl->cq_hdlrarg; 1455 if (qp->qp_is_special) { 1456 attr_p->qp_qpn = (qp->qp_is_special == HERMON_QP_SMI) ? 0 : 1; 1457 } else { 1458 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum; 1459 } 1460 attr_p->qp_sq_sgl = qp->qp_sq_sgl; 1461 attr_p->qp_rq_sgl = qp->qp_rq_sgl; 1462 attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz - qp->qp_sq_hdrmwqes; 1463 attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz; 1464 1465 /* 1466 * If QP is currently in the "Reset" state, then only the above are 1467 * returned 1468 */ 1469 if (qp_state == IBT_STATE_RESET) { 1470 mutex_exit(&qp->qp_lock); 1471 return (DDI_SUCCESS); 1472 } 1473 1474 /* 1475 * Post QUERY_QP command to firmware 1476 * 1477 * We do a HERMON_NOSLEEP here because we are holding the "qp_lock". 1478 * Since we may be in the interrupt context (or subsequently raised 1479 * to interrupt level by priority inversion), we do not want to block 1480 * in this routine waiting for success. 1481 */ 1482 tmp_sched_q = qpc->pri_addr_path.sched_q; 1483 tmp_alt_sched_q = qpc->alt_addr_path.sched_q; 1484 status = hermon_cmn_query_cmd_post(state, QUERY_QP, 0, qp->qp_qpnum, 1485 qpc, sizeof (hermon_hw_qpc_t), HERMON_CMD_NOSLEEP_SPIN); 1486 if (status != HERMON_CMD_SUCCESS) { 1487 mutex_exit(&qp->qp_lock); 1488 cmn_err(CE_WARN, "hermon%d: hermon_qp_query: QUERY_QP " 1489 "command failed: %08x\n", state->hs_instance, status); 1490 if (status == HERMON_CMD_INVALID_STATUS) { 1491 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1492 } 1493 return (ibc_get_ci_failure(0)); 1494 } 1495 qpc->pri_addr_path.sched_q = tmp_sched_q; 1496 qpc->alt_addr_path.sched_q = tmp_alt_sched_q; 1497 1498 /* 1499 * Fill in the additional QP info based on the QP's transport type. 1500 */ 1501 if (qp->qp_serv_type == HERMON_QP_UD) { 1502 1503 /* Fill in the UD-specific info */ 1504 ud = &attr_p->qp_info.qp_transport.ud; 1505 ud->ud_qkey = (ib_qkey_t)qpc->qkey; 1506 ud->ud_sq_psn = qpc->next_snd_psn; 1507 ud->ud_pkey_ix = qpc->pri_addr_path.pkey_indx; 1508 /* port+1 for port 1/2 */ 1509 ud->ud_port = 1510 (uint8_t)(((qpc->pri_addr_path.sched_q >> 6) & 0x01) + 1); 1511 1512 attr_p->qp_info.qp_trans = IBT_UD_SRV; 1513 1514 } else if (qp->qp_serv_type == HERMON_QP_RC) { 1515 1516 /* Fill in the RC-specific info */ 1517 rc = &attr_p->qp_info.qp_transport.rc; 1518 rc->rc_sq_psn = qpc->next_snd_psn; 1519 rc->rc_rq_psn = qpc->next_rcv_psn; 1520 rc->rc_dst_qpn = qpc->rem_qpn; 1521 1522 /* Grab the path migration state information */ 1523 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) { 1524 rc->rc_mig_state = IBT_STATE_MIGRATED; 1525 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) { 1526 rc->rc_mig_state = IBT_STATE_REARMED; 1527 } else { 1528 rc->rc_mig_state = IBT_STATE_ARMED; 1529 } 1530 rc->rc_rdma_ra_out = (1 << qpc->sra_max); 1531 rc->rc_rdma_ra_in = (1 << qpc->rra_max); 1532 rc->rc_min_rnr_nak = qpc->min_rnr_nak; 1533 rc->rc_path_mtu = qpc->mtu; 1534 rc->rc_retry_cnt = qpc->retry_cnt; 1535 1536 /* Get the common primary address path fields */ 1537 qpc_path = &qpc->pri_addr_path; 1538 path_ptr = &rc->rc_path; 1539 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect, 1540 HERMON_ADDRPATH_QP); 1541 1542 /* Fill in the additional primary address path fields */ 1543 path_ptr->cep_pkey_ix = qpc_path->pkey_indx; 1544 path_ptr->cep_hca_port_num = 1545 path_ptr->cep_adds_vect.av_port_num = 1546 (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1); 1547 path_ptr->cep_timeout = qpc_path->ack_timeout; 1548 1549 /* Get the common alternate address path fields */ 1550 qpc_alt_path = &qpc->alt_addr_path; 1551 alt_path_ptr = &rc->rc_alt_path; 1552 hermon_get_addr_path(state, qpc_alt_path, 1553 &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP); 1554 1555 /* Fill in the additional alternate address path fields */ 1556 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx; 1557 alt_path_ptr->cep_hca_port_num = 1558 alt_path_ptr->cep_adds_vect.av_port_num = 1559 (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1); 1560 alt_path_ptr->cep_timeout = qpc_alt_path->ack_timeout; 1561 1562 /* Get the RNR retry time from primary path */ 1563 rc->rc_rnr_retry_cnt = qpc->rnr_retry; 1564 1565 /* Set the enable flags based on RDMA/Atomic enable bits */ 1566 enable_flags = IBT_CEP_NO_FLAGS; 1567 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD); 1568 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR); 1569 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC); 1570 attr_p->qp_info.qp_flags = enable_flags; 1571 1572 attr_p->qp_info.qp_trans = IBT_RC_SRV; 1573 1574 } else if (qp->qp_serv_type == HERMON_QP_UC) { 1575 1576 /* Fill in the UC-specific info */ 1577 uc = &attr_p->qp_info.qp_transport.uc; 1578 uc->uc_sq_psn = qpc->next_snd_psn; 1579 uc->uc_rq_psn = qpc->next_rcv_psn; 1580 uc->uc_dst_qpn = qpc->rem_qpn; 1581 1582 /* Grab the path migration state information */ 1583 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) { 1584 uc->uc_mig_state = IBT_STATE_MIGRATED; 1585 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) { 1586 uc->uc_mig_state = IBT_STATE_REARMED; 1587 } else { 1588 uc->uc_mig_state = IBT_STATE_ARMED; 1589 } 1590 uc->uc_path_mtu = qpc->mtu; 1591 1592 /* Get the common primary address path fields */ 1593 qpc_path = &qpc->pri_addr_path; 1594 path_ptr = &uc->uc_path; 1595 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect, 1596 HERMON_ADDRPATH_QP); 1597 1598 /* Fill in the additional primary address path fields */ 1599 path_ptr->cep_pkey_ix = qpc_path->pkey_indx; 1600 path_ptr->cep_hca_port_num = 1601 path_ptr->cep_adds_vect.av_port_num = 1602 (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1); 1603 1604 /* Get the common alternate address path fields */ 1605 qpc_alt_path = &qpc->alt_addr_path; 1606 alt_path_ptr = &uc->uc_alt_path; 1607 hermon_get_addr_path(state, qpc_alt_path, 1608 &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP); 1609 1610 /* Fill in the additional alternate address path fields */ 1611 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx; 1612 alt_path_ptr->cep_hca_port_num = 1613 alt_path_ptr->cep_adds_vect.av_port_num = 1614 (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1); 1615 1616 /* 1617 * Set the enable flags based on RDMA enable bits (by 1618 * definition UC doesn't support Atomic or RDMA Read) 1619 */ 1620 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR); 1621 attr_p->qp_info.qp_flags = enable_flags; 1622 1623 attr_p->qp_info.qp_trans = IBT_UC_SRV; 1624 1625 } else { 1626 HERMON_WARNING(state, "unexpected QP transport type"); 1627 mutex_exit(&qp->qp_lock); 1628 return (ibc_get_ci_failure(0)); 1629 } 1630 1631 /* 1632 * Under certain circumstances it is possible for the Hermon hardware 1633 * to transition to one of the error states without software directly 1634 * knowing about it. The QueryQP() call is the one place where we 1635 * have an opportunity to sample and update our view of the QP state. 1636 */ 1637 if (qpc->state == HERMON_QP_SQERR) { 1638 attr_p->qp_info.qp_state = IBT_STATE_SQE; 1639 qp->qp_state = HERMON_QP_SQERR; 1640 } 1641 if (qpc->state == HERMON_QP_ERR) { 1642 attr_p->qp_info.qp_state = IBT_STATE_ERROR; 1643 qp->qp_state = HERMON_QP_ERR; 1644 } 1645 mutex_exit(&qp->qp_lock); 1646 1647 return (DDI_SUCCESS); 1648 } 1649 1650 1651 /* 1652 * hermon_qp_create_qpn() 1653 * Context: Can be called from interrupt or base context. 1654 */ 1655 static int 1656 hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp, 1657 hermon_rsrc_t *qpc) 1658 { 1659 hermon_qpn_entry_t query; 1660 hermon_qpn_entry_t *entry; 1661 avl_index_t where; 1662 1663 /* 1664 * Build a query (for the AVL tree lookup) and attempt to find 1665 * a previously added entry that has a matching QPC index. If 1666 * no matching entry is found, then allocate, initialize, and 1667 * add an entry to the AVL tree. 1668 * If a matching entry is found, then increment its QPN counter 1669 * and reference counter. 1670 */ 1671 query.qpn_indx = qpc->hr_indx; 1672 mutex_enter(&state->hs_qpn_avl_lock); 1673 entry = (hermon_qpn_entry_t *)avl_find(&state->hs_qpn_avl, 1674 &query, &where); 1675 if (entry == NULL) { 1676 /* 1677 * Allocate and initialize a QPN entry, then insert 1678 * it into the AVL tree. 1679 */ 1680 entry = (hermon_qpn_entry_t *)kmem_zalloc( 1681 sizeof (hermon_qpn_entry_t), KM_NOSLEEP); 1682 if (entry == NULL) { 1683 mutex_exit(&state->hs_qpn_avl_lock); 1684 return (DDI_FAILURE); 1685 } 1686 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry)) 1687 1688 entry->qpn_indx = qpc->hr_indx; 1689 entry->qpn_refcnt = 0; 1690 entry->qpn_counter = 0; 1691 1692 avl_insert(&state->hs_qpn_avl, entry, where); 1693 } 1694 1695 /* 1696 * Make the AVL tree entry point to the QP context resource that 1697 * it will be responsible for tracking 1698 */ 1699 entry->qpn_qpc = qpc; 1700 1701 /* 1702 * Setup the QP handle to point to the AVL tree entry. Then 1703 * generate the new QP number from the entry's QPN counter value 1704 * and the hardware's QP context table index. 1705 */ 1706 qp->qp_qpn_hdl = entry; 1707 qp->qp_qpnum = ((entry->qpn_counter << 1708 state->hs_cfg_profile->cp_log_num_qp) | qpc->hr_indx) & 1709 HERMON_QP_MAXNUMBER_MSK; 1710 qp->qp_ring = qp->qp_qpnum << 8; 1711 1712 /* 1713 * Increment the reference counter and QPN counter. The QPN 1714 * counter always indicates the next available number for use. 1715 */ 1716 entry->qpn_counter++; 1717 entry->qpn_refcnt++; 1718 1719 mutex_exit(&state->hs_qpn_avl_lock); 1720 1721 return (DDI_SUCCESS); 1722 } 1723 1724 1725 /* 1726 * hermon_qp_release_qpn() 1727 * Context: Can be called only from user or kernel context. 1728 */ 1729 void 1730 hermon_qp_release_qpn(hermon_state_t *state, hermon_qpn_entry_t *entry, 1731 int flags) 1732 { 1733 ASSERT(entry != NULL); 1734 1735 mutex_enter(&state->hs_qpn_avl_lock); 1736 1737 /* 1738 * If we are releasing the QP number here, then we decrement the 1739 * reference count and check for zero references. If there are 1740 * zero references, then we free the QPC context (if it hadn't 1741 * already been freed during a HERMON_QPN_FREE_ONLY free, i.e. for 1742 * reuse with another similar QP number) and remove the tracking 1743 * structure from the QP number AVL tree and free the structure. 1744 * If we are not releasing the QP number here, then, as long as we 1745 * have not exhausted the usefulness of the QPC context (that is, 1746 * re-used it too many times without the reference count having 1747 * gone to zero), we free up the QPC context for use by another 1748 * thread (which will use it to construct a different QP number 1749 * from the same QPC table index). 1750 */ 1751 if (flags == HERMON_QPN_RELEASE) { 1752 entry->qpn_refcnt--; 1753 1754 /* 1755 * If the reference count is zero, then we free the QPC 1756 * context (if it hadn't already been freed in an early 1757 * step, e.g. HERMON_QPN_FREE_ONLY) and remove/free the 1758 * tracking structure from the QP number AVL tree. 1759 */ 1760 if (entry->qpn_refcnt == 0) { 1761 if (entry->qpn_qpc != NULL) { 1762 hermon_rsrc_free(state, &entry->qpn_qpc); 1763 } 1764 1765 /* 1766 * If the current entry has served it's useful 1767 * purpose (i.e. been reused the maximum allowable 1768 * number of times), then remove it from QP number 1769 * AVL tree and free it up. 1770 */ 1771 if (entry->qpn_counter >= (1 << 1772 (24 - state->hs_cfg_profile->cp_log_num_qp))) { 1773 avl_remove(&state->hs_qpn_avl, entry); 1774 kmem_free(entry, sizeof (hermon_qpn_entry_t)); 1775 } 1776 } 1777 1778 } else if (flags == HERMON_QPN_FREE_ONLY) { 1779 /* 1780 * Even if we are not freeing the QP number, that will not 1781 * always prevent us from releasing the QPC context. In fact, 1782 * since the QPC context only forms part of the whole QPN, 1783 * we want to free it up for use by other consumers. But 1784 * if the reference count is non-zero (which it will always 1785 * be when we are doing HERMON_QPN_FREE_ONLY) and the counter 1786 * has reached its maximum value, then we cannot reuse the 1787 * QPC context until the reference count eventually reaches 1788 * zero (in HERMON_QPN_RELEASE, above). 1789 */ 1790 if (entry->qpn_counter < (1 << 1791 (24 - state->hs_cfg_profile->cp_log_num_qp))) { 1792 hermon_rsrc_free(state, &entry->qpn_qpc); 1793 } 1794 } 1795 mutex_exit(&state->hs_qpn_avl_lock); 1796 } 1797 1798 1799 /* 1800 * hermon_qpn_avl_compare() 1801 * Context: Can be called from user or kernel context. 1802 */ 1803 static int 1804 hermon_qpn_avl_compare(const void *q, const void *e) 1805 { 1806 hermon_qpn_entry_t *entry, *query; 1807 1808 entry = (hermon_qpn_entry_t *)e; 1809 query = (hermon_qpn_entry_t *)q; 1810 1811 if (query->qpn_indx < entry->qpn_indx) { 1812 return (-1); 1813 } else if (query->qpn_indx > entry->qpn_indx) { 1814 return (+1); 1815 } else { 1816 return (0); 1817 } 1818 } 1819 1820 1821 /* 1822 * hermon_qpn_avl_init() 1823 * Context: Only called from attach() path context 1824 */ 1825 void 1826 hermon_qpn_avl_init(hermon_state_t *state) 1827 { 1828 /* Initialize the lock used for QP number (QPN) AVL tree access */ 1829 mutex_init(&state->hs_qpn_avl_lock, NULL, MUTEX_DRIVER, 1830 DDI_INTR_PRI(state->hs_intrmsi_pri)); 1831 1832 /* Initialize the AVL tree for the QP number (QPN) storage */ 1833 avl_create(&state->hs_qpn_avl, hermon_qpn_avl_compare, 1834 sizeof (hermon_qpn_entry_t), 1835 offsetof(hermon_qpn_entry_t, qpn_avlnode)); 1836 } 1837 1838 1839 /* 1840 * hermon_qpn_avl_fini() 1841 * Context: Only called from attach() and/or detach() path contexts 1842 */ 1843 void 1844 hermon_qpn_avl_fini(hermon_state_t *state) 1845 { 1846 hermon_qpn_entry_t *entry; 1847 void *cookie; 1848 1849 /* 1850 * Empty all entries (if necessary) and destroy the AVL tree 1851 * that was used for QP number (QPN) tracking. 1852 */ 1853 cookie = NULL; 1854 while ((entry = (hermon_qpn_entry_t *)avl_destroy_nodes( 1855 &state->hs_qpn_avl, &cookie)) != NULL) { 1856 kmem_free(entry, sizeof (hermon_qpn_entry_t)); 1857 } 1858 avl_destroy(&state->hs_qpn_avl); 1859 1860 /* Destroy the lock used for QP number (QPN) AVL tree access */ 1861 mutex_destroy(&state->hs_qpn_avl_lock); 1862 } 1863 1864 1865 /* 1866 * hermon_qphdl_from_qpnum() 1867 * Context: Can be called from interrupt or base context. 1868 * 1869 * This routine is important because changing the unconstrained 1870 * portion of the QP number is critical to the detection of a 1871 * potential race condition in the QP event handler code (i.e. the case 1872 * where a QP is freed and alloc'd again before an event for the 1873 * "old" QP can be handled). 1874 * 1875 * While this is not a perfect solution (not sure that one exists) 1876 * it does help to mitigate the chance that this race condition will 1877 * cause us to deliver a "stale" event to the new QP owner. Note: 1878 * this solution does not scale well because the number of constrained 1879 * bits increases (and, hence, the number of unconstrained bits 1880 * decreases) as the number of supported QPs grows. For small and 1881 * intermediate values, it should hopefully provide sufficient 1882 * protection. 1883 */ 1884 hermon_qphdl_t 1885 hermon_qphdl_from_qpnum(hermon_state_t *state, uint_t qpnum) 1886 { 1887 uint_t qpindx, qpmask; 1888 1889 /* Calculate the QP table index from the qpnum */ 1890 qpmask = (1 << state->hs_cfg_profile->cp_log_num_qp) - 1; 1891 qpindx = qpnum & qpmask; 1892 return (state->hs_qphdl[qpindx]); 1893 } 1894 1895 1896 /* 1897 * hermon_special_qp_rsrc_alloc 1898 * Context: Can be called from interrupt or base context. 1899 */ 1900 static int 1901 hermon_special_qp_rsrc_alloc(hermon_state_t *state, ibt_sqp_type_t type, 1902 uint_t port, hermon_rsrc_t **qp_rsrc) 1903 { 1904 uint_t mask, flags; 1905 int status; 1906 1907 mutex_enter(&state->hs_spec_qplock); 1908 flags = state->hs_spec_qpflags; 1909 if (type == IBT_SMI_SQP) { 1910 /* 1911 * Check here to see if the driver has been configured 1912 * to instruct the Hermon firmware to handle all incoming 1913 * SMP messages (i.e. messages sent to SMA). If so, 1914 * then we will treat QP0 as if it has already been 1915 * allocated (for internal use). Otherwise, if we allow 1916 * the allocation to happen, it will cause unexpected 1917 * behaviors (e.g. Hermon SMA becomes unresponsive). 1918 */ 1919 if (state->hs_cfg_profile->cp_qp0_agents_in_fw != 0) { 1920 mutex_exit(&state->hs_spec_qplock); 1921 return (IBT_QP_IN_USE); 1922 } 1923 1924 /* 1925 * If this is the first QP0 allocation, then post 1926 * a CONF_SPECIAL_QP firmware command 1927 */ 1928 if ((flags & HERMON_SPECIAL_QP0_RSRC_MASK) == 0) { 1929 status = hermon_conf_special_qp_cmd_post(state, 1930 state->hs_spec_qp0->hr_indx, HERMON_CMD_QP_SMI, 1931 HERMON_CMD_NOSLEEP_SPIN, 1932 HERMON_CMD_SPEC_QP_OPMOD( 1933 state->hs_cfg_profile->cp_qp0_agents_in_fw, 1934 state->hs_cfg_profile->cp_qp1_agents_in_fw)); 1935 if (status != HERMON_CMD_SUCCESS) { 1936 mutex_exit(&state->hs_spec_qplock); 1937 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP " 1938 "command failed: %08x\n", 1939 state->hs_instance, status); 1940 return (IBT_INSUFF_RESOURCE); 1941 } 1942 } 1943 1944 /* 1945 * Now check (and, if necessary, modify) the flags to indicate 1946 * whether the allocation was successful 1947 */ 1948 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port)); 1949 if (flags & mask) { 1950 mutex_exit(&state->hs_spec_qplock); 1951 return (IBT_QP_IN_USE); 1952 } 1953 state->hs_spec_qpflags |= mask; 1954 *qp_rsrc = state->hs_spec_qp0; 1955 1956 } else { 1957 /* 1958 * If this is the first QP1 allocation, then post 1959 * a CONF_SPECIAL_QP firmware command 1960 */ 1961 if ((flags & HERMON_SPECIAL_QP1_RSRC_MASK) == 0) { 1962 status = hermon_conf_special_qp_cmd_post(state, 1963 state->hs_spec_qp1->hr_indx, HERMON_CMD_QP_GSI, 1964 HERMON_CMD_NOSLEEP_SPIN, 1965 HERMON_CMD_SPEC_QP_OPMOD( 1966 state->hs_cfg_profile->cp_qp0_agents_in_fw, 1967 state->hs_cfg_profile->cp_qp1_agents_in_fw)); 1968 if (status != HERMON_CMD_SUCCESS) { 1969 mutex_exit(&state->hs_spec_qplock); 1970 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP " 1971 "command failed: %08x\n", 1972 state->hs_instance, status); 1973 return (IBT_INSUFF_RESOURCE); 1974 } 1975 } 1976 1977 /* 1978 * Now check (and, if necessary, modify) the flags to indicate 1979 * whether the allocation was successful 1980 */ 1981 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port)); 1982 if (flags & mask) { 1983 mutex_exit(&state->hs_spec_qplock); 1984 return (IBT_QP_IN_USE); 1985 } 1986 state->hs_spec_qpflags |= mask; 1987 *qp_rsrc = state->hs_spec_qp1; 1988 } 1989 1990 mutex_exit(&state->hs_spec_qplock); 1991 return (DDI_SUCCESS); 1992 } 1993 1994 1995 /* 1996 * hermon_special_qp_rsrc_free 1997 * Context: Can be called from interrupt or base context. 1998 */ 1999 static int 2000 hermon_special_qp_rsrc_free(hermon_state_t *state, ibt_sqp_type_t type, 2001 uint_t port) 2002 { 2003 uint_t mask, flags; 2004 int status; 2005 2006 mutex_enter(&state->hs_spec_qplock); 2007 if (type == IBT_SMI_SQP) { 2008 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port)); 2009 state->hs_spec_qpflags &= ~mask; 2010 flags = state->hs_spec_qpflags; 2011 2012 /* 2013 * If this is the last QP0 free, then post a CONF_SPECIAL_QP 2014 * NOW, If this is the last Special QP free, then post a 2015 * CONF_SPECIAL_QP firmware command - it'll stop them all 2016 */ 2017 if (flags) { 2018 status = hermon_conf_special_qp_cmd_post(state, 0, 2019 HERMON_CMD_QP_SMI, HERMON_CMD_NOSLEEP_SPIN, 0); 2020 if (status != HERMON_CMD_SUCCESS) { 2021 mutex_exit(&state->hs_spec_qplock); 2022 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP " 2023 "command failed: %08x\n", 2024 state->hs_instance, status); 2025 if (status == HERMON_CMD_INVALID_STATUS) { 2026 hermon_fm_ereport(state, HCA_SYS_ERR, 2027 HCA_ERR_SRV_LOST); 2028 } 2029 return (ibc_get_ci_failure(0)); 2030 } 2031 } 2032 } else { 2033 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port)); 2034 state->hs_spec_qpflags &= ~mask; 2035 flags = state->hs_spec_qpflags; 2036 2037 /* 2038 * If this is the last QP1 free, then post a CONF_SPECIAL_QP 2039 * NOW, if this is the last special QP free, then post a 2040 * CONF_SPECIAL_QP firmware command - it'll stop them all 2041 */ 2042 if (flags) { 2043 status = hermon_conf_special_qp_cmd_post(state, 0, 2044 HERMON_CMD_QP_GSI, HERMON_CMD_NOSLEEP_SPIN, 0); 2045 if (status != HERMON_CMD_SUCCESS) { 2046 mutex_exit(&state->hs_spec_qplock); 2047 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP " 2048 "command failed: %08x\n", 2049 state->hs_instance, status); 2050 if (status == HERMON_CMD_INVALID_STATUS) { 2051 hermon_fm_ereport(state, HCA_SYS_ERR, 2052 HCA_ERR_SRV_LOST); 2053 } 2054 return (ibc_get_ci_failure(0)); 2055 } 2056 } 2057 } 2058 2059 mutex_exit(&state->hs_spec_qplock); 2060 return (DDI_SUCCESS); 2061 } 2062 2063 2064 /* 2065 * hermon_qp_sgl_to_logwqesz() 2066 * Context: Can be called from interrupt or base context. 2067 */ 2068 static void 2069 hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl, 2070 uint_t real_max_sgl, hermon_qp_wq_type_t wq_type, 2071 uint_t *logwqesz, uint_t *max_sgl) 2072 { 2073 uint_t max_size, log2, actual_sgl; 2074 2075 switch (wq_type) { 2076 case HERMON_QP_WQ_TYPE_SENDQ_UD: 2077 /* 2078 * Use requested maximum SGL to calculate max descriptor size 2079 * (while guaranteeing that the descriptor size is a 2080 * power-of-2 cachelines). 2081 */ 2082 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4)); 2083 log2 = highbit(max_size); 2084 if ((max_size & (max_size - 1)) == 0) { 2085 log2 = log2 - 1; 2086 } 2087 2088 /* Make sure descriptor is at least the minimum size */ 2089 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); 2090 2091 /* Calculate actual number of SGL (given WQE size) */ 2092 actual_sgl = ((1 << log2) - 2093 sizeof (hermon_hw_snd_wqe_ctrl_t)) >> 4; 2094 break; 2095 2096 case HERMON_QP_WQ_TYPE_SENDQ_CONN: 2097 /* 2098 * Use requested maximum SGL to calculate max descriptor size 2099 * (while guaranteeing that the descriptor size is a 2100 * power-of-2 cachelines). 2101 */ 2102 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4)); 2103 log2 = highbit(max_size); 2104 if ((max_size & (max_size - 1)) == 0) { 2105 log2 = log2 - 1; 2106 } 2107 2108 /* Make sure descriptor is at least the minimum size */ 2109 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); 2110 2111 /* Calculate actual number of SGL (given WQE size) */ 2112 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SND_HDRS) >> 4; 2113 break; 2114 2115 case HERMON_QP_WQ_TYPE_RECVQ: 2116 /* 2117 * Same as above (except for Recv WQEs) 2118 */ 2119 max_size = (HERMON_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4)); 2120 log2 = highbit(max_size); 2121 if ((max_size & (max_size - 1)) == 0) { 2122 log2 = log2 - 1; 2123 } 2124 2125 /* Make sure descriptor is at least the minimum size */ 2126 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); 2127 2128 /* Calculate actual number of SGL (given WQE size) */ 2129 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_RCV_HDRS) >> 4; 2130 break; 2131 2132 case HERMON_QP_WQ_TYPE_SENDMLX_QP0: 2133 /* 2134 * Same as above (except for MLX transport WQEs). For these 2135 * WQEs we have to account for the space consumed by the 2136 * "inline" packet headers. (This is smaller than for QP1 2137 * below because QP0 is not allowed to send packets with a GRH. 2138 */ 2139 max_size = (HERMON_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4)); 2140 log2 = highbit(max_size); 2141 if ((max_size & (max_size - 1)) == 0) { 2142 log2 = log2 - 1; 2143 } 2144 2145 /* Make sure descriptor is at least the minimum size */ 2146 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); 2147 2148 /* Calculate actual number of SGL (given WQE size) */ 2149 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP0_HDRS) >> 4; 2150 break; 2151 2152 case HERMON_QP_WQ_TYPE_SENDMLX_QP1: 2153 /* 2154 * Same as above. For these WQEs we again have to account for 2155 * the space consumed by the "inline" packet headers. (This 2156 * is larger than for QP0 above because we have to account for 2157 * the possibility of a GRH in each packet - and this 2158 * introduces an alignment issue that causes us to consume 2159 * an additional 8 bytes). 2160 */ 2161 max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4)); 2162 log2 = highbit(max_size); 2163 if ((max_size & (max_size - 1)) == 0) { 2164 log2 = log2 - 1; 2165 } 2166 2167 /* Make sure descriptor is at least the minimum size */ 2168 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); 2169 2170 /* Calculate actual number of SGL (given WQE size) */ 2171 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4; 2172 break; 2173 2174 default: 2175 HERMON_WARNING(state, "unexpected work queue type"); 2176 break; 2177 } 2178 2179 /* Fill in the return values */ 2180 *logwqesz = log2; 2181 *max_sgl = min(real_max_sgl, actual_sgl); 2182 } 2183