1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_qp.c 29 * Tavor Queue Pair Processing Routines 30 * 31 * Implements all the routines necessary for allocating, freeing, and 32 * querying the Tavor queue pairs. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/modctl.h> 40 #include <sys/bitmap.h> 41 #include <sys/sysmacros.h> 42 43 #include <sys/ib/adapters/tavor/tavor.h> 44 #include <sys/ib/ib_pkt_hdrs.h> 45 46 static int tavor_qp_create_qpn(tavor_state_t *state, tavor_qphdl_t qp, 47 tavor_rsrc_t *qpc); 48 static int tavor_qpn_avl_compare(const void *q, const void *e); 49 static int tavor_special_qp_rsrc_alloc(tavor_state_t *state, 50 ibt_sqp_type_t type, uint_t port, tavor_rsrc_t **qp_rsrc); 51 static int tavor_special_qp_rsrc_free(tavor_state_t *state, ibt_sqp_type_t type, 52 uint_t port); 53 static void tavor_qp_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl, 54 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl); 55 56 /* 57 * tavor_qp_alloc() 58 * Context: Can be called only from user or kernel context. 59 */ 60 int 61 tavor_qp_alloc(tavor_state_t *state, tavor_qp_info_t *qpinfo, 62 uint_t sleepflag, tavor_qp_options_t *op) 63 { 64 tavor_rsrc_pool_info_t *rsrc_pool; 65 tavor_rsrc_t *qpc, *rsrc, *rdb; 66 tavor_umap_db_entry_t *umapdb; 67 tavor_qphdl_t qp; 68 ibt_qp_alloc_attr_t *attr_p; 69 ibt_qp_type_t type; 70 ibtl_qp_hdl_t ibt_qphdl; 71 ibt_chan_sizes_t *queuesz_p; 72 ib_qpn_t *qpn; 73 tavor_qphdl_t *qphdl; 74 ibt_mr_attr_t mr_attr; 75 tavor_mr_options_t mr_op; 76 tavor_srqhdl_t srq; 77 tavor_pdhdl_t pd; 78 tavor_cqhdl_t sq_cq, rq_cq; 79 tavor_mrhdl_t mr; 80 uint64_t value, qp_desc_off; 81 uint32_t *sq_buf, *rq_buf; 82 uint32_t log_qp_sq_size, log_qp_rq_size; 83 uint32_t sq_size, rq_size; 84 uint32_t sq_wqe_size, rq_wqe_size; 85 uint32_t max_rdb, max_sgl, uarpg; 86 uint_t wq_location, dma_xfer_mode, qp_is_umap; 87 uint_t qp_srq_en; 88 int status, flag; 89 90 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p)) 91 92 /* 93 * Check the "options" flag. Currently this flag tells the driver 94 * whether or not the QP's work queues should be come from normal 95 * system memory or whether they should be allocated from DDR memory. 96 */ 97 if (op == NULL) { 98 wq_location = TAVOR_QUEUE_LOCATION_NORMAL; 99 } else { 100 wq_location = op->qpo_wq_loc; 101 } 102 103 /* 104 * Extract the necessary info from the tavor_qp_info_t structure 105 */ 106 attr_p = qpinfo->qpi_attrp; 107 type = qpinfo->qpi_type; 108 ibt_qphdl = qpinfo->qpi_ibt_qphdl; 109 queuesz_p = qpinfo->qpi_queueszp; 110 qpn = qpinfo->qpi_qpn; 111 qphdl = &qpinfo->qpi_qphdl; 112 113 /* 114 * Determine whether QP is being allocated for userland access or 115 * whether it is being allocated for kernel access. If the QP is 116 * being allocated for userland access, then lookup the UAR doorbell 117 * page number for the current process. Note: If this is not found 118 * (e.g. if the process has not previously open()'d the Tavor driver), 119 * then an error is returned. 120 */ 121 qp_is_umap = (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) ? 1 : 0; 122 if (qp_is_umap) { 123 status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(), 124 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); 125 if (status != DDI_SUCCESS) { 126 goto qpalloc_fail; 127 } 128 uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx; 129 } 130 131 /* 132 * Determine whether QP is being associated with an SRQ 133 */ 134 qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0; 135 if (qp_srq_en) { 136 /* 137 * Check for valid SRQ handle pointers 138 */ 139 if (attr_p->qp_ibc_srq_hdl == NULL) { 140 goto qpalloc_fail; 141 } 142 srq = (tavor_srqhdl_t)attr_p->qp_ibc_srq_hdl; 143 } 144 145 /* 146 * Check for valid QP service type (only UD/RC/UC supported) 147 */ 148 if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) && 149 (type != IBT_UC_RQP))) { 150 goto qpalloc_fail; 151 } 152 153 /* 154 * Only RC is supported on an SRQ -- This is a Tavor hardware 155 * limitation. Arbel native mode will not have this shortcoming. 156 */ 157 if (qp_srq_en && type != IBT_RC_RQP) { 158 goto qpalloc_fail; 159 } 160 161 /* 162 * Check for valid PD handle pointer 163 */ 164 if (attr_p->qp_pd_hdl == NULL) { 165 goto qpalloc_fail; 166 } 167 pd = (tavor_pdhdl_t)attr_p->qp_pd_hdl; 168 169 /* 170 * If on an SRQ, check to make sure the PD is the same 171 */ 172 if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) { 173 goto qpalloc_fail; 174 } 175 176 /* Increment the reference count on the protection domain (PD) */ 177 tavor_pd_refcnt_inc(pd); 178 179 /* 180 * Check for valid CQ handle pointers 181 */ 182 if ((attr_p->qp_ibc_scq_hdl == NULL) || 183 (attr_p->qp_ibc_rcq_hdl == NULL)) { 184 goto qpalloc_fail1; 185 } 186 sq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_scq_hdl; 187 rq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_rcq_hdl; 188 189 /* 190 * Increment the reference count on the CQs. One or both of these 191 * could return error if we determine that the given CQ is already 192 * being used with a special (SMI/GSI) QP. 193 */ 194 status = tavor_cq_refcnt_inc(sq_cq, TAVOR_CQ_IS_NORMAL); 195 if (status != DDI_SUCCESS) { 196 goto qpalloc_fail1; 197 } 198 status = tavor_cq_refcnt_inc(rq_cq, TAVOR_CQ_IS_NORMAL); 199 if (status != DDI_SUCCESS) { 200 goto qpalloc_fail2; 201 } 202 203 /* 204 * Allocate an QP context entry. This will be filled in with all 205 * the necessary parameters to define the Queue Pair. Unlike 206 * other Tavor hardware resources, ownership is not immediately 207 * given to hardware in the final step here. Instead, we must 208 * wait until the QP is later transitioned to the "Init" state before 209 * passing the QP to hardware. If we fail here, we must undo all 210 * the reference count (CQ and PD). 211 */ 212 status = tavor_rsrc_alloc(state, TAVOR_QPC, 1, sleepflag, &qpc); 213 if (status != DDI_SUCCESS) { 214 goto qpalloc_fail3; 215 } 216 217 /* 218 * Allocate the software structure for tracking the queue pair 219 * (i.e. the Tavor Queue Pair handle). If we fail here, we must 220 * undo the reference counts and the previous resource allocation. 221 */ 222 status = tavor_rsrc_alloc(state, TAVOR_QPHDL, 1, sleepflag, &rsrc); 223 if (status != DDI_SUCCESS) { 224 goto qpalloc_fail4; 225 } 226 qp = (tavor_qphdl_t)rsrc->tr_addr; 227 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) 228 229 /* 230 * Calculate the QP number from QPC index. This routine handles 231 * all of the operations necessary to keep track of used, unused, 232 * and released QP numbers. 233 */ 234 status = tavor_qp_create_qpn(state, qp, qpc); 235 if (status != DDI_SUCCESS) { 236 goto qpalloc_fail5; 237 } 238 239 /* 240 * If this will be a user-mappable QP, then allocate an entry for 241 * the "userland resources database". This will later be added to 242 * the database (after all further QP operations are successful). 243 * If we fail here, we must undo the reference counts and the 244 * previous resource allocation. 245 */ 246 if (qp_is_umap) { 247 umapdb = tavor_umap_db_alloc(state->ts_instance, qp->qp_qpnum, 248 MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc); 249 if (umapdb == NULL) { 250 goto qpalloc_fail6; 251 } 252 } 253 254 /* 255 * If this is an RC QP, then pre-allocate the maximum number of RDB 256 * entries. This allows us to ensure that we can later cover all 257 * the resources needed by hardware for handling multiple incoming 258 * RDMA Reads. Note: These resources are obviously not always 259 * necessary. They are allocated here anyway. Someday maybe this 260 * can be modified to allocate these on-the-fly (i.e. only if RDMA 261 * Read or Atomic operations are enabled) XXX 262 * If we fail here, we have a bunch of resource and reference count 263 * cleanup to do. 264 */ 265 if (type == IBT_RC_RQP) { 266 max_rdb = state->ts_cfg_profile->cp_hca_max_rdma_in_qp; 267 status = tavor_rsrc_alloc(state, TAVOR_RDB, max_rdb, 268 sleepflag, &rdb); 269 if (status != DDI_SUCCESS) { 270 goto qpalloc_fail7; 271 } 272 qp->qp_rdbrsrcp = rdb; 273 /* Calculate offset (into DDR memory) of RDB entries */ 274 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_RDB]; 275 qp->qp_rdb_ddraddr = (uintptr_t)rsrc_pool->rsrc_ddr_offset + 276 (rdb->tr_indx << TAVOR_RDB_SIZE_SHIFT); 277 } 278 279 /* 280 * Calculate the appropriate size for the work queues. 281 * Note: All Tavor QP work queues must be a power-of-2 in size. Also 282 * they may not be any smaller than TAVOR_QP_MIN_SIZE. This step is 283 * to round the requested size up to the next highest power-of-2 284 */ 285 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, TAVOR_QP_MIN_SIZE); 286 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, TAVOR_QP_MIN_SIZE); 287 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq); 288 if (ISP2(attr_p->qp_sizes.cs_sq)) { 289 log_qp_sq_size = log_qp_sq_size - 1; 290 } 291 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq); 292 if (ISP2(attr_p->qp_sizes.cs_rq)) { 293 log_qp_rq_size = log_qp_rq_size - 1; 294 } 295 296 /* 297 * Next we verify that the rounded-up size is valid (i.e. consistent 298 * with the device limits and/or software-configured limits). If not, 299 * then obviously we have a lot of cleanup to do before returning. 300 */ 301 if ((log_qp_sq_size > state->ts_cfg_profile->cp_log_max_qp_sz) || 302 (!qp_srq_en && (log_qp_rq_size > 303 state->ts_cfg_profile->cp_log_max_qp_sz))) { 304 goto qpalloc_fail8; 305 } 306 307 /* 308 * Next we verify that the requested number of SGL is valid (i.e. 309 * consistent with the device limits and/or software-configured 310 * limits). If not, then obviously the same cleanup needs to be done. 311 */ 312 max_sgl = state->ts_cfg_profile->cp_wqe_real_max_sgl; 313 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) || 314 (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_sgl))) { 315 goto qpalloc_fail8; 316 } 317 318 /* 319 * Determine this QP's WQE sizes (for both the Send and Recv WQEs). 320 * This will depend on the requested number of SGLs. Note: this 321 * has the side-effect of also calculating the real number of SGLs 322 * (for the calculated WQE size). 323 * 324 * For QP's on an SRQ, we set these to 0. 325 */ 326 if (qp_srq_en) { 327 qp->qp_rq_log_wqesz = 0; 328 qp->qp_rq_sgl = 0; 329 } else { 330 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl, 331 TAVOR_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz, 332 &qp->qp_rq_sgl); 333 } 334 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl, 335 TAVOR_QP_WQ_TYPE_SENDQ, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl); 336 337 /* 338 * Allocate the memory for QP work queues. Note: The location from 339 * which we will allocate these work queues has been passed in 340 * through the tavor_qp_options_t structure. Since Tavor work queues 341 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of 342 * the work queue memory is very important. We used to allocate 343 * work queues (the combined receive and send queues) so that they 344 * would be aligned on their combined size. That alignment guaranteed 345 * that they would never cross the 4GB boundary (Tavor work queues 346 * are on the order of MBs at maximum). Now we are able to relax 347 * this alignment constraint by ensuring that the IB address assigned 348 * to the queue memory (as a result of the tavor_mr_register() call) 349 * is offset from zero. 350 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to 351 * guarantee the alignment, but when attempting to use IOMMU bypass 352 * mode we found that we were not allowed to specify any alignment 353 * that was more restrictive than the system page size. 354 * So we avoided this constraint by passing two alignment values, 355 * one for the memory allocation itself and the other for the DMA 356 * handle (for later bind). This used to cause more memory than 357 * necessary to be allocated (in order to guarantee the more 358 * restrictive alignment contraint). But be guaranteeing the 359 * zero-based IB virtual address for the queue, we are able to 360 * conserve this memory. 361 * Note: If QP is not user-mappable, then it may come from either 362 * kernel system memory or from HCA-attached local DDR memory. 363 */ 364 sq_wqe_size = 1 << qp->qp_sq_log_wqesz; 365 sq_size = (1 << log_qp_sq_size) * sq_wqe_size; 366 367 /* QP on SRQ sets these to 0 */ 368 if (qp_srq_en) { 369 rq_wqe_size = 0; 370 rq_size = 0; 371 } else { 372 rq_wqe_size = 1 << qp->qp_rq_log_wqesz; 373 rq_size = (1 << log_qp_rq_size) * rq_wqe_size; 374 } 375 376 qp->qp_wqinfo.qa_size = sq_size + rq_size; 377 qp->qp_wqinfo.qa_alloc_align = max(sq_wqe_size, rq_wqe_size); 378 qp->qp_wqinfo.qa_bind_align = max(sq_wqe_size, rq_wqe_size); 379 if (qp_is_umap) { 380 qp->qp_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND; 381 } else { 382 qp->qp_wqinfo.qa_location = wq_location; 383 } 384 status = tavor_queue_alloc(state, &qp->qp_wqinfo, sleepflag); 385 if (status != DDI_SUCCESS) { 386 goto qpalloc_fail8; 387 } 388 if (sq_wqe_size > rq_wqe_size) { 389 sq_buf = qp->qp_wqinfo.qa_buf_aligned; 390 391 /* 392 * If QP's on an SRQ, we set the rq_buf to NULL 393 */ 394 if (qp_srq_en) 395 rq_buf = NULL; 396 else 397 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size); 398 } else { 399 rq_buf = qp->qp_wqinfo.qa_buf_aligned; 400 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size); 401 } 402 403 /* 404 * Register the memory for the QP work queues. The memory for the 405 * QP must be registered in the Tavor TPT tables. This gives us the 406 * LKey to specify in the QP context later. Note: The memory for 407 * Tavor work queues (both Send and Recv) must be contiguous and 408 * registered as a single memory region. Note also: If the work 409 * queue is to be allocated from DDR memory, then only a "bypass" 410 * mapping is appropriate. And if the QP memory is user-mappable, 411 * then we force DDI_DMA_CONSISTENT mapping. 412 * Also, in order to meet the alignment restriction, we pass the 413 * "mro_bind_override_addr" flag in the call to tavor_mr_register(). 414 * This guarantees that the resulting IB vaddr will be zero-based 415 * (modulo the offset into the first page). 416 * If we fail here, we still have the bunch of resource and reference 417 * count cleanup to do. 418 */ 419 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : 420 IBT_MR_NOSLEEP; 421 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned; 422 mr_attr.mr_len = qp->qp_wqinfo.qa_size; 423 mr_attr.mr_as = NULL; 424 mr_attr.mr_flags = flag; 425 if (qp_is_umap) { 426 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass; 427 } else { 428 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) { 429 mr_op.mro_bind_type = 430 state->ts_cfg_profile->cp_iommu_bypass; 431 dma_xfer_mode = 432 state->ts_cfg_profile->cp_streaming_consistent; 433 if (dma_xfer_mode == DDI_DMA_STREAMING) { 434 mr_attr.mr_flags |= IBT_MR_NONCOHERENT; 435 } 436 } else { 437 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS; 438 } 439 } 440 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl; 441 mr_op.mro_bind_override_addr = 1; 442 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op); 443 if (status != DDI_SUCCESS) { 444 goto qpalloc_fail9; 445 } 446 447 /* 448 * Calculate the offset between the kernel virtual address space 449 * and the IB virtual address space. This will be used when 450 * posting work requests to properly initialize each WQE. 451 */ 452 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned - 453 (uint64_t)mr->mr_bindinfo.bi_addr; 454 455 /* 456 * Fill in all the return arguments (if necessary). This includes 457 * real work queue sizes, real SGLs, and QP number 458 */ 459 if (queuesz_p != NULL) { 460 queuesz_p->cs_sq = (1 << log_qp_sq_size); 461 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl; 462 463 /* QP on an SRQ set these to 0 */ 464 if (qp_srq_en) { 465 queuesz_p->cs_rq = 0; 466 queuesz_p->cs_rq_sgl = 0; 467 } else { 468 queuesz_p->cs_rq = (1 << log_qp_rq_size); 469 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl; 470 } 471 } 472 if (qpn != NULL) { 473 *qpn = (ib_qpn_t)qp->qp_qpnum; 474 } 475 476 /* 477 * Fill in the rest of the Tavor Queue Pair handle. We can update 478 * the following fields for use in further operations on the QP. 479 */ 480 qp->qp_qpcrsrcp = qpc; 481 qp->qp_rsrcp = rsrc; 482 qp->qp_state = TAVOR_QP_RESET; 483 qp->qp_pdhdl = pd; 484 qp->qp_mrhdl = mr; 485 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ? 486 TAVOR_QP_SQ_WR_SIGNALED : TAVOR_QP_SQ_ALL_SIGNALED; 487 qp->qp_is_special = 0; 488 qp->qp_is_umap = qp_is_umap; 489 qp->qp_uarpg = (qp->qp_is_umap) ? uarpg : 0; 490 qp->qp_umap_dhp = (devmap_cookie_t)NULL; 491 qp->qp_sq_cqhdl = sq_cq; 492 qp->qp_sq_lastwqeaddr = NULL; 493 qp->qp_sq_bufsz = (1 << log_qp_sq_size); 494 qp->qp_sq_buf = sq_buf; 495 qp->qp_desc_off = qp_desc_off; 496 qp->qp_rq_cqhdl = rq_cq; 497 qp->qp_rq_lastwqeaddr = NULL; 498 qp->qp_rq_buf = rq_buf; 499 500 /* QP on an SRQ sets this to 0 */ 501 if (qp_srq_en) { 502 qp->qp_rq_bufsz = 0; 503 } else { 504 qp->qp_rq_bufsz = (1 << log_qp_rq_size); 505 } 506 507 qp->qp_forward_sqd_event = 0; 508 qp->qp_sqd_still_draining = 0; 509 qp->qp_hdlrarg = (void *)ibt_qphdl; 510 qp->qp_mcg_refcnt = 0; 511 512 /* 513 * If this QP is to be associated with an SRQ, then set the SRQ handle 514 * appropriately. 515 */ 516 if (qp_srq_en) { 517 qp->qp_srqhdl = srq; 518 qp->qp_srq_en = TAVOR_QP_SRQ_ENABLED; 519 tavor_srq_refcnt_inc(qp->qp_srqhdl); 520 } else { 521 qp->qp_srqhdl = NULL; 522 qp->qp_srq_en = TAVOR_QP_SRQ_DISABLED; 523 } 524 525 /* Determine if later ddi_dma_sync will be necessary */ 526 qp->qp_sync = TAVOR_QP_IS_SYNC_REQ(state, qp->qp_wqinfo); 527 528 /* Determine the QP service type */ 529 if (type == IBT_RC_RQP) { 530 qp->qp_serv_type = TAVOR_QP_RC; 531 } else if (type == IBT_UD_RQP) { 532 qp->qp_serv_type = TAVOR_QP_UD; 533 } else { 534 qp->qp_serv_type = TAVOR_QP_UC; 535 } 536 537 /* Zero out the QP context */ 538 bzero(&qp->qpc, sizeof (tavor_hw_qpc_t)); 539 540 /* 541 * Put QP handle in Tavor QPNum-to-QPHdl list. Then fill in the 542 * "qphdl" and return success 543 */ 544 ASSERT(state->ts_qphdl[qpc->tr_indx] == NULL); 545 state->ts_qphdl[qpc->tr_indx] = qp; 546 547 /* 548 * If this is a user-mappable QP, then we need to insert the previously 549 * allocated entry into the "userland resources database". This will 550 * allow for later lookup during devmap() (i.e. mmap()) calls. 551 */ 552 if (qp_is_umap) { 553 tavor_umap_db_add(umapdb); 554 } 555 556 *qphdl = qp; 557 558 return (DDI_SUCCESS); 559 560 /* 561 * The following is cleanup for all possible failure cases in this routine 562 */ 563 qpalloc_fail9: 564 tavor_queue_free(state, &qp->qp_wqinfo); 565 qpalloc_fail8: 566 if (type == IBT_RC_RQP) { 567 tavor_rsrc_free(state, &rdb); 568 } 569 qpalloc_fail7: 570 if (qp_is_umap) { 571 tavor_umap_db_free(umapdb); 572 } 573 qpalloc_fail6: 574 /* 575 * Releasing the QPN will also free up the QPC context. Update 576 * the QPC context pointer to indicate this. 577 */ 578 tavor_qp_release_qpn(state, qp->qp_qpn_hdl, TAVOR_QPN_RELEASE); 579 qpc = NULL; 580 qpalloc_fail5: 581 tavor_rsrc_free(state, &rsrc); 582 qpalloc_fail4: 583 if (qpc) { 584 tavor_rsrc_free(state, &qpc); 585 } 586 qpalloc_fail3: 587 tavor_cq_refcnt_dec(rq_cq); 588 qpalloc_fail2: 589 tavor_cq_refcnt_dec(sq_cq); 590 qpalloc_fail1: 591 tavor_pd_refcnt_dec(pd); 592 qpalloc_fail: 593 return (status); 594 } 595 596 597 598 /* 599 * tavor_special_qp_alloc() 600 * Context: Can be called only from user or kernel context. 601 */ 602 int 603 tavor_special_qp_alloc(tavor_state_t *state, tavor_qp_info_t *qpinfo, 604 uint_t sleepflag, tavor_qp_options_t *op) 605 { 606 tavor_rsrc_t *qpc, *rsrc; 607 tavor_qphdl_t qp; 608 ibt_qp_alloc_attr_t *attr_p; 609 ibt_sqp_type_t type; 610 uint8_t port; 611 ibtl_qp_hdl_t ibt_qphdl; 612 ibt_chan_sizes_t *queuesz_p; 613 tavor_qphdl_t *qphdl; 614 ibt_mr_attr_t mr_attr; 615 tavor_mr_options_t mr_op; 616 tavor_pdhdl_t pd; 617 tavor_cqhdl_t sq_cq, rq_cq; 618 tavor_mrhdl_t mr; 619 uint64_t qp_desc_off; 620 uint32_t *sq_buf, *rq_buf; 621 uint32_t log_qp_sq_size, log_qp_rq_size; 622 uint32_t sq_size, rq_size, max_sgl; 623 uint32_t sq_wqe_size, rq_wqe_size; 624 uint_t wq_location, dma_xfer_mode; 625 int status, flag; 626 627 /* 628 * Check the "options" flag. Currently this flag tells the driver 629 * whether or not the QP's work queues should be come from normal 630 * system memory or whether they should be allocated from DDR memory. 631 */ 632 if (op == NULL) { 633 wq_location = TAVOR_QUEUE_LOCATION_NORMAL; 634 } else { 635 wq_location = op->qpo_wq_loc; 636 } 637 638 /* 639 * Extract the necessary info from the tavor_qp_info_t structure 640 */ 641 attr_p = qpinfo->qpi_attrp; 642 type = qpinfo->qpi_type; 643 port = qpinfo->qpi_port; 644 ibt_qphdl = qpinfo->qpi_ibt_qphdl; 645 queuesz_p = qpinfo->qpi_queueszp; 646 qphdl = &qpinfo->qpi_qphdl; 647 648 /* 649 * Check for valid special QP type (only SMI & GSI supported) 650 */ 651 if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) { 652 goto spec_qpalloc_fail; 653 } 654 655 /* 656 * Check for valid port number 657 */ 658 if (!tavor_portnum_is_valid(state, port)) { 659 goto spec_qpalloc_fail; 660 } 661 port = port - 1; 662 663 /* 664 * Check for valid PD handle pointer 665 */ 666 if (attr_p->qp_pd_hdl == NULL) { 667 goto spec_qpalloc_fail; 668 } 669 pd = (tavor_pdhdl_t)attr_p->qp_pd_hdl; 670 671 /* Increment the reference count on the PD */ 672 tavor_pd_refcnt_inc(pd); 673 674 /* 675 * Check for valid CQ handle pointers 676 */ 677 if ((attr_p->qp_ibc_scq_hdl == NULL) || 678 (attr_p->qp_ibc_rcq_hdl == NULL)) { 679 goto spec_qpalloc_fail1; 680 } 681 sq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_scq_hdl; 682 rq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_rcq_hdl; 683 684 /* 685 * Increment the reference count on the CQs. One or both of these 686 * could return error if we determine that the given CQ is already 687 * being used with a non-special QP (i.e. a normal QP). 688 */ 689 status = tavor_cq_refcnt_inc(sq_cq, TAVOR_CQ_IS_SPECIAL); 690 if (status != DDI_SUCCESS) { 691 goto spec_qpalloc_fail1; 692 } 693 status = tavor_cq_refcnt_inc(rq_cq, TAVOR_CQ_IS_SPECIAL); 694 if (status != DDI_SUCCESS) { 695 goto spec_qpalloc_fail2; 696 } 697 698 /* 699 * Allocate the special QP resources. Essentially, this allocation 700 * amounts to checking if the request special QP has already been 701 * allocated. If successful, the QP context return is an actual 702 * QP context that has been "aliased" to act as a special QP of the 703 * appropriate type (and for the appropriate port). Just as in 704 * tavor_qp_alloc() above, ownership for this QP context is not 705 * immediately given to hardware in the final step here. Instead, we 706 * wait until the QP is later transitioned to the "Init" state before 707 * passing the QP to hardware. If we fail here, we must undo all 708 * the reference count (CQ and PD). 709 */ 710 status = tavor_special_qp_rsrc_alloc(state, type, port, &qpc); 711 if (status != DDI_SUCCESS) { 712 goto spec_qpalloc_fail3; 713 } 714 715 /* 716 * Allocate the software structure for tracking the special queue 717 * pair (i.e. the Tavor Queue Pair handle). If we fail here, we 718 * must undo the reference counts and the previous resource allocation. 719 */ 720 status = tavor_rsrc_alloc(state, TAVOR_QPHDL, 1, sleepflag, &rsrc); 721 if (status != DDI_SUCCESS) { 722 goto spec_qpalloc_fail4; 723 } 724 qp = (tavor_qphdl_t)rsrc->tr_addr; 725 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) 726 727 /* 728 * Actual QP number is a combination of the index of the QPC and 729 * the port number. This is because the special QP contexts must 730 * be allocated two-at-a-time. 731 */ 732 qp->qp_qpnum = qpc->tr_indx + port; 733 734 /* 735 * Calculate the appropriate size for the work queues. 736 * Note: All Tavor QP work queues must be a power-of-2 in size. Also 737 * they may not be any smaller than TAVOR_QP_MIN_SIZE. This step is 738 * to round the requested size up to the next highest power-of-2 739 */ 740 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, TAVOR_QP_MIN_SIZE); 741 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, TAVOR_QP_MIN_SIZE); 742 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq); 743 if (ISP2(attr_p->qp_sizes.cs_sq)) { 744 log_qp_sq_size = log_qp_sq_size - 1; 745 } 746 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq); 747 if (ISP2(attr_p->qp_sizes.cs_rq)) { 748 log_qp_rq_size = log_qp_rq_size - 1; 749 } 750 751 /* 752 * Next we verify that the rounded-up size is valid (i.e. consistent 753 * with the device limits and/or software-configured limits). If not, 754 * then obviously we have a bit of cleanup to do before returning. 755 */ 756 if ((log_qp_sq_size > state->ts_cfg_profile->cp_log_max_qp_sz) || 757 (log_qp_rq_size > state->ts_cfg_profile->cp_log_max_qp_sz)) { 758 goto spec_qpalloc_fail5; 759 } 760 761 /* 762 * Next we verify that the requested number of SGL is valid (i.e. 763 * consistent with the device limits and/or software-configured 764 * limits). If not, then obviously the same cleanup needs to be done. 765 */ 766 max_sgl = state->ts_cfg_profile->cp_wqe_real_max_sgl; 767 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) || 768 (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) { 769 goto spec_qpalloc_fail5; 770 } 771 772 /* 773 * Determine this QP's WQE sizes (for both the Send and Recv WQEs). 774 * This will depend on the requested number of SGLs. Note: this 775 * has the side-effect of also calculating the real number of SGLs 776 * (for the calculated WQE size). 777 */ 778 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl, 779 TAVOR_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl); 780 if (type == IBT_SMI_SQP) { 781 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl, 782 TAVOR_QP_WQ_TYPE_SENDMLX_QP0, &qp->qp_sq_log_wqesz, 783 &qp->qp_sq_sgl); 784 } else { 785 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl, 786 TAVOR_QP_WQ_TYPE_SENDMLX_QP1, &qp->qp_sq_log_wqesz, 787 &qp->qp_sq_sgl); 788 } 789 790 /* 791 * Allocate the memory for QP work queues. Note: The location from 792 * which we will allocate these work queues has been passed in 793 * through the tavor_qp_options_t structure. Since Tavor work queues 794 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of 795 * the work queue memory is very important. We used to allocate 796 * work queues (the combined receive and send queues) so that they 797 * would be aligned on their combined size. That alignment guaranteed 798 * that they would never cross the 4GB boundary (Tavor work queues 799 * are on the order of MBs at maximum). Now we are able to relax 800 * this alignment constraint by ensuring that the IB address assigned 801 * to the queue memory (as a result of the tavor_mr_register() call) 802 * is offset from zero. 803 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to 804 * guarantee the alignment, but when attempting to use IOMMU bypass 805 * mode we found that we were not allowed to specify any alignment 806 * that was more restrictive than the system page size. 807 * So we avoided this constraint by passing two alignment values, 808 * one for the memory allocation itself and the other for the DMA 809 * handle (for later bind). This used to cause more memory than 810 * necessary to be allocated (in order to guarantee the more 811 * restrictive alignment contraint). But be guaranteeing the 812 * zero-based IB virtual address for the queue, we are able to 813 * conserve this memory. 814 */ 815 sq_wqe_size = 1 << qp->qp_sq_log_wqesz; 816 rq_wqe_size = 1 << qp->qp_rq_log_wqesz; 817 sq_size = (1 << log_qp_sq_size) * sq_wqe_size; 818 rq_size = (1 << log_qp_rq_size) * rq_wqe_size; 819 qp->qp_wqinfo.qa_size = sq_size + rq_size; 820 qp->qp_wqinfo.qa_alloc_align = max(sq_wqe_size, rq_wqe_size); 821 qp->qp_wqinfo.qa_bind_align = max(sq_wqe_size, rq_wqe_size); 822 qp->qp_wqinfo.qa_location = wq_location; 823 status = tavor_queue_alloc(state, &qp->qp_wqinfo, sleepflag); 824 if (status != 0) { 825 goto spec_qpalloc_fail5; 826 } 827 if (sq_wqe_size > rq_wqe_size) { 828 sq_buf = qp->qp_wqinfo.qa_buf_aligned; 829 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size); 830 } else { 831 rq_buf = qp->qp_wqinfo.qa_buf_aligned; 832 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size); 833 } 834 835 /* 836 * Register the memory for the special QP work queues. The memory for 837 * the special QP must be registered in the Tavor TPT tables. This 838 * gives us the LKey to specify in the QP context later. Note: The 839 * memory for Tavor work queues (both Send and Recv) must be contiguous 840 * and registered as a single memory region. Note also: If the work 841 * queue is to be allocated from DDR memory, then only a "bypass" 842 * mapping is appropriate. 843 * Also, in order to meet the alignment restriction, we pass the 844 * "mro_bind_override_addr" flag in the call to tavor_mr_register(). 845 * This guarantees that the resulting IB vaddr will be zero-based 846 * (modulo the offset into the first page). 847 * If we fail here, we have a bunch of resource and reference count 848 * cleanup to do. 849 */ 850 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : 851 IBT_MR_NOSLEEP; 852 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned; 853 mr_attr.mr_len = qp->qp_wqinfo.qa_size; 854 mr_attr.mr_as = NULL; 855 mr_attr.mr_flags = flag; 856 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) { 857 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass; 858 859 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent; 860 if (dma_xfer_mode == DDI_DMA_STREAMING) { 861 mr_attr.mr_flags |= IBT_MR_NONCOHERENT; 862 } 863 } else { 864 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS; 865 } 866 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl; 867 mr_op.mro_bind_override_addr = 1; 868 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op); 869 if (status != DDI_SUCCESS) { 870 goto spec_qpalloc_fail6; 871 } 872 873 /* 874 * Calculate the offset between the kernel virtual address space 875 * and the IB virtual address space. This will be used when 876 * posting work requests to properly initialize each WQE. 877 */ 878 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned - 879 (uint64_t)mr->mr_bindinfo.bi_addr; 880 881 /* 882 * Fill in all the return arguments (if necessary). This includes 883 * real work queue sizes, real SGLs, and QP number (which will be 884 * either zero or one, depending on the special QP type) 885 */ 886 if (queuesz_p != NULL) { 887 queuesz_p->cs_sq = (1 << log_qp_sq_size); 888 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl; 889 queuesz_p->cs_rq = (1 << log_qp_rq_size); 890 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl; 891 } 892 893 /* 894 * Fill in the rest of the Tavor Queue Pair handle. We can update 895 * the following fields for use in further operations on the QP. 896 */ 897 qp->qp_qpcrsrcp = qpc; 898 qp->qp_rsrcp = rsrc; 899 qp->qp_state = TAVOR_QP_RESET; 900 qp->qp_pdhdl = pd; 901 qp->qp_mrhdl = mr; 902 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ? 903 TAVOR_QP_SQ_WR_SIGNALED : TAVOR_QP_SQ_ALL_SIGNALED; 904 qp->qp_is_special = (type == IBT_SMI_SQP) ? 905 TAVOR_QP_SMI : TAVOR_QP_GSI; 906 qp->qp_is_umap = 0; 907 qp->qp_uarpg = 0; 908 qp->qp_sq_cqhdl = sq_cq; 909 qp->qp_sq_lastwqeaddr = NULL; 910 qp->qp_sq_bufsz = (1 << log_qp_sq_size); 911 qp->qp_sq_buf = sq_buf; 912 qp->qp_desc_off = qp_desc_off; 913 qp->qp_rq_cqhdl = rq_cq; 914 qp->qp_rq_lastwqeaddr = NULL; 915 qp->qp_rq_bufsz = (1 << log_qp_rq_size); 916 qp->qp_rq_buf = rq_buf; 917 qp->qp_portnum = port; 918 qp->qp_pkeyindx = 0; 919 qp->qp_hdlrarg = (void *)ibt_qphdl; 920 qp->qp_mcg_refcnt = 0; 921 qp->qp_srq_en = 0; 922 qp->qp_srqhdl = NULL; 923 924 /* Determine if later ddi_dma_sync will be necessary */ 925 qp->qp_sync = TAVOR_QP_IS_SYNC_REQ(state, qp->qp_wqinfo); 926 927 /* All special QPs are UD QP service type */ 928 qp->qp_serv_type = TAVOR_QP_UD; 929 930 /* Zero out the QP context */ 931 bzero(&qp->qpc, sizeof (tavor_hw_qpc_t)); 932 933 /* 934 * Put QP handle in Tavor QPNum-to-QPHdl list. Then fill in the 935 * "qphdl" and return success 936 */ 937 ASSERT(state->ts_qphdl[qpc->tr_indx + port] == NULL); 938 state->ts_qphdl[qpc->tr_indx + port] = qp; 939 940 *qphdl = qp; 941 942 return (DDI_SUCCESS); 943 944 /* 945 * The following is cleanup for all possible failure cases in this routine 946 */ 947 spec_qpalloc_fail6: 948 tavor_queue_free(state, &qp->qp_wqinfo); 949 spec_qpalloc_fail5: 950 tavor_rsrc_free(state, &rsrc); 951 spec_qpalloc_fail4: 952 if (tavor_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) { 953 TAVOR_WARNING(state, "failed to free special QP rsrc"); 954 } 955 spec_qpalloc_fail3: 956 tavor_cq_refcnt_dec(rq_cq); 957 spec_qpalloc_fail2: 958 tavor_cq_refcnt_dec(sq_cq); 959 spec_qpalloc_fail1: 960 tavor_pd_refcnt_dec(pd); 961 spec_qpalloc_fail: 962 return (status); 963 } 964 965 966 /* 967 * tavor_qp_free() 968 * This function frees up the QP resources. Depending on the value 969 * of the "free_qp_flags", the QP number may not be released until 970 * a subsequent call to tavor_qp_release_qpn(). 971 * 972 * Context: Can be called only from user or kernel context. 973 */ 974 /* ARGSUSED */ 975 int 976 tavor_qp_free(tavor_state_t *state, tavor_qphdl_t *qphdl, 977 ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh, 978 uint_t sleepflag) 979 { 980 tavor_rsrc_t *qpc, *rdb, *rsrc; 981 tavor_umap_db_entry_t *umapdb; 982 tavor_qpn_entry_t *entry; 983 tavor_pdhdl_t pd; 984 tavor_mrhdl_t mr; 985 tavor_cqhdl_t sq_cq, rq_cq; 986 tavor_srqhdl_t srq; 987 tavor_qphdl_t qp; 988 uint64_t value; 989 uint_t type, port; 990 uint_t maxprot; 991 uint_t qp_srq_en; 992 int status; 993 994 /* 995 * Pull all the necessary information from the Tavor Queue Pair 996 * handle. This is necessary here because the resource for the 997 * QP handle is going to be freed up as part of this operation. 998 */ 999 qp = *qphdl; 1000 mutex_enter(&qp->qp_lock); 1001 qpc = qp->qp_qpcrsrcp; 1002 rsrc = qp->qp_rsrcp; 1003 pd = qp->qp_pdhdl; 1004 srq = qp->qp_srqhdl; 1005 mr = qp->qp_mrhdl; 1006 rq_cq = qp->qp_rq_cqhdl; 1007 sq_cq = qp->qp_sq_cqhdl; 1008 rdb = qp->qp_rdbrsrcp; 1009 port = qp->qp_portnum; 1010 qp_srq_en = qp->qp_srq_en; 1011 1012 /* 1013 * If the QP is part of an MCG, then we fail the qp_free 1014 */ 1015 if (qp->qp_mcg_refcnt != 0) { 1016 mutex_exit(&qp->qp_lock); 1017 goto qpfree_fail; 1018 } 1019 1020 /* 1021 * If the QP is not already in "Reset" state, then transition to 1022 * "Reset". This is necessary because software does not reclaim 1023 * ownership of the QP context until the QP is in the "Reset" state. 1024 * If the ownership transfer fails for any reason, then it is an 1025 * indication that something (either in HW or SW) has gone seriously 1026 * wrong. So we print a warning message and return. 1027 */ 1028 if (qp->qp_state != TAVOR_QP_RESET) { 1029 if (tavor_qp_to_reset(state, qp) != DDI_SUCCESS) { 1030 mutex_exit(&qp->qp_lock); 1031 TAVOR_WARNING(state, "failed to reset QP context"); 1032 goto qpfree_fail; 1033 } 1034 qp->qp_state = TAVOR_QP_RESET; 1035 1036 /* 1037 * Do any additional handling necessary for the transition 1038 * to the "Reset" state (e.g. update the WRID lists) 1039 */ 1040 tavor_wrid_to_reset_handling(state, qp); 1041 } 1042 1043 /* 1044 * If this was a user-mappable QP, then we need to remove its entry 1045 * from the "userland resources database". If it is also currently 1046 * mmap()'d out to a user process, then we need to call 1047 * devmap_devmem_remap() to remap the QP memory to an invalid mapping. 1048 * We also need to invalidate the QP tracking information for the 1049 * user mapping. 1050 */ 1051 if (qp->qp_is_umap) { 1052 status = tavor_umap_db_find(state->ts_instance, qp->qp_qpnum, 1053 MLNX_UMAP_QPMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE, 1054 &umapdb); 1055 if (status != DDI_SUCCESS) { 1056 mutex_exit(&qp->qp_lock); 1057 TAVOR_WARNING(state, "failed to find in database"); 1058 return (ibc_get_ci_failure(0)); 1059 } 1060 tavor_umap_db_free(umapdb); 1061 if (qp->qp_umap_dhp != NULL) { 1062 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 1063 status = devmap_devmem_remap(qp->qp_umap_dhp, 1064 state->ts_dip, 0, 0, qp->qp_wqinfo.qa_size, 1065 maxprot, DEVMAP_MAPPING_INVALID, NULL); 1066 if (status != DDI_SUCCESS) { 1067 mutex_exit(&qp->qp_lock); 1068 TAVOR_WARNING(state, "failed in QP memory " 1069 "devmap_devmem_remap()"); 1070 return (ibc_get_ci_failure(0)); 1071 } 1072 qp->qp_umap_dhp = (devmap_cookie_t)NULL; 1073 } 1074 } 1075 1076 /* 1077 * Put NULL into the Tavor QPNum-to-QPHdl list. This will allow any 1078 * in-progress events to detect that the QP corresponding to this 1079 * number has been freed. Note: it does depend in whether we are 1080 * freeing a special QP or not. 1081 */ 1082 if (qp->qp_is_special) { 1083 state->ts_qphdl[qpc->tr_indx + port] = NULL; 1084 } else { 1085 state->ts_qphdl[qpc->tr_indx] = NULL; 1086 } 1087 1088 /* 1089 * Drop the QP lock 1090 * At this point the lock is no longer necessary. We cannot 1091 * protect from multiple simultaneous calls to free the same QP. 1092 * In addition, since the QP lock is contained in the QP "software 1093 * handle" resource, which we will free (see below), it is 1094 * important that we have no further references to that memory. 1095 */ 1096 mutex_exit(&qp->qp_lock); 1097 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp)) 1098 1099 /* 1100 * Free the QP resources 1101 * Start by deregistering and freeing the memory for work queues. 1102 * Next free any previously allocated context information 1103 * (depending on QP type) 1104 * Finally, decrement the necessary reference counts. 1105 * If this fails for any reason, then it is an indication that 1106 * something (either in HW or SW) has gone seriously wrong. So we 1107 * print a warning message and return. 1108 */ 1109 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 1110 sleepflag); 1111 if (status != DDI_SUCCESS) { 1112 TAVOR_WARNING(state, "failed to deregister QP memory"); 1113 goto qpfree_fail; 1114 } 1115 1116 /* Free the memory for the QP */ 1117 tavor_queue_free(state, &qp->qp_wqinfo); 1118 1119 /* 1120 * Free up the remainder of the QP resources. Note: we have a few 1121 * different resources to free up depending on whether the QP is a 1122 * special QP or not. As described above, if any of these fail for 1123 * any reason it is an indication that something (either in HW or SW) 1124 * has gone seriously wrong. So we print a warning message and 1125 * return. 1126 */ 1127 if (qp->qp_is_special) { 1128 type = (qp->qp_is_special == TAVOR_QP_SMI) ? 1129 IBT_SMI_SQP : IBT_GSI_SQP; 1130 1131 /* Free up resources for the special QP */ 1132 status = tavor_special_qp_rsrc_free(state, type, port); 1133 if (status != DDI_SUCCESS) { 1134 TAVOR_WARNING(state, "failed to free special QP rsrc"); 1135 goto qpfree_fail; 1136 } 1137 1138 } else { 1139 type = qp->qp_serv_type; 1140 1141 /* Free up the RDB entries resource */ 1142 if (type == TAVOR_QP_RC) { 1143 tavor_rsrc_free(state, &rdb); 1144 } 1145 1146 /* 1147 * Check the flags and determine whether to release the 1148 * QPN or not, based on their value. 1149 */ 1150 if (free_qp_flags == IBC_FREE_QP_ONLY) { 1151 entry = qp->qp_qpn_hdl; 1152 tavor_qp_release_qpn(state, qp->qp_qpn_hdl, 1153 TAVOR_QPN_FREE_ONLY); 1154 *qpnh = (ibc_qpn_hdl_t)entry; 1155 } else { 1156 tavor_qp_release_qpn(state, qp->qp_qpn_hdl, 1157 TAVOR_QPN_RELEASE); 1158 } 1159 } 1160 1161 /* Free the Tavor Queue Pair handle */ 1162 tavor_rsrc_free(state, &rsrc); 1163 1164 /* Decrement the reference counts on CQs, PD and SRQ (if needed) */ 1165 tavor_cq_refcnt_dec(rq_cq); 1166 tavor_cq_refcnt_dec(sq_cq); 1167 tavor_pd_refcnt_dec(pd); 1168 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) { 1169 tavor_srq_refcnt_dec(srq); 1170 } 1171 1172 /* Set the qphdl pointer to NULL and return success */ 1173 *qphdl = NULL; 1174 1175 return (DDI_SUCCESS); 1176 1177 qpfree_fail: 1178 return (status); 1179 } 1180 1181 1182 /* 1183 * tavor_qp_query() 1184 * Context: Can be called from interrupt or base context. 1185 */ 1186 int 1187 tavor_qp_query(tavor_state_t *state, tavor_qphdl_t qp, 1188 ibt_qp_query_attr_t *attr_p) 1189 { 1190 ibt_cep_state_t qp_state; 1191 ibt_qp_ud_attr_t *ud; 1192 ibt_qp_rc_attr_t *rc; 1193 ibt_qp_uc_attr_t *uc; 1194 ibt_cep_flags_t enable_flags; 1195 tavor_hw_addr_path_t *qpc_path, *qpc_alt_path; 1196 ibt_cep_path_t *path_ptr, *alt_path_ptr; 1197 tavor_hw_qpc_t *qpc; 1198 int status; 1199 1200 mutex_enter(&qp->qp_lock); 1201 1202 /* 1203 * Grab the temporary QPC entry from QP software state 1204 */ 1205 qpc = &qp->qpc; 1206 1207 /* Convert the current Tavor QP state to IBTF QP state */ 1208 switch (qp->qp_state) { 1209 case TAVOR_QP_RESET: 1210 qp_state = IBT_STATE_RESET; /* "Reset" */ 1211 break; 1212 case TAVOR_QP_INIT: 1213 qp_state = IBT_STATE_INIT; /* Initialized */ 1214 break; 1215 case TAVOR_QP_RTR: 1216 qp_state = IBT_STATE_RTR; /* Ready to Receive */ 1217 break; 1218 case TAVOR_QP_RTS: 1219 qp_state = IBT_STATE_RTS; /* Ready to Send */ 1220 break; 1221 case TAVOR_QP_SQERR: 1222 qp_state = IBT_STATE_SQE; /* Send Queue Error */ 1223 break; 1224 case TAVOR_QP_SQD: 1225 if (qp->qp_sqd_still_draining) { 1226 qp_state = IBT_STATE_SQDRAIN; /* SQ Draining */ 1227 } else { 1228 qp_state = IBT_STATE_SQD; /* SQ Drained */ 1229 } 1230 break; 1231 case TAVOR_QP_ERR: 1232 qp_state = IBT_STATE_ERROR; /* Error */ 1233 break; 1234 default: 1235 mutex_exit(&qp->qp_lock); 1236 return (ibc_get_ci_failure(0)); 1237 } 1238 attr_p->qp_info.qp_state = qp_state; 1239 1240 /* SRQ Hook. */ 1241 attr_p->qp_srq = NULL; 1242 1243 /* 1244 * The following QP information is always returned, regardless of 1245 * the current QP state. Note: Some special handling is necessary 1246 * for calculating the QP number on special QP (QP0 and QP1). 1247 */ 1248 attr_p->qp_sq_cq = qp->qp_sq_cqhdl->cq_hdlrarg; 1249 attr_p->qp_rq_cq = qp->qp_rq_cqhdl->cq_hdlrarg; 1250 if (qp->qp_is_special) { 1251 attr_p->qp_qpn = (qp->qp_is_special == TAVOR_QP_SMI) ? 0 : 1; 1252 } else { 1253 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum; 1254 } 1255 attr_p->qp_sq_sgl = qp->qp_sq_sgl; 1256 attr_p->qp_rq_sgl = qp->qp_rq_sgl; 1257 attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz; 1258 attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz; 1259 1260 /* 1261 * If QP is currently in the "Reset" state, then only the above are 1262 * returned 1263 */ 1264 if (qp_state == IBT_STATE_RESET) { 1265 mutex_exit(&qp->qp_lock); 1266 return (DDI_SUCCESS); 1267 } 1268 1269 /* 1270 * Post QUERY_QP command to firmware 1271 * 1272 * We do a TAVOR_NOSLEEP here because we are holding the "qp_lock". 1273 * Since we may be in the interrupt context (or subsequently raised 1274 * to interrupt level by priority inversion), we do not want to block 1275 * in this routine waiting for success. 1276 */ 1277 status = tavor_cmn_query_cmd_post(state, QUERY_QP, qp->qp_qpnum, 1278 qpc, sizeof (tavor_hw_qpc_t), TAVOR_CMD_NOSLEEP_SPIN); 1279 if (status != TAVOR_CMD_SUCCESS) { 1280 mutex_exit(&qp->qp_lock); 1281 cmn_err(CE_CONT, "Tavor: QUERY_QP command failed: %08x\n", 1282 status); 1283 return (ibc_get_ci_failure(0)); 1284 } 1285 1286 /* 1287 * Fill in the additional QP info based on the QP's transport type. 1288 */ 1289 if (qp->qp_serv_type == TAVOR_QP_UD) { 1290 1291 /* Fill in the UD-specific info */ 1292 ud = &attr_p->qp_info.qp_transport.ud; 1293 ud->ud_qkey = (ib_qkey_t)qpc->qkey; 1294 ud->ud_sq_psn = qpc->next_snd_psn; 1295 ud->ud_pkey_ix = qpc->pri_addr_path.pkey_indx; 1296 ud->ud_port = qpc->pri_addr_path.portnum; 1297 1298 attr_p->qp_info.qp_trans = IBT_UD_SRV; 1299 1300 } else if (qp->qp_serv_type == TAVOR_QP_RC) { 1301 1302 /* Fill in the RC-specific info */ 1303 rc = &attr_p->qp_info.qp_transport.rc; 1304 rc->rc_sq_psn = qpc->next_snd_psn; 1305 rc->rc_rq_psn = qpc->next_rcv_psn; 1306 rc->rc_dst_qpn = qpc->rem_qpn; 1307 1308 /* Grab the path migration state information */ 1309 if (qpc->pm_state == TAVOR_QP_PMSTATE_MIGRATED) { 1310 rc->rc_mig_state = IBT_STATE_MIGRATED; 1311 } else if (qpc->pm_state == TAVOR_QP_PMSTATE_REARM) { 1312 rc->rc_mig_state = IBT_STATE_REARMED; 1313 } else { 1314 rc->rc_mig_state = IBT_STATE_ARMED; 1315 } 1316 rc->rc_rdma_ra_out = (1 << qpc->sra_max); 1317 rc->rc_rdma_ra_in = (1 << qpc->rra_max); 1318 rc->rc_min_rnr_nak = qpc->min_rnr_nak; 1319 rc->rc_path_mtu = qpc->mtu; 1320 rc->rc_retry_cnt = qpc->retry_cnt; 1321 1322 /* Get the common primary address path fields */ 1323 qpc_path = &qpc->pri_addr_path; 1324 path_ptr = &rc->rc_path; 1325 tavor_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect, 1326 TAVOR_ADDRPATH_QP, qp); 1327 1328 /* Fill in the additional primary address path fields */ 1329 path_ptr->cep_pkey_ix = qpc_path->pkey_indx; 1330 path_ptr->cep_hca_port_num = qpc_path->portnum; 1331 path_ptr->cep_timeout = qpc_path->ack_timeout; 1332 1333 /* Get the common alternate address path fields */ 1334 qpc_alt_path = &qpc->alt_addr_path; 1335 alt_path_ptr = &rc->rc_alt_path; 1336 tavor_get_addr_path(state, qpc_alt_path, 1337 &alt_path_ptr->cep_adds_vect, TAVOR_ADDRPATH_QP, qp); 1338 1339 /* Fill in the additional alternate address path fields */ 1340 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx; 1341 alt_path_ptr->cep_hca_port_num = qpc_alt_path->portnum; 1342 alt_path_ptr->cep_timeout = qpc_alt_path->ack_timeout; 1343 1344 /* Get the RNR retry time from primary path */ 1345 rc->rc_rnr_retry_cnt = qpc_path->rnr_retry; 1346 1347 /* Set the enable flags based on RDMA/Atomic enable bits */ 1348 enable_flags = IBT_CEP_NO_FLAGS; 1349 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD); 1350 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR); 1351 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC); 1352 attr_p->qp_info.qp_flags = enable_flags; 1353 1354 attr_p->qp_info.qp_trans = IBT_RC_SRV; 1355 1356 } else if (qp->qp_serv_type == TAVOR_QP_UC) { 1357 1358 /* Fill in the UC-specific info */ 1359 uc = &attr_p->qp_info.qp_transport.uc; 1360 uc->uc_sq_psn = qpc->next_snd_psn; 1361 uc->uc_rq_psn = qpc->next_rcv_psn; 1362 uc->uc_dst_qpn = qpc->rem_qpn; 1363 1364 /* Grab the path migration state information */ 1365 if (qpc->pm_state == TAVOR_QP_PMSTATE_MIGRATED) { 1366 uc->uc_mig_state = IBT_STATE_MIGRATED; 1367 } else if (qpc->pm_state == TAVOR_QP_PMSTATE_REARM) { 1368 uc->uc_mig_state = IBT_STATE_REARMED; 1369 } else { 1370 uc->uc_mig_state = IBT_STATE_ARMED; 1371 } 1372 uc->uc_path_mtu = qpc->mtu; 1373 1374 /* Get the common primary address path fields */ 1375 qpc_path = &qpc->pri_addr_path; 1376 path_ptr = &uc->uc_path; 1377 tavor_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect, 1378 TAVOR_ADDRPATH_QP, qp); 1379 1380 /* Fill in the additional primary address path fields */ 1381 path_ptr->cep_pkey_ix = qpc_path->pkey_indx; 1382 path_ptr->cep_hca_port_num = qpc_path->portnum; 1383 1384 /* Get the common alternate address path fields */ 1385 qpc_alt_path = &qpc->alt_addr_path; 1386 alt_path_ptr = &uc->uc_alt_path; 1387 tavor_get_addr_path(state, qpc_alt_path, 1388 &alt_path_ptr->cep_adds_vect, TAVOR_ADDRPATH_QP, qp); 1389 1390 /* Fill in the additional alternate address path fields */ 1391 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx; 1392 alt_path_ptr->cep_hca_port_num = qpc_alt_path->portnum; 1393 1394 /* 1395 * Set the enable flags based on RDMA enable bits (by 1396 * definition UC doesn't support Atomic or RDMA Read) 1397 */ 1398 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR); 1399 attr_p->qp_info.qp_flags = enable_flags; 1400 1401 attr_p->qp_info.qp_trans = IBT_UC_SRV; 1402 1403 } else { 1404 TAVOR_WARNING(state, "unexpected QP transport type"); 1405 mutex_exit(&qp->qp_lock); 1406 return (ibc_get_ci_failure(0)); 1407 } 1408 1409 /* 1410 * Under certain circumstances it is possible for the Tavor hardware 1411 * to transition to one of the error states without software directly 1412 * knowing about it. The QueryQP() call is the one place where we 1413 * have an opportunity to sample and update our view of the QP state. 1414 */ 1415 if (qpc->state == TAVOR_QP_SQERR) { 1416 attr_p->qp_info.qp_state = IBT_STATE_SQE; 1417 qp->qp_state = TAVOR_QP_SQERR; 1418 } 1419 if (qpc->state == TAVOR_QP_ERR) { 1420 attr_p->qp_info.qp_state = IBT_STATE_ERROR; 1421 qp->qp_state = TAVOR_QP_ERR; 1422 } 1423 mutex_exit(&qp->qp_lock); 1424 1425 return (DDI_SUCCESS); 1426 } 1427 1428 1429 /* 1430 * tavor_qp_create_qpn() 1431 * Context: Can be called from interrupt or base context. 1432 */ 1433 static int 1434 tavor_qp_create_qpn(tavor_state_t *state, tavor_qphdl_t qp, tavor_rsrc_t *qpc) 1435 { 1436 tavor_qpn_entry_t query; 1437 tavor_qpn_entry_t *entry; 1438 avl_index_t where; 1439 1440 /* 1441 * Build a query (for the AVL tree lookup) and attempt to find 1442 * a previously added entry that has a matching QPC index. If 1443 * no matching entry is found, then allocate, initialize, and 1444 * add an entry to the AVL tree. 1445 * If a matching entry is found, then increment its QPN counter 1446 * and reference counter. 1447 */ 1448 query.qpn_indx = qpc->tr_indx; 1449 mutex_enter(&state->ts_qpn_avl_lock); 1450 entry = (tavor_qpn_entry_t *)avl_find(&state->ts_qpn_avl, 1451 &query, &where); 1452 if (entry == NULL) { 1453 /* 1454 * Allocate and initialize a QPN entry, then insert 1455 * it into the AVL tree. 1456 */ 1457 entry = (tavor_qpn_entry_t *)kmem_zalloc( 1458 sizeof (tavor_qpn_entry_t), KM_NOSLEEP); 1459 if (entry == NULL) { 1460 mutex_exit(&state->ts_qpn_avl_lock); 1461 return (DDI_FAILURE); 1462 } 1463 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry)) 1464 1465 entry->qpn_indx = qpc->tr_indx; 1466 entry->qpn_refcnt = 0; 1467 entry->qpn_counter = 0; 1468 1469 avl_insert(&state->ts_qpn_avl, entry, where); 1470 } 1471 1472 /* 1473 * Make the AVL tree entry point to the QP context resource that 1474 * it will be responsible for tracking 1475 */ 1476 entry->qpn_qpc = qpc; 1477 1478 /* 1479 * Setup the QP handle to point to the AVL tree entry. Then 1480 * generate the new QP number from the entry's QPN counter value 1481 * and the hardware's QP context table index. 1482 */ 1483 qp->qp_qpn_hdl = entry; 1484 qp->qp_qpnum = ((entry->qpn_counter << 1485 state->ts_cfg_profile->cp_log_num_qp) | qpc->tr_indx) & 1486 TAVOR_QP_MAXNUMBER_MSK; 1487 1488 /* 1489 * Increment the reference counter and QPN counter. The QPN 1490 * counter always indicates the next available number for use. 1491 */ 1492 entry->qpn_counter++; 1493 entry->qpn_refcnt++; 1494 1495 mutex_exit(&state->ts_qpn_avl_lock); 1496 return (DDI_SUCCESS); 1497 } 1498 1499 1500 /* 1501 * tavor_qp_release_qpn() 1502 * Context: Can be called only from user or kernel context. 1503 */ 1504 void 1505 tavor_qp_release_qpn(tavor_state_t *state, tavor_qpn_entry_t *entry, int flags) 1506 { 1507 ASSERT(entry != NULL); 1508 1509 mutex_enter(&state->ts_qpn_avl_lock); 1510 1511 /* 1512 * If we are releasing the QP number here, then we decrement the 1513 * reference count and check for zero references. If there are 1514 * zero references, then we free the QPC context (if it hadn't 1515 * already been freed during a TAVOR_QPN_FREE_ONLY free, i.e. for 1516 * reuse with another similar QP number) and remove the tracking 1517 * structure from the QP number AVL tree and free the structure. 1518 * If we are not releasing the QP number here, then, as long as we 1519 * have not exhausted the usefulness of the QPC context (that is, 1520 * re-used it too many times without the reference count having 1521 * gone to zero), we free up the QPC context for use by another 1522 * thread (which will use it to construct a different QP number 1523 * from the same QPC table index). 1524 */ 1525 if (flags == TAVOR_QPN_RELEASE) { 1526 entry->qpn_refcnt--; 1527 1528 /* 1529 * If the reference count is zero, then we free the QPC 1530 * context (if it hadn't already been freed in an early 1531 * step, e.g. TAVOR_QPN_FREE_ONLY) and remove/free the 1532 * tracking structure from the QP number AVL tree. 1533 */ 1534 if (entry->qpn_refcnt == 0) { 1535 if (entry->qpn_qpc != NULL) { 1536 tavor_rsrc_free(state, &entry->qpn_qpc); 1537 } 1538 1539 /* 1540 * If the current entry has served it's useful 1541 * purpose (i.e. been reused the maximum allowable 1542 * number of times), then remove it from QP number 1543 * AVL tree and free it up. 1544 */ 1545 if (entry->qpn_counter >= (1 << 1546 (24 - state->ts_cfg_profile->cp_log_num_qp))) { 1547 avl_remove(&state->ts_qpn_avl, entry); 1548 kmem_free(entry, sizeof (tavor_qpn_entry_t)); 1549 } 1550 } 1551 1552 } else if (flags == TAVOR_QPN_FREE_ONLY) { 1553 /* 1554 * Even if we are not freeing the QP number, that will not 1555 * always prevent us from releasing the QPC context. In fact, 1556 * since the QPC context only forms part of the whole QPN, 1557 * we want to free it up for use by other consumers. But 1558 * if the reference count is non-zero (which it will always 1559 * be when we are doing TAVOR_QPN_FREE_ONLY) and the counter 1560 * has reached its maximum value, then we cannot reuse the 1561 * QPC context until the reference count eventually reaches 1562 * zero (in TAVOR_QPN_RELEASE, above). 1563 */ 1564 if (entry->qpn_counter < (1 << 1565 (24 - state->ts_cfg_profile->cp_log_num_qp))) { 1566 tavor_rsrc_free(state, &entry->qpn_qpc); 1567 } 1568 } 1569 mutex_exit(&state->ts_qpn_avl_lock); 1570 } 1571 1572 1573 /* 1574 * tavor_qpn_db_compare() 1575 * Context: Can be called from user or kernel context. 1576 */ 1577 static int 1578 tavor_qpn_avl_compare(const void *q, const void *e) 1579 { 1580 tavor_qpn_entry_t *entry, *query; 1581 1582 entry = (tavor_qpn_entry_t *)e; 1583 query = (tavor_qpn_entry_t *)q; 1584 1585 if (query->qpn_indx < entry->qpn_indx) { 1586 return (-1); 1587 } else if (query->qpn_indx > entry->qpn_indx) { 1588 return (+1); 1589 } else { 1590 return (0); 1591 } 1592 } 1593 1594 1595 /* 1596 * tavor_qpn_avl_init() 1597 * Context: Only called from attach() path context 1598 */ 1599 void 1600 tavor_qpn_avl_init(tavor_state_t *state) 1601 { 1602 /* Initialize the lock used for QP number (QPN) AVL tree access */ 1603 mutex_init(&state->ts_qpn_avl_lock, NULL, MUTEX_DRIVER, 1604 DDI_INTR_PRI(state->ts_intrmsi_pri)); 1605 1606 /* Initialize the AVL tree for the QP number (QPN) storage */ 1607 avl_create(&state->ts_qpn_avl, tavor_qpn_avl_compare, 1608 sizeof (tavor_qpn_entry_t), 1609 offsetof(tavor_qpn_entry_t, qpn_avlnode)); 1610 } 1611 1612 1613 /* 1614 * tavor_qpn_avl_fini() 1615 * Context: Only called from attach() and/or detach() path contexts 1616 */ 1617 void 1618 tavor_qpn_avl_fini(tavor_state_t *state) 1619 { 1620 tavor_qpn_entry_t *entry; 1621 void *cookie; 1622 1623 /* 1624 * Empty all entries (if necessary) and destroy the AVL tree 1625 * that was used for QP number (QPN) tracking. 1626 */ 1627 cookie = NULL; 1628 while ((entry = (tavor_qpn_entry_t *)avl_destroy_nodes( 1629 &state->ts_qpn_avl, &cookie)) != NULL) { 1630 kmem_free(entry, sizeof (tavor_qpn_entry_t)); 1631 } 1632 avl_destroy(&state->ts_qpn_avl); 1633 1634 /* Destroy the lock used for QP number (QPN) AVL tree access */ 1635 mutex_destroy(&state->ts_qpn_avl_lock); 1636 } 1637 1638 1639 /* 1640 * tavor_qphdl_from_qpnum() 1641 * Context: Can be called from interrupt or base context. 1642 * 1643 * This routine is important because changing the unconstrained 1644 * portion of the QP number is critical to the detection of a 1645 * potential race condition in the QP event handler code (i.e. the case 1646 * where a QP is freed and alloc'd again before an event for the 1647 * "old" QP can be handled). 1648 * 1649 * While this is not a perfect solution (not sure that one exists) 1650 * it does help to mitigate the chance that this race condition will 1651 * cause us to deliver a "stale" event to the new QP owner. Note: 1652 * this solution does not scale well because the number of constrained 1653 * bits increases (and, hence, the number of unconstrained bits 1654 * decreases) as the number of supported QPs grows. For small and 1655 * intermediate values, it should hopefully provide sufficient 1656 * protection. 1657 */ 1658 tavor_qphdl_t 1659 tavor_qphdl_from_qpnum(tavor_state_t *state, uint_t qpnum) 1660 { 1661 uint_t qpindx, qpmask; 1662 1663 /* Calculate the QP table index from the qpnum */ 1664 qpmask = (1 << state->ts_cfg_profile->cp_log_num_qp) - 1; 1665 qpindx = qpnum & qpmask; 1666 return (state->ts_qphdl[qpindx]); 1667 } 1668 1669 1670 /* 1671 * tavor_special_qp_rsrc_alloc 1672 * Context: Can be called from interrupt or base context. 1673 */ 1674 static int 1675 tavor_special_qp_rsrc_alloc(tavor_state_t *state, ibt_sqp_type_t type, 1676 uint_t port, tavor_rsrc_t **qp_rsrc) 1677 { 1678 uint_t mask, flags; 1679 int status; 1680 1681 mutex_enter(&state->ts_spec_qplock); 1682 flags = state->ts_spec_qpflags; 1683 if (type == IBT_SMI_SQP) { 1684 /* 1685 * Check here to see if the driver has been configured 1686 * to instruct the Tavor firmware to handle all incoming 1687 * SMP messages (i.e. messages sent to SMA). If so, 1688 * then we will treat QP0 as if it has already been 1689 * allocated (for internal use). Otherwise, if we allow 1690 * the allocation to happen, it will cause unexpected 1691 * behaviors (e.g. Tavor SMA becomes unresponsive). 1692 */ 1693 if (state->ts_cfg_profile->cp_qp0_agents_in_fw != 0) { 1694 mutex_exit(&state->ts_spec_qplock); 1695 return (IBT_QP_IN_USE); 1696 } 1697 1698 /* 1699 * If this is the first QP0 allocation, then post 1700 * a CONF_SPECIAL_QP firmware command 1701 */ 1702 if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) { 1703 status = tavor_conf_special_qp_cmd_post(state, 1704 state->ts_spec_qp0->tr_indx, TAVOR_CMD_QP_SMI, 1705 TAVOR_CMD_NOSLEEP_SPIN); 1706 if (status != TAVOR_CMD_SUCCESS) { 1707 mutex_exit(&state->ts_spec_qplock); 1708 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP " 1709 "command failed: %08x\n", status); 1710 return (IBT_INSUFF_RESOURCE); 1711 } 1712 } 1713 1714 /* 1715 * Now check (and, if necessary, modify) the flags to indicate 1716 * whether the allocation was successful 1717 */ 1718 mask = (1 << (TAVOR_SPECIAL_QP0_RSRC + port)); 1719 if (flags & mask) { 1720 mutex_exit(&state->ts_spec_qplock); 1721 return (IBT_QP_IN_USE); 1722 } 1723 state->ts_spec_qpflags |= mask; 1724 *qp_rsrc = state->ts_spec_qp0; 1725 1726 } else { 1727 /* 1728 * If this is the first QP1 allocation, then post 1729 * a CONF_SPECIAL_QP firmware command 1730 */ 1731 if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) { 1732 status = tavor_conf_special_qp_cmd_post(state, 1733 state->ts_spec_qp1->tr_indx, TAVOR_CMD_QP_GSI, 1734 TAVOR_CMD_NOSLEEP_SPIN); 1735 if (status != TAVOR_CMD_SUCCESS) { 1736 mutex_exit(&state->ts_spec_qplock); 1737 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP " 1738 "command failed: %08x\n", status); 1739 return (IBT_INSUFF_RESOURCE); 1740 } 1741 } 1742 1743 /* 1744 * Now check (and, if necessary, modify) the flags to indicate 1745 * whether the allocation was successful 1746 */ 1747 mask = (1 << (TAVOR_SPECIAL_QP1_RSRC + port)); 1748 if (flags & mask) { 1749 mutex_exit(&state->ts_spec_qplock); 1750 return (IBT_QP_IN_USE); 1751 } 1752 state->ts_spec_qpflags |= mask; 1753 *qp_rsrc = state->ts_spec_qp1; 1754 } 1755 1756 mutex_exit(&state->ts_spec_qplock); 1757 return (DDI_SUCCESS); 1758 } 1759 1760 1761 /* 1762 * tavor_special_qp_rsrc_free 1763 * Context: Can be called from interrupt or base context. 1764 */ 1765 static int 1766 tavor_special_qp_rsrc_free(tavor_state_t *state, ibt_sqp_type_t type, 1767 uint_t port) 1768 { 1769 uint_t mask, flags; 1770 int status; 1771 1772 mutex_enter(&state->ts_spec_qplock); 1773 if (type == IBT_SMI_SQP) { 1774 mask = (1 << (TAVOR_SPECIAL_QP0_RSRC + port)); 1775 state->ts_spec_qpflags &= ~mask; 1776 flags = state->ts_spec_qpflags; 1777 1778 /* 1779 * If this is the last QP0 free, then post a CONF_SPECIAL_QP 1780 * firmware command 1781 */ 1782 if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) { 1783 status = tavor_conf_special_qp_cmd_post(state, 0, 1784 TAVOR_CMD_QP_SMI, TAVOR_CMD_NOSLEEP_SPIN); 1785 if (status != TAVOR_CMD_SUCCESS) { 1786 mutex_exit(&state->ts_spec_qplock); 1787 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP " 1788 "command failed: %08x\n", status); 1789 return (ibc_get_ci_failure(0)); 1790 } 1791 } 1792 } else { 1793 mask = (1 << (TAVOR_SPECIAL_QP1_RSRC + port)); 1794 state->ts_spec_qpflags &= ~mask; 1795 flags = state->ts_spec_qpflags; 1796 1797 /* 1798 * If this is the last QP1 free, then post a CONF_SPECIAL_QP 1799 * firmware command 1800 */ 1801 if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) { 1802 status = tavor_conf_special_qp_cmd_post(state, 0, 1803 TAVOR_CMD_QP_GSI, TAVOR_CMD_NOSLEEP_SPIN); 1804 if (status != TAVOR_CMD_SUCCESS) { 1805 mutex_exit(&state->ts_spec_qplock); 1806 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP " 1807 "command failed: %08x\n", status); 1808 return (ibc_get_ci_failure(0)); 1809 } 1810 } 1811 } 1812 1813 mutex_exit(&state->ts_spec_qplock); 1814 return (DDI_SUCCESS); 1815 } 1816 1817 1818 /* 1819 * tavor_qp_sgl_to_logwqesz() 1820 * Context: Can be called from interrupt or base context. 1821 */ 1822 static void 1823 tavor_qp_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl, 1824 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl) 1825 { 1826 uint_t max_size, log2, actual_sgl; 1827 1828 switch (wq_type) { 1829 case TAVOR_QP_WQ_TYPE_SENDQ: 1830 /* 1831 * Use requested maximum SGL to calculate max descriptor size 1832 * (while guaranteeing that the descriptor size is a 1833 * power-of-2 cachelines). 1834 */ 1835 max_size = (TAVOR_QP_WQE_MLX_SND_HDRS + (num_sgl << 4)); 1836 log2 = highbit(max_size); 1837 if (ISP2(max_size)) { 1838 log2 = log2 - 1; 1839 } 1840 1841 /* Make sure descriptor is at least the minimum size */ 1842 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM); 1843 1844 /* Calculate actual number of SGL (given WQE size) */ 1845 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_SND_HDRS) >> 4; 1846 break; 1847 1848 case TAVOR_QP_WQ_TYPE_RECVQ: 1849 /* 1850 * Same as above (except for Recv WQEs) 1851 */ 1852 max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4)); 1853 log2 = highbit(max_size); 1854 if (ISP2(max_size)) { 1855 log2 = log2 - 1; 1856 } 1857 1858 /* Make sure descriptor is at least the minimum size */ 1859 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM); 1860 1861 /* Calculate actual number of SGL (given WQE size) */ 1862 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4; 1863 break; 1864 1865 case TAVOR_QP_WQ_TYPE_SENDMLX_QP0: 1866 /* 1867 * Same as above (except for MLX transport WQEs). For these 1868 * WQEs we have to account for the space consumed by the 1869 * "inline" packet headers. (This is smaller than for QP1 1870 * below because QP0 is not allowed to send packets with a GRH. 1871 */ 1872 max_size = (TAVOR_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4)); 1873 log2 = highbit(max_size); 1874 if (ISP2(max_size)) { 1875 log2 = log2 - 1; 1876 } 1877 1878 /* Make sure descriptor is at least the minimum size */ 1879 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM); 1880 1881 /* Calculate actual number of SGL (given WQE size) */ 1882 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_QP0_HDRS) >> 4; 1883 break; 1884 1885 case TAVOR_QP_WQ_TYPE_SENDMLX_QP1: 1886 /* 1887 * Same as above. For these WQEs we again have to account for 1888 * the space consumed by the "inline" packet headers. (This 1889 * is larger than for QP0 above because we have to account for 1890 * the possibility of a GRH in each packet - and this 1891 * introduces an alignment issue that causes us to consume 1892 * an additional 8 bytes). 1893 */ 1894 max_size = (TAVOR_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4)); 1895 log2 = highbit(max_size); 1896 if (ISP2(max_size)) { 1897 log2 = log2 - 1; 1898 } 1899 1900 /* Make sure descriptor is at least the minimum size */ 1901 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM); 1902 1903 /* Calculate actual number of SGL (given WQE size) */ 1904 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_QP1_HDRS) >> 4; 1905 break; 1906 1907 default: 1908 TAVOR_WARNING(state, "unexpected work queue type"); 1909 break; 1910 } 1911 1912 /* Fill in the return values */ 1913 *logwqesz = log2; 1914 *max_sgl = min(state->ts_cfg_profile->cp_wqe_real_max_sgl, actual_sgl); 1915 } 1916