1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_srq.c 29 * Tavor Shared Receive Queue Processing Routines 30 * 31 * Implements all the routines necessary for allocating, freeing, querying, 32 * modifying and posting shared receive queues. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/modctl.h> 40 #include <sys/bitmap.h> 41 42 #include <sys/ib/adapters/tavor/tavor.h> 43 44 /* 45 * Used by tavor_srq_numcalc() below to fill in the "unconstrained" portion of 46 * Tavor shared receive queue number 47 */ 48 static uint_t tavor_debug_srqnum_cnt = 0x00000000; 49 static void tavor_srq_numcalc(tavor_state_t *state, uint32_t indx, 50 uint32_t *key); 51 static void tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl, 52 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl); 53 54 /* 55 * tavor_srq_alloc() 56 * Context: Can be called only from user or kernel context. 57 */ 58 int 59 tavor_srq_alloc(tavor_state_t *state, tavor_srq_info_t *srqinfo, 60 uint_t sleepflag, tavor_srq_options_t *op) 61 { 62 ibt_srq_hdl_t ibt_srqhdl; 63 tavor_pdhdl_t pd; 64 ibt_srq_sizes_t *sizes; 65 ibt_srq_sizes_t *real_sizes; 66 tavor_srqhdl_t *srqhdl; 67 ibt_srq_flags_t flags; 68 tavor_rsrc_t *srqc, *rsrc; 69 tavor_hw_srqc_t srqc_entry; 70 uint32_t *buf; 71 tavor_srqhdl_t srq; 72 tavor_umap_db_entry_t *umapdb; 73 ibt_mr_attr_t mr_attr; 74 tavor_mr_options_t mr_op; 75 tavor_mrhdl_t mr; 76 uint64_t addr; 77 uint64_t value, srq_desc_off; 78 uint32_t lkey; 79 uint32_t log_srq_size; 80 uint32_t uarpg; 81 uint_t wq_location, dma_xfer_mode, srq_is_umap; 82 int flag, status; 83 char *errormsg; 84 uint_t max_sgl; 85 uint_t wqesz; 86 87 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes)) 88 89 TAVOR_TNF_ENTER(tavor_srq_alloc); 90 91 /* 92 * Check the "options" flag. Currently this flag tells the driver 93 * whether or not the SRQ's work queues should be come from normal 94 * system memory or whether they should be allocated from DDR memory. 95 */ 96 if (op == NULL) { 97 wq_location = TAVOR_QUEUE_LOCATION_NORMAL; 98 } else { 99 wq_location = op->srqo_wq_loc; 100 } 101 102 /* 103 * Extract the necessary info from the tavor_srq_info_t structure 104 */ 105 real_sizes = srqinfo->srqi_real_sizes; 106 sizes = srqinfo->srqi_sizes; 107 pd = srqinfo->srqi_pd; 108 ibt_srqhdl = srqinfo->srqi_ibt_srqhdl; 109 flags = srqinfo->srqi_flags; 110 srqhdl = srqinfo->srqi_srqhdl; 111 112 /* 113 * Determine whether SRQ is being allocated for userland access or 114 * whether it is being allocated for kernel access. If the SRQ is 115 * being allocated for userland access, then lookup the UAR doorbell 116 * page number for the current process. Note: If this is not found 117 * (e.g. if the process has not previously open()'d the Tavor driver), 118 * then an error is returned. 119 */ 120 srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0; 121 if (srq_is_umap) { 122 status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(), 123 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); 124 if (status != DDI_SUCCESS) { 125 /* Set "status" and "errormsg" and goto failure */ 126 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "failed UAR page"); 127 goto srqalloc_fail3; 128 } 129 uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx; 130 } 131 132 /* Increase PD refcnt */ 133 tavor_pd_refcnt_inc(pd); 134 135 /* Allocate an SRQ context entry */ 136 status = tavor_rsrc_alloc(state, TAVOR_SRQC, 1, sleepflag, &srqc); 137 if (status != DDI_SUCCESS) { 138 /* Set "status" and "errormsg" and goto failure */ 139 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ context"); 140 goto srqalloc_fail1; 141 } 142 143 /* Allocate the SRQ Handle entry */ 144 status = tavor_rsrc_alloc(state, TAVOR_SRQHDL, 1, sleepflag, &rsrc); 145 if (status != DDI_SUCCESS) { 146 /* Set "status" and "errormsg" and goto failure */ 147 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ handle"); 148 goto srqalloc_fail2; 149 } 150 151 srq = (tavor_srqhdl_t)rsrc->tr_addr; 152 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq)) 153 154 /* Calculate the SRQ number */ 155 tavor_srq_numcalc(state, srqc->tr_indx, &srq->srq_srqnum); 156 157 /* 158 * If this will be a user-mappable SRQ, then allocate an entry for 159 * the "userland resources database". This will later be added to 160 * the database (after all further SRQ operations are successful). 161 * If we fail here, we must undo the reference counts and the 162 * previous resource allocation. 163 */ 164 if (srq_is_umap) { 165 umapdb = tavor_umap_db_alloc(state->ts_instance, 166 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, 167 (uint64_t)(uintptr_t)rsrc); 168 if (umapdb == NULL) { 169 /* Set "status" and "errormsg" and goto failure */ 170 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 171 goto srqalloc_fail3; 172 } 173 } 174 175 /* 176 * Calculate the appropriate size for the SRQ. 177 * Note: All Tavor SRQs must be a power-of-2 in size. Also 178 * they may not be any smaller than TAVOR_SRQ_MIN_SIZE. This step 179 * is to round the requested size up to the next highest power-of-2 180 */ 181 sizes->srq_wr_sz = max(sizes->srq_wr_sz, TAVOR_SRQ_MIN_SIZE); 182 log_srq_size = highbit(sizes->srq_wr_sz); 183 if ((sizes->srq_wr_sz & (sizes->srq_wr_sz - 1)) == 0) { 184 log_srq_size = log_srq_size - 1; 185 } 186 187 /* 188 * Next we verify that the rounded-up size is valid (i.e. consistent 189 * with the device limits and/or software-configured limits). If not, 190 * then obviously we have a lot of cleanup to do before returning. 191 */ 192 if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) { 193 /* Set "status" and "errormsg" and goto failure */ 194 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size"); 195 goto srqalloc_fail4; 196 } 197 198 /* 199 * Next we verify that the requested number of SGL is valid (i.e. 200 * consistent with the device limits and/or software-configured 201 * limits). If not, then obviously the same cleanup needs to be done. 202 */ 203 max_sgl = state->ts_cfg_profile->cp_srq_max_sgl; 204 if (sizes->srq_sgl_sz > max_sgl) { 205 /* Set "status" and "errormsg" and goto failure */ 206 TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max SRQ SGL"); 207 goto srqalloc_fail4; 208 } 209 210 /* 211 * Determine the SRQ's WQE sizes. This depends on the requested 212 * number of SGLs. Note: This also has the side-effect of 213 * calculating the real number of SGLs (for the calculated WQE size) 214 */ 215 tavor_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz, 216 TAVOR_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz, 217 &srq->srq_wq_sgl); 218 219 /* 220 * Allocate the memory for SRQ work queues. Note: The location from 221 * which we will allocate these work queues has been passed in through 222 * the tavor_qp_options_t structure. Since Tavor work queues are not 223 * allowed to cross a 32-bit (4GB) boundary, the alignment of the work 224 * queue memory is very important. We used to allocate work queues 225 * (the combined receive and send queues) so that they would be aligned 226 * on their combined size. That alignment guaranteed that they would 227 * never cross the 4GB boundary (Tavor work queues are on the order of 228 * MBs at maximum). Now we are able to relax this alignment constraint 229 * by ensuring that the IB address assigned to the queue memory (as a 230 * result of the tavor_mr_register() call) is offset from zero. 231 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to 232 * guarantee the alignment, but when attempting to use IOMMU bypass 233 * mode we found that we were not allowed to specify any alignment that 234 * was more restrictive than the system page size. So we avoided this 235 * constraint by passing two alignment values, one for the memory 236 * allocation itself and the other for the DMA handle (for later bind). 237 * This used to cause more memory than necessary to be allocated (in 238 * order to guarantee the more restrictive alignment contraint). But 239 * be guaranteeing the zero-based IB virtual address for the queue, we 240 * are able to conserve this memory. 241 * 242 * Note: If SRQ is not user-mappable, then it may come from either 243 * kernel system memory or from HCA-attached local DDR memory. 244 * 245 * Note2: We align this queue on a pagesize boundary. This is required 246 * to make sure that all the resulting IB addresses will start at 0, for 247 * a zero-based queue. By making sure we are aligned on at least a 248 * page, any offset we use into our queue will be the same as when we 249 * perform tavor_srq_modify() operations later. 250 */ 251 wqesz = (1 << srq->srq_wq_log_wqesz); 252 srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz; 253 srq->srq_wqinfo.qa_alloc_align = PAGESIZE; 254 srq->srq_wqinfo.qa_bind_align = PAGESIZE; 255 if (srq_is_umap) { 256 srq->srq_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND; 257 } else { 258 srq->srq_wqinfo.qa_location = wq_location; 259 } 260 status = tavor_queue_alloc(state, &srq->srq_wqinfo, sleepflag); 261 if (status != DDI_SUCCESS) { 262 /* Set "status" and "errormsg" and goto failure */ 263 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq"); 264 goto srqalloc_fail4; 265 } 266 buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned; 267 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 268 269 /* 270 * Register the memory for the SRQ work queues. The memory for the SRQ 271 * must be registered in the Tavor TPT tables. This gives us the LKey 272 * to specify in the SRQ context later. Note: If the work queue is to 273 * be allocated from DDR memory, then only a "bypass" mapping is 274 * appropriate. And if the SRQ memory is user-mappable, then we force 275 * DDI_DMA_CONSISTENT mapping. Also, in order to meet the alignment 276 * restriction, we pass the "mro_bind_override_addr" flag in the call 277 * to tavor_mr_register(). This guarantees that the resulting IB vaddr 278 * will be zero-based (modulo the offset into the first page). If we 279 * fail here, we still have the bunch of resource and reference count 280 * cleanup to do. 281 */ 282 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : 283 IBT_MR_NOSLEEP; 284 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf; 285 mr_attr.mr_len = srq->srq_wqinfo.qa_size; 286 mr_attr.mr_as = NULL; 287 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE; 288 if (srq_is_umap) { 289 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass; 290 } else { 291 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) { 292 mr_op.mro_bind_type = 293 state->ts_cfg_profile->cp_iommu_bypass; 294 dma_xfer_mode = 295 state->ts_cfg_profile->cp_streaming_consistent; 296 if (dma_xfer_mode == DDI_DMA_STREAMING) { 297 mr_attr.mr_flags |= IBT_MR_NONCOHERENT; 298 } 299 } else { 300 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS; 301 } 302 } 303 mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl; 304 mr_op.mro_bind_override_addr = 1; 305 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op); 306 if (status != DDI_SUCCESS) { 307 /* Set "status" and "errormsg" and goto failure */ 308 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr"); 309 goto srqalloc_fail5; 310 } 311 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 312 addr = mr->mr_bindinfo.bi_addr; 313 lkey = mr->mr_lkey; 314 315 /* 316 * Calculate the offset between the kernel virtual address space 317 * and the IB virtual address space. This will be used when 318 * posting work requests to properly initialize each WQE. 319 */ 320 srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned - 321 (uint64_t)mr->mr_bindinfo.bi_addr; 322 323 /* 324 * Create WQL and Wridlist for use by this SRQ 325 */ 326 srq->srq_wrid_wql = tavor_wrid_wql_create(state); 327 if (srq->srq_wrid_wql == NULL) { 328 /* Set "status" and "errormsg" and goto failure */ 329 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wql create"); 330 goto srqalloc_fail6; 331 } 332 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wrid_wql))) 333 334 srq->srq_wridlist = tavor_wrid_get_list(1 << log_srq_size); 335 if (srq->srq_wridlist == NULL) { 336 /* Set "status" and "errormsg" and goto failure */ 337 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wridlist create"); 338 goto srqalloc_fail7; 339 } 340 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wridlist))) 341 342 srq->srq_wridlist->wl_srq_en = 1; 343 srq->srq_wridlist->wl_free_list_indx = -1; 344 345 /* 346 * Fill in all the return arguments (if necessary). This includes 347 * real queue size and real SGLs. 348 */ 349 if (real_sizes != NULL) { 350 real_sizes->srq_wr_sz = (1 << log_srq_size); 351 real_sizes->srq_sgl_sz = srq->srq_wq_sgl; 352 } 353 354 /* 355 * Fill in the SRQC entry. This is the final step before passing 356 * ownership of the SRQC entry to the Tavor hardware. We use all of 357 * the information collected/calculated above to fill in the 358 * requisite portions of the SRQC. Note: If this SRQ is going to be 359 * used for userland access, then we need to set the UAR page number 360 * appropriately (otherwise it's a "don't care") 361 */ 362 bzero(&srqc_entry, sizeof (tavor_hw_srqc_t)); 363 srqc_entry.wqe_addr_h = (addr >> 32); 364 srqc_entry.next_wqe_addr_l = 0; 365 srqc_entry.ds = (wqesz >> 4); 366 srqc_entry.state = TAVOR_SRQ_STATE_HW_OWNER; 367 srqc_entry.pd = pd->pd_pdnum; 368 srqc_entry.lkey = lkey; 369 srqc_entry.wqe_cnt = 0; 370 if (srq_is_umap) { 371 srqc_entry.uar = uarpg; 372 } else { 373 srqc_entry.uar = 0; 374 } 375 376 /* 377 * Write the SRQC entry to hardware. Lastly, we pass ownership of 378 * the entry to the hardware (using the Tavor SW2HW_SRQ firmware 379 * command). Note: In general, this operation shouldn't fail. But 380 * if it does, we have to undo everything we've done above before 381 * returning error. 382 */ 383 status = tavor_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry, 384 sizeof (tavor_hw_srqc_t), srq->srq_srqnum, 385 sleepflag); 386 if (status != TAVOR_CMD_SUCCESS) { 387 cmn_err(CE_CONT, "Tavor: SW2HW_SRQ command failed: %08x\n", 388 status); 389 TNF_PROBE_1(tavor_srq_alloc_sw2hw_srq_cmd_fail, 390 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 391 /* Set "status" and "errormsg" and goto failure */ 392 TAVOR_TNF_FAIL(IBT_FAILURE, "tavor SW2HW_SRQ command"); 393 goto srqalloc_fail8; 394 } 395 396 /* 397 * Fill in the rest of the Tavor SRQ handle. We can update 398 * the following fields for use in further operations on the SRQ. 399 */ 400 srq->srq_srqcrsrcp = srqc; 401 srq->srq_rsrcp = rsrc; 402 srq->srq_mrhdl = mr; 403 srq->srq_refcnt = 0; 404 srq->srq_is_umap = srq_is_umap; 405 srq->srq_uarpg = (srq->srq_is_umap) ? uarpg : 0; 406 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 407 srq->srq_pdhdl = pd; 408 srq->srq_wq_lastwqeindx = -1; 409 srq->srq_wq_bufsz = (1 << log_srq_size); 410 srq->srq_wq_buf = buf; 411 srq->srq_desc_off = srq_desc_off; 412 srq->srq_hdlrarg = (void *)ibt_srqhdl; 413 srq->srq_state = 0; 414 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size); 415 srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl; 416 417 /* Determine if later ddi_dma_sync will be necessary */ 418 srq->srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo); 419 420 /* 421 * Put SRQ handle in Tavor SRQNum-to-SRQhdl list. Then fill in the 422 * "srqhdl" and return success 423 */ 424 ASSERT(state->ts_srqhdl[srqc->tr_indx] == NULL); 425 state->ts_srqhdl[srqc->tr_indx] = srq; 426 427 /* 428 * If this is a user-mappable SRQ, then we need to insert the 429 * previously allocated entry into the "userland resources database". 430 * This will allow for later lookup during devmap() (i.e. mmap()) 431 * calls. 432 */ 433 if (srq->srq_is_umap) { 434 tavor_umap_db_add(umapdb); 435 } else { 436 mutex_enter(&srq->srq_wrid_wql->wql_lock); 437 tavor_wrid_list_srq_init(srq->srq_wridlist, srq, 0); 438 mutex_exit(&srq->srq_wrid_wql->wql_lock); 439 } 440 441 *srqhdl = srq; 442 443 TAVOR_TNF_EXIT(tavor_srq_alloc); 444 return (status); 445 446 /* 447 * The following is cleanup for all possible failure cases in this routine 448 */ 449 srqalloc_fail8: 450 kmem_free(srq->srq_wridlist->wl_wre, srq->srq_wridlist->wl_size * 451 sizeof (tavor_wrid_entry_t)); 452 kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t)); 453 srqalloc_fail7: 454 tavor_wql_refcnt_dec(srq->srq_wrid_wql); 455 srqalloc_fail6: 456 if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 457 TAVOR_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) { 458 TAVOR_WARNING(state, "failed to deregister SRQ memory"); 459 } 460 srqalloc_fail5: 461 tavor_queue_free(state, &srq->srq_wqinfo); 462 srqalloc_fail4: 463 if (srq_is_umap) { 464 tavor_umap_db_free(umapdb); 465 } 466 srqalloc_fail3: 467 tavor_rsrc_free(state, &rsrc); 468 srqalloc_fail2: 469 tavor_rsrc_free(state, &srqc); 470 srqalloc_fail1: 471 tavor_pd_refcnt_dec(pd); 472 srqalloc_fail: 473 TNF_PROBE_1(tavor_srq_alloc_fail, TAVOR_TNF_ERROR, "", 474 tnf_string, msg, errormsg); 475 TAVOR_TNF_EXIT(tavor_srq_alloc); 476 return (status); 477 } 478 479 480 /* 481 * tavor_srq_free() 482 * Context: Can be called only from user or kernel context. 483 */ 484 /* ARGSUSED */ 485 int 486 tavor_srq_free(tavor_state_t *state, tavor_srqhdl_t *srqhdl, uint_t sleepflag) 487 { 488 tavor_rsrc_t *srqc, *rsrc; 489 tavor_umap_db_entry_t *umapdb; 490 uint64_t value; 491 tavor_srqhdl_t srq; 492 tavor_mrhdl_t mr; 493 tavor_pdhdl_t pd; 494 tavor_hw_srqc_t srqc_entry; 495 uint32_t srqnum; 496 uint32_t size; 497 uint_t maxprot; 498 int status; 499 500 TAVOR_TNF_ENTER(tavor_srq_free); 501 502 /* 503 * Pull all the necessary information from the Tavor Shared Receive 504 * Queue handle. This is necessary here because the resource for the 505 * SRQ handle is going to be freed up as part of this operation. 506 */ 507 srq = *srqhdl; 508 mutex_enter(&srq->srq_lock); 509 srqc = srq->srq_srqcrsrcp; 510 rsrc = srq->srq_rsrcp; 511 pd = srq->srq_pdhdl; 512 mr = srq->srq_mrhdl; 513 srqnum = srq->srq_srqnum; 514 515 /* 516 * If there are work queues still associated with the SRQ, then return 517 * an error. Otherwise, we will be holding the SRQ lock. 518 */ 519 if (srq->srq_refcnt != 0) { 520 mutex_exit(&srq->srq_lock); 521 TNF_PROBE_1(tavor_srq_free_refcnt_fail, TAVOR_TNF_ERROR, "", 522 tnf_int, refcnt, srq->srq_refcnt); 523 TAVOR_TNF_EXIT(tavor_srq_free); 524 return (IBT_SRQ_IN_USE); 525 } 526 527 /* 528 * If this was a user-mappable SRQ, then we need to remove its entry 529 * from the "userland resources database". If it is also currently 530 * mmap()'d out to a user process, then we need to call 531 * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping. 532 * We also need to invalidate the SRQ tracking information for the 533 * user mapping. 534 */ 535 if (srq->srq_is_umap) { 536 status = tavor_umap_db_find(state->ts_instance, srq->srq_srqnum, 537 MLNX_UMAP_SRQMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE, 538 &umapdb); 539 if (status != DDI_SUCCESS) { 540 mutex_exit(&srq->srq_lock); 541 TAVOR_WARNING(state, "failed to find in database"); 542 TAVOR_TNF_EXIT(tavor_srq_free); 543 return (ibc_get_ci_failure(0)); 544 } 545 tavor_umap_db_free(umapdb); 546 if (srq->srq_umap_dhp != NULL) { 547 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 548 status = devmap_devmem_remap(srq->srq_umap_dhp, 549 state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size, 550 maxprot, DEVMAP_MAPPING_INVALID, NULL); 551 if (status != DDI_SUCCESS) { 552 mutex_exit(&srq->srq_lock); 553 TAVOR_WARNING(state, "failed in SRQ memory " 554 "devmap_devmem_remap()"); 555 TAVOR_TNF_EXIT(tavor_srq_free); 556 return (ibc_get_ci_failure(0)); 557 } 558 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 559 } 560 } 561 562 /* 563 * Put NULL into the Tavor SRQNum-to-SRQHdl list. This will allow any 564 * in-progress events to detect that the SRQ corresponding to this 565 * number has been freed. 566 */ 567 state->ts_srqhdl[srqc->tr_indx] = NULL; 568 569 mutex_exit(&srq->srq_lock); 570 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq)); 571 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq->srq_wridlist)); 572 573 /* 574 * Reclaim SRQC entry from hardware (using the Tavor HW2SW_SRQ 575 * firmware command). If the ownership transfer fails for any reason, 576 * then it is an indication that something (either in HW or SW) has 577 * gone seriously wrong. 578 */ 579 status = tavor_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry, 580 sizeof (tavor_hw_srqc_t), srqnum, sleepflag); 581 if (status != TAVOR_CMD_SUCCESS) { 582 TAVOR_WARNING(state, "failed to reclaim SRQC ownership"); 583 cmn_err(CE_CONT, "Tavor: HW2SW_SRQ command failed: %08x\n", 584 status); 585 TNF_PROBE_1(tavor_srq_free_hw2sw_srq_cmd_fail, 586 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 587 TAVOR_TNF_EXIT(tavor_srq_free); 588 return (IBT_FAILURE); 589 } 590 591 /* 592 * Deregister the memory for the Shared Receive Queue. If this fails 593 * for any reason, then it is an indication that something (either 594 * in HW or SW) has gone seriously wrong. So we print a warning 595 * message and return. 596 */ 597 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 598 sleepflag); 599 if (status != DDI_SUCCESS) { 600 TAVOR_WARNING(state, "failed to deregister SRQ memory"); 601 TNF_PROBE_0(tavor_srq_free_dereg_mr_fail, TAVOR_TNF_ERROR, ""); 602 TAVOR_TNF_EXIT(tavor_srq_free); 603 return (IBT_FAILURE); 604 } 605 606 /* Calculate the size and free the wridlist container */ 607 if (srq->srq_wridlist != NULL) { 608 size = (srq->srq_wridlist->wl_size * 609 sizeof (tavor_wrid_entry_t)); 610 kmem_free(srq->srq_wridlist->wl_wre, size); 611 kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t)); 612 613 /* 614 * Release reference to WQL; If this is the last reference, 615 * this call also has the side effect of freeing up the 616 * 'srq_wrid_wql' memory. 617 */ 618 tavor_wql_refcnt_dec(srq->srq_wrid_wql); 619 } 620 621 /* Free the memory for the SRQ */ 622 tavor_queue_free(state, &srq->srq_wqinfo); 623 624 /* Free the Tavor SRQ Handle */ 625 tavor_rsrc_free(state, &rsrc); 626 627 /* Free the SRQC entry resource */ 628 tavor_rsrc_free(state, &srqc); 629 630 /* Decrement the reference count on the protection domain (PD) */ 631 tavor_pd_refcnt_dec(pd); 632 633 /* Set the srqhdl pointer to NULL and return success */ 634 *srqhdl = NULL; 635 636 TAVOR_TNF_EXIT(tavor_srq_free); 637 return (DDI_SUCCESS); 638 } 639 640 641 /* 642 * tavor_srq_modify() 643 * Context: Can be called only from user or kernel context. 644 */ 645 int 646 tavor_srq_modify(tavor_state_t *state, tavor_srqhdl_t srq, uint_t size, 647 uint_t *real_size, uint_t sleepflag) 648 { 649 tavor_qalloc_info_t new_srqinfo, old_srqinfo; 650 tavor_rsrc_t *mtt, *mpt, *old_mtt; 651 tavor_bind_info_t bind; 652 tavor_bind_info_t old_bind; 653 tavor_rsrc_pool_info_t *rsrc_pool; 654 tavor_mrhdl_t mr; 655 tavor_hw_mpt_t mpt_entry; 656 tavor_wrid_entry_t *wre_new, *wre_old; 657 uint64_t mtt_ddrbaseaddr, mtt_addr; 658 uint64_t srq_desc_off; 659 uint32_t *buf, srq_old_bufsz; 660 uint32_t wqesz; 661 uint_t max_srq_size; 662 uint_t dma_xfer_mode, mtt_pgsize_bits; 663 uint_t srq_sync, log_srq_size, maxprot; 664 uint_t wq_location; 665 int status; 666 char *errormsg; 667 668 TAVOR_TNF_ENTER(tavor_srq_modify); 669 670 /* 671 * Check the "inddr" flag. This flag tells the driver whether or not 672 * the SRQ's work queues should be come from normal system memory or 673 * whether they should be allocated from DDR memory. 674 */ 675 wq_location = state->ts_cfg_profile->cp_srq_wq_inddr; 676 677 /* 678 * If size requested is larger than device capability, return 679 * Insufficient Resources 680 */ 681 max_srq_size = (1 << state->ts_cfg_profile->cp_log_max_srq_sz); 682 if (size > max_srq_size) { 683 TNF_PROBE_0(tavor_srq_modify_size_larger_than_maxsize, 684 TAVOR_TNF_ERROR, ""); 685 TAVOR_TNF_EXIT(tavor_srq_modify); 686 return (IBT_HCA_WR_EXCEEDED); 687 } 688 689 /* 690 * Calculate the appropriate size for the SRQ. 691 * Note: All Tavor SRQs must be a power-of-2 in size. Also 692 * they may not be any smaller than TAVOR_SRQ_MIN_SIZE. This step 693 * is to round the requested size up to the next highest power-of-2 694 */ 695 size = max(size, TAVOR_SRQ_MIN_SIZE); 696 log_srq_size = highbit(size); 697 if ((size & (size - 1)) == 0) { 698 log_srq_size = log_srq_size - 1; 699 } 700 701 /* 702 * Next we verify that the rounded-up size is valid (i.e. consistent 703 * with the device limits and/or software-configured limits). 704 */ 705 if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) { 706 /* Set "status" and "errormsg" and goto failure */ 707 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size"); 708 goto srqmodify_fail; 709 } 710 711 /* 712 * Allocate the memory for newly resized Shared Receive Queue. 713 * 714 * Note: If SRQ is not user-mappable, then it may come from either 715 * kernel system memory or from HCA-attached local DDR memory. 716 * 717 * Note2: We align this queue on a pagesize boundary. This is required 718 * to make sure that all the resulting IB addresses will start at 0, 719 * for a zero-based queue. By making sure we are aligned on at least a 720 * page, any offset we use into our queue will be the same as it was 721 * when we allocated it at tavor_srq_alloc() time. 722 */ 723 wqesz = (1 << srq->srq_wq_log_wqesz); 724 new_srqinfo.qa_size = (1 << log_srq_size) * wqesz; 725 new_srqinfo.qa_alloc_align = PAGESIZE; 726 new_srqinfo.qa_bind_align = PAGESIZE; 727 if (srq->srq_is_umap) { 728 new_srqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND; 729 } else { 730 new_srqinfo.qa_location = wq_location; 731 } 732 status = tavor_queue_alloc(state, &new_srqinfo, sleepflag); 733 if (status != DDI_SUCCESS) { 734 /* Set "status" and "errormsg" and goto failure */ 735 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq"); 736 goto srqmodify_fail; 737 } 738 buf = (uint32_t *)new_srqinfo.qa_buf_aligned; 739 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 740 741 /* 742 * Allocate the memory for the new WRE list. This will be used later 743 * when we resize the wridlist based on the new SRQ size. 744 */ 745 wre_new = (tavor_wrid_entry_t *)kmem_zalloc((1 << log_srq_size) * 746 sizeof (tavor_wrid_entry_t), sleepflag); 747 if (wre_new == NULL) { 748 /* Set "status" and "errormsg" and goto failure */ 749 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, 750 "failed wre_new alloc"); 751 goto srqmodify_fail; 752 } 753 754 /* 755 * Fill in the "bind" struct. This struct provides the majority 756 * of the information that will be used to distinguish between an 757 * "addr" binding (as is the case here) and a "buf" binding (see 758 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 759 * which does most of the "heavy lifting" for the Tavor memory 760 * registration routines. 761 */ 762 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind)) 763 bzero(&bind, sizeof (tavor_bind_info_t)); 764 bind.bi_type = TAVOR_BINDHDL_VADDR; 765 bind.bi_addr = (uint64_t)(uintptr_t)buf; 766 bind.bi_len = new_srqinfo.qa_size; 767 bind.bi_as = NULL; 768 bind.bi_flags = sleepflag == TAVOR_SLEEP ? IBT_MR_SLEEP : 769 IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 770 if (srq->srq_is_umap) { 771 bind.bi_bypass = state->ts_cfg_profile->cp_iommu_bypass; 772 } else { 773 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) { 774 bind.bi_bypass = 775 state->ts_cfg_profile->cp_iommu_bypass; 776 dma_xfer_mode = 777 state->ts_cfg_profile->cp_streaming_consistent; 778 if (dma_xfer_mode == DDI_DMA_STREAMING) { 779 bind.bi_flags |= IBT_MR_NONCOHERENT; 780 } 781 } else { 782 bind.bi_bypass = TAVOR_BINDMEM_BYPASS; 783 } 784 } 785 status = tavor_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt, 786 &mtt_pgsize_bits); 787 if (status != DDI_SUCCESS) { 788 /* Set "status" and "errormsg" and goto failure */ 789 TAVOR_TNF_FAIL(status, "failed mtt bind"); 790 kmem_free(wre_new, srq->srq_wq_bufsz * 791 sizeof (tavor_wrid_entry_t)); 792 tavor_queue_free(state, &new_srqinfo); 793 goto srqmodify_fail; 794 } 795 796 /* 797 * Calculate the offset between the kernel virtual address space 798 * and the IB virtual address space. This will be used when 799 * posting work requests to properly initialize each WQE. 800 * 801 * Note: bind addr is zero-based (from alloc) so we calculate the 802 * correct new offset here. 803 */ 804 bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1); 805 srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned - 806 (uint64_t)bind.bi_addr; 807 808 /* 809 * Get the base address for the MTT table. This will be necessary 810 * below when we are modifying the MPT entry. 811 */ 812 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 813 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 814 815 /* 816 * Fill in the MPT entry. This is the final step before passing 817 * ownership of the MPT entry to the Tavor hardware. We use all of 818 * the information collected/calculated above to fill in the 819 * requisite portions of the MPT. 820 */ 821 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 822 mpt_entry.reg_win_len = bind.bi_len; 823 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 824 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 825 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 826 827 /* 828 * Now we grab the SRQ lock. Since we will be updating the actual 829 * SRQ location and the producer/consumer indexes, we should hold 830 * the lock. 831 * 832 * We do a TAVOR_NOSLEEP here (and below), though, because we are 833 * holding the "srq_lock" and if we got raised to interrupt level 834 * by priority inversion, we would not want to block in this routine 835 * waiting for success. 836 */ 837 mutex_enter(&srq->srq_lock); 838 839 /* 840 * Copy old entries to new buffer 841 */ 842 srq_old_bufsz = srq->srq_wq_bufsz; 843 bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz); 844 845 /* Determine if later ddi_dma_sync will be necessary */ 846 srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo); 847 848 /* Sync entire "new" SRQ for use by hardware (if necessary) */ 849 if (srq_sync) { 850 (void) ddi_dma_sync(bind.bi_dmahdl, 0, 851 new_srqinfo.qa_size, DDI_DMA_SYNC_FORDEV); 852 } 853 854 /* 855 * Setup MPT information for use in the MODIFY_MPT command 856 */ 857 mr = srq->srq_mrhdl; 858 mutex_enter(&mr->mr_lock); 859 mpt = srq->srq_mrhdl->mr_mptrsrcp; 860 861 /* 862 * MODIFY_MPT 863 * 864 * If this fails for any reason, then it is an indication that 865 * something (either in HW or SW) has gone seriously wrong. So we 866 * print a warning message and return. 867 */ 868 status = tavor_modify_mpt_cmd_post(state, &mpt_entry, mpt->tr_indx, 869 TAVOR_CMD_MODIFY_MPT_RESIZESRQ, sleepflag); 870 if (status != TAVOR_CMD_SUCCESS) { 871 cmn_err(CE_CONT, "Tavor: MODIFY_MPT command failed: %08x\n", 872 status); 873 TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail, 874 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 875 TAVOR_TNF_FAIL(status, "MODIFY_MPT command failed"); 876 (void) tavor_mr_mtt_unbind(state, &srq->srq_mrhdl->mr_bindinfo, 877 srq->srq_mrhdl->mr_mttrsrcp); 878 kmem_free(wre_new, srq->srq_wq_bufsz * 879 sizeof (tavor_wrid_entry_t)); 880 tavor_queue_free(state, &new_srqinfo); 881 mutex_exit(&mr->mr_lock); 882 mutex_exit(&srq->srq_lock); 883 return (ibc_get_ci_failure(0)); 884 } 885 886 /* 887 * Update the Tavor Shared Receive Queue handle with all the new 888 * information. At the same time, save away all the necessary 889 * information for freeing up the old resources 890 */ 891 old_srqinfo = srq->srq_wqinfo; 892 old_mtt = srq->srq_mrhdl->mr_mttrsrcp; 893 bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind, 894 sizeof (tavor_bind_info_t)); 895 896 /* Now set the new info */ 897 srq->srq_wqinfo = new_srqinfo; 898 srq->srq_wq_buf = buf; 899 srq->srq_wq_bufsz = (1 << log_srq_size); 900 bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (tavor_bind_info_t)); 901 srq->srq_mrhdl->mr_mttrsrcp = mtt; 902 srq->srq_desc_off = srq_desc_off; 903 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size); 904 905 /* Update MR mtt pagesize */ 906 mr->mr_logmttpgsz = mtt_pgsize_bits; 907 mutex_exit(&mr->mr_lock); 908 909 #ifdef __lock_lint 910 mutex_enter(&srq->srq_wrid_wql->wql_lock); 911 #else 912 if (srq->srq_wrid_wql != NULL) { 913 mutex_enter(&srq->srq_wrid_wql->wql_lock); 914 } 915 #endif 916 917 /* 918 * Initialize new wridlist, if needed. 919 * 920 * If a wridlist already is setup on an SRQ (the QP associated with an 921 * SRQ has moved "from_reset") then we must update this wridlist based 922 * on the new SRQ size. We allocate the new size of Work Request ID 923 * Entries, copy over the old entries to the new list, and 924 * re-initialize the srq wridlist in non-umap case 925 */ 926 wre_old = NULL; 927 if (srq->srq_wridlist != NULL) { 928 wre_old = srq->srq_wridlist->wl_wre; 929 930 bcopy(wre_old, wre_new, srq_old_bufsz * 931 sizeof (tavor_wrid_entry_t)); 932 933 /* Setup new sizes in wre */ 934 srq->srq_wridlist->wl_wre = wre_new; 935 srq->srq_wridlist->wl_size = srq->srq_wq_bufsz; 936 937 if (!srq->srq_is_umap) { 938 tavor_wrid_list_srq_init(srq->srq_wridlist, srq, 939 srq_old_bufsz); 940 } 941 } 942 943 #ifdef __lock_lint 944 mutex_exit(&srq->srq_wrid_wql->wql_lock); 945 #else 946 if (srq->srq_wrid_wql != NULL) { 947 mutex_exit(&srq->srq_wrid_wql->wql_lock); 948 } 949 #endif 950 951 /* 952 * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out 953 * to a user process, then we need to call devmap_devmem_remap() to 954 * invalidate the mapping to the SRQ memory. We also need to 955 * invalidate the SRQ tracking information for the user mapping. 956 * 957 * Note: On failure, the remap really shouldn't ever happen. So, if it 958 * does, it is an indication that something has gone seriously wrong. 959 * So we print a warning message and return error (knowing, of course, 960 * that the "old" SRQ memory will be leaked) 961 */ 962 if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) { 963 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 964 status = devmap_devmem_remap(srq->srq_umap_dhp, 965 state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot, 966 DEVMAP_MAPPING_INVALID, NULL); 967 if (status != DDI_SUCCESS) { 968 mutex_exit(&srq->srq_lock); 969 TAVOR_WARNING(state, "failed in SRQ memory " 970 "devmap_devmem_remap()"); 971 /* We can, however, free the memory for old wre */ 972 if (wre_old != NULL) { 973 kmem_free(wre_old, srq_old_bufsz * 974 sizeof (tavor_wrid_entry_t)); 975 } 976 TAVOR_TNF_EXIT(tavor_srq_modify); 977 return (ibc_get_ci_failure(0)); 978 } 979 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 980 } 981 982 /* 983 * Drop the SRQ lock now. The only thing left to do is to free up 984 * the old resources. 985 */ 986 mutex_exit(&srq->srq_lock); 987 988 /* 989 * Unbind the MTT entries. 990 */ 991 status = tavor_mr_mtt_unbind(state, &old_bind, old_mtt); 992 if (status != DDI_SUCCESS) { 993 TAVOR_WARNING(state, "failed to unbind old SRQ memory"); 994 /* Set "status" and "errormsg" and goto failure */ 995 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 996 "failed to unbind (old)"); 997 goto srqmodify_fail; 998 } 999 1000 /* Free the memory for old wre */ 1001 if (wre_old != NULL) { 1002 kmem_free(wre_old, srq_old_bufsz * 1003 sizeof (tavor_wrid_entry_t)); 1004 } 1005 1006 /* Free the memory for the old SRQ */ 1007 tavor_queue_free(state, &old_srqinfo); 1008 1009 /* 1010 * Fill in the return arguments (if necessary). This includes the 1011 * real new completion queue size. 1012 */ 1013 if (real_size != NULL) { 1014 *real_size = (1 << log_srq_size); 1015 } 1016 1017 TAVOR_TNF_EXIT(tavor_srq_modify); 1018 return (DDI_SUCCESS); 1019 1020 srqmodify_fail: 1021 TNF_PROBE_1(tavor_srq_modify_fail, TAVOR_TNF_ERROR, "", 1022 tnf_string, msg, errormsg); 1023 TAVOR_TNF_EXIT(tavor_srq_modify); 1024 return (status); 1025 } 1026 1027 1028 /* 1029 * tavor_srq_numcalc() 1030 * Context: Can be called from interrupt or base context. 1031 */ 1032 static void 1033 tavor_srq_numcalc(tavor_state_t *state, uint32_t indx, uint32_t *key) 1034 { 1035 uint32_t tmp, log_num_srq; 1036 1037 /* 1038 * Generate a simple key from counter. Note: We increment this 1039 * static variable _intentionally_ without any kind of mutex around 1040 * it. First, single-threading all operations through a single lock 1041 * would be a bad idea (from a performance point-of-view). Second, 1042 * the upper "unconstrained" bits don't really have to be unique 1043 * because the lower bits are guaranteed to be (although we do make a 1044 * best effort to ensure that they are). Third, the window for the 1045 * race (where both threads read and update the counter at the same 1046 * time) is incredibly small. 1047 */ 1048 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(tavor_debug_srqnum_cnt)) 1049 log_num_srq = state->ts_cfg_profile->cp_log_num_srq; 1050 tmp = (tavor_debug_srqnum_cnt++) << log_num_srq; 1051 *key = (tmp | indx) & TAVOR_CQ_MAXNUMBER_MSK; 1052 } 1053 1054 1055 /* 1056 * tavor_srq_refcnt_inc() 1057 * Context: Can be called from interrupt or base context. 1058 */ 1059 void 1060 tavor_srq_refcnt_inc(tavor_srqhdl_t srq) 1061 { 1062 mutex_enter(&srq->srq_lock); 1063 TNF_PROBE_1_DEBUG(tavor_srq_refcnt_inc, TAVOR_TNF_TRACE, "", 1064 tnf_uint, refcnt, srq->srq_refcnt); 1065 srq->srq_refcnt++; 1066 mutex_exit(&srq->srq_lock); 1067 } 1068 1069 1070 /* 1071 * tavor_srq_refcnt_dec() 1072 * Context: Can be called from interrupt or base context. 1073 */ 1074 void 1075 tavor_srq_refcnt_dec(tavor_srqhdl_t srq) 1076 { 1077 mutex_enter(&srq->srq_lock); 1078 srq->srq_refcnt--; 1079 TNF_PROBE_1_DEBUG(tavor_srq_refcnt_dec, TAVOR_TNF_TRACE, "", 1080 tnf_uint, refcnt, srq->srq_refcnt); 1081 mutex_exit(&srq->srq_lock); 1082 } 1083 1084 1085 /* 1086 * tavor_srqhdl_from_srqnum() 1087 * Context: Can be called from interrupt or base context. 1088 * 1089 * This routine is important because changing the unconstrained 1090 * portion of the SRQ number is critical to the detection of a 1091 * potential race condition in the SRQ handler code (i.e. the case 1092 * where a SRQ is freed and alloc'd again before an event for the 1093 * "old" SRQ can be handled). 1094 * 1095 * While this is not a perfect solution (not sure that one exists) 1096 * it does help to mitigate the chance that this race condition will 1097 * cause us to deliver a "stale" event to the new SRQ owner. Note: 1098 * this solution does not scale well because the number of constrained 1099 * bits increases (and, hence, the number of unconstrained bits 1100 * decreases) as the number of supported SRQ grows. For small and 1101 * intermediate values, it should hopefully provide sufficient 1102 * protection. 1103 */ 1104 tavor_srqhdl_t 1105 tavor_srqhdl_from_srqnum(tavor_state_t *state, uint_t srqnum) 1106 { 1107 uint_t srqindx, srqmask; 1108 1109 /* Calculate the SRQ table index from the srqnum */ 1110 srqmask = (1 << state->ts_cfg_profile->cp_log_num_srq) - 1; 1111 srqindx = srqnum & srqmask; 1112 return (state->ts_srqhdl[srqindx]); 1113 } 1114 1115 1116 /* 1117 * tavor_srq_sgl_to_logwqesz() 1118 * Context: Can be called from interrupt or base context. 1119 */ 1120 static void 1121 tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl, 1122 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl) 1123 { 1124 uint_t max_size, log2, actual_sgl; 1125 1126 TAVOR_TNF_ENTER(tavor_srq_sgl_to_logwqesz); 1127 1128 switch (wq_type) { 1129 case TAVOR_QP_WQ_TYPE_RECVQ: 1130 /* 1131 * Use requested maximum SGL to calculate max descriptor size 1132 * (while guaranteeing that the descriptor size is a 1133 * power-of-2 cachelines). 1134 */ 1135 max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4)); 1136 log2 = highbit(max_size); 1137 if ((max_size & (max_size - 1)) == 0) { 1138 log2 = log2 - 1; 1139 } 1140 1141 /* Make sure descriptor is at least the minimum size */ 1142 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM); 1143 1144 /* Calculate actual number of SGL (given WQE size) */ 1145 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4; 1146 break; 1147 1148 default: 1149 TAVOR_WARNING(state, "unexpected work queue type"); 1150 TNF_PROBE_0(tavor_srq_sgl_to_logwqesz_inv_wqtype_fail, 1151 TAVOR_TNF_ERROR, ""); 1152 break; 1153 } 1154 1155 /* Fill in the return values */ 1156 *logwqesz = log2; 1157 *max_sgl = min(state->ts_cfg_profile->cp_srq_max_sgl, actual_sgl); 1158 1159 TAVOR_TNF_EXIT(tavor_qp_sgl_to_logwqesz); 1160 } 1161