1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_srq.c 29 * Hermon Shared Receive Queue Processing Routines 30 * 31 * Implements all the routines necessary for allocating, freeing, querying, 32 * modifying and posting shared receive queues. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/modctl.h> 40 #include <sys/bitmap.h> 41 42 #include <sys/ib/adapters/hermon/hermon.h> 43 44 static void hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl, 45 hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl); 46 47 /* 48 * hermon_srq_alloc() 49 * Context: Can be called only from user or kernel context. 50 */ 51 int 52 hermon_srq_alloc(hermon_state_t *state, hermon_srq_info_t *srqinfo, 53 uint_t sleepflag) 54 { 55 ibt_srq_hdl_t ibt_srqhdl; 56 hermon_pdhdl_t pd; 57 ibt_srq_sizes_t *sizes; 58 ibt_srq_sizes_t *real_sizes; 59 hermon_srqhdl_t *srqhdl; 60 ibt_srq_flags_t flags; 61 hermon_rsrc_t *srqc, *rsrc; 62 hermon_hw_srqc_t srqc_entry; 63 uint32_t *buf; 64 hermon_srqhdl_t srq; 65 hermon_umap_db_entry_t *umapdb; 66 ibt_mr_attr_t mr_attr; 67 hermon_mr_options_t mr_op; 68 hermon_mrhdl_t mr; 69 uint64_t value, srq_desc_off; 70 uint32_t log_srq_size; 71 uint32_t uarpg; 72 uint_t srq_is_umap; 73 int flag, status; 74 uint_t max_sgl; 75 uint_t wqesz; 76 uint_t srq_wr_sz; 77 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes)) 78 79 /* 80 * options-->wq_location used to be for location, now explicitly 81 * LOCATION_NORMAL 82 */ 83 84 /* 85 * Extract the necessary info from the hermon_srq_info_t structure 86 */ 87 real_sizes = srqinfo->srqi_real_sizes; 88 sizes = srqinfo->srqi_sizes; 89 pd = srqinfo->srqi_pd; 90 ibt_srqhdl = srqinfo->srqi_ibt_srqhdl; 91 flags = srqinfo->srqi_flags; 92 srqhdl = srqinfo->srqi_srqhdl; 93 94 /* 95 * Determine whether SRQ is being allocated for userland access or 96 * whether it is being allocated for kernel access. If the SRQ is 97 * being allocated for userland access, then lookup the UAR doorbell 98 * page number for the current process. Note: If this is not found 99 * (e.g. if the process has not previously open()'d the Hermon driver), 100 * then an error is returned. 101 */ 102 srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0; 103 if (srq_is_umap) { 104 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(), 105 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); 106 if (status != DDI_SUCCESS) { 107 status = IBT_INVALID_PARAM; 108 goto srqalloc_fail3; 109 } 110 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx; 111 } else { 112 uarpg = state->hs_kernel_uar_index; 113 } 114 115 /* Increase PD refcnt */ 116 hermon_pd_refcnt_inc(pd); 117 118 /* Allocate an SRQ context entry */ 119 status = hermon_rsrc_alloc(state, HERMON_SRQC, 1, sleepflag, &srqc); 120 if (status != DDI_SUCCESS) { 121 status = IBT_INSUFF_RESOURCE; 122 goto srqalloc_fail1; 123 } 124 125 /* Allocate the SRQ Handle entry */ 126 status = hermon_rsrc_alloc(state, HERMON_SRQHDL, 1, sleepflag, &rsrc); 127 if (status != DDI_SUCCESS) { 128 status = IBT_INSUFF_RESOURCE; 129 goto srqalloc_fail2; 130 } 131 132 srq = (hermon_srqhdl_t)rsrc->hr_addr; 133 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq)) 134 135 bzero(srq, sizeof (struct hermon_sw_srq_s)); 136 /* Calculate the SRQ number */ 137 138 /* just use the index, implicit in Hermon */ 139 srq->srq_srqnum = srqc->hr_indx; 140 141 /* 142 * If this will be a user-mappable SRQ, then allocate an entry for 143 * the "userland resources database". This will later be added to 144 * the database (after all further SRQ operations are successful). 145 * If we fail here, we must undo the reference counts and the 146 * previous resource allocation. 147 */ 148 if (srq_is_umap) { 149 umapdb = hermon_umap_db_alloc(state->hs_instance, 150 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, 151 (uint64_t)(uintptr_t)rsrc); 152 if (umapdb == NULL) { 153 status = IBT_INSUFF_RESOURCE; 154 goto srqalloc_fail3; 155 } 156 } 157 158 /* 159 * Allocate the doorbell record. Hermon just needs one for the 160 * SRQ, and use uarpg (above) as the uar index 161 */ 162 163 status = hermon_dbr_alloc(state, uarpg, &srq->srq_wq_dbr_acchdl, 164 &srq->srq_wq_vdbr, &srq->srq_wq_pdbr, &srq->srq_rdbr_mapoffset); 165 if (status != DDI_SUCCESS) { 166 status = IBT_INSUFF_RESOURCE; 167 goto srqalloc_fail4; 168 } 169 170 /* 171 * Calculate the appropriate size for the SRQ. 172 * Note: All Hermon SRQs must be a power-of-2 in size. Also 173 * they may not be any smaller than HERMON_SRQ_MIN_SIZE. This step 174 * is to round the requested size up to the next highest power-of-2 175 */ 176 srq_wr_sz = max(sizes->srq_wr_sz + 1, HERMON_SRQ_MIN_SIZE); 177 log_srq_size = highbit(srq_wr_sz); 178 if ((srq_wr_sz & (srq_wr_sz - 1)) == 0) { 179 log_srq_size = log_srq_size - 1; 180 } 181 182 /* 183 * Next we verify that the rounded-up size is valid (i.e. consistent 184 * with the device limits and/or software-configured limits). If not, 185 * then obviously we have a lot of cleanup to do before returning. 186 */ 187 if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) { 188 status = IBT_HCA_WR_EXCEEDED; 189 goto srqalloc_fail4a; 190 } 191 192 /* 193 * Next we verify that the requested number of SGL is valid (i.e. 194 * consistent with the device limits and/or software-configured 195 * limits). If not, then obviously the same cleanup needs to be done. 196 */ 197 max_sgl = state->hs_ibtfinfo.hca_attr->hca_max_srq_sgl; 198 if (sizes->srq_sgl_sz > max_sgl) { 199 status = IBT_HCA_SGL_EXCEEDED; 200 goto srqalloc_fail4a; 201 } 202 203 /* 204 * Determine the SRQ's WQE sizes. This depends on the requested 205 * number of SGLs. Note: This also has the side-effect of 206 * calculating the real number of SGLs (for the calculated WQE size) 207 */ 208 hermon_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz, 209 HERMON_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz, 210 &srq->srq_wq_sgl); 211 212 /* 213 * Allocate the memory for SRQ work queues. Note: The location from 214 * which we will allocate these work queues is always 215 * QUEUE_LOCATION_NORMAL. Since Hermon work queues are not 216 * allowed to cross a 32-bit (4GB) boundary, the alignment of the work 217 * queue memory is very important. We used to allocate work queues 218 * (the combined receive and send queues) so that they would be aligned 219 * on their combined size. That alignment guaranteed that they would 220 * never cross the 4GB boundary (Hermon work queues are on the order of 221 * MBs at maximum). Now we are able to relax this alignment constraint 222 * by ensuring that the IB address assigned to the queue memory (as a 223 * result of the hermon_mr_register() call) is offset from zero. 224 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to 225 * guarantee the alignment, but when attempting to use IOMMU bypass 226 * mode we found that we were not allowed to specify any alignment that 227 * was more restrictive than the system page size. So we avoided this 228 * constraint by passing two alignment values, one for the memory 229 * allocation itself and the other for the DMA handle (for later bind). 230 * This used to cause more memory than necessary to be allocated (in 231 * order to guarantee the more restrictive alignment contraint). But 232 * be guaranteeing the zero-based IB virtual address for the queue, we 233 * are able to conserve this memory. 234 * 235 * Note: If SRQ is not user-mappable, then it may come from either 236 * kernel system memory or from HCA-attached local DDR memory. 237 * 238 * Note2: We align this queue on a pagesize boundary. This is required 239 * to make sure that all the resulting IB addresses will start at 0, for 240 * a zero-based queue. By making sure we are aligned on at least a 241 * page, any offset we use into our queue will be the same as when we 242 * perform hermon_srq_modify() operations later. 243 */ 244 wqesz = (1 << srq->srq_wq_log_wqesz); 245 srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz; 246 srq->srq_wqinfo.qa_alloc_align = PAGESIZE; 247 srq->srq_wqinfo.qa_bind_align = PAGESIZE; 248 if (srq_is_umap) { 249 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND; 250 } else { 251 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; 252 } 253 status = hermon_queue_alloc(state, &srq->srq_wqinfo, sleepflag); 254 if (status != DDI_SUCCESS) { 255 status = IBT_INSUFF_RESOURCE; 256 goto srqalloc_fail4a; 257 } 258 buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned; 259 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 260 261 /* 262 * Register the memory for the SRQ work queues. The memory for the SRQ 263 * must be registered in the Hermon cMPT tables. This gives us the LKey 264 * to specify in the SRQ context later. Note: If the work queue is to 265 * be allocated from DDR memory, then only a "bypass" mapping is 266 * appropriate. And if the SRQ memory is user-mappable, then we force 267 * DDI_DMA_CONSISTENT mapping. Also, in order to meet the alignment 268 * restriction, we pass the "mro_bind_override_addr" flag in the call 269 * to hermon_mr_register(). This guarantees that the resulting IB vaddr 270 * will be zero-based (modulo the offset into the first page). If we 271 * fail here, we still have the bunch of resource and reference count 272 * cleanup to do. 273 */ 274 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : 275 IBT_MR_NOSLEEP; 276 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf; 277 mr_attr.mr_len = srq->srq_wqinfo.qa_size; 278 mr_attr.mr_as = NULL; 279 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE; 280 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; 281 mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl; 282 mr_op.mro_bind_override_addr = 1; 283 status = hermon_mr_register(state, pd, &mr_attr, &mr, 284 &mr_op, HERMON_SRQ_CMPT); 285 if (status != DDI_SUCCESS) { 286 status = IBT_INSUFF_RESOURCE; 287 goto srqalloc_fail5; 288 } 289 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 290 291 /* 292 * Calculate the offset between the kernel virtual address space 293 * and the IB virtual address space. This will be used when 294 * posting work requests to properly initialize each WQE. 295 */ 296 srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned - 297 (uint64_t)mr->mr_bindinfo.bi_addr; 298 299 srq->srq_wq_wqhdr = hermon_wrid_wqhdr_create(1 << log_srq_size); 300 301 /* 302 * Fill in all the return arguments (if necessary). This includes 303 * real queue size and real SGLs. 304 */ 305 if (real_sizes != NULL) { 306 real_sizes->srq_wr_sz = (1 << log_srq_size) - 1; 307 real_sizes->srq_sgl_sz = srq->srq_wq_sgl; 308 } 309 310 /* 311 * Fill in the SRQC entry. This is the final step before passing 312 * ownership of the SRQC entry to the Hermon hardware. We use all of 313 * the information collected/calculated above to fill in the 314 * requisite portions of the SRQC. Note: If this SRQ is going to be 315 * used for userland access, then we need to set the UAR page number 316 * appropriately (otherwise it's a "don't care") 317 */ 318 bzero(&srqc_entry, sizeof (hermon_hw_srqc_t)); 319 srqc_entry.state = HERMON_SRQ_STATE_HW_OWNER; 320 srqc_entry.log_srq_size = log_srq_size; 321 srqc_entry.srqn = srq->srq_srqnum; 322 srqc_entry.log_rq_stride = srq->srq_wq_log_wqesz - 4; 323 /* 16-byte chunks */ 324 325 srqc_entry.page_offs = srq->srq_wqinfo.qa_pgoffs >> 6; 326 srqc_entry.log2_pgsz = mr->mr_log2_pgsz; 327 srqc_entry.mtt_base_addrh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF); 328 srqc_entry.mtt_base_addrl = mr->mr_mttaddr >> 3; 329 srqc_entry.pd = pd->pd_pdnum; 330 srqc_entry.dbr_addrh = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 32); 331 srqc_entry.dbr_addrl = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 2); 332 333 /* 334 * all others - specifically, xrcd, cqn_xrc, lwm, wqe_cnt, and wqe_cntr 335 * are zero thanks to the bzero of the structure 336 */ 337 338 /* 339 * Write the SRQC entry to hardware. Lastly, we pass ownership of 340 * the entry to the hardware (using the Hermon SW2HW_SRQ firmware 341 * command). Note: In general, this operation shouldn't fail. But 342 * if it does, we have to undo everything we've done above before 343 * returning error. 344 */ 345 status = hermon_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry, 346 sizeof (hermon_hw_srqc_t), srq->srq_srqnum, 347 sleepflag); 348 if (status != HERMON_CMD_SUCCESS) { 349 cmn_err(CE_CONT, "Hermon: SW2HW_SRQ command failed: %08x\n", 350 status); 351 if (status == HERMON_CMD_INVALID_STATUS) { 352 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 353 } 354 status = ibc_get_ci_failure(0); 355 goto srqalloc_fail8; 356 } 357 358 /* 359 * Fill in the rest of the Hermon SRQ handle. We can update 360 * the following fields for use in further operations on the SRQ. 361 */ 362 srq->srq_srqcrsrcp = srqc; 363 srq->srq_rsrcp = rsrc; 364 srq->srq_mrhdl = mr; 365 srq->srq_refcnt = 0; 366 srq->srq_is_umap = srq_is_umap; 367 srq->srq_uarpg = uarpg; 368 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 369 srq->srq_pdhdl = pd; 370 srq->srq_wq_bufsz = (1 << log_srq_size); 371 srq->srq_wq_buf = buf; 372 srq->srq_desc_off = srq_desc_off; 373 srq->srq_hdlrarg = (void *)ibt_srqhdl; 374 srq->srq_state = 0; 375 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size); 376 srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl; 377 378 /* 379 * Put SRQ handle in Hermon SRQNum-to-SRQhdl list. Then fill in the 380 * "srqhdl" and return success 381 */ 382 ASSERT(state->hs_srqhdl[srqc->hr_indx] == NULL); 383 state->hs_srqhdl[srqc->hr_indx] = srq; 384 385 /* 386 * If this is a user-mappable SRQ, then we need to insert the 387 * previously allocated entry into the "userland resources database". 388 * This will allow for later lookup during devmap() (i.e. mmap()) 389 * calls. 390 */ 391 if (srq->srq_is_umap) { 392 hermon_umap_db_add(umapdb); 393 } else { /* initialize work queue for kernel SRQs */ 394 int i, len, last; 395 uint16_t *desc; 396 397 desc = (uint16_t *)buf; 398 len = wqesz / sizeof (*desc); 399 last = srq->srq_wq_bufsz - 1; 400 for (i = 0; i < last; i++) { 401 desc[1] = htons(i + 1); 402 desc += len; 403 } 404 srq->srq_wq_wqhdr->wq_tail = last; 405 srq->srq_wq_wqhdr->wq_head = 0; 406 } 407 408 *srqhdl = srq; 409 410 return (status); 411 412 /* 413 * The following is cleanup for all possible failure cases in this routine 414 */ 415 srqalloc_fail8: 416 hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr); 417 srqalloc_fail7: 418 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 419 HERMON_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) { 420 HERMON_WARNING(state, "failed to deregister SRQ memory"); 421 } 422 srqalloc_fail5: 423 hermon_queue_free(&srq->srq_wqinfo); 424 srqalloc_fail4a: 425 hermon_dbr_free(state, uarpg, srq->srq_wq_vdbr); 426 srqalloc_fail4: 427 if (srq_is_umap) { 428 hermon_umap_db_free(umapdb); 429 } 430 srqalloc_fail3: 431 hermon_rsrc_free(state, &rsrc); 432 srqalloc_fail2: 433 hermon_rsrc_free(state, &srqc); 434 srqalloc_fail1: 435 hermon_pd_refcnt_dec(pd); 436 srqalloc_fail: 437 return (status); 438 } 439 440 441 /* 442 * hermon_srq_free() 443 * Context: Can be called only from user or kernel context. 444 */ 445 /* ARGSUSED */ 446 int 447 hermon_srq_free(hermon_state_t *state, hermon_srqhdl_t *srqhdl, 448 uint_t sleepflag) 449 { 450 hermon_rsrc_t *srqc, *rsrc; 451 hermon_umap_db_entry_t *umapdb; 452 uint64_t value; 453 hermon_srqhdl_t srq; 454 hermon_mrhdl_t mr; 455 hermon_pdhdl_t pd; 456 hermon_hw_srqc_t srqc_entry; 457 uint32_t srqnum; 458 uint_t maxprot; 459 int status; 460 461 /* 462 * Pull all the necessary information from the Hermon Shared Receive 463 * Queue handle. This is necessary here because the resource for the 464 * SRQ handle is going to be freed up as part of this operation. 465 */ 466 srq = *srqhdl; 467 mutex_enter(&srq->srq_lock); 468 srqc = srq->srq_srqcrsrcp; 469 rsrc = srq->srq_rsrcp; 470 pd = srq->srq_pdhdl; 471 mr = srq->srq_mrhdl; 472 srqnum = srq->srq_srqnum; 473 474 /* 475 * If there are work queues still associated with the SRQ, then return 476 * an error. Otherwise, we will be holding the SRQ lock. 477 */ 478 if (srq->srq_refcnt != 0) { 479 mutex_exit(&srq->srq_lock); 480 return (IBT_SRQ_IN_USE); 481 } 482 483 /* 484 * If this was a user-mappable SRQ, then we need to remove its entry 485 * from the "userland resources database". If it is also currently 486 * mmap()'d out to a user process, then we need to call 487 * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping. 488 * We also need to invalidate the SRQ tracking information for the 489 * user mapping. 490 */ 491 if (srq->srq_is_umap) { 492 status = hermon_umap_db_find(state->hs_instance, 493 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, &value, 494 HERMON_UMAP_DB_REMOVE, &umapdb); 495 if (status != DDI_SUCCESS) { 496 mutex_exit(&srq->srq_lock); 497 HERMON_WARNING(state, "failed to find in database"); 498 return (ibc_get_ci_failure(0)); 499 } 500 hermon_umap_db_free(umapdb); 501 if (srq->srq_umap_dhp != NULL) { 502 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 503 status = devmap_devmem_remap(srq->srq_umap_dhp, 504 state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size, 505 maxprot, DEVMAP_MAPPING_INVALID, NULL); 506 if (status != DDI_SUCCESS) { 507 mutex_exit(&srq->srq_lock); 508 HERMON_WARNING(state, "failed in SRQ memory " 509 "devmap_devmem_remap()"); 510 return (ibc_get_ci_failure(0)); 511 } 512 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 513 } 514 } 515 516 /* 517 * Put NULL into the Hermon SRQNum-to-SRQHdl list. This will allow any 518 * in-progress events to detect that the SRQ corresponding to this 519 * number has been freed. 520 */ 521 state->hs_srqhdl[srqc->hr_indx] = NULL; 522 523 mutex_exit(&srq->srq_lock); 524 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq)); 525 526 /* 527 * Reclaim SRQC entry from hardware (using the Hermon HW2SW_SRQ 528 * firmware command). If the ownership transfer fails for any reason, 529 * then it is an indication that something (either in HW or SW) has 530 * gone seriously wrong. 531 */ 532 status = hermon_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry, 533 sizeof (hermon_hw_srqc_t), srqnum, sleepflag); 534 if (status != HERMON_CMD_SUCCESS) { 535 HERMON_WARNING(state, "failed to reclaim SRQC ownership"); 536 cmn_err(CE_CONT, "Hermon: HW2SW_SRQ command failed: %08x\n", 537 status); 538 if (status == HERMON_CMD_INVALID_STATUS) { 539 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 540 } 541 return (ibc_get_ci_failure(0)); 542 } 543 544 /* 545 * Deregister the memory for the Shared Receive Queue. If this fails 546 * for any reason, then it is an indication that something (either 547 * in HW or SW) has gone seriously wrong. So we print a warning 548 * message and return. 549 */ 550 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 551 sleepflag); 552 if (status != DDI_SUCCESS) { 553 HERMON_WARNING(state, "failed to deregister SRQ memory"); 554 return (IBT_FAILURE); 555 } 556 557 hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr); 558 559 /* Free the memory for the SRQ */ 560 hermon_queue_free(&srq->srq_wqinfo); 561 562 /* Free the dbr */ 563 hermon_dbr_free(state, srq->srq_uarpg, srq->srq_wq_vdbr); 564 565 /* Free the Hermon SRQ Handle */ 566 hermon_rsrc_free(state, &rsrc); 567 568 /* Free the SRQC entry resource */ 569 hermon_rsrc_free(state, &srqc); 570 571 /* Decrement the reference count on the protection domain (PD) */ 572 hermon_pd_refcnt_dec(pd); 573 574 /* Set the srqhdl pointer to NULL and return success */ 575 *srqhdl = NULL; 576 577 return (DDI_SUCCESS); 578 } 579 580 581 /* 582 * hermon_srq_modify() 583 * Context: Can be called only from user or kernel context. 584 */ 585 int 586 hermon_srq_modify(hermon_state_t *state, hermon_srqhdl_t srq, uint_t size, 587 uint_t *real_size, uint_t sleepflag) 588 { 589 hermon_qalloc_info_t new_srqinfo, old_srqinfo; 590 hermon_rsrc_t *mtt, *old_mtt; 591 hermon_bind_info_t bind; 592 hermon_bind_info_t old_bind; 593 hermon_mrhdl_t mr; 594 hermon_hw_srqc_t srqc_entry; 595 hermon_hw_dmpt_t mpt_entry; 596 uint64_t *wre_new, *wre_old; 597 uint64_t mtt_addr; 598 uint64_t srq_pgoffs; 599 uint64_t srq_desc_off; 600 uint32_t *buf, srq_old_bufsz; 601 uint32_t wqesz; 602 uint_t max_srq_size; 603 uint_t mtt_pgsize_bits; 604 uint_t log_srq_size, maxprot; 605 int status; 606 607 if ((state->hs_devlim.mod_wr_srq == 0) || 608 (state->hs_cfg_profile->cp_srq_resize_enabled == 0)) 609 return (IBT_NOT_SUPPORTED); 610 611 /* 612 * If size requested is larger than device capability, return 613 * Insufficient Resources 614 */ 615 max_srq_size = (1 << state->hs_cfg_profile->cp_log_max_srq_sz); 616 if (size > max_srq_size) { 617 return (IBT_HCA_WR_EXCEEDED); 618 } 619 620 /* 621 * Calculate the appropriate size for the SRQ. 622 * Note: All Hermon SRQs must be a power-of-2 in size. Also 623 * they may not be any smaller than HERMON_SRQ_MIN_SIZE. This step 624 * is to round the requested size up to the next highest power-of-2 625 */ 626 size = max(size, HERMON_SRQ_MIN_SIZE); 627 log_srq_size = highbit(size); 628 if ((size & (size - 1)) == 0) { 629 log_srq_size = log_srq_size - 1; 630 } 631 632 /* 633 * Next we verify that the rounded-up size is valid (i.e. consistent 634 * with the device limits and/or software-configured limits). 635 */ 636 if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) { 637 status = IBT_HCA_WR_EXCEEDED; 638 goto srqmodify_fail; 639 } 640 641 /* 642 * Allocate the memory for newly resized Shared Receive Queue. 643 * 644 * Note: If SRQ is not user-mappable, then it may come from either 645 * kernel system memory or from HCA-attached local DDR memory. 646 * 647 * Note2: We align this queue on a pagesize boundary. This is required 648 * to make sure that all the resulting IB addresses will start at 0, 649 * for a zero-based queue. By making sure we are aligned on at least a 650 * page, any offset we use into our queue will be the same as it was 651 * when we allocated it at hermon_srq_alloc() time. 652 */ 653 wqesz = (1 << srq->srq_wq_log_wqesz); 654 new_srqinfo.qa_size = (1 << log_srq_size) * wqesz; 655 new_srqinfo.qa_alloc_align = PAGESIZE; 656 new_srqinfo.qa_bind_align = PAGESIZE; 657 if (srq->srq_is_umap) { 658 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND; 659 } else { 660 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; 661 } 662 status = hermon_queue_alloc(state, &new_srqinfo, sleepflag); 663 if (status != DDI_SUCCESS) { 664 status = IBT_INSUFF_RESOURCE; 665 goto srqmodify_fail; 666 } 667 buf = (uint32_t *)new_srqinfo.qa_buf_aligned; 668 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 669 670 /* 671 * Allocate the memory for the new WRE list. This will be used later 672 * when we resize the wridlist based on the new SRQ size. 673 */ 674 wre_new = kmem_zalloc((1 << log_srq_size) * sizeof (uint64_t), 675 sleepflag); 676 if (wre_new == NULL) { 677 status = IBT_INSUFF_RESOURCE; 678 goto srqmodify_fail; 679 } 680 681 /* 682 * Fill in the "bind" struct. This struct provides the majority 683 * of the information that will be used to distinguish between an 684 * "addr" binding (as is the case here) and a "buf" binding (see 685 * below). The "bind" struct is later passed to hermon_mr_mem_bind() 686 * which does most of the "heavy lifting" for the Hermon memory 687 * registration routines. 688 */ 689 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind)) 690 bzero(&bind, sizeof (hermon_bind_info_t)); 691 bind.bi_type = HERMON_BINDHDL_VADDR; 692 bind.bi_addr = (uint64_t)(uintptr_t)buf; 693 bind.bi_len = new_srqinfo.qa_size; 694 bind.bi_as = NULL; 695 bind.bi_flags = sleepflag == HERMON_SLEEP ? IBT_MR_SLEEP : 696 IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 697 bind.bi_bypass = state->hs_cfg_profile->cp_iommu_bypass; 698 699 status = hermon_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt, 700 &mtt_pgsize_bits, 0); /* no relaxed ordering */ 701 if (status != DDI_SUCCESS) { 702 status = status; 703 kmem_free(wre_new, (1 << log_srq_size) * 704 sizeof (uint64_t)); 705 hermon_queue_free(&new_srqinfo); 706 goto srqmodify_fail; 707 } 708 709 /* 710 * Calculate the offset between the kernel virtual address space 711 * and the IB virtual address space. This will be used when 712 * posting work requests to properly initialize each WQE. 713 * 714 * Note: bind addr is zero-based (from alloc) so we calculate the 715 * correct new offset here. 716 */ 717 bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1); 718 srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned - 719 (uint64_t)bind.bi_addr; 720 srq_pgoffs = (uint_t) 721 ((uintptr_t)new_srqinfo.qa_buf_aligned & HERMON_PAGEMASK); 722 723 /* 724 * Fill in the MPT entry. This is the final step before passing 725 * ownership of the MPT entry to the Hermon hardware. We use all of 726 * the information collected/calculated above to fill in the 727 * requisite portions of the MPT. 728 */ 729 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 730 mpt_entry.reg_win_len = bind.bi_len; 731 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 732 mpt_entry.mtt_addr_h = mtt_addr >> 32; 733 mpt_entry.mtt_addr_l = mtt_addr >> 3; 734 735 /* 736 * for hermon we build up a new srqc and pass that (partially filled 737 * to resize SRQ instead of modifying the (d)mpt directly 738 */ 739 740 741 742 /* 743 * Now we grab the SRQ lock. Since we will be updating the actual 744 * SRQ location and the producer/consumer indexes, we should hold 745 * the lock. 746 * 747 * We do a HERMON_NOSLEEP here (and below), though, because we are 748 * holding the "srq_lock" and if we got raised to interrupt level 749 * by priority inversion, we would not want to block in this routine 750 * waiting for success. 751 */ 752 mutex_enter(&srq->srq_lock); 753 754 /* 755 * Copy old entries to new buffer 756 */ 757 srq_old_bufsz = srq->srq_wq_bufsz; 758 bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz); 759 760 /* Sync entire "new" SRQ for use by hardware (if necessary) */ 761 (void) ddi_dma_sync(bind.bi_dmahdl, 0, new_srqinfo.qa_size, 762 DDI_DMA_SYNC_FORDEV); 763 764 /* 765 * Setup MPT information for use in the MODIFY_MPT command 766 */ 767 mr = srq->srq_mrhdl; 768 mutex_enter(&mr->mr_lock); 769 770 /* 771 * now, setup the srqc information needed for resize - limit the 772 * values, but use the same structure as the srqc 773 */ 774 775 srqc_entry.log_srq_size = log_srq_size; 776 srqc_entry.page_offs = srq_pgoffs >> 6; 777 srqc_entry.log2_pgsz = mr->mr_log2_pgsz; 778 srqc_entry.mtt_base_addrl = (uint64_t)mtt_addr >> 32; 779 srqc_entry.mtt_base_addrh = mtt_addr >> 3; 780 781 /* 782 * RESIZE_SRQ 783 * 784 * If this fails for any reason, then it is an indication that 785 * something (either in HW or SW) has gone seriously wrong. So we 786 * print a warning message and return. 787 */ 788 status = hermon_resize_srq_cmd_post(state, &srqc_entry, 789 srq->srq_srqnum, sleepflag); 790 if (status != HERMON_CMD_SUCCESS) { 791 cmn_err(CE_CONT, "Hermon: RESIZE_SRQ command failed: %08x\n", 792 status); 793 if (status == HERMON_CMD_INVALID_STATUS) { 794 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 795 } 796 (void) hermon_mr_mtt_unbind(state, &bind, mtt); 797 kmem_free(wre_new, (1 << log_srq_size) * 798 sizeof (uint64_t)); 799 hermon_queue_free(&new_srqinfo); 800 mutex_exit(&mr->mr_lock); 801 mutex_exit(&srq->srq_lock); 802 return (ibc_get_ci_failure(0)); 803 } 804 /* 805 * Update the Hermon Shared Receive Queue handle with all the new 806 * information. At the same time, save away all the necessary 807 * information for freeing up the old resources 808 */ 809 old_srqinfo = srq->srq_wqinfo; 810 old_mtt = srq->srq_mrhdl->mr_mttrsrcp; 811 bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind, 812 sizeof (hermon_bind_info_t)); 813 814 /* Now set the new info */ 815 srq->srq_wqinfo = new_srqinfo; 816 srq->srq_wq_buf = buf; 817 srq->srq_wq_bufsz = (1 << log_srq_size); 818 bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (hermon_bind_info_t)); 819 srq->srq_mrhdl->mr_mttrsrcp = mtt; 820 srq->srq_desc_off = srq_desc_off; 821 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size); 822 823 /* Update MR mtt pagesize */ 824 mr->mr_logmttpgsz = mtt_pgsize_bits; 825 mutex_exit(&mr->mr_lock); 826 827 /* 828 * Initialize new wridlist, if needed. 829 * 830 * If a wridlist already is setup on an SRQ (the QP associated with an 831 * SRQ has moved "from_reset") then we must update this wridlist based 832 * on the new SRQ size. We allocate the new size of Work Request ID 833 * Entries, copy over the old entries to the new list, and 834 * re-initialize the srq wridlist in non-umap case 835 */ 836 wre_old = srq->srq_wq_wqhdr->wq_wrid; 837 838 bcopy(wre_old, wre_new, srq_old_bufsz * sizeof (uint64_t)); 839 840 /* Setup new sizes in wre */ 841 srq->srq_wq_wqhdr->wq_wrid = wre_new; 842 843 /* 844 * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out 845 * to a user process, then we need to call devmap_devmem_remap() to 846 * invalidate the mapping to the SRQ memory. We also need to 847 * invalidate the SRQ tracking information for the user mapping. 848 * 849 * Note: On failure, the remap really shouldn't ever happen. So, if it 850 * does, it is an indication that something has gone seriously wrong. 851 * So we print a warning message and return error (knowing, of course, 852 * that the "old" SRQ memory will be leaked) 853 */ 854 if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) { 855 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 856 status = devmap_devmem_remap(srq->srq_umap_dhp, 857 state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot, 858 DEVMAP_MAPPING_INVALID, NULL); 859 if (status != DDI_SUCCESS) { 860 mutex_exit(&srq->srq_lock); 861 HERMON_WARNING(state, "failed in SRQ memory " 862 "devmap_devmem_remap()"); 863 /* We can, however, free the memory for old wre */ 864 kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t)); 865 return (ibc_get_ci_failure(0)); 866 } 867 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 868 } 869 870 /* 871 * Drop the SRQ lock now. The only thing left to do is to free up 872 * the old resources. 873 */ 874 mutex_exit(&srq->srq_lock); 875 876 /* 877 * Unbind the MTT entries. 878 */ 879 status = hermon_mr_mtt_unbind(state, &old_bind, old_mtt); 880 if (status != DDI_SUCCESS) { 881 HERMON_WARNING(state, "failed to unbind old SRQ memory"); 882 status = ibc_get_ci_failure(0); 883 goto srqmodify_fail; 884 } 885 886 /* Free the memory for old wre */ 887 kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t)); 888 889 /* Free the memory for the old SRQ */ 890 hermon_queue_free(&old_srqinfo); 891 892 /* 893 * Fill in the return arguments (if necessary). This includes the 894 * real new completion queue size. 895 */ 896 if (real_size != NULL) { 897 *real_size = (1 << log_srq_size); 898 } 899 900 return (DDI_SUCCESS); 901 902 srqmodify_fail: 903 return (status); 904 } 905 906 907 /* 908 * hermon_srq_refcnt_inc() 909 * Context: Can be called from interrupt or base context. 910 */ 911 void 912 hermon_srq_refcnt_inc(hermon_srqhdl_t srq) 913 { 914 mutex_enter(&srq->srq_lock); 915 srq->srq_refcnt++; 916 mutex_exit(&srq->srq_lock); 917 } 918 919 920 /* 921 * hermon_srq_refcnt_dec() 922 * Context: Can be called from interrupt or base context. 923 */ 924 void 925 hermon_srq_refcnt_dec(hermon_srqhdl_t srq) 926 { 927 mutex_enter(&srq->srq_lock); 928 srq->srq_refcnt--; 929 mutex_exit(&srq->srq_lock); 930 } 931 932 933 /* 934 * hermon_srqhdl_from_srqnum() 935 * Context: Can be called from interrupt or base context. 936 * 937 * This routine is important because changing the unconstrained 938 * portion of the SRQ number is critical to the detection of a 939 * potential race condition in the SRQ handler code (i.e. the case 940 * where a SRQ is freed and alloc'd again before an event for the 941 * "old" SRQ can be handled). 942 * 943 * While this is not a perfect solution (not sure that one exists) 944 * it does help to mitigate the chance that this race condition will 945 * cause us to deliver a "stale" event to the new SRQ owner. Note: 946 * this solution does not scale well because the number of constrained 947 * bits increases (and, hence, the number of unconstrained bits 948 * decreases) as the number of supported SRQ grows. For small and 949 * intermediate values, it should hopefully provide sufficient 950 * protection. 951 */ 952 hermon_srqhdl_t 953 hermon_srqhdl_from_srqnum(hermon_state_t *state, uint_t srqnum) 954 { 955 uint_t srqindx, srqmask; 956 957 /* Calculate the SRQ table index from the srqnum */ 958 srqmask = (1 << state->hs_cfg_profile->cp_log_num_srq) - 1; 959 srqindx = srqnum & srqmask; 960 return (state->hs_srqhdl[srqindx]); 961 } 962 963 964 /* 965 * hermon_srq_sgl_to_logwqesz() 966 * Context: Can be called from interrupt or base context. 967 */ 968 static void 969 hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl, 970 hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl) 971 { 972 uint_t max_size, log2, actual_sgl; 973 974 switch (wq_type) { 975 case HERMON_QP_WQ_TYPE_RECVQ: 976 /* 977 * Use requested maximum SGL to calculate max descriptor size 978 * (while guaranteeing that the descriptor size is a 979 * power-of-2 cachelines). 980 */ 981 max_size = (HERMON_QP_WQE_MLX_SRQ_HDRS + (num_sgl << 4)); 982 log2 = highbit(max_size); 983 if ((max_size & (max_size - 1)) == 0) { 984 log2 = log2 - 1; 985 } 986 987 /* Make sure descriptor is at least the minimum size */ 988 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM); 989 990 /* Calculate actual number of SGL (given WQE size) */ 991 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SRQ_HDRS) >> 4; 992 break; 993 994 default: 995 HERMON_WARNING(state, "unexpected work queue type"); 996 break; 997 } 998 999 /* Fill in the return values */ 1000 *logwqesz = log2; 1001 *max_sgl = min(state->hs_cfg_profile->cp_srq_max_sgl, actual_sgl); 1002 } 1003