1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * hermon_mr.c 29 * Hermon Memory Region/Window Routines 30 * 31 * Implements all the routines necessary to provide the requisite memory 32 * registration verbs. These include operations like RegisterMemRegion(), 33 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion, 34 * etc., that affect Memory Regions. It also includes the verbs that 35 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(), 36 * and QueryMemWindow(). 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/esunddi.h> 45 46 #include <sys/ib/adapters/hermon/hermon.h> 47 48 extern uint32_t hermon_kernel_data_ro; 49 extern uint32_t hermon_user_data_ro; 50 51 /* 52 * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion 53 * of Hermon memory keys (LKeys and RKeys) 54 */ 55 static uint_t hermon_memkey_cnt = 0x00; 56 #define HERMON_MEMKEY_SHIFT 24 57 #define HERMON_MPT_SW_OWNERSHIP 0xF 58 59 static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd, 60 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 61 hermon_mpt_rsrc_type_t mpt_type); 62 static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr, 63 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new, 64 hermon_mr_options_t *op); 65 static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr, 66 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr, 67 uint_t sleep, uint_t *dereg_level); 68 static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state, 69 hermon_bind_info_t *bind, uint_t *mtt_pgsize); 70 static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind, 71 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer); 72 static void hermon_mr_mem_unbind(hermon_state_t *state, 73 hermon_bind_info_t *bind); 74 static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt, 75 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits); 76 static int hermon_mr_fast_mtt_write_fmr(hermon_rsrc_t *mtt, 77 ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits); 78 static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc); 79 static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc); 80 81 82 /* 83 * The Hermon umem_lockmemory() callback ops. When userland memory is 84 * registered, these callback ops are specified. The hermon_umap_umemlock_cb() 85 * callback will be called whenever the memory for the corresponding 86 * ddi_umem_cookie_t is being freed. 87 */ 88 static struct umem_callback_ops hermon_umem_cbops = { 89 UMEM_CALLBACK_VERSION, 90 hermon_umap_umemlock_cb, 91 }; 92 93 94 95 /* 96 * hermon_mr_register() 97 * Context: Can be called from interrupt or base context. 98 */ 99 int 100 hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd, 101 ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 102 hermon_mpt_rsrc_type_t mpt_type) 103 { 104 hermon_bind_info_t bind; 105 int status; 106 107 /* 108 * Fill in the "bind" struct. This struct provides the majority 109 * of the information that will be used to distinguish between an 110 * "addr" binding (as is the case here) and a "buf" binding (see 111 * below). The "bind" struct is later passed to hermon_mr_mem_bind() 112 * which does most of the "heavy lifting" for the Hermon memory 113 * registration routines. 114 */ 115 bind.bi_type = HERMON_BINDHDL_VADDR; 116 bind.bi_addr = mr_attr->mr_vaddr; 117 bind.bi_len = mr_attr->mr_len; 118 bind.bi_as = mr_attr->mr_as; 119 bind.bi_flags = mr_attr->mr_flags; 120 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, 121 mpt_type); 122 return (status); 123 } 124 125 126 /* 127 * hermon_mr_register_buf() 128 * Context: Can be called from interrupt or base context. 129 */ 130 int 131 hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd, 132 ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl, 133 hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type) 134 { 135 hermon_bind_info_t bind; 136 int status; 137 138 /* 139 * Fill in the "bind" struct. This struct provides the majority 140 * of the information that will be used to distinguish between an 141 * "addr" binding (see above) and a "buf" binding (as is the case 142 * here). The "bind" struct is later passed to hermon_mr_mem_bind() 143 * which does most of the "heavy lifting" for the Hermon memory 144 * registration routines. Note: We have chosen to provide 145 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 146 * not set). It is not critical what value we choose here as it need 147 * only be unique for the given RKey (which will happen by default), 148 * so the choice here is somewhat arbitrary. 149 */ 150 bind.bi_type = HERMON_BINDHDL_BUF; 151 bind.bi_buf = buf; 152 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 153 bind.bi_addr = mr_attr->mr_vaddr; 154 } else { 155 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 156 } 157 bind.bi_as = NULL; 158 bind.bi_len = (uint64_t)buf->b_bcount; 159 bind.bi_flags = mr_attr->mr_flags; 160 status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type); 161 return (status); 162 } 163 164 165 /* 166 * hermon_mr_register_shared() 167 * Context: Can be called from interrupt or base context. 168 */ 169 int 170 hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl, 171 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new) 172 { 173 hermon_rsrc_t *mpt, *mtt, *rsrc; 174 hermon_umap_db_entry_t *umapdb; 175 hermon_hw_dmpt_t mpt_entry; 176 hermon_mrhdl_t mr; 177 hermon_bind_info_t *bind; 178 ddi_umem_cookie_t umem_cookie; 179 size_t umem_len; 180 caddr_t umem_addr; 181 uint64_t mtt_addr, pgsize_msk; 182 uint_t sleep, mr_is_umem; 183 int status, umem_flags; 184 185 /* 186 * Check the sleep flag. Ensure that it is consistent with the 187 * current thread context (i.e. if we are currently in the interrupt 188 * context, then we shouldn't be attempting to sleep). 189 */ 190 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP : 191 HERMON_SLEEP; 192 if ((sleep == HERMON_SLEEP) && 193 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 194 status = IBT_INVALID_PARAM; 195 goto mrshared_fail; 196 } 197 198 /* Increment the reference count on the protection domain (PD) */ 199 hermon_pd_refcnt_inc(pd); 200 201 /* 202 * Allocate an MPT entry. This will be filled in with all the 203 * necessary parameters to define the shared memory region. 204 * Specifically, it will be made to reference the currently existing 205 * MTT entries and ownership of the MPT will be passed to the hardware 206 * in the last step below. If we fail here, we must undo the 207 * protection domain reference count. 208 */ 209 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 210 if (status != DDI_SUCCESS) { 211 status = IBT_INSUFF_RESOURCE; 212 goto mrshared_fail1; 213 } 214 215 /* 216 * Allocate the software structure for tracking the shared memory 217 * region (i.e. the Hermon Memory Region handle). If we fail here, we 218 * must undo the protection domain reference count and the previous 219 * resource allocation. 220 */ 221 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 222 if (status != DDI_SUCCESS) { 223 status = IBT_INSUFF_RESOURCE; 224 goto mrshared_fail2; 225 } 226 mr = (hermon_mrhdl_t)rsrc->hr_addr; 227 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 228 229 /* 230 * Setup and validate the memory region access flags. This means 231 * translating the IBTF's enable flags into the access flags that 232 * will be used in later operations. 233 */ 234 mr->mr_accflag = 0; 235 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND) 236 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 237 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 238 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 239 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ) 240 mr->mr_accflag |= IBT_MR_REMOTE_READ; 241 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 242 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 243 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 244 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 245 246 /* 247 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 248 * from a certain number of "constrained" bits (the least significant 249 * bits) and some number of "unconstrained" bits. The constrained 250 * bits must be set to the index of the entry in the MPT table, but 251 * the unconstrained bits can be set to any value we wish. Note: 252 * if no remote access is required, then the RKey value is not filled 253 * in. Otherwise both Rkey and LKey are given the same value. 254 */ 255 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 256 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 257 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 258 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 259 mr->mr_rkey = mr->mr_lkey; 260 } 261 262 /* Grab the MR lock for the current memory region */ 263 mutex_enter(&mrhdl->mr_lock); 264 265 /* 266 * Check here to see if the memory region has already been partially 267 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 268 * If so, this is an error, return failure. 269 */ 270 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 271 mutex_exit(&mrhdl->mr_lock); 272 status = IBT_MR_HDL_INVALID; 273 goto mrshared_fail3; 274 } 275 276 /* 277 * Determine if the original memory was from userland and, if so, pin 278 * the pages (again) with umem_lockmemory(). This will guarantee a 279 * separate callback for each of this shared region's MR handles. 280 * If this is userland memory, then allocate an entry in the 281 * "userland resources database". This will later be added to 282 * the database (after all further memory registration operations are 283 * successful). If we fail here, we must undo all the above setup. 284 */ 285 mr_is_umem = mrhdl->mr_is_umem; 286 if (mr_is_umem) { 287 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len)); 288 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr & 289 ~PAGEOFFSET); 290 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 291 DDI_UMEMLOCK_LONGTERM); 292 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 293 &umem_cookie, &hermon_umem_cbops, NULL); 294 if (status != 0) { 295 mutex_exit(&mrhdl->mr_lock); 296 status = IBT_INSUFF_RESOURCE; 297 goto mrshared_fail3; 298 } 299 300 umapdb = hermon_umap_db_alloc(state->hs_instance, 301 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 302 (uint64_t)(uintptr_t)rsrc); 303 if (umapdb == NULL) { 304 mutex_exit(&mrhdl->mr_lock); 305 status = IBT_INSUFF_RESOURCE; 306 goto mrshared_fail4; 307 } 308 } 309 310 /* 311 * Copy the MTT resource pointer (and additional parameters) from 312 * the original Hermon Memory Region handle. Note: this is normally 313 * where the hermon_mr_mem_bind() routine would be called, but because 314 * we already have bound and filled-in MTT entries it is simply a 315 * matter here of managing the MTT reference count and grabbing the 316 * address of the MTT table entries (for filling in the shared region's 317 * MPT entry). 318 */ 319 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp; 320 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz; 321 mr->mr_bindinfo = mrhdl->mr_bindinfo; 322 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp; 323 mutex_exit(&mrhdl->mr_lock); 324 bind = &mr->mr_bindinfo; 325 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 326 mtt = mr->mr_mttrsrcp; 327 328 /* 329 * Increment the MTT reference count (to reflect the fact that 330 * the MTT is now shared) 331 */ 332 (void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp); 333 334 /* 335 * Update the new "bind" virtual address. Do some extra work here 336 * to ensure proper alignment. That is, make sure that the page 337 * offset for the beginning of the old range is the same as the 338 * offset for this new mapping 339 */ 340 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1); 341 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) | 342 (mr->mr_bindinfo.bi_addr & pgsize_msk)); 343 344 /* 345 * Fill in the MPT entry. This is the final step before passing 346 * ownership of the MPT entry to the Hermon hardware. We use all of 347 * the information collected/calculated above to fill in the 348 * requisite portions of the MPT. 349 */ 350 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 351 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 352 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 353 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 354 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 355 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 356 mpt_entry.lr = 1; 357 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 358 mpt_entry.entity_sz = mr->mr_logmttpgsz; 359 mpt_entry.mem_key = mr->mr_lkey; 360 mpt_entry.pd = pd->pd_pdnum; 361 mpt_entry.start_addr = bind->bi_addr; 362 mpt_entry.reg_win_len = bind->bi_len; 363 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 364 mpt_entry.mtt_addr_h = mtt_addr >> 32; 365 mpt_entry.mtt_addr_l = mtt_addr >> 3; 366 367 /* 368 * Write the MPT entry to hardware. Lastly, we pass ownership of 369 * the entry to the hardware. Note: in general, this operation 370 * shouldn't fail. But if it does, we have to undo everything we've 371 * done above before returning error. 372 */ 373 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 374 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 375 if (status != HERMON_CMD_SUCCESS) { 376 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 377 status); 378 if (status == HERMON_CMD_INVALID_STATUS) { 379 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 380 } 381 status = ibc_get_ci_failure(0); 382 goto mrshared_fail5; 383 } 384 385 /* 386 * Fill in the rest of the Hermon Memory Region handle. Having 387 * successfully transferred ownership of the MPT, we can update the 388 * following fields for use in further operations on the MR. 389 */ 390 mr->mr_mptrsrcp = mpt; 391 mr->mr_mttrsrcp = mtt; 392 mr->mr_mpt_type = HERMON_MPT_DMPT; 393 mr->mr_pdhdl = pd; 394 mr->mr_rsrcp = rsrc; 395 mr->mr_is_umem = mr_is_umem; 396 mr->mr_is_fmr = 0; 397 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 398 mr->mr_umem_cbfunc = NULL; 399 mr->mr_umem_cbarg1 = NULL; 400 mr->mr_umem_cbarg2 = NULL; 401 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 402 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 403 404 /* 405 * If this is userland memory, then we need to insert the previously 406 * allocated entry into the "userland resources database". This will 407 * allow for later coordination between the hermon_umap_umemlock_cb() 408 * callback and hermon_mr_deregister(). 409 */ 410 if (mr_is_umem) { 411 hermon_umap_db_add(umapdb); 412 } 413 414 *mrhdl_new = mr; 415 416 return (DDI_SUCCESS); 417 418 /* 419 * The following is cleanup for all possible failure cases in this routine 420 */ 421 mrshared_fail5: 422 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp); 423 if (mr_is_umem) { 424 hermon_umap_db_free(umapdb); 425 } 426 mrshared_fail4: 427 if (mr_is_umem) { 428 ddi_umem_unlock(umem_cookie); 429 } 430 mrshared_fail3: 431 hermon_rsrc_free(state, &rsrc); 432 mrshared_fail2: 433 hermon_rsrc_free(state, &mpt); 434 mrshared_fail1: 435 hermon_pd_refcnt_dec(pd); 436 mrshared_fail: 437 return (status); 438 } 439 440 /* 441 * hermon_mr_alloc_fmr() 442 * Context: Can be called from interrupt or base context. 443 */ 444 int 445 hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd, 446 hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl) 447 { 448 hermon_rsrc_t *mpt, *mtt, *rsrc; 449 hermon_hw_dmpt_t mpt_entry; 450 hermon_mrhdl_t mr; 451 hermon_bind_info_t bind; 452 uint64_t mtt_addr; 453 uint64_t nummtt; 454 uint_t sleep, mtt_pgsize_bits; 455 int status; 456 457 /* 458 * Check the sleep flag. Ensure that it is consistent with the 459 * current thread context (i.e. if we are currently in the interrupt 460 * context, then we shouldn't be attempting to sleep). 461 */ 462 sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP : 463 HERMON_NOSLEEP; 464 if ((sleep == HERMON_SLEEP) && 465 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 466 return (IBT_INVALID_PARAM); 467 } 468 469 /* Increment the reference count on the protection domain (PD) */ 470 hermon_pd_refcnt_inc(pd); 471 472 /* 473 * Allocate an MPT entry. This will be filled in with all the 474 * necessary parameters to define the FMR. Specifically, it will be 475 * made to reference the currently existing MTT entries and ownership 476 * of the MPT will be passed to the hardware in the last step below. 477 * If we fail here, we must undo the protection domain reference count. 478 */ 479 480 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 481 if (status != DDI_SUCCESS) { 482 status = IBT_INSUFF_RESOURCE; 483 goto fmralloc_fail1; 484 } 485 486 /* 487 * Allocate the software structure for tracking the fmr memory 488 * region (i.e. the Hermon Memory Region handle). If we fail here, we 489 * must undo the protection domain reference count and the previous 490 * resource allocation. 491 */ 492 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 493 if (status != DDI_SUCCESS) { 494 status = IBT_INSUFF_RESOURCE; 495 goto fmralloc_fail2; 496 } 497 mr = (hermon_mrhdl_t)rsrc->hr_addr; 498 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 499 500 /* 501 * Setup and validate the memory region access flags. This means 502 * translating the IBTF's enable flags into the access flags that 503 * will be used in later operations. 504 */ 505 mr->mr_accflag = 0; 506 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 507 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 508 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ) 509 mr->mr_accflag |= IBT_MR_REMOTE_READ; 510 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 511 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 512 if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 513 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 514 515 /* 516 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 517 * from a certain number of "constrained" bits (the least significant 518 * bits) and some number of "unconstrained" bits. The constrained 519 * bits must be set to the index of the entry in the MPT table, but 520 * the unconstrained bits can be set to any value we wish. Note: 521 * if no remote access is required, then the RKey value is not filled 522 * in. Otherwise both Rkey and LKey are given the same value. 523 */ 524 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 525 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 526 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 527 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 528 mr->mr_rkey = mr->mr_lkey; 529 } 530 531 /* 532 * Determine number of pages spanned. This routine uses the 533 * information in the "bind" struct to determine the required 534 * number of MTT entries needed (and returns the suggested page size - 535 * as a "power-of-2" - for each MTT entry). 536 */ 537 /* Assume address will be page aligned later */ 538 bind.bi_addr = 0; 539 /* Calculate size based on given max pages */ 540 bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT; 541 nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits); 542 543 /* 544 * Allocate the MTT entries. Use the calculations performed above to 545 * allocate the required number of MTT entries. If we fail here, we 546 * must not only undo all the previous resource allocation (and PD 547 * reference count), but we must also unbind the memory. 548 */ 549 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt); 550 if (status != DDI_SUCCESS) { 551 status = IBT_INSUFF_RESOURCE; 552 goto fmralloc_fail3; 553 } 554 mr->mr_logmttpgsz = mtt_pgsize_bits; 555 556 /* 557 * Fill in the MPT entry. This is the final step before passing 558 * ownership of the MPT entry to the Hermon hardware. We use all of 559 * the information collected/calculated above to fill in the 560 * requisite portions of the MPT. 561 */ 562 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 563 mpt_entry.en_bind = 0; 564 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 565 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 566 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 567 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 568 mpt_entry.lr = 1; 569 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 570 mpt_entry.pd = pd->pd_pdnum; 571 572 mpt_entry.entity_sz = mr->mr_logmttpgsz; 573 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 574 mpt_entry.mtt_addr_h = mtt_addr >> 32; 575 mpt_entry.mtt_addr_l = mtt_addr >> 3; 576 mpt_entry.mem_key = mr->mr_lkey; 577 578 /* 579 * FMR sets these to 0 for now. Later during actual fmr registration 580 * these values are filled in. 581 */ 582 mpt_entry.start_addr = 0; 583 mpt_entry.reg_win_len = 0; 584 585 /* 586 * Write the MPT entry to hardware. Lastly, we pass ownership of 587 * the entry to the hardware. Note: in general, this operation 588 * shouldn't fail. But if it does, we have to undo everything we've 589 * done above before returning error. 590 */ 591 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 592 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 593 if (status != HERMON_CMD_SUCCESS) { 594 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 595 status); 596 if (status == HERMON_CMD_INVALID_STATUS) { 597 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 598 } 599 status = ibc_get_ci_failure(0); 600 goto fmralloc_fail4; 601 } 602 603 /* 604 * Fill in the rest of the Hermon Memory Region handle. Having 605 * successfully transferred ownership of the MPT, we can update the 606 * following fields for use in further operations on the MR. Also, set 607 * that this is an FMR region. 608 */ 609 mr->mr_mptrsrcp = mpt; 610 mr->mr_mttrsrcp = mtt; 611 mr->mr_mpt_type = HERMON_MPT_DMPT; 612 mr->mr_pdhdl = pd; 613 mr->mr_rsrcp = rsrc; 614 mr->mr_is_fmr = 1; 615 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 616 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 617 (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t)); 618 619 *mrhdl = mr; 620 621 return (DDI_SUCCESS); 622 623 /* 624 * The following is cleanup for all possible failure cases in this routine 625 */ 626 fmralloc_fail4: 627 kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt); 628 fmralloc_fail3: 629 hermon_rsrc_free(state, &rsrc); 630 fmralloc_fail2: 631 hermon_rsrc_free(state, &mpt); 632 fmralloc_fail1: 633 hermon_pd_refcnt_dec(pd); 634 fmralloc_fail: 635 return (status); 636 } 637 638 /* 639 * hermon_mr_register_physical_fmr() 640 * Context: Can be called from interrupt or base context. 641 */ 642 /*ARGSUSED*/ 643 int 644 hermon_mr_register_physical_fmr(hermon_state_t *state, 645 ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p) 646 { 647 hermon_rsrc_t *mpt; 648 uint64_t *mpt_table; 649 int status; 650 651 mutex_enter(&mr->mr_lock); 652 mpt = mr->mr_mptrsrcp; 653 mpt_table = (uint64_t *)mpt->hr_addr; 654 655 /* Write MPT status to SW bit */ 656 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 657 658 /* 659 * Write the mapped addresses into the MTT entries. FMR needs to do 660 * this a little differently, so we call the fmr specific fast mtt 661 * write here. 662 */ 663 status = hermon_mr_fast_mtt_write_fmr(mr->mr_mttrsrcp, mem_pattr_p, 664 mr->mr_logmttpgsz); 665 if (status != DDI_SUCCESS) { 666 mutex_exit(&mr->mr_lock); 667 status = ibc_get_ci_failure(0); 668 goto fmr_reg_fail1; 669 } 670 671 /* 672 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 673 * from a certain number of "constrained" bits (the least significant 674 * bits) and some number of "unconstrained" bits. The constrained 675 * bits must be set to the index of the entry in the MPT table, but 676 * the unconstrained bits can be set to any value we wish. Note: 677 * if no remote access is required, then the RKey value is not filled 678 * in. Otherwise both Rkey and LKey are given the same value. 679 */ 680 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 681 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 682 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 683 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 684 mr->mr_rkey = mr->mr_lkey; 685 } 686 687 /* write mem key value */ 688 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[1], mr->mr_lkey); 689 690 /* write length value */ 691 ddi_put64(mpt->hr_acchdl, &mpt_table[3], mem_pattr_p->pmr_len); 692 693 /* write start addr value */ 694 ddi_put64(mpt->hr_acchdl, &mpt_table[2], mem_pattr_p->pmr_iova); 695 696 /* write lkey value */ 697 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[4], mr->mr_lkey); 698 699 /* Write MPT status to HW bit */ 700 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 701 702 /* Fill in return parameters */ 703 mem_desc_p->pmd_lkey = mr->mr_lkey; 704 mem_desc_p->pmd_rkey = mr->mr_rkey; 705 mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova; 706 mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len; 707 708 /* Fill in MR bindinfo struct for later sync or query operations */ 709 mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova; 710 mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT; 711 712 mutex_exit(&mr->mr_lock); 713 714 return (DDI_SUCCESS); 715 716 fmr_reg_fail1: 717 /* 718 * Note, we fail here, and purposely leave the memory ownership in 719 * software. The memory tables may be corrupt, so we leave the region 720 * unregistered. 721 */ 722 return (DDI_FAILURE); 723 } 724 725 726 /* 727 * hermon_mr_deregister() 728 * Context: Can be called from interrupt or base context. 729 */ 730 /* ARGSUSED */ 731 int 732 hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level, 733 uint_t sleep) 734 { 735 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 736 hermon_umap_db_entry_t *umapdb; 737 hermon_pdhdl_t pd; 738 hermon_mrhdl_t mr; 739 hermon_bind_info_t *bind; 740 uint64_t value; 741 int status; 742 uint_t shared_mtt; 743 744 /* 745 * Check the sleep flag. Ensure that it is consistent with the 746 * current thread context (i.e. if we are currently in the interrupt 747 * context, then we shouldn't be attempting to sleep). 748 */ 749 if ((sleep == HERMON_SLEEP) && 750 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 751 status = IBT_INVALID_PARAM; 752 return (status); 753 } 754 755 /* 756 * Pull all the necessary information from the Hermon Memory Region 757 * handle. This is necessary here because the resource for the 758 * MR handle is going to be freed up as part of the this 759 * deregistration 760 */ 761 mr = *mrhdl; 762 mutex_enter(&mr->mr_lock); 763 mpt = mr->mr_mptrsrcp; 764 mtt = mr->mr_mttrsrcp; 765 mtt_refcnt = mr->mr_mttrefcntp; 766 rsrc = mr->mr_rsrcp; 767 pd = mr->mr_pdhdl; 768 bind = &mr->mr_bindinfo; 769 770 /* 771 * Check here if the memory region is really an FMR. If so, this is a 772 * bad thing and we shouldn't be here. Return failure. 773 */ 774 if (mr->mr_is_fmr) { 775 mutex_exit(&mr->mr_lock); 776 return (IBT_INVALID_PARAM); 777 } 778 779 /* 780 * Check here to see if the memory region has already been partially 781 * deregistered as a result of the hermon_umap_umemlock_cb() callback. 782 * If so, then jump to the end and free the remaining resources. 783 */ 784 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 785 goto mrdereg_finish_cleanup; 786 } 787 788 /* 789 * We must drop the "mr_lock" here to ensure that both SLEEP and 790 * NOSLEEP calls into the firmware work as expected. Also, if two 791 * threads are attemping to access this MR (via de-register, 792 * re-register, or otherwise), then we allow the firmware to enforce 793 * the checking, that only one deregister is valid. 794 */ 795 mutex_exit(&mr->mr_lock); 796 797 /* 798 * Reclaim MPT entry from hardware (if necessary). Since the 799 * hermon_mr_deregister() routine is used in the memory region 800 * reregistration process as well, it is possible that we will 801 * not always wish to reclaim ownership of the MPT. Check the 802 * "level" arg and, if necessary, attempt to reclaim it. If 803 * the ownership transfer fails for any reason, we check to see 804 * what command status was returned from the hardware. The only 805 * "expected" error status is the one that indicates an attempt to 806 * deregister a memory region that has memory windows bound to it 807 */ 808 if (level >= HERMON_MR_DEREG_ALL) { 809 if (mr->mr_mpt_type >= HERMON_MPT_DMPT) { 810 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, 811 NULL, 0, mpt->hr_indx, sleep); 812 if (status != HERMON_CMD_SUCCESS) { 813 if (status == HERMON_CMD_REG_BOUND) { 814 return (IBT_MR_IN_USE); 815 } else { 816 cmn_err(CE_CONT, "Hermon: HW2SW_MPT " 817 "command failed: %08x\n", status); 818 if (status == 819 HERMON_CMD_INVALID_STATUS) { 820 hermon_fm_ereport(state, 821 HCA_SYS_ERR, 822 DDI_SERVICE_LOST); 823 } 824 return (IBT_INVALID_PARAM); 825 } 826 } 827 } 828 } 829 830 /* 831 * Re-grab the mr_lock here. Since further access to the protected 832 * 'mr' structure is needed, and we would have returned previously for 833 * the multiple deregistration case, we can safely grab the lock here. 834 */ 835 mutex_enter(&mr->mr_lock); 836 837 /* 838 * If the memory had come from userland, then we do a lookup in the 839 * "userland resources database". On success, we free the entry, call 840 * ddi_umem_unlock(), and continue the cleanup. On failure (which is 841 * an indication that the umem_lockmemory() callback has called 842 * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate 843 * the "mr_umemcookie" field in the MR handle (this will be used 844 * later to detect that only partial cleaup still remains to be done 845 * on the MR handle). 846 */ 847 if (mr->mr_is_umem) { 848 status = hermon_umap_db_find(state->hs_instance, 849 (uint64_t)(uintptr_t)mr->mr_umemcookie, 850 MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE, 851 &umapdb); 852 if (status == DDI_SUCCESS) { 853 hermon_umap_db_free(umapdb); 854 ddi_umem_unlock(mr->mr_umemcookie); 855 } else { 856 ddi_umem_unlock(mr->mr_umemcookie); 857 mr->mr_umemcookie = NULL; 858 } 859 } 860 861 /* 862 * Decrement the MTT reference count. Since the MTT resource 863 * may be shared between multiple memory regions (as a result 864 * of a "RegisterSharedMR" verb) it is important that we not 865 * free up or unbind resources prematurely. If it's not shared (as 866 * indicated by the return status), then free the resource. 867 */ 868 shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt); 869 if (!shared_mtt) { 870 hermon_rsrc_free(state, &mtt_refcnt); 871 } 872 873 /* 874 * Free up the MTT entries and unbind the memory. Here, as above, we 875 * attempt to free these resources only if it is appropriate to do so. 876 */ 877 if (!shared_mtt) { 878 if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) { 879 hermon_mr_mem_unbind(state, bind); 880 } 881 hermon_rsrc_free(state, &mtt); 882 } 883 884 /* 885 * If the MR handle has been invalidated, then drop the 886 * lock and return success. Note: This only happens because 887 * the umem_lockmemory() callback has been triggered. The 888 * cleanup here is partial, and further cleanup (in a 889 * subsequent hermon_mr_deregister() call) will be necessary. 890 */ 891 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 892 mutex_exit(&mr->mr_lock); 893 return (DDI_SUCCESS); 894 } 895 896 mrdereg_finish_cleanup: 897 mutex_exit(&mr->mr_lock); 898 899 /* Free the Hermon Memory Region handle */ 900 hermon_rsrc_free(state, &rsrc); 901 902 /* Free up the MPT entry resource */ 903 if (mpt != NULL) 904 hermon_rsrc_free(state, &mpt); 905 906 /* Decrement the reference count on the protection domain (PD) */ 907 hermon_pd_refcnt_dec(pd); 908 909 /* Set the mrhdl pointer to NULL and return success */ 910 *mrhdl = NULL; 911 912 return (DDI_SUCCESS); 913 } 914 915 /* 916 * hermon_mr_dealloc_fmr() 917 * Context: Can be called from interrupt or base context. 918 */ 919 /* ARGSUSED */ 920 int 921 hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl) 922 { 923 hermon_rsrc_t *mpt, *mtt, *rsrc; 924 hermon_pdhdl_t pd; 925 hermon_mrhdl_t mr; 926 927 /* 928 * Pull all the necessary information from the Hermon Memory Region 929 * handle. This is necessary here because the resource for the 930 * MR handle is going to be freed up as part of the this 931 * deregistration 932 */ 933 mr = *mrhdl; 934 mutex_enter(&mr->mr_lock); 935 mpt = mr->mr_mptrsrcp; 936 mtt = mr->mr_mttrsrcp; 937 rsrc = mr->mr_rsrcp; 938 pd = mr->mr_pdhdl; 939 mutex_exit(&mr->mr_lock); 940 941 /* Free the MTT entries */ 942 hermon_rsrc_free(state, &mtt); 943 944 /* Free the Hermon Memory Region handle */ 945 hermon_rsrc_free(state, &rsrc); 946 947 /* Free up the MPT entry resource */ 948 hermon_rsrc_free(state, &mpt); 949 950 /* Decrement the reference count on the protection domain (PD) */ 951 hermon_pd_refcnt_dec(pd); 952 953 /* Set the mrhdl pointer to NULL and return success */ 954 *mrhdl = NULL; 955 956 return (DDI_SUCCESS); 957 } 958 959 /* 960 * hermon_mr_invalidate_fmr() 961 * Context: Can be called from interrupt or base context. 962 */ 963 /* ARGSUSED */ 964 int 965 hermon_mr_invalidate_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 966 { 967 hermon_rsrc_t *mpt; 968 uint64_t *mpt_table; 969 970 mutex_enter(&mr->mr_lock); 971 mpt = mr->mr_mptrsrcp; 972 mpt_table = (uint64_t *)mpt->hr_addr; 973 974 /* Write MPT status to SW bit */ 975 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 976 977 /* invalidate mem key value */ 978 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[1], 0); 979 980 /* invalidate lkey value */ 981 ddi_put32(mpt->hr_acchdl, (uint32_t *)&mpt_table[4], 0); 982 983 /* Write MPT status to HW bit */ 984 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0x0); 985 986 mutex_exit(&mr->mr_lock); 987 988 return (DDI_SUCCESS); 989 } 990 991 /* 992 * hermon_mr_deregister_fmr() 993 * Context: Can be called from interrupt or base context. 994 */ 995 /* ARGSUSED */ 996 int 997 hermon_mr_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr) 998 { 999 hermon_rsrc_t *mpt; 1000 uint64_t *mpt_table; 1001 1002 mutex_enter(&mr->mr_lock); 1003 mpt = mr->mr_mptrsrcp; 1004 mpt_table = (uint64_t *)mpt->hr_addr; 1005 1006 /* Write MPT status to SW bit */ 1007 ddi_put8(mpt->hr_acchdl, (uint8_t *)&mpt_table[0], 0xF); 1008 mutex_exit(&mr->mr_lock); 1009 1010 return (DDI_SUCCESS); 1011 } 1012 1013 1014 /* 1015 * hermon_mr_query() 1016 * Context: Can be called from interrupt or base context. 1017 */ 1018 /* ARGSUSED */ 1019 int 1020 hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr, 1021 ibt_mr_query_attr_t *attr) 1022 { 1023 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr)) 1024 1025 mutex_enter(&mr->mr_lock); 1026 1027 /* 1028 * Check here to see if the memory region has already been partially 1029 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 1030 * If so, this is an error, return failure. 1031 */ 1032 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 1033 mutex_exit(&mr->mr_lock); 1034 return (IBT_MR_HDL_INVALID); 1035 } 1036 1037 /* Fill in the queried attributes */ 1038 attr->mr_attr_flags = mr->mr_accflag; 1039 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl; 1040 1041 /* Fill in the "local" attributes */ 1042 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey; 1043 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1044 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1045 1046 /* 1047 * Fill in the "remote" attributes (if necessary). Note: the 1048 * remote attributes are only valid if the memory region has one 1049 * or more of the remote access flags set. 1050 */ 1051 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1052 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1053 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1054 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey; 1055 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 1056 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 1057 } 1058 1059 /* 1060 * If region is mapped for streaming (i.e. noncoherent), then set sync 1061 * is required 1062 */ 1063 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags & 1064 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE; 1065 1066 mutex_exit(&mr->mr_lock); 1067 return (DDI_SUCCESS); 1068 } 1069 1070 1071 /* 1072 * hermon_mr_reregister() 1073 * Context: Can be called from interrupt or base context. 1074 */ 1075 int 1076 hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr, 1077 hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new, 1078 hermon_mr_options_t *op) 1079 { 1080 hermon_bind_info_t bind; 1081 int status; 1082 1083 /* 1084 * Fill in the "bind" struct. This struct provides the majority 1085 * of the information that will be used to distinguish between an 1086 * "addr" binding (as is the case here) and a "buf" binding (see 1087 * below). The "bind" struct is later passed to hermon_mr_mem_bind() 1088 * which does most of the "heavy lifting" for the Hermon memory 1089 * registration (and reregistration) routines. 1090 */ 1091 bind.bi_type = HERMON_BINDHDL_VADDR; 1092 bind.bi_addr = mr_attr->mr_vaddr; 1093 bind.bi_len = mr_attr->mr_len; 1094 bind.bi_as = mr_attr->mr_as; 1095 bind.bi_flags = mr_attr->mr_flags; 1096 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1097 return (status); 1098 } 1099 1100 1101 /* 1102 * hermon_mr_reregister_buf() 1103 * Context: Can be called from interrupt or base context. 1104 */ 1105 int 1106 hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr, 1107 hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 1108 hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op) 1109 { 1110 hermon_bind_info_t bind; 1111 int status; 1112 1113 /* 1114 * Fill in the "bind" struct. This struct provides the majority 1115 * of the information that will be used to distinguish between an 1116 * "addr" binding (see above) and a "buf" binding (as is the case 1117 * here). The "bind" struct is later passed to hermon_mr_mem_bind() 1118 * which does most of the "heavy lifting" for the Hermon memory 1119 * registration routines. Note: We have chosen to provide 1120 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 1121 * not set). It is not critical what value we choose here as it need 1122 * only be unique for the given RKey (which will happen by default), 1123 * so the choice here is somewhat arbitrary. 1124 */ 1125 bind.bi_type = HERMON_BINDHDL_BUF; 1126 bind.bi_buf = buf; 1127 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 1128 bind.bi_addr = mr_attr->mr_vaddr; 1129 } else { 1130 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 1131 } 1132 bind.bi_len = (uint64_t)buf->b_bcount; 1133 bind.bi_flags = mr_attr->mr_flags; 1134 bind.bi_as = NULL; 1135 status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 1136 return (status); 1137 } 1138 1139 1140 /* 1141 * hermon_mr_sync() 1142 * Context: Can be called from interrupt or base context. 1143 */ 1144 /* ARGSUSED */ 1145 int 1146 hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs) 1147 { 1148 hermon_mrhdl_t mrhdl; 1149 uint64_t seg_vaddr, seg_len, seg_end; 1150 uint64_t mr_start, mr_end; 1151 uint_t type; 1152 int status, i; 1153 1154 /* Process each of the ibt_mr_sync_t's */ 1155 for (i = 0; i < num_segs; i++) { 1156 mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle; 1157 1158 /* Check for valid memory region handle */ 1159 if (mrhdl == NULL) { 1160 status = IBT_MR_HDL_INVALID; 1161 goto mrsync_fail; 1162 } 1163 1164 mutex_enter(&mrhdl->mr_lock); 1165 1166 /* 1167 * Check here to see if the memory region has already been 1168 * partially deregistered as a result of a 1169 * hermon_umap_umemlock_cb() callback. If so, this is an 1170 * error, return failure. 1171 */ 1172 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 1173 mutex_exit(&mrhdl->mr_lock); 1174 status = IBT_MR_HDL_INVALID; 1175 goto mrsync_fail; 1176 } 1177 1178 /* Check for valid bounds on sync request */ 1179 seg_vaddr = mr_segs[i].ms_vaddr; 1180 seg_len = mr_segs[i].ms_len; 1181 seg_end = seg_vaddr + seg_len - 1; 1182 mr_start = mrhdl->mr_bindinfo.bi_addr; 1183 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1; 1184 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) { 1185 mutex_exit(&mrhdl->mr_lock); 1186 status = IBT_MR_VA_INVALID; 1187 goto mrsync_fail; 1188 } 1189 if ((seg_end < mr_start) || (seg_end > mr_end)) { 1190 mutex_exit(&mrhdl->mr_lock); 1191 status = IBT_MR_LEN_INVALID; 1192 goto mrsync_fail; 1193 } 1194 1195 /* Determine what type (i.e. direction) for sync */ 1196 if (mr_segs[i].ms_flags & IBT_SYNC_READ) { 1197 type = DDI_DMA_SYNC_FORDEV; 1198 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) { 1199 type = DDI_DMA_SYNC_FORCPU; 1200 } else { 1201 mutex_exit(&mrhdl->mr_lock); 1202 status = IBT_INVALID_PARAM; 1203 goto mrsync_fail; 1204 } 1205 1206 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl, 1207 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type); 1208 1209 mutex_exit(&mrhdl->mr_lock); 1210 } 1211 1212 return (DDI_SUCCESS); 1213 1214 mrsync_fail: 1215 return (status); 1216 } 1217 1218 1219 /* 1220 * hermon_mw_alloc() 1221 * Context: Can be called from interrupt or base context. 1222 */ 1223 int 1224 hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags, 1225 hermon_mwhdl_t *mwhdl) 1226 { 1227 hermon_rsrc_t *mpt, *rsrc; 1228 hermon_hw_dmpt_t mpt_entry; 1229 hermon_mwhdl_t mw; 1230 uint_t sleep; 1231 int status; 1232 1233 if (state != NULL) /* XXX - bogus test that is always TRUE */ 1234 return (IBT_INSUFF_RESOURCE); 1235 1236 /* 1237 * Check the sleep flag. Ensure that it is consistent with the 1238 * current thread context (i.e. if we are currently in the interrupt 1239 * context, then we shouldn't be attempting to sleep). 1240 */ 1241 sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP; 1242 if ((sleep == HERMON_SLEEP) && 1243 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1244 status = IBT_INVALID_PARAM; 1245 goto mwalloc_fail; 1246 } 1247 1248 /* Increment the reference count on the protection domain (PD) */ 1249 hermon_pd_refcnt_inc(pd); 1250 1251 /* 1252 * Allocate an MPT entry (for use as a memory window). Since the 1253 * Hermon hardware uses the MPT entry for memory regions and for 1254 * memory windows, we will fill in this MPT with all the necessary 1255 * parameters for the memory window. And then (just as we do for 1256 * memory regions) ownership will be passed to the hardware in the 1257 * final step below. If we fail here, we must undo the protection 1258 * domain reference count. 1259 */ 1260 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 1261 if (status != DDI_SUCCESS) { 1262 status = IBT_INSUFF_RESOURCE; 1263 goto mwalloc_fail1; 1264 } 1265 1266 /* 1267 * Allocate the software structure for tracking the memory window (i.e. 1268 * the Hermon Memory Window handle). Note: This is actually the same 1269 * software structure used for tracking memory regions, but since many 1270 * of the same properties are needed, only a single structure is 1271 * necessary. If we fail here, we must undo the protection domain 1272 * reference count and the previous resource allocation. 1273 */ 1274 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 1275 if (status != DDI_SUCCESS) { 1276 status = IBT_INSUFF_RESOURCE; 1277 goto mwalloc_fail2; 1278 } 1279 mw = (hermon_mwhdl_t)rsrc->hr_addr; 1280 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1281 1282 /* 1283 * Calculate an "unbound" RKey from MPT index. In much the same way 1284 * as we do for memory regions (above), this key is constructed from 1285 * a "constrained" (which depends on the MPT index) and an 1286 * "unconstrained" portion (which may be arbitrarily chosen). 1287 */ 1288 mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx); 1289 1290 /* 1291 * Fill in the MPT entry. This is the final step before passing 1292 * ownership of the MPT entry to the Hermon hardware. We use all of 1293 * the information collected/calculated above to fill in the 1294 * requisite portions of the MPT. Note: fewer entries in the MPT 1295 * entry are necessary to allocate a memory window. 1296 */ 1297 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 1298 mpt_entry.reg_win = HERMON_MPT_IS_WINDOW; 1299 mpt_entry.mem_key = mw->mr_rkey; 1300 mpt_entry.pd = pd->pd_pdnum; 1301 mpt_entry.lr = 1; 1302 1303 /* 1304 * Write the MPT entry to hardware. Lastly, we pass ownership of 1305 * the entry to the hardware. Note: in general, this operation 1306 * shouldn't fail. But if it does, we have to undo everything we've 1307 * done above before returning error. 1308 */ 1309 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1310 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 1311 if (status != HERMON_CMD_SUCCESS) { 1312 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 1313 status); 1314 if (status == HERMON_CMD_INVALID_STATUS) { 1315 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1316 } 1317 status = ibc_get_ci_failure(0); 1318 goto mwalloc_fail3; 1319 } 1320 1321 /* 1322 * Fill in the rest of the Hermon Memory Window handle. Having 1323 * successfully transferred ownership of the MPT, we can update the 1324 * following fields for use in further operations on the MW. 1325 */ 1326 mw->mr_mptrsrcp = mpt; 1327 mw->mr_pdhdl = pd; 1328 mw->mr_rsrcp = rsrc; 1329 mw->mr_rkey = hermon_mr_key_swap(mw->mr_rkey); 1330 *mwhdl = mw; 1331 1332 return (DDI_SUCCESS); 1333 1334 mwalloc_fail3: 1335 hermon_rsrc_free(state, &rsrc); 1336 mwalloc_fail2: 1337 hermon_rsrc_free(state, &mpt); 1338 mwalloc_fail1: 1339 hermon_pd_refcnt_dec(pd); 1340 mwalloc_fail: 1341 return (status); 1342 } 1343 1344 1345 /* 1346 * hermon_mw_free() 1347 * Context: Can be called from interrupt or base context. 1348 */ 1349 int 1350 hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep) 1351 { 1352 hermon_rsrc_t *mpt, *rsrc; 1353 hermon_mwhdl_t mw; 1354 int status; 1355 hermon_pdhdl_t pd; 1356 1357 /* 1358 * Check the sleep flag. Ensure that it is consistent with the 1359 * current thread context (i.e. if we are currently in the interrupt 1360 * context, then we shouldn't be attempting to sleep). 1361 */ 1362 if ((sleep == HERMON_SLEEP) && 1363 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1364 status = IBT_INVALID_PARAM; 1365 return (status); 1366 } 1367 1368 /* 1369 * Pull all the necessary information from the Hermon Memory Window 1370 * handle. This is necessary here because the resource for the 1371 * MW handle is going to be freed up as part of the this operation. 1372 */ 1373 mw = *mwhdl; 1374 mutex_enter(&mw->mr_lock); 1375 mpt = mw->mr_mptrsrcp; 1376 rsrc = mw->mr_rsrcp; 1377 pd = mw->mr_pdhdl; 1378 mutex_exit(&mw->mr_lock); 1379 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw)) 1380 1381 /* 1382 * Reclaim the MPT entry from hardware. Note: in general, it is 1383 * unexpected for this operation to return an error. 1384 */ 1385 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 1386 0, mpt->hr_indx, sleep); 1387 if (status != HERMON_CMD_SUCCESS) { 1388 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n", 1389 status); 1390 if (status == HERMON_CMD_INVALID_STATUS) { 1391 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1392 } 1393 return (ibc_get_ci_failure(0)); 1394 } 1395 1396 /* Free the Hermon Memory Window handle */ 1397 hermon_rsrc_free(state, &rsrc); 1398 1399 /* Free up the MPT entry resource */ 1400 hermon_rsrc_free(state, &mpt); 1401 1402 /* Decrement the reference count on the protection domain (PD) */ 1403 hermon_pd_refcnt_dec(pd); 1404 1405 /* Set the mwhdl pointer to NULL and return success */ 1406 *mwhdl = NULL; 1407 1408 return (DDI_SUCCESS); 1409 } 1410 1411 1412 /* 1413 * hermon_mr_keycalc() 1414 * Context: Can be called from interrupt or base context. 1415 * NOTE: Produces a key in the form of 1416 * KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII 1417 * where K == the arbitrary bits and I == the index 1418 */ 1419 uint32_t 1420 hermon_mr_keycalc(uint32_t indx) 1421 { 1422 uint32_t tmp_key, tmp_indx; 1423 1424 /* 1425 * Generate a simple key from counter. Note: We increment this 1426 * static variable _intentionally_ without any kind of mutex around 1427 * it. First, single-threading all operations through a single lock 1428 * would be a bad idea (from a performance point-of-view). Second, 1429 * the upper "unconstrained" bits don't really have to be unique 1430 * because the lower bits are guaranteed to be (although we do make a 1431 * best effort to ensure that they are). Third, the window for the 1432 * race (where both threads read and update the counter at the same 1433 * time) is incredibly small. 1434 * And, lastly, we'd like to make this into a "random" key 1435 */ 1436 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt)) 1437 tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT; 1438 tmp_indx = indx & 0xffffff; 1439 return (tmp_key | tmp_indx); 1440 } 1441 1442 1443 /* 1444 * hermon_mr_key_swap() 1445 * Context: Can be called from interrupt or base context. 1446 * NOTE: Produces a key in the form of 1447 * IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK 1448 * where K == the arbitrary bits and I == the index 1449 */ 1450 uint32_t 1451 hermon_mr_key_swap(uint32_t indx) 1452 { 1453 /* 1454 * The memory key format to pass down to the hardware is 1455 * (key[7:0],index[23:0]), which defines the index to the 1456 * hardware resource. When the driver passes this as a memory 1457 * key, (i.e. to retrieve a resource) the format is 1458 * (index[23:0],key[7:0]). 1459 */ 1460 return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00)); 1461 } 1462 1463 /* 1464 * hermon_mr_common_reg() 1465 * Context: Can be called from interrupt or base context. 1466 */ 1467 static int 1468 hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd, 1469 hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op, 1470 hermon_mpt_rsrc_type_t mpt_type) 1471 { 1472 hermon_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 1473 hermon_umap_db_entry_t *umapdb; 1474 hermon_sw_refcnt_t *swrc_tmp; 1475 hermon_hw_dmpt_t mpt_entry; 1476 hermon_mrhdl_t mr; 1477 ibt_mr_flags_t flags; 1478 hermon_bind_info_t *bh; 1479 ddi_dma_handle_t bind_dmahdl; 1480 ddi_umem_cookie_t umem_cookie; 1481 size_t umem_len; 1482 caddr_t umem_addr; 1483 uint64_t mtt_addr, max_sz; 1484 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem; 1485 int status, umem_flags, bind_override_addr; 1486 1487 /* 1488 * Check the "options" flag. Currently this flag tells the driver 1489 * whether or not the region should be bound normally (i.e. with 1490 * entries written into the PCI IOMMU), whether it should be 1491 * registered to bypass the IOMMU, and whether or not the resulting 1492 * address should be "zero-based" (to aid the alignment restrictions 1493 * for QPs). 1494 */ 1495 if (op == NULL) { 1496 bind_type = HERMON_BINDMEM_NORMAL; 1497 bind_dmahdl = NULL; 1498 bind_override_addr = 0; 1499 } else { 1500 bind_type = op->mro_bind_type; 1501 bind_dmahdl = op->mro_bind_dmahdl; 1502 bind_override_addr = op->mro_bind_override_addr; 1503 } 1504 1505 /* check what kind of mpt to use */ 1506 1507 /* Extract the flags field from the hermon_bind_info_t */ 1508 flags = bind->bi_flags; 1509 1510 /* 1511 * Check for invalid length. Check is the length is zero or if the 1512 * length is larger than the maximum configured value. Return error 1513 * if it is. 1514 */ 1515 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz); 1516 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 1517 status = IBT_MR_LEN_INVALID; 1518 goto mrcommon_fail; 1519 } 1520 1521 /* 1522 * Check the sleep flag. Ensure that it is consistent with the 1523 * current thread context (i.e. if we are currently in the interrupt 1524 * context, then we shouldn't be attempting to sleep). 1525 */ 1526 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; 1527 if ((sleep == HERMON_SLEEP) && 1528 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1529 status = IBT_INVALID_PARAM; 1530 goto mrcommon_fail; 1531 } 1532 1533 /* Increment the reference count on the protection domain (PD) */ 1534 hermon_pd_refcnt_inc(pd); 1535 1536 /* 1537 * Allocate an MPT entry. This will be filled in with all the 1538 * necessary parameters to define the memory region. And then 1539 * ownership will be passed to the hardware in the final step 1540 * below. If we fail here, we must undo the protection domain 1541 * reference count. 1542 */ 1543 if (mpt_type == HERMON_MPT_DMPT) { 1544 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt); 1545 if (status != DDI_SUCCESS) { 1546 status = IBT_INSUFF_RESOURCE; 1547 goto mrcommon_fail1; 1548 } 1549 } else { 1550 mpt = NULL; 1551 } 1552 1553 /* 1554 * Allocate the software structure for tracking the memory region (i.e. 1555 * the Hermon Memory Region handle). If we fail here, we must undo 1556 * the protection domain reference count and the previous resource 1557 * allocation. 1558 */ 1559 status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc); 1560 if (status != DDI_SUCCESS) { 1561 status = IBT_INSUFF_RESOURCE; 1562 goto mrcommon_fail2; 1563 } 1564 mr = (hermon_mrhdl_t)rsrc->hr_addr; 1565 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 1566 1567 /* 1568 * Setup and validate the memory region access flags. This means 1569 * translating the IBTF's enable flags into the access flags that 1570 * will be used in later operations. 1571 */ 1572 mr->mr_accflag = 0; 1573 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1574 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1575 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1576 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1577 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1578 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1579 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1580 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1581 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1582 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1583 1584 /* 1585 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1586 * from a certain number of "constrained" bits (the least significant 1587 * bits) and some number of "unconstrained" bits. The constrained 1588 * bits must be set to the index of the entry in the MPT table, but 1589 * the unconstrained bits can be set to any value we wish. Note: 1590 * if no remote access is required, then the RKey value is not filled 1591 * in. Otherwise both Rkey and LKey are given the same value. 1592 */ 1593 if (mpt) 1594 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 1595 1596 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1597 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1598 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1599 mr->mr_rkey = mr->mr_lkey; 1600 } 1601 1602 /* 1603 * Determine if the memory is from userland and pin the pages 1604 * with umem_lockmemory() if necessary. 1605 * Then, if this is userland memory, allocate an entry in the 1606 * "userland resources database". This will later be added to 1607 * the database (after all further memory registration operations are 1608 * successful). If we fail here, we must undo the reference counts 1609 * and the previous resource allocations. 1610 */ 1611 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0); 1612 if (mr_is_umem) { 1613 umem_len = ptob(btopr(bind->bi_len + 1614 ((uintptr_t)bind->bi_addr & PAGEOFFSET))); 1615 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET); 1616 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 1617 DDI_UMEMLOCK_LONGTERM); 1618 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 1619 &umem_cookie, &hermon_umem_cbops, NULL); 1620 if (status != 0) { 1621 status = IBT_INSUFF_RESOURCE; 1622 goto mrcommon_fail3; 1623 } 1624 1625 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1626 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1627 1628 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len, 1629 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 1630 if (bind->bi_buf == NULL) { 1631 status = IBT_INSUFF_RESOURCE; 1632 goto mrcommon_fail3; 1633 } 1634 bind->bi_type = HERMON_BINDHDL_UBUF; 1635 bind->bi_buf->b_flags |= B_READ; 1636 1637 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf)) 1638 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1639 1640 umapdb = hermon_umap_db_alloc(state->hs_instance, 1641 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 1642 (uint64_t)(uintptr_t)rsrc); 1643 if (umapdb == NULL) { 1644 status = IBT_INSUFF_RESOURCE; 1645 goto mrcommon_fail4; 1646 } 1647 } 1648 1649 /* 1650 * Setup the bindinfo for the mtt bind call 1651 */ 1652 bh = &mr->mr_bindinfo; 1653 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh)) 1654 bcopy(bind, bh, sizeof (hermon_bind_info_t)); 1655 bh->bi_bypass = bind_type; 1656 status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt, 1657 &mtt_pgsize_bits, mpt != NULL); 1658 if (status != DDI_SUCCESS) { 1659 goto mrcommon_fail5; 1660 } 1661 mr->mr_logmttpgsz = mtt_pgsize_bits; 1662 1663 /* 1664 * Allocate MTT reference count (to track shared memory regions). 1665 * This reference count resource may never be used on the given 1666 * memory region, but if it is ever later registered as "shared" 1667 * memory region then this resource will be necessary. If we fail 1668 * here, we do pretty much the same as above to clean up. 1669 */ 1670 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep, 1671 &mtt_refcnt); 1672 if (status != DDI_SUCCESS) { 1673 status = IBT_INSUFF_RESOURCE; 1674 goto mrcommon_fail6; 1675 } 1676 mr->mr_mttrefcntp = mtt_refcnt; 1677 swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; 1678 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp)) 1679 HERMON_MTT_REFCNT_INIT(swrc_tmp); 1680 1681 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT); 1682 1683 /* 1684 * Fill in the MPT entry. This is the final step before passing 1685 * ownership of the MPT entry to the Hermon hardware. We use all of 1686 * the information collected/calculated above to fill in the 1687 * requisite portions of the MPT. Do this ONLY for DMPTs. 1688 */ 1689 if (mpt == NULL) 1690 goto no_passown; 1691 1692 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 1693 1694 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; 1695 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1696 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1697 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1698 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1699 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1700 mpt_entry.lr = 1; 1701 mpt_entry.phys_addr = 0; 1702 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 1703 1704 mpt_entry.entity_sz = mr->mr_logmttpgsz; 1705 mpt_entry.mem_key = mr->mr_lkey; 1706 mpt_entry.pd = pd->pd_pdnum; 1707 mpt_entry.rem_acc_en = 0; 1708 mpt_entry.fast_reg_en = 0; 1709 mpt_entry.en_inval = 0; 1710 mpt_entry.lkey = 0; 1711 mpt_entry.win_cnt = 0; 1712 1713 if (bind_override_addr == 0) { 1714 mpt_entry.start_addr = bh->bi_addr; 1715 } else { 1716 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1); 1717 mpt_entry.start_addr = bh->bi_addr; 1718 } 1719 mpt_entry.reg_win_len = bh->bi_len; 1720 1721 mpt_entry.mtt_addr_h = mtt_addr >> 32; /* only 8 more bits */ 1722 mpt_entry.mtt_addr_l = mtt_addr >> 3; /* only 29 bits */ 1723 1724 /* 1725 * Write the MPT entry to hardware. Lastly, we pass ownership of 1726 * the entry to the hardware if needed. Note: in general, this 1727 * operation shouldn't fail. But if it does, we have to undo 1728 * everything we've done above before returning error. 1729 * 1730 * For Hermon, this routine (which is common to the contexts) will only 1731 * set the ownership if needed - the process of passing the context 1732 * itself to HW will take care of setting up the MPT (based on type 1733 * and index). 1734 */ 1735 1736 mpt_entry.bnd_qp = 0; /* dMPT for a qp, check for window */ 1737 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1738 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep); 1739 if (status != HERMON_CMD_SUCCESS) { 1740 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 1741 status); 1742 if (status == HERMON_CMD_INVALID_STATUS) { 1743 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1744 } 1745 status = ibc_get_ci_failure(0); 1746 goto mrcommon_fail7; 1747 } 1748 no_passown: 1749 1750 /* 1751 * Fill in the rest of the Hermon Memory Region handle. Having 1752 * successfully transferred ownership of the MPT, we can update the 1753 * following fields for use in further operations on the MR. 1754 */ 1755 mr->mr_mttaddr = mtt_addr; 1756 1757 mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT); 1758 mr->mr_mptrsrcp = mpt; 1759 mr->mr_mttrsrcp = mtt; 1760 mr->mr_pdhdl = pd; 1761 mr->mr_rsrcp = rsrc; 1762 mr->mr_is_umem = mr_is_umem; 1763 mr->mr_is_fmr = 0; 1764 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 1765 mr->mr_umem_cbfunc = NULL; 1766 mr->mr_umem_cbarg1 = NULL; 1767 mr->mr_umem_cbarg2 = NULL; 1768 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 1769 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 1770 mr->mr_mpt_type = mpt_type; 1771 1772 /* 1773 * If this is userland memory, then we need to insert the previously 1774 * allocated entry into the "userland resources database". This will 1775 * allow for later coordination between the hermon_umap_umemlock_cb() 1776 * callback and hermon_mr_deregister(). 1777 */ 1778 if (mr_is_umem) { 1779 hermon_umap_db_add(umapdb); 1780 } 1781 1782 *mrhdl = mr; 1783 1784 return (DDI_SUCCESS); 1785 1786 /* 1787 * The following is cleanup for all possible failure cases in this routine 1788 */ 1789 mrcommon_fail7: 1790 hermon_rsrc_free(state, &mtt_refcnt); 1791 mrcommon_fail6: 1792 hermon_mr_mem_unbind(state, bh); 1793 bind->bi_type = bh->bi_type; 1794 mrcommon_fail5: 1795 if (mr_is_umem) { 1796 hermon_umap_db_free(umapdb); 1797 } 1798 mrcommon_fail4: 1799 if (mr_is_umem) { 1800 /* 1801 * Free up the memory ddi_umem_iosetup() allocates 1802 * internally. 1803 */ 1804 if (bind->bi_type == HERMON_BINDHDL_UBUF) { 1805 freerbuf(bind->bi_buf); 1806 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1807 bind->bi_type = HERMON_BINDHDL_NONE; 1808 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 1809 } 1810 ddi_umem_unlock(umem_cookie); 1811 } 1812 mrcommon_fail3: 1813 hermon_rsrc_free(state, &rsrc); 1814 mrcommon_fail2: 1815 if (mpt != NULL) 1816 hermon_rsrc_free(state, &mpt); 1817 mrcommon_fail1: 1818 hermon_pd_refcnt_dec(pd); 1819 mrcommon_fail: 1820 return (status); 1821 } 1822 1823 /* 1824 * hermon_mr_mtt_bind() 1825 * Context: Can be called from interrupt or base context. 1826 */ 1827 int 1828 hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind, 1829 ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits, 1830 uint_t is_buffer) 1831 { 1832 uint64_t nummtt; 1833 uint_t sleep; 1834 int status; 1835 1836 /* 1837 * Check the sleep flag. Ensure that it is consistent with the 1838 * current thread context (i.e. if we are currently in the interrupt 1839 * context, then we shouldn't be attempting to sleep). 1840 */ 1841 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? 1842 HERMON_NOSLEEP : HERMON_SLEEP; 1843 if ((sleep == HERMON_SLEEP) && 1844 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1845 status = IBT_INVALID_PARAM; 1846 goto mrmttbind_fail; 1847 } 1848 1849 /* 1850 * Bind the memory and determine the mapped addresses. This is 1851 * the first of two routines that do all the "heavy lifting" for 1852 * the Hermon memory registration routines. The hermon_mr_mem_bind() 1853 * routine takes the "bind" struct with all its fields filled 1854 * in and returns a list of DMA cookies (for the PCI mapped addresses 1855 * corresponding to the specified address region) which are used by 1856 * the hermon_mr_fast_mtt_write() routine below. If we fail here, we 1857 * must undo all the previous resource allocation (and PD reference 1858 * count). 1859 */ 1860 status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer); 1861 if (status != DDI_SUCCESS) { 1862 status = IBT_INSUFF_RESOURCE; 1863 goto mrmttbind_fail; 1864 } 1865 1866 /* 1867 * Determine number of pages spanned. This routine uses the 1868 * information in the "bind" struct to determine the required 1869 * number of MTT entries needed (and returns the suggested page size - 1870 * as a "power-of-2" - for each MTT entry). 1871 */ 1872 nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits); 1873 1874 /* 1875 * Allocate the MTT entries. Use the calculations performed above to 1876 * allocate the required number of MTT entries. If we fail here, we 1877 * must not only undo all the previous resource allocation (and PD 1878 * reference count), but we must also unbind the memory. 1879 */ 1880 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt); 1881 if (status != DDI_SUCCESS) { 1882 status = IBT_INSUFF_RESOURCE; 1883 goto mrmttbind_fail2; 1884 } 1885 1886 /* 1887 * Write the mapped addresses into the MTT entries. This is part two 1888 * of the "heavy lifting" routines that we talked about above. Note: 1889 * we pass the suggested page size from the earlier operation here. 1890 * And if we fail here, we again do pretty much the same huge clean up. 1891 */ 1892 status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits); 1893 if (status != DDI_SUCCESS) { 1894 /* 1895 * hermon_mr_fast_mtt_write() returns DDI_FAILURE 1896 * only if it detects a HW error during DMA. 1897 */ 1898 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1899 status = ibc_get_ci_failure(0); 1900 goto mrmttbind_fail3; 1901 } 1902 return (DDI_SUCCESS); 1903 1904 /* 1905 * The following is cleanup for all possible failure cases in this routine 1906 */ 1907 mrmttbind_fail3: 1908 hermon_rsrc_free(state, mtt); 1909 mrmttbind_fail2: 1910 hermon_mr_mem_unbind(state, bind); 1911 mrmttbind_fail: 1912 return (status); 1913 } 1914 1915 1916 /* 1917 * hermon_mr_mtt_unbind() 1918 * Context: Can be called from interrupt or base context. 1919 */ 1920 int 1921 hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind, 1922 hermon_rsrc_t *mtt) 1923 { 1924 /* 1925 * Free up the MTT entries and unbind the memory. Here, as above, we 1926 * attempt to free these resources only if it is appropriate to do so. 1927 */ 1928 hermon_mr_mem_unbind(state, bind); 1929 hermon_rsrc_free(state, &mtt); 1930 1931 return (DDI_SUCCESS); 1932 } 1933 1934 1935 /* 1936 * hermon_mr_common_rereg() 1937 * Context: Can be called from interrupt or base context. 1938 */ 1939 static int 1940 hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr, 1941 hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new, 1942 hermon_mr_options_t *op) 1943 { 1944 hermon_rsrc_t *mpt; 1945 ibt_mr_attr_flags_t acc_flags_to_use; 1946 ibt_mr_flags_t flags; 1947 hermon_pdhdl_t pd_to_use; 1948 hermon_hw_dmpt_t mpt_entry; 1949 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use; 1950 uint_t sleep, dereg_level; 1951 int status; 1952 1953 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 1954 1955 /* 1956 * Check here to see if the memory region corresponds to a userland 1957 * mapping. Reregistration of userland memory regions is not 1958 * currently supported. Return failure. 1959 */ 1960 if (mr->mr_is_umem) { 1961 status = IBT_MR_HDL_INVALID; 1962 goto mrrereg_fail; 1963 } 1964 1965 mutex_enter(&mr->mr_lock); 1966 1967 /* Pull MPT resource pointer from the Hermon Memory Region handle */ 1968 mpt = mr->mr_mptrsrcp; 1969 1970 /* Extract the flags field from the hermon_bind_info_t */ 1971 flags = bind->bi_flags; 1972 1973 /* 1974 * Check the sleep flag. Ensure that it is consistent with the 1975 * current thread context (i.e. if we are currently in the interrupt 1976 * context, then we shouldn't be attempting to sleep). 1977 */ 1978 sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP; 1979 if ((sleep == HERMON_SLEEP) && 1980 (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) { 1981 mutex_exit(&mr->mr_lock); 1982 status = IBT_INVALID_PARAM; 1983 goto mrrereg_fail; 1984 } 1985 1986 /* 1987 * First step is to temporarily invalidate the MPT entry. This 1988 * regains ownership from the hardware, and gives us the opportunity 1989 * to modify the entry. Note: The HW2SW_MPT command returns the 1990 * current MPT entry contents. These are saved away here because 1991 * they will be reused in a later step below. If the region has 1992 * bound memory windows that we fail returning an "in use" error code. 1993 * Otherwise, this is an unexpected error and we deregister the 1994 * memory region and return error. 1995 * 1996 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect 1997 * against holding the lock around this rereg call in all contexts. 1998 */ 1999 status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry, 2000 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN); 2001 if (status != HERMON_CMD_SUCCESS) { 2002 mutex_exit(&mr->mr_lock); 2003 if (status == HERMON_CMD_REG_BOUND) { 2004 return (IBT_MR_IN_USE); 2005 } else { 2006 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: " 2007 "%08x\n", status); 2008 if (status == HERMON_CMD_INVALID_STATUS) { 2009 hermon_fm_ereport(state, HCA_SYS_ERR, 2010 HCA_ERR_SRV_LOST); 2011 } 2012 /* 2013 * Call deregister and ensure that all current 2014 * resources get freed up 2015 */ 2016 if (hermon_mr_deregister(state, &mr, 2017 HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) { 2018 HERMON_WARNING(state, "failed to deregister " 2019 "memory region"); 2020 } 2021 return (ibc_get_ci_failure(0)); 2022 } 2023 } 2024 2025 /* 2026 * If we're changing the protection domain, then validate the new one 2027 */ 2028 if (flags & IBT_MR_CHANGE_PD) { 2029 2030 /* Check for valid PD handle pointer */ 2031 if (pd == NULL) { 2032 mutex_exit(&mr->mr_lock); 2033 /* 2034 * Call deregister and ensure that all current 2035 * resources get properly freed up. Unnecessary 2036 * here to attempt to regain software ownership 2037 * of the MPT entry as that has already been 2038 * done above. 2039 */ 2040 if (hermon_mr_deregister(state, &mr, 2041 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != 2042 DDI_SUCCESS) { 2043 HERMON_WARNING(state, "failed to deregister " 2044 "memory region"); 2045 } 2046 status = IBT_PD_HDL_INVALID; 2047 goto mrrereg_fail; 2048 } 2049 2050 /* Use the new PD handle in all operations below */ 2051 pd_to_use = pd; 2052 2053 } else { 2054 /* Use the current PD handle in all operations below */ 2055 pd_to_use = mr->mr_pdhdl; 2056 } 2057 2058 /* 2059 * If we're changing access permissions, then validate the new ones 2060 */ 2061 if (flags & IBT_MR_CHANGE_ACCESS) { 2062 /* 2063 * Validate the access flags. Both remote write and remote 2064 * atomic require the local write flag to be set 2065 */ 2066 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) || 2067 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && 2068 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) { 2069 mutex_exit(&mr->mr_lock); 2070 /* 2071 * Call deregister and ensure that all current 2072 * resources get properly freed up. Unnecessary 2073 * here to attempt to regain software ownership 2074 * of the MPT entry as that has already been 2075 * done above. 2076 */ 2077 if (hermon_mr_deregister(state, &mr, 2078 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != 2079 DDI_SUCCESS) { 2080 HERMON_WARNING(state, "failed to deregister " 2081 "memory region"); 2082 } 2083 status = IBT_MR_ACCESS_REQ_INVALID; 2084 goto mrrereg_fail; 2085 } 2086 2087 /* 2088 * Setup and validate the memory region access flags. This 2089 * means translating the IBTF's enable flags into the access 2090 * flags that will be used in later operations. 2091 */ 2092 acc_flags_to_use = 0; 2093 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 2094 acc_flags_to_use |= IBT_MR_WINDOW_BIND; 2095 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 2096 acc_flags_to_use |= IBT_MR_LOCAL_WRITE; 2097 if (flags & IBT_MR_ENABLE_REMOTE_READ) 2098 acc_flags_to_use |= IBT_MR_REMOTE_READ; 2099 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 2100 acc_flags_to_use |= IBT_MR_REMOTE_WRITE; 2101 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 2102 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC; 2103 2104 } else { 2105 acc_flags_to_use = mr->mr_accflag; 2106 } 2107 2108 /* 2109 * If we're modifying the translation, then figure out whether 2110 * we can reuse the current MTT resources. This means calling 2111 * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting 2112 * for the reregistration. If the current memory region contains 2113 * sufficient MTT entries for the new regions, then it will be 2114 * reused and filled in. Otherwise, new entries will be allocated, 2115 * the old ones will be freed, and the new entries will be filled 2116 * in. Note: If we're not modifying the translation, then we 2117 * should already have all the information we need to update the MPT. 2118 * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return 2119 * a "dereg_level" which is the level of cleanup that needs to be 2120 * passed to hermon_mr_deregister() to finish the cleanup. 2121 */ 2122 if (flags & IBT_MR_CHANGE_TRANSLATION) { 2123 status = hermon_mr_rereg_xlat_helper(state, mr, bind, op, 2124 &mtt_addr_to_use, sleep, &dereg_level); 2125 if (status != DDI_SUCCESS) { 2126 mutex_exit(&mr->mr_lock); 2127 /* 2128 * Call deregister and ensure that all resources get 2129 * properly freed up. 2130 */ 2131 if (hermon_mr_deregister(state, &mr, dereg_level, 2132 sleep) != DDI_SUCCESS) { 2133 HERMON_WARNING(state, "failed to deregister " 2134 "memory region"); 2135 } 2136 goto mrrereg_fail; 2137 } 2138 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2139 len_to_use = mr->mr_bindinfo.bi_len; 2140 } else { 2141 mtt_addr_to_use = mr->mr_mttaddr; 2142 vaddr_to_use = mr->mr_bindinfo.bi_addr; 2143 len_to_use = mr->mr_bindinfo.bi_len; 2144 } 2145 2146 /* 2147 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were 2148 * when the region was first registered, each key is formed from 2149 * "constrained" bits and "unconstrained" bits. Note: If no remote 2150 * access is required, then the RKey value is not filled in. Otherwise 2151 * both Rkey and LKey are given the same value. 2152 */ 2153 mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx); 2154 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) || 2155 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) || 2156 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) { 2157 mr->mr_rkey = mr->mr_lkey; 2158 } else 2159 mr->mr_rkey = 0; 2160 2161 /* 2162 * Fill in the MPT entry. This is the final step before passing 2163 * ownership of the MPT entry to the Hermon hardware. We use all of 2164 * the information collected/calculated above to fill in the 2165 * requisite portions of the MPT. 2166 */ 2167 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t)); 2168 2169 mpt_entry.status = HERMON_MPT_SW_OWNERSHIP; 2170 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0; 2171 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 2172 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0; 2173 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0; 2174 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0; 2175 mpt_entry.lr = 1; 2176 mpt_entry.phys_addr = 0; 2177 mpt_entry.reg_win = HERMON_MPT_IS_REGION; 2178 2179 mpt_entry.entity_sz = mr->mr_logmttpgsz; 2180 mpt_entry.mem_key = mr->mr_lkey; 2181 mpt_entry.pd = pd_to_use->pd_pdnum; 2182 2183 mpt_entry.start_addr = vaddr_to_use; 2184 mpt_entry.reg_win_len = len_to_use; 2185 mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32; 2186 mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3; 2187 2188 /* 2189 * Write the updated MPT entry to hardware 2190 * 2191 * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect 2192 * against holding the lock around this rereg call in all contexts. 2193 */ 2194 status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 2195 sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN); 2196 if (status != HERMON_CMD_SUCCESS) { 2197 mutex_exit(&mr->mr_lock); 2198 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n", 2199 status); 2200 if (status == HERMON_CMD_INVALID_STATUS) { 2201 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2202 } 2203 /* 2204 * Call deregister and ensure that all current resources get 2205 * properly freed up. Unnecessary here to attempt to regain 2206 * software ownership of the MPT entry as that has already 2207 * been done above. 2208 */ 2209 if (hermon_mr_deregister(state, &mr, 2210 HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { 2211 HERMON_WARNING(state, "failed to deregister memory " 2212 "region"); 2213 } 2214 return (ibc_get_ci_failure(0)); 2215 } 2216 2217 /* 2218 * If we're changing PD, then update their reference counts now. 2219 * This means decrementing the reference count on the old PD and 2220 * incrementing the reference count on the new PD. 2221 */ 2222 if (flags & IBT_MR_CHANGE_PD) { 2223 hermon_pd_refcnt_dec(mr->mr_pdhdl); 2224 hermon_pd_refcnt_inc(pd); 2225 } 2226 2227 /* 2228 * Update the contents of the Hermon Memory Region handle to reflect 2229 * what has been changed. 2230 */ 2231 mr->mr_pdhdl = pd_to_use; 2232 mr->mr_accflag = acc_flags_to_use; 2233 mr->mr_is_umem = 0; 2234 mr->mr_is_fmr = 0; 2235 mr->mr_umemcookie = NULL; 2236 mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey); 2237 mr->mr_rkey = hermon_mr_key_swap(mr->mr_rkey); 2238 2239 /* New MR handle is same as the old */ 2240 *mrhdl_new = mr; 2241 mutex_exit(&mr->mr_lock); 2242 2243 return (DDI_SUCCESS); 2244 2245 mrrereg_fail: 2246 return (status); 2247 } 2248 2249 2250 /* 2251 * hermon_mr_rereg_xlat_helper 2252 * Context: Can be called from interrupt or base context. 2253 * Note: This routine expects the "mr_lock" to be held when it 2254 * is called. Upon returning failure, this routine passes information 2255 * about what "dereg_level" should be passed to hermon_mr_deregister(). 2256 */ 2257 static int 2258 hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr, 2259 hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr, 2260 uint_t sleep, uint_t *dereg_level) 2261 { 2262 hermon_rsrc_t *mtt, *mtt_refcnt; 2263 hermon_sw_refcnt_t *swrc_old, *swrc_new; 2264 ddi_dma_handle_t dmahdl; 2265 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz; 2266 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl; 2267 int status; 2268 2269 ASSERT(MUTEX_HELD(&mr->mr_lock)); 2270 2271 /* 2272 * Check the "options" flag. Currently this flag tells the driver 2273 * whether or not the region should be bound normally (i.e. with 2274 * entries written into the PCI IOMMU) or whether it should be 2275 * registered to bypass the IOMMU. 2276 */ 2277 if (op == NULL) { 2278 bind_type = HERMON_BINDMEM_NORMAL; 2279 } else { 2280 bind_type = op->mro_bind_type; 2281 } 2282 2283 /* 2284 * Check for invalid length. Check is the length is zero or if the 2285 * length is larger than the maximum configured value. Return error 2286 * if it is. 2287 */ 2288 max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz); 2289 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 2290 /* 2291 * Deregister will be called upon returning failure from this 2292 * routine. This will ensure that all current resources get 2293 * properly freed up. Unnecessary to attempt to regain 2294 * software ownership of the MPT entry as that has already 2295 * been done above (in hermon_mr_reregister()) 2296 */ 2297 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT; 2298 2299 status = IBT_MR_LEN_INVALID; 2300 goto mrrereghelp_fail; 2301 } 2302 2303 /* 2304 * Determine the number of pages necessary for new region and the 2305 * number of pages supported by the current MTT resources 2306 */ 2307 nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits); 2308 nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT; 2309 2310 /* 2311 * Depending on whether we have enough pages or not, the next step is 2312 * to fill in a set of MTT entries that reflect the new mapping. In 2313 * the first case below, we already have enough entries. This means 2314 * we need to unbind the memory from the previous mapping, bind the 2315 * memory for the new mapping, write the new MTT entries, and update 2316 * the mr to reflect the changes. 2317 * In the second case below, we do not have enough entries in the 2318 * current mapping. So, in this case, we need not only to unbind the 2319 * current mapping, but we need to free up the MTT resources associated 2320 * with that mapping. After we've successfully done that, we continue 2321 * by binding the new memory, allocating new MTT entries, writing the 2322 * new MTT entries, and updating the mr to reflect the changes. 2323 */ 2324 2325 /* 2326 * If this region is being shared (i.e. MTT refcount != 1), then we 2327 * can't reuse the current MTT resources regardless of their size. 2328 * Instead we'll need to alloc new ones (below) just as if there 2329 * hadn't been enough room in the current entries. 2330 */ 2331 swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr; 2332 if (HERMON_MTT_IS_NOT_SHARED(swrc_old) && 2333 (nummtt_needed <= nummtt_in_currrsrc)) { 2334 2335 /* 2336 * Unbind the old mapping for this memory region, but retain 2337 * the ddi_dma_handle_t (if possible) for reuse in the bind 2338 * operation below. Note: If original memory region was 2339 * bound for IOMMU bypass and the new region can not use 2340 * bypass, then a new DMA handle will be necessary. 2341 */ 2342 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2343 mr->mr_bindinfo.bi_free_dmahdl = 0; 2344 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2345 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2346 reuse_dmahdl = 1; 2347 } else { 2348 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2349 dmahdl = NULL; 2350 reuse_dmahdl = 0; 2351 } 2352 2353 /* 2354 * Bind the new memory and determine the mapped addresses. 2355 * As described, this routine and hermon_mr_fast_mtt_write() 2356 * do the majority of the work for the memory registration 2357 * operations. Note: When we successfully finish the binding, 2358 * we will set the "bi_free_dmahdl" flag to indicate that 2359 * even though we may have reused the ddi_dma_handle_t we do 2360 * wish it to be freed up at some later time. Note also that 2361 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2362 */ 2363 bind->bi_bypass = bind_type; 2364 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1); 2365 if (status != DDI_SUCCESS) { 2366 if (reuse_dmahdl) { 2367 ddi_dma_free_handle(&dmahdl); 2368 } 2369 2370 /* 2371 * Deregister will be called upon returning failure 2372 * from this routine. This will ensure that all 2373 * current resources get properly freed up. 2374 * Unnecessary to attempt to regain software ownership 2375 * of the MPT entry as that has already been done 2376 * above (in hermon_mr_reregister()). Also unnecessary 2377 * to attempt to unbind the memory. 2378 */ 2379 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2380 2381 status = IBT_INSUFF_RESOURCE; 2382 goto mrrereghelp_fail; 2383 } 2384 if (reuse_dmahdl) { 2385 bind->bi_free_dmahdl = 1; 2386 } 2387 2388 /* 2389 * Using the new mapping, but reusing the current MTT 2390 * resources, write the updated entries to MTT 2391 */ 2392 mtt = mr->mr_mttrsrcp; 2393 status = hermon_mr_fast_mtt_write(state, mtt, bind, 2394 mtt_pgsize_bits); 2395 if (status != DDI_SUCCESS) { 2396 /* 2397 * Deregister will be called upon returning failure 2398 * from this routine. This will ensure that all 2399 * current resources get properly freed up. 2400 * Unnecessary to attempt to regain software ownership 2401 * of the MPT entry as that has already been done 2402 * above (in hermon_mr_reregister()). Also unnecessary 2403 * to attempt to unbind the memory. 2404 * 2405 * But we do need to unbind the newly bound memory 2406 * before returning. 2407 */ 2408 hermon_mr_mem_unbind(state, bind); 2409 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2410 2411 /* 2412 * hermon_mr_fast_mtt_write() returns DDI_FAILURE 2413 * only if it detects a HW error during DMA. 2414 */ 2415 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 2416 status = ibc_get_ci_failure(0); 2417 goto mrrereghelp_fail; 2418 } 2419 2420 /* Put the updated information into the Mem Region handle */ 2421 mr->mr_bindinfo = *bind; 2422 mr->mr_logmttpgsz = mtt_pgsize_bits; 2423 2424 } else { 2425 /* 2426 * Check if the memory region MTT is shared by any other MRs. 2427 * Since the resource may be shared between multiple memory 2428 * regions (as a result of a "RegisterSharedMR()" verb) it is 2429 * important that we not unbind any resources prematurely. 2430 */ 2431 if (!HERMON_MTT_IS_SHARED(swrc_old)) { 2432 /* 2433 * Unbind the old mapping for this memory region, but 2434 * retain the ddi_dma_handle_t for reuse in the bind 2435 * operation below. Note: This can only be done here 2436 * because the region being reregistered is not 2437 * currently shared. Also if original memory region 2438 * was bound for IOMMU bypass and the new region can 2439 * not use bypass, then a new DMA handle will be 2440 * necessary. 2441 */ 2442 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2443 mr->mr_bindinfo.bi_free_dmahdl = 0; 2444 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2445 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2446 reuse_dmahdl = 1; 2447 } else { 2448 hermon_mr_mem_unbind(state, &mr->mr_bindinfo); 2449 dmahdl = NULL; 2450 reuse_dmahdl = 0; 2451 } 2452 } else { 2453 dmahdl = NULL; 2454 reuse_dmahdl = 0; 2455 } 2456 2457 /* 2458 * Bind the new memory and determine the mapped addresses. 2459 * As described, this routine and hermon_mr_fast_mtt_write() 2460 * do the majority of the work for the memory registration 2461 * operations. Note: When we successfully finish the binding, 2462 * we will set the "bi_free_dmahdl" flag to indicate that 2463 * even though we may have reused the ddi_dma_handle_t we do 2464 * wish it to be freed up at some later time. Note also that 2465 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2466 */ 2467 bind->bi_bypass = bind_type; 2468 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1); 2469 if (status != DDI_SUCCESS) { 2470 if (reuse_dmahdl) { 2471 ddi_dma_free_handle(&dmahdl); 2472 } 2473 2474 /* 2475 * Deregister will be called upon returning failure 2476 * from this routine. This will ensure that all 2477 * current resources get properly freed up. 2478 * Unnecessary to attempt to regain software ownership 2479 * of the MPT entry as that has already been done 2480 * above (in hermon_mr_reregister()). Also unnecessary 2481 * to attempt to unbind the memory. 2482 */ 2483 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2484 2485 status = IBT_INSUFF_RESOURCE; 2486 goto mrrereghelp_fail; 2487 } 2488 if (reuse_dmahdl) { 2489 bind->bi_free_dmahdl = 1; 2490 } 2491 2492 /* 2493 * Allocate the new MTT entries resource 2494 */ 2495 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed, 2496 sleep, &mtt); 2497 if (status != DDI_SUCCESS) { 2498 /* 2499 * Deregister will be called upon returning failure 2500 * from this routine. This will ensure that all 2501 * current resources get properly freed up. 2502 * Unnecessary to attempt to regain software ownership 2503 * of the MPT entry as that has already been done 2504 * above (in hermon_mr_reregister()). Also unnecessary 2505 * to attempt to unbind the memory. 2506 * 2507 * But we do need to unbind the newly bound memory 2508 * before returning. 2509 */ 2510 hermon_mr_mem_unbind(state, bind); 2511 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2512 2513 status = IBT_INSUFF_RESOURCE; 2514 goto mrrereghelp_fail; 2515 } 2516 2517 /* 2518 * Allocate MTT reference count (to track shared memory 2519 * regions). As mentioned elsewhere above, this reference 2520 * count resource may never be used on the given memory region, 2521 * but if it is ever later registered as a "shared" memory 2522 * region then this resource will be necessary. Note: This 2523 * is only necessary here if the existing memory region is 2524 * already being shared (because otherwise we already have 2525 * a useable reference count resource). 2526 */ 2527 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2528 status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, 2529 sleep, &mtt_refcnt); 2530 if (status != DDI_SUCCESS) { 2531 /* 2532 * Deregister will be called upon returning 2533 * failure from this routine. This will ensure 2534 * that all current resources get properly 2535 * freed up. Unnecessary to attempt to regain 2536 * software ownership of the MPT entry as that 2537 * has already been done above (in 2538 * hermon_mr_reregister()). Also unnecessary 2539 * to attempt to unbind the memory. 2540 * 2541 * But we need to unbind the newly bound 2542 * memory and free up the newly allocated MTT 2543 * entries before returning. 2544 */ 2545 hermon_mr_mem_unbind(state, bind); 2546 hermon_rsrc_free(state, &mtt); 2547 *dereg_level = 2548 HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2549 2550 status = IBT_INSUFF_RESOURCE; 2551 goto mrrereghelp_fail; 2552 } 2553 swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr; 2554 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new)) 2555 HERMON_MTT_REFCNT_INIT(swrc_new); 2556 } else { 2557 mtt_refcnt = mr->mr_mttrefcntp; 2558 } 2559 2560 /* 2561 * Using the new mapping and the new MTT resources, write the 2562 * updated entries to MTT 2563 */ 2564 status = hermon_mr_fast_mtt_write(state, mtt, bind, 2565 mtt_pgsize_bits); 2566 if (status != DDI_SUCCESS) { 2567 /* 2568 * Deregister will be called upon returning failure 2569 * from this routine. This will ensure that all 2570 * current resources get properly freed up. 2571 * Unnecessary to attempt to regain software ownership 2572 * of the MPT entry as that has already been done 2573 * above (in hermon_mr_reregister()). Also unnecessary 2574 * to attempt to unbind the memory. 2575 * 2576 * But we need to unbind the newly bound memory, 2577 * free up the newly allocated MTT entries, and 2578 * (possibly) free the new MTT reference count 2579 * resource before returning. 2580 */ 2581 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2582 hermon_rsrc_free(state, &mtt_refcnt); 2583 } 2584 hermon_mr_mem_unbind(state, bind); 2585 hermon_rsrc_free(state, &mtt); 2586 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2587 2588 status = IBT_INSUFF_RESOURCE; 2589 goto mrrereghelp_fail; 2590 } 2591 2592 /* 2593 * Check if the memory region MTT is shared by any other MRs. 2594 * Since the resource may be shared between multiple memory 2595 * regions (as a result of a "RegisterSharedMR()" verb) it is 2596 * important that we not free up any resources prematurely. 2597 */ 2598 if (HERMON_MTT_IS_SHARED(swrc_old)) { 2599 /* Decrement MTT reference count for "old" region */ 2600 (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp); 2601 } else { 2602 /* Free up the old MTT entries resource */ 2603 hermon_rsrc_free(state, &mr->mr_mttrsrcp); 2604 } 2605 2606 /* Put the updated information into the mrhdl */ 2607 mr->mr_bindinfo = *bind; 2608 mr->mr_logmttpgsz = mtt_pgsize_bits; 2609 mr->mr_mttrsrcp = mtt; 2610 mr->mr_mttrefcntp = mtt_refcnt; 2611 } 2612 2613 /* 2614 * Calculate and return the updated MTT address (in the DDR address 2615 * space). This will be used by the caller (hermon_mr_reregister) in 2616 * the updated MPT entry 2617 */ 2618 *mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT; 2619 2620 return (DDI_SUCCESS); 2621 2622 mrrereghelp_fail: 2623 return (status); 2624 } 2625 2626 2627 /* 2628 * hermon_mr_nummtt_needed() 2629 * Context: Can be called from interrupt or base context. 2630 */ 2631 /* ARGSUSED */ 2632 static uint64_t 2633 hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind, 2634 uint_t *mtt_pgsize_bits) 2635 { 2636 uint64_t pg_offset_mask; 2637 uint64_t pg_offset, tmp_length; 2638 2639 /* 2640 * For now we specify the page size as 8Kb (the default page size for 2641 * the sun4u architecture), or 4Kb for x86. Figure out optimal page 2642 * size by examining the dmacookies 2643 */ 2644 *mtt_pgsize_bits = PAGESHIFT; 2645 2646 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1; 2647 pg_offset = bind->bi_addr & pg_offset_mask; 2648 tmp_length = pg_offset + (bind->bi_len - 1); 2649 return ((tmp_length >> *mtt_pgsize_bits) + 1); 2650 } 2651 2652 2653 /* 2654 * hermon_mr_mem_bind() 2655 * Context: Can be called from interrupt or base context. 2656 */ 2657 static int 2658 hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind, 2659 ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer) 2660 { 2661 ddi_dma_attr_t dma_attr; 2662 int (*callback)(caddr_t); 2663 int status; 2664 2665 /* bi_type must be set to a meaningful value to get a bind handle */ 2666 ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR || 2667 bind->bi_type == HERMON_BINDHDL_BUF || 2668 bind->bi_type == HERMON_BINDHDL_UBUF); 2669 2670 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2671 2672 /* Set the callback flag appropriately */ 2673 callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT; 2674 2675 /* 2676 * Initialize many of the default DMA attributes. Then, if we're 2677 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag. 2678 */ 2679 if (dmahdl == NULL) { 2680 hermon_dma_attr_init(state, &dma_attr); 2681 #ifdef __sparc 2682 if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) { 2683 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2684 } 2685 #endif 2686 2687 /* set RO if needed - tunable set and 'is_buffer' is non-0 */ 2688 if (is_buffer) { 2689 if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) { 2690 if ((bind->bi_type != HERMON_BINDHDL_UBUF) && 2691 (hermon_kernel_data_ro == 2692 HERMON_RO_ENABLED)) { 2693 dma_attr.dma_attr_flags |= 2694 DDI_DMA_RELAXED_ORDERING; 2695 } 2696 if (((bind->bi_type == HERMON_BINDHDL_UBUF) && 2697 (hermon_user_data_ro == 2698 HERMON_RO_ENABLED))) { 2699 dma_attr.dma_attr_flags |= 2700 DDI_DMA_RELAXED_ORDERING; 2701 } 2702 } 2703 } 2704 2705 /* Allocate a DMA handle for the binding */ 2706 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 2707 callback, NULL, &bind->bi_dmahdl); 2708 if (status != DDI_SUCCESS) { 2709 return (status); 2710 } 2711 bind->bi_free_dmahdl = 1; 2712 2713 } else { 2714 bind->bi_dmahdl = dmahdl; 2715 bind->bi_free_dmahdl = 0; 2716 } 2717 2718 2719 /* 2720 * Bind the memory to get the PCI mapped addresses. The decision 2721 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle() 2722 * is determined by the "bi_type" flag. Note: if the bind operation 2723 * fails then we have to free up the DMA handle and return error. 2724 */ 2725 if (bind->bi_type == HERMON_BINDHDL_VADDR) { 2726 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL, 2727 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len, 2728 (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL, 2729 &bind->bi_dmacookie, &bind->bi_cookiecnt); 2730 2731 } else { /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */ 2732 2733 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl, 2734 bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, 2735 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); 2736 } 2737 if (status != DDI_DMA_MAPPED) { 2738 if (bind->bi_free_dmahdl != 0) { 2739 ddi_dma_free_handle(&bind->bi_dmahdl); 2740 } 2741 return (status); 2742 } 2743 2744 return (DDI_SUCCESS); 2745 } 2746 2747 2748 /* 2749 * hermon_mr_mem_unbind() 2750 * Context: Can be called from interrupt or base context. 2751 */ 2752 static void 2753 hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind) 2754 { 2755 int status; 2756 2757 /* 2758 * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to 2759 * is actually allocated by ddi_umem_iosetup() internally, then 2760 * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE 2761 * not to free it again later. 2762 */ 2763 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind)) 2764 if (bind->bi_type == HERMON_BINDHDL_UBUF) { 2765 freerbuf(bind->bi_buf); 2766 bind->bi_type = HERMON_BINDHDL_NONE; 2767 } 2768 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind)) 2769 2770 /* 2771 * Unbind the DMA memory for the region 2772 * 2773 * Note: The only way ddi_dma_unbind_handle() currently 2774 * can return an error is if the handle passed in is invalid. 2775 * Since this should never happen, we choose to return void 2776 * from this function! If this does return an error, however, 2777 * then we print a warning message to the console. 2778 */ 2779 status = ddi_dma_unbind_handle(bind->bi_dmahdl); 2780 if (status != DDI_SUCCESS) { 2781 HERMON_WARNING(state, "failed to unbind DMA mapping"); 2782 return; 2783 } 2784 2785 /* Free up the DMA handle */ 2786 if (bind->bi_free_dmahdl != 0) { 2787 ddi_dma_free_handle(&bind->bi_dmahdl); 2788 } 2789 } 2790 2791 2792 /* 2793 * hermon_mr_fast_mtt_write() 2794 * Context: Can be called from interrupt or base context. 2795 */ 2796 static int 2797 hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt, 2798 hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits) 2799 { 2800 hermon_icm_table_t *icm_table; 2801 hermon_dma_info_t *dma_info; 2802 uint32_t index1, index2, rindx; 2803 ddi_dma_cookie_t dmacookie; 2804 uint_t cookie_cnt; 2805 uint64_t *mtt_table; 2806 uint64_t mtt_entry; 2807 uint64_t addr, endaddr; 2808 uint64_t pagesize; 2809 offset_t i, start; 2810 uint_t per_span; 2811 int sync_needed; 2812 2813 /* 2814 * XXX According to the PRM, we are to use the WRITE_MTT 2815 * command to write out MTTs. Tavor does not do this, 2816 * instead taking advantage of direct access to the MTTs, 2817 * and knowledge that Mellanox FMR relies on our ability 2818 * to write directly to the MTTs without any further 2819 * notification to the firmware. Likewise, we will choose 2820 * to not use the WRITE_MTT command, but to simply write 2821 * out the MTTs. 2822 */ 2823 2824 /* Calculate page size from the suggested value passed in */ 2825 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2826 2827 /* Walk the "cookie list" and fill in the MTT table entries */ 2828 dmacookie = bind->bi_dmacookie; 2829 cookie_cnt = bind->bi_cookiecnt; 2830 2831 icm_table = &state->hs_icm[HERMON_MTT]; 2832 rindx = mtt->hr_indx; 2833 hermon_index(index1, index2, rindx, icm_table, i); 2834 start = i; 2835 2836 per_span = icm_table->span; 2837 dma_info = icm_table->icm_dma[index1] + index2; 2838 mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr; 2839 2840 sync_needed = 0; 2841 while (cookie_cnt-- > 0) { 2842 addr = dmacookie.dmac_laddress; 2843 endaddr = addr + (dmacookie.dmac_size - 1); 2844 addr = addr & ~((uint64_t)pagesize - 1); 2845 2846 while (addr <= endaddr) { 2847 2848 /* 2849 * Fill in the mapped addresses (calculated above) and 2850 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry. 2851 */ 2852 mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT; 2853 mtt_table[i] = htonll(mtt_entry); 2854 i++; 2855 rindx++; 2856 2857 if (i == per_span) { 2858 2859 (void) ddi_dma_sync(dma_info->dma_hdl, 2860 start * sizeof (hermon_hw_mtt_t), 2861 (i - start) * sizeof (hermon_hw_mtt_t), 2862 DDI_DMA_SYNC_FORDEV); 2863 2864 if ((addr + pagesize > endaddr) && 2865 (cookie_cnt == 0)) 2866 return (DDI_SUCCESS); 2867 2868 hermon_index(index1, index2, rindx, icm_table, 2869 i); 2870 start = i * sizeof (hermon_hw_mtt_t); 2871 dma_info = icm_table->icm_dma[index1] + index2; 2872 mtt_table = 2873 (uint64_t *)(uintptr_t)dma_info->vaddr; 2874 2875 sync_needed = 0; 2876 } else { 2877 sync_needed = 1; 2878 } 2879 2880 addr += pagesize; 2881 if (addr == 0) { 2882 static int do_once = 1; 2883 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", 2884 do_once)) 2885 if (do_once) { 2886 do_once = 0; 2887 cmn_err(CE_NOTE, "probable error in " 2888 "dma_cookie address from caller\n"); 2889 } 2890 break; 2891 } 2892 } 2893 2894 /* 2895 * When we've reached the end of the current DMA cookie, 2896 * jump to the next cookie (if there are more) 2897 */ 2898 if (cookie_cnt != 0) { 2899 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie); 2900 } 2901 } 2902 2903 /* done all the cookies, now sync the memory for the device */ 2904 if (sync_needed) 2905 (void) ddi_dma_sync(dma_info->dma_hdl, 2906 start * sizeof (hermon_hw_mtt_t), 2907 (i - start) * sizeof (hermon_hw_mtt_t), 2908 DDI_DMA_SYNC_FORDEV); 2909 2910 return (DDI_SUCCESS); 2911 } 2912 2913 /* 2914 * hermon_mr_fast_mtt_write_fmr() 2915 * Context: Can be called from interrupt or base context. 2916 */ 2917 static int 2918 hermon_mr_fast_mtt_write_fmr(hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, 2919 uint32_t mtt_pgsize_bits) 2920 { 2921 uint64_t *mtt_table; 2922 ibt_phys_addr_t *buf; 2923 uint64_t mtt_entry; 2924 uint64_t addr, first_addr, endaddr; 2925 uint64_t pagesize; 2926 int i; 2927 2928 /* Calculate page size from the suggested value passed in */ 2929 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2930 2931 /* 2932 * Walk the "addr list" and fill in the MTT table entries 2933 */ 2934 mtt_table = (uint64_t *)mtt->hr_addr; 2935 for (i = 0; i < mem_pattr->pmr_num_buf; i++) { 2936 buf = &mem_pattr->pmr_addr_list[i]; 2937 2938 /* 2939 * For first cookie, use the offset field to determine where 2940 * the buffer starts. The end addr is then calculated with the 2941 * offset in mind. 2942 */ 2943 if (i == 0) { 2944 first_addr = addr = buf->p_laddr + 2945 mem_pattr->pmr_offset; 2946 endaddr = addr + (mem_pattr->pmr_buf_sz - 1) - 2947 mem_pattr->pmr_offset; 2948 /* 2949 * For last cookie, determine end addr based on starting 2950 * address and size of the total buffer 2951 */ 2952 } else if (i == mem_pattr->pmr_num_buf - 1) { 2953 addr = buf->p_laddr; 2954 endaddr = addr + (first_addr + mem_pattr->pmr_len & 2955 (mem_pattr->pmr_buf_sz - 1)); 2956 /* 2957 * For the middle cookies case, start and end addr are 2958 * straightforward. Just use the laddr, and the size, as all 2959 * middle cookies are a set size. 2960 */ 2961 } else { 2962 addr = buf->p_laddr; 2963 endaddr = addr + (mem_pattr->pmr_buf_sz - 1); 2964 } 2965 2966 addr = addr & ~((uint64_t)pagesize - 1); 2967 while (addr <= endaddr) { 2968 /* 2969 * Fill in the mapped addresses (calculated above) and 2970 * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry. 2971 */ 2972 mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT; 2973 mtt_table[i] = htonll(mtt_entry); 2974 addr += pagesize; 2975 } 2976 } 2977 2978 return (DDI_SUCCESS); 2979 } 2980 2981 2982 /* 2983 * hermon_mtt_refcnt_inc() 2984 * Context: Can be called from interrupt or base context. 2985 */ 2986 static uint_t 2987 hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc) 2988 { 2989 hermon_sw_refcnt_t *rc; 2990 2991 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr; 2992 return (atomic_inc_uint_nv(&rc->swrc_refcnt)); 2993 } 2994 2995 2996 /* 2997 * hermon_mtt_refcnt_dec() 2998 * Context: Can be called from interrupt or base context. 2999 */ 3000 static uint_t 3001 hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc) 3002 { 3003 hermon_sw_refcnt_t *rc; 3004 3005 rc = (hermon_sw_refcnt_t *)rsrc->hr_addr; 3006 return (atomic_dec_uint_nv(&rc->swrc_refcnt)); 3007 } 3008