1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * I/O Controller functions for the Solaris COMSTAR SCSI RDMA Protocol 29 * Target (SRPT) port provider. 30 */ 31 32 #include <sys/types.h> 33 #include <sys/ddi.h> 34 #include <sys/types.h> 35 #include <sys/sunddi.h> 36 #include <sys/atomic.h> 37 #include <sys/sysmacros.h> 38 #include <sys/ib/ibtl/ibti.h> 39 #include <sys/sdt.h> 40 41 #include "srp.h" 42 #include "srpt_impl.h" 43 #include "srpt_ioc.h" 44 #include "srpt_stp.h" 45 #include "srpt_ch.h" 46 47 /* 48 * srpt_ioc_srq_size - Tunable parameter that specifies the number 49 * of receive WQ entries that can be posted to the IOC shared 50 * receive queue. 51 */ 52 uint32_t srpt_ioc_srq_size = SRPT_DEFAULT_IOC_SRQ_SIZE; 53 extern uint16_t srpt_send_msg_depth; 54 55 /* IOC profile capabilities mask must be big-endian */ 56 typedef struct srpt_ioc_opcap_bits_s { 57 #if defined(_BIT_FIELDS_LTOH) 58 uint8_t af:1, 59 at:1, 60 wf:1, 61 wt:1, 62 rf:1, 63 rt:1, 64 sf:1, 65 st:1; 66 #elif defined(_BIT_FIELDS_HTOL) 67 uint8_t st:1, 68 sf:1, 69 rt:1, 70 rf:1, 71 wt:1, 72 wf:1, 73 at:1, 74 af:1; 75 #else 76 #error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined 77 #endif 78 } srpt_ioc_opcap_bits_t; 79 80 typedef union { 81 srpt_ioc_opcap_bits_t bits; 82 uint8_t mask; 83 } srpt_ioc_opcap_mask_t; 84 85 /* 86 * vmem arena variables - values derived from iSER 87 */ 88 #define SRPT_MR_QUANTSIZE 0x400 /* 1K */ 89 #define SRPT_MIN_CHUNKSIZE 0x100000 /* 1MB */ 90 91 /* use less memory on 32-bit kernels as it's much more constrained */ 92 #ifdef _LP64 93 #define SRPT_BUF_MR_CHUNKSIZE 0x1000000 /* 16MB */ 94 #define SRPT_BUF_POOL_MAX 0x40000000 /* 1GB */ 95 #else 96 #define SRPT_BUF_MR_CHUNKSIZE 0x400000 /* 4MB */ 97 #define SRPT_BUF_POOL_MAX 0x4000000 /* 64MB */ 98 #endif 99 100 static ibt_mr_flags_t srpt_dbuf_mr_flags = 101 IBT_MR_ENABLE_LOCAL_WRITE | IBT_MR_ENABLE_REMOTE_WRITE | 102 IBT_MR_ENABLE_REMOTE_READ; 103 104 void srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl, 105 ibt_async_code_t code, ibt_async_event_t *event); 106 107 static struct ibt_clnt_modinfo_s srpt_ibt_modinfo = { 108 IBTI_V_CURR, 109 IBT_STORAGE_DEV, 110 srpt_ioc_ib_async_hdlr, 111 NULL, 112 "srpt" 113 }; 114 115 static srpt_ioc_t *srpt_ioc_init(ib_guid_t guid); 116 static void srpt_ioc_fini(srpt_ioc_t *ioc); 117 118 static srpt_vmem_pool_t *srpt_vmem_create(const char *name, srpt_ioc_t *ioc, 119 ib_memlen_t chunksize, uint64_t maxsize, ibt_mr_flags_t flags); 120 static void *srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size); 121 static int srpt_vmem_mr_compare(const void *a, const void *b); 122 static srpt_mr_t *srpt_vmem_chunk_alloc(srpt_vmem_pool_t *ioc, 123 ib_memlen_t chunksize); 124 static void srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool); 125 static void srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size); 126 static srpt_mr_t *srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, 127 ib_memlen_t len); 128 static void srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr); 129 static void srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr); 130 static int srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size, 131 srpt_mr_t *mr); 132 133 /* 134 * srpt_ioc_attach() - I/O Controller attach 135 * 136 * Attach to IBTF and initialize I/O controllers. The srpt_ctxt->sc_rwlock 137 * should be held outside of this call. 138 */ 139 int 140 srpt_ioc_attach() 141 { 142 int status; 143 int hca_cnt; 144 int hca_ndx; 145 ib_guid_t *guid; 146 srpt_ioc_t *ioc; 147 148 ASSERT(srpt_ctxt != NULL); 149 150 /* 151 * Attach to IBTF and initialize a list of IB devices. Each 152 * HCA will be represented by an I/O Controller. 153 */ 154 status = ibt_attach(&srpt_ibt_modinfo, srpt_ctxt->sc_dip, 155 srpt_ctxt, &srpt_ctxt->sc_ibt_hdl); 156 if (status != DDI_SUCCESS) { 157 SRPT_DPRINTF_L1("ioc_attach, ibt_attach failed (0x%x)", 158 status); 159 return (DDI_FAILURE); 160 } 161 162 hca_cnt = ibt_get_hca_list(&guid); 163 if (hca_cnt < 1) { 164 /* 165 * not a fatal error. Service will be up and 166 * waiting for ATTACH events. 167 */ 168 SRPT_DPRINTF_L2("ioc_attach, no HCA found"); 169 return (DDI_SUCCESS); 170 } 171 172 for (hca_ndx = 0; hca_ndx < hca_cnt; hca_ndx++) { 173 SRPT_DPRINTF_L2("ioc_attach, adding I/O" 174 " Controller (%016llx)", (u_longlong_t)guid[hca_ndx]); 175 176 ioc = srpt_ioc_init(guid[hca_ndx]); 177 if (ioc == NULL) { 178 SRPT_DPRINTF_L1("ioc_attach, ioc_init GUID(%016llx)" 179 " failed", (u_longlong_t)guid[hca_ndx]); 180 continue; 181 } 182 list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc); 183 SRPT_DPRINTF_L2("ioc_attach, I/O Controller ibt HCA hdl (%p)", 184 (void *)ioc->ioc_ibt_hdl); 185 srpt_ctxt->sc_num_iocs++; 186 } 187 188 ibt_free_hca_list(guid, hca_cnt); 189 SRPT_DPRINTF_L3("ioc_attach, added %d I/O Controller(s)", 190 srpt_ctxt->sc_num_iocs); 191 return (DDI_SUCCESS); 192 } 193 194 /* 195 * srpt_ioc_detach() - I/O Controller detach 196 * 197 * srpt_ctxt->sc_rwlock should be held outside of this call. 198 */ 199 void 200 srpt_ioc_detach() 201 { 202 srpt_ioc_t *ioc; 203 204 ASSERT(srpt_ctxt != NULL); 205 206 while ((ioc = list_head(&srpt_ctxt->sc_ioc_list)) != NULL) { 207 list_remove(&srpt_ctxt->sc_ioc_list, ioc); 208 SRPT_DPRINTF_L2("ioc_detach, removing I/O Controller(%p)" 209 " (%016llx), ibt_hdl(%p)", 210 (void *)ioc, 211 ioc ? (u_longlong_t)ioc->ioc_guid : 0x0ll, 212 (void *)ioc->ioc_ibt_hdl); 213 srpt_ioc_fini(ioc); 214 } 215 216 (void) ibt_detach(srpt_ctxt->sc_ibt_hdl); 217 srpt_ctxt->sc_ibt_hdl = NULL; 218 } 219 220 /* 221 * srpt_ioc_init() - I/O Controller initialization 222 * 223 * Requires srpt_ctxt->rw_lock be held outside of call. 224 */ 225 static srpt_ioc_t * 226 srpt_ioc_init(ib_guid_t guid) 227 { 228 ibt_status_t status; 229 srpt_ioc_t *ioc; 230 ibt_hca_attr_t hca_attr; 231 uint_t iu_ndx; 232 uint_t err_ndx; 233 ibt_mr_attr_t mr_attr; 234 ibt_mr_desc_t mr_desc; 235 srpt_iu_t *iu; 236 ibt_srq_sizes_t srq_attr; 237 char namebuf[32]; 238 size_t iu_offset; 239 240 status = ibt_query_hca_byguid(guid, &hca_attr); 241 if (status != IBT_SUCCESS) { 242 SRPT_DPRINTF_L1("ioc_init, HCA query error (%d)", 243 status); 244 return (NULL); 245 } 246 247 ioc = srpt_ioc_get_locked(guid); 248 if (ioc != NULL) { 249 SRPT_DPRINTF_L1("ioc_init, HCA already exists"); 250 return (NULL); 251 } 252 253 ioc = kmem_zalloc(sizeof (srpt_ioc_t), KM_SLEEP); 254 255 rw_init(&ioc->ioc_rwlock, NULL, RW_DRIVER, NULL); 256 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 257 258 bcopy(&hca_attr, &ioc->ioc_attr, sizeof (ibt_hca_attr_t)); 259 260 SRPT_DPRINTF_L2("ioc_init, HCA max mr=%d, mrlen=%lld", 261 hca_attr.hca_max_memr, (u_longlong_t)hca_attr.hca_max_memr_len); 262 ioc->ioc_guid = guid; 263 264 status = ibt_open_hca(srpt_ctxt->sc_ibt_hdl, guid, &ioc->ioc_ibt_hdl); 265 if (status != IBT_SUCCESS) { 266 SRPT_DPRINTF_L1("ioc_init, IBT open failed (%d)", status); 267 goto hca_open_err; 268 } 269 270 status = ibt_alloc_pd(ioc->ioc_ibt_hdl, IBT_PD_NO_FLAGS, 271 &ioc->ioc_pd_hdl); 272 if (status != IBT_SUCCESS) { 273 SRPT_DPRINTF_L1("ioc_init, IBT create PD failed (%d)", status); 274 goto pd_alloc_err; 275 } 276 277 /* 278 * We require hardware support for SRQs. We use a common SRQ to 279 * reduce channel memory consumption. 280 */ 281 if ((ioc->ioc_attr.hca_flags & IBT_HCA_SRQ) == 0) { 282 SRPT_DPRINTF_L0("ioc_init, no SRQ capability, not supported"); 283 goto srq_alloc_err; 284 } 285 286 SRPT_DPRINTF_L3("ioc_init, Using shared receive queues, max srq work" 287 " queue size(%d), def size = %d", ioc->ioc_attr.hca_max_srqs_sz, 288 srpt_ioc_srq_size); 289 srq_attr.srq_wr_sz = min(srpt_ioc_srq_size, 290 ioc->ioc_attr.hca_max_srqs_sz); 291 srq_attr.srq_sgl_sz = 1; 292 293 status = ibt_alloc_srq(ioc->ioc_ibt_hdl, IBT_SRQ_NO_FLAGS, 294 ioc->ioc_pd_hdl, &srq_attr, &ioc->ioc_srq_hdl, 295 &ioc->ioc_srq_attr); 296 if (status != IBT_SUCCESS) { 297 SRPT_DPRINTF_L1("ioc_init, IBT create SRQ failed(%d)", status); 298 goto srq_alloc_err; 299 } 300 301 SRPT_DPRINTF_L2("ioc_init, SRQ WR size(%d), SG size(%d)", 302 ioc->ioc_srq_attr.srq_wr_sz, ioc->ioc_srq_attr.srq_sgl_sz); 303 304 ibt_set_srq_private(ioc->ioc_srq_hdl, ioc); 305 306 /* 307 * Allocate a pool of SRP IU message buffers and post them to 308 * the I/O Controller SRQ. We let the SRQ manage the free IU 309 * messages. 310 */ 311 ioc->ioc_num_iu_entries = 312 min(srq_attr.srq_wr_sz, srpt_ioc_srq_size) - 1; 313 314 ioc->ioc_iu_pool = kmem_zalloc(sizeof (srpt_iu_t) * 315 ioc->ioc_num_iu_entries, KM_SLEEP); 316 317 ioc->ioc_iu_bufs = kmem_alloc(SRPT_DEFAULT_SEND_MSG_SIZE * 318 ioc->ioc_num_iu_entries, KM_SLEEP); 319 320 if ((ioc->ioc_iu_pool == NULL) || (ioc->ioc_iu_bufs == NULL)) { 321 SRPT_DPRINTF_L1("ioc_init, failed to allocate SRQ IUs"); 322 goto srq_iu_alloc_err; 323 } 324 325 mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)ioc->ioc_iu_bufs; 326 mr_attr.mr_len = SRPT_DEFAULT_SEND_MSG_SIZE * ioc->ioc_num_iu_entries; 327 mr_attr.mr_as = NULL; 328 mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 329 330 status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl, 331 &mr_attr, &ioc->ioc_iu_mr_hdl, &mr_desc); 332 if (status != IBT_SUCCESS) { 333 SRPT_DPRINTF_L1("ioc_init, IU buffer pool MR err(%d)", 334 status); 335 goto srq_iu_alloc_err; 336 } 337 338 for (iu_ndx = 0, iu = ioc->ioc_iu_pool; iu_ndx < 339 ioc->ioc_num_iu_entries; iu_ndx++, iu++) { 340 341 iu_offset = (iu_ndx * SRPT_DEFAULT_SEND_MSG_SIZE); 342 iu->iu_buf = (void *)((uintptr_t)ioc->ioc_iu_bufs + iu_offset); 343 344 mutex_init(&iu->iu_lock, NULL, MUTEX_DRIVER, NULL); 345 346 iu->iu_sge.ds_va = mr_desc.md_vaddr + iu_offset; 347 iu->iu_sge.ds_key = mr_desc.md_lkey; 348 iu->iu_sge.ds_len = SRPT_DEFAULT_SEND_MSG_SIZE; 349 iu->iu_ioc = ioc; 350 iu->iu_pool_ndx = iu_ndx; 351 352 status = srpt_ioc_post_recv_iu(ioc, &ioc->ioc_iu_pool[iu_ndx]); 353 if (status != IBT_SUCCESS) { 354 SRPT_DPRINTF_L1("ioc_init, SRQ IU post err(%d)", 355 status); 356 goto srq_iu_post_err; 357 } 358 } 359 360 /* 361 * Initialize the dbuf vmem arena 362 */ 363 (void) snprintf(namebuf, sizeof (namebuf), 364 "srpt_buf_pool_%16llX", (u_longlong_t)guid); 365 ioc->ioc_dbuf_pool = srpt_vmem_create(namebuf, ioc, 366 SRPT_BUF_MR_CHUNKSIZE, SRPT_BUF_POOL_MAX, srpt_dbuf_mr_flags); 367 368 if (ioc->ioc_dbuf_pool == NULL) { 369 goto stmf_db_alloc_err; 370 } 371 372 /* 373 * Allocate the I/O Controller STMF data buffer allocator. The 374 * data store will span all targets associated with this IOC. 375 */ 376 ioc->ioc_stmf_ds = stmf_alloc(STMF_STRUCT_DBUF_STORE, 0, 0); 377 if (ioc->ioc_stmf_ds == NULL) { 378 SRPT_DPRINTF_L1("ioc_attach, STMF DBUF alloc failure for IOC"); 379 goto stmf_db_alloc_err; 380 } 381 ioc->ioc_stmf_ds->ds_alloc_data_buf = &srpt_ioc_ds_alloc_dbuf; 382 ioc->ioc_stmf_ds->ds_free_data_buf = &srpt_ioc_ds_free_dbuf; 383 ioc->ioc_stmf_ds->ds_port_private = ioc; 384 385 rw_exit(&ioc->ioc_rwlock); 386 return (ioc); 387 388 stmf_db_alloc_err: 389 if (ioc->ioc_dbuf_pool != NULL) { 390 srpt_vmem_destroy(ioc->ioc_dbuf_pool); 391 } 392 393 srq_iu_post_err: 394 if (ioc->ioc_iu_mr_hdl != NULL) { 395 status = ibt_deregister_mr(ioc->ioc_ibt_hdl, 396 ioc->ioc_iu_mr_hdl); 397 if (status != IBT_SUCCESS) { 398 SRPT_DPRINTF_L1("ioc_init, error deregistering" 399 " memory region (%d)", status); 400 } 401 } 402 for (err_ndx = 0, iu = ioc->ioc_iu_pool; err_ndx < iu_ndx; 403 err_ndx++, iu++) { 404 mutex_destroy(&iu->iu_lock); 405 } 406 407 srq_iu_alloc_err: 408 if (ioc->ioc_iu_bufs != NULL) { 409 kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE * 410 ioc->ioc_num_iu_entries); 411 } 412 if (ioc->ioc_iu_pool != NULL) { 413 kmem_free(ioc->ioc_iu_pool, 414 sizeof (srpt_iu_t) * ioc->ioc_num_iu_entries); 415 } 416 if (ioc->ioc_srq_hdl != NULL) { 417 status = ibt_free_srq(ioc->ioc_srq_hdl); 418 if (status != IBT_SUCCESS) { 419 SRPT_DPRINTF_L1("ioc_init, error freeing SRQ (%d)", 420 status); 421 } 422 423 } 424 425 srq_alloc_err: 426 status = ibt_free_pd(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl); 427 if (status != IBT_SUCCESS) { 428 SRPT_DPRINTF_L1("ioc_init, free PD error (%d)", status); 429 } 430 431 pd_alloc_err: 432 status = ibt_close_hca(ioc->ioc_ibt_hdl); 433 if (status != IBT_SUCCESS) { 434 SRPT_DPRINTF_L1("ioc_init, close ioc error (%d)", status); 435 } 436 437 hca_open_err: 438 rw_exit(&ioc->ioc_rwlock); 439 rw_destroy(&ioc->ioc_rwlock); 440 kmem_free(ioc, sizeof (*ioc)); 441 return (NULL); 442 } 443 444 /* 445 * srpt_ioc_fini() - I/O Controller Cleanup 446 * 447 * Requires srpt_ctxt->sc_rwlock be held outside of call. 448 */ 449 static void 450 srpt_ioc_fini(srpt_ioc_t *ioc) 451 { 452 int status; 453 int ndx; 454 455 /* 456 * Note driver flows will have already taken all SRP 457 * services running on the I/O Controller off-line. 458 */ 459 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 460 if (ioc->ioc_ibt_hdl != NULL) { 461 if (ioc->ioc_stmf_ds != NULL) { 462 stmf_free(ioc->ioc_stmf_ds); 463 } 464 465 if (ioc->ioc_srq_hdl != NULL) { 466 SRPT_DPRINTF_L4("ioc_fini, freeing SRQ"); 467 status = ibt_free_srq(ioc->ioc_srq_hdl); 468 if (status != IBT_SUCCESS) { 469 SRPT_DPRINTF_L1("ioc_fini, free SRQ" 470 " error (%d)", status); 471 } 472 } 473 474 if (ioc->ioc_iu_mr_hdl != NULL) { 475 status = ibt_deregister_mr( 476 ioc->ioc_ibt_hdl, ioc->ioc_iu_mr_hdl); 477 if (status != IBT_SUCCESS) { 478 SRPT_DPRINTF_L1("ioc_fini, error deregistering" 479 " memory region (%d)", status); 480 } 481 } 482 483 if (ioc->ioc_iu_bufs != NULL) { 484 kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE * 485 ioc->ioc_num_iu_entries); 486 } 487 488 if (ioc->ioc_iu_pool != NULL) { 489 SRPT_DPRINTF_L4("ioc_fini, freeing IU entries"); 490 for (ndx = 0; ndx < ioc->ioc_num_iu_entries; ndx++) { 491 mutex_destroy(&ioc->ioc_iu_pool[ndx].iu_lock); 492 } 493 494 SRPT_DPRINTF_L4("ioc_fini, free IU pool struct"); 495 kmem_free(ioc->ioc_iu_pool, 496 sizeof (srpt_iu_t) * (ioc->ioc_num_iu_entries)); 497 ioc->ioc_iu_pool = NULL; 498 ioc->ioc_num_iu_entries = 0; 499 } 500 501 if (ioc->ioc_dbuf_pool != NULL) { 502 srpt_vmem_destroy(ioc->ioc_dbuf_pool); 503 } 504 505 if (ioc->ioc_pd_hdl != NULL) { 506 status = ibt_free_pd(ioc->ioc_ibt_hdl, 507 ioc->ioc_pd_hdl); 508 if (status != IBT_SUCCESS) { 509 SRPT_DPRINTF_L1("ioc_fini, free PD" 510 " error (%d)", status); 511 } 512 } 513 514 status = ibt_close_hca(ioc->ioc_ibt_hdl); 515 if (status != IBT_SUCCESS) { 516 SRPT_DPRINTF_L1( 517 "ioc_fini, close ioc error (%d)", status); 518 } 519 } 520 rw_exit(&ioc->ioc_rwlock); 521 rw_destroy(&ioc->ioc_rwlock); 522 kmem_free(ioc, sizeof (srpt_ioc_t)); 523 } 524 525 /* 526 * srpt_ioc_port_active() - I/O Controller port active 527 */ 528 static void 529 srpt_ioc_port_active(ibt_async_event_t *event) 530 { 531 ibt_status_t status; 532 srpt_ioc_t *ioc; 533 534 ASSERT(event != NULL); 535 536 SRPT_DPRINTF_L3("ioc_port_active event handler, invoked"); 537 538 /* 539 * Find the HCA in question and if the HCA has completed 540 * initialization, and the SRP Target service for the 541 * the I/O Controller exists, then bind this port. 542 */ 543 ioc = srpt_ioc_get(event->ev_hca_guid); 544 545 if (ioc == NULL) { 546 SRPT_DPRINTF_L2("ioc_port_active, I/O Controller not" 547 " active"); 548 return; 549 } 550 551 if (ioc->ioc_tgt_port == NULL) { 552 SRPT_DPRINTF_L2("ioc_port_active, no I/O Controller target" 553 " undefined"); 554 return; 555 } 556 557 558 /* 559 * We take the target lock here to serialize this operation 560 * with any STMF initiated target state transitions. If 561 * SRP is off-line then the service handle is NULL. 562 */ 563 mutex_enter(&ioc->ioc_tgt_port->tp_lock); 564 565 if (ioc->ioc_tgt_port->tp_ibt_svc_hdl != NULL) { 566 status = srpt_ioc_svc_bind(ioc->ioc_tgt_port, event->ev_port); 567 if (status != IBT_SUCCESS && 568 status != IBT_HCA_PORT_NOT_ACTIVE) { 569 SRPT_DPRINTF_L1("ioc_port_active, bind failed (%d)", 570 status); 571 } 572 } 573 mutex_exit(&ioc->ioc_tgt_port->tp_lock); 574 } 575 576 /* 577 * srpt_ioc_port_down() 578 */ 579 static void 580 srpt_ioc_port_down(ibt_async_event_t *event) 581 { 582 srpt_ioc_t *ioc; 583 srpt_target_port_t *tgt; 584 srpt_channel_t *ch; 585 srpt_channel_t *next_ch; 586 587 SRPT_DPRINTF_L3("ioc_port_down event handler, invoked"); 588 589 /* 590 * Find the HCA in question and if the HCA has completed 591 * initialization, and the SRP Target service for the 592 * the I/O Controller exists, then logout initiators 593 * through this port. 594 */ 595 ioc = srpt_ioc_get(event->ev_hca_guid); 596 597 if (ioc == NULL) { 598 SRPT_DPRINTF_L2("ioc_port_down, I/O Controller not" 599 " active"); 600 return; 601 } 602 603 /* 604 * We only have one target now, but we could go through all 605 * SCSI target ports if more are added. 606 */ 607 tgt = ioc->ioc_tgt_port; 608 if (tgt == NULL) { 609 SRPT_DPRINTF_L2("ioc_port_down, no I/O Controller target" 610 " undefined"); 611 return; 612 } 613 mutex_enter(&tgt->tp_lock); 614 615 /* 616 * For all channel's logged in through this port, initiate a 617 * disconnect. 618 */ 619 mutex_enter(&tgt->tp_ch_list_lock); 620 ch = list_head(&tgt->tp_ch_list); 621 while (ch != NULL) { 622 next_ch = list_next(&tgt->tp_ch_list, ch); 623 if (ch->ch_session && (ch->ch_session->ss_hw_port == 624 event->ev_port)) { 625 srpt_ch_disconnect(ch); 626 } 627 ch = next_ch; 628 } 629 mutex_exit(&tgt->tp_ch_list_lock); 630 631 mutex_exit(&tgt->tp_lock); 632 } 633 634 /* 635 * srpt_ioc_ib_async_hdlr - I/O Controller IB asynchronous events 636 */ 637 /* ARGSUSED */ 638 void 639 srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl, 640 ibt_async_code_t code, ibt_async_event_t *event) 641 { 642 srpt_ioc_t *ioc; 643 srpt_channel_t *ch; 644 645 switch (code) { 646 case IBT_EVENT_PORT_UP: 647 srpt_ioc_port_active(event); 648 break; 649 650 case IBT_ERROR_PORT_DOWN: 651 srpt_ioc_port_down(event); 652 break; 653 654 case IBT_HCA_ATTACH_EVENT: 655 rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER); 656 ioc = srpt_ioc_init(event->ev_hca_guid); 657 658 if (ioc == NULL) { 659 rw_exit(&srpt_ctxt->sc_rwlock); 660 SRPT_DPRINTF_L1("ib_async_hdlr, HCA_ATTACH" 661 " event failed to initialize HCA (0x%016llx)", 662 (u_longlong_t)event->ev_hca_guid); 663 return; 664 } 665 SRPT_DPRINTF_L2("HCA_ATTACH_EVENT: I/O Controller" 666 " ibt hdl (%p)", 667 (void *)ioc->ioc_ibt_hdl); 668 669 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 670 ioc->ioc_tgt_port = srpt_stp_alloc_port(ioc, ioc->ioc_guid); 671 if (ioc->ioc_tgt_port == NULL) { 672 SRPT_DPRINTF_L1("ioc_ib_async_hdlr, alloc SCSI " 673 "target port error for HCA (0x%016llx)", 674 (u_longlong_t)event->ev_hca_guid); 675 rw_exit(&ioc->ioc_rwlock); 676 srpt_ioc_fini(ioc); 677 rw_exit(&srpt_ctxt->sc_rwlock); 678 return; 679 } 680 681 /* 682 * New HCA added with default SCSI Target Port, SRP service 683 * will be started when SCSI Target Port is brought 684 * on-line by STMF. 685 */ 686 srpt_ctxt->sc_num_iocs++; 687 list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc); 688 689 rw_exit(&ioc->ioc_rwlock); 690 rw_exit(&srpt_ctxt->sc_rwlock); 691 break; 692 693 case IBT_HCA_DETACH_EVENT: 694 SRPT_DPRINTF_L1( 695 "ioc_iob_async_hdlr, HCA_DETACH_EVENT received."); 696 break; 697 698 case IBT_EVENT_EMPTY_CHAN: 699 /* Channel in ERROR state is now empty */ 700 ch = (srpt_channel_t *)ibt_get_chan_private(event->ev_chan_hdl); 701 SRPT_DPRINTF_L3( 702 "ioc_iob_async_hdlr, received empty channel error on %p", 703 (void *)ch); 704 break; 705 706 default: 707 SRPT_DPRINTF_L2("ioc_ib_async_hdlr, event not " 708 "handled (%d)", code); 709 break; 710 } 711 } 712 713 /* 714 * srpt_ioc_svc_bind() 715 */ 716 ibt_status_t 717 srpt_ioc_svc_bind(srpt_target_port_t *tgt, uint_t portnum) 718 { 719 ibt_status_t status; 720 srpt_hw_port_t *port; 721 ibt_hca_portinfo_t *portinfo; 722 uint_t qportinfo_sz; 723 uint_t qportnum; 724 ib_gid_t new_gid; 725 srpt_ioc_t *ioc; 726 srpt_session_t sess; 727 728 ASSERT(tgt != NULL); 729 ASSERT(tgt->tp_ioc != NULL); 730 ioc = tgt->tp_ioc; 731 732 if (tgt->tp_ibt_svc_hdl == NULL) { 733 SRPT_DPRINTF_L2("ioc_svc_bind, NULL SCSI target port" 734 " service"); 735 return (IBT_INVALID_PARAM); 736 } 737 738 if (portnum == 0 || portnum > tgt->tp_nports) { 739 SRPT_DPRINTF_L2("ioc_svc_bind, bad port (%d)", portnum); 740 return (IBT_INVALID_PARAM); 741 } 742 status = ibt_query_hca_ports(ioc->ioc_ibt_hdl, portnum, 743 &portinfo, &qportnum, &qportinfo_sz); 744 if (status != IBT_SUCCESS) { 745 SRPT_DPRINTF_L1("ioc_svc_bind, query port error (%d)", 746 portnum); 747 return (IBT_INVALID_PARAM); 748 } 749 750 ASSERT(portinfo != NULL); 751 752 /* 753 * If port is not active do nothing, caller should attempt to bind 754 * after the port goes active. 755 */ 756 if (portinfo->p_linkstate != IBT_PORT_ACTIVE) { 757 SRPT_DPRINTF_L2("ioc_svc_bind, port %d not in active state", 758 portnum); 759 ibt_free_portinfo(portinfo, qportinfo_sz); 760 return (IBT_HCA_PORT_NOT_ACTIVE); 761 } 762 763 port = &tgt->tp_hw_port[portnum-1]; 764 new_gid = portinfo->p_sgid_tbl[0]; 765 ibt_free_portinfo(portinfo, qportinfo_sz); 766 767 /* 768 * If previously bound and the port GID has changed, 769 * rebind to the new GID. 770 */ 771 if (port->hwp_bind_hdl != NULL) { 772 if (new_gid.gid_guid != port->hwp_gid.gid_guid || 773 new_gid.gid_prefix != port->hwp_gid.gid_prefix) { 774 SRPT_DPRINTF_L2("ioc_svc_bind, unregister current" 775 " bind"); 776 (void) ibt_unbind_service(tgt->tp_ibt_svc_hdl, 777 port->hwp_bind_hdl); 778 port->hwp_bind_hdl = NULL; 779 } 780 } 781 SRPT_DPRINTF_L2("ioc_svc_bind, bind service, %016llx:%016llx", 782 (u_longlong_t)new_gid.gid_prefix, 783 (u_longlong_t)new_gid.gid_guid); 784 785 /* 786 * Pass SCSI Target Port as CM private data, the target will always 787 * exist while this service is bound. 788 */ 789 status = ibt_bind_service(tgt->tp_ibt_svc_hdl, new_gid, NULL, tgt, 790 &port->hwp_bind_hdl); 791 if (status != IBT_SUCCESS && status != IBT_CM_SERVICE_EXISTS) { 792 SRPT_DPRINTF_L1("ioc_svc_bind, bind error (%d)", status); 793 return (status); 794 } 795 port->hwp_gid.gid_prefix = new_gid.gid_prefix; 796 port->hwp_gid.gid_guid = new_gid.gid_guid; 797 798 /* setting up a transient structure for the dtrace probe. */ 799 bzero(&sess, sizeof (srpt_session_t)); 800 ALIAS_STR(sess.ss_t_gid, new_gid.gid_prefix, new_gid.gid_guid); 801 EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id); 802 803 DTRACE_SRP_1(service__up, srpt_session_t, &sess); 804 805 return (IBT_SUCCESS); 806 } 807 808 /* 809 * srpt_ioc_svc_unbind() 810 */ 811 void 812 srpt_ioc_svc_unbind(srpt_target_port_t *tgt, uint_t portnum) 813 { 814 srpt_hw_port_t *port; 815 srpt_session_t sess; 816 817 if (tgt == NULL) { 818 SRPT_DPRINTF_L2("ioc_svc_unbind, SCSI target does not exist"); 819 return; 820 } 821 822 if (portnum == 0 || portnum > tgt->tp_nports) { 823 SRPT_DPRINTF_L2("ioc_svc_unbind, bad port (%d)", portnum); 824 return; 825 } 826 port = &tgt->tp_hw_port[portnum-1]; 827 828 /* setting up a transient structure for the dtrace probe. */ 829 bzero(&sess, sizeof (srpt_session_t)); 830 ALIAS_STR(sess.ss_t_gid, port->hwp_gid.gid_prefix, 831 port->hwp_gid.gid_guid); 832 EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id); 833 834 DTRACE_SRP_1(service__down, srpt_session_t, &sess); 835 836 if (tgt->tp_ibt_svc_hdl != NULL && port->hwp_bind_hdl != NULL) { 837 SRPT_DPRINTF_L2("ioc_svc_unbind, unregister current bind"); 838 (void) ibt_unbind_service(tgt->tp_ibt_svc_hdl, 839 port->hwp_bind_hdl); 840 } 841 port->hwp_bind_hdl = NULL; 842 port->hwp_gid.gid_prefix = 0; 843 port->hwp_gid.gid_guid = 0; 844 } 845 846 /* 847 * srpt_ioc_svc_unbind_all() 848 */ 849 void 850 srpt_ioc_svc_unbind_all(srpt_target_port_t *tgt) 851 { 852 uint_t portnum; 853 854 if (tgt == NULL) { 855 SRPT_DPRINTF_L2("ioc_svc_unbind_all, NULL SCSI target port" 856 " specified"); 857 return; 858 } 859 for (portnum = 1; portnum <= tgt->tp_nports; portnum++) { 860 srpt_ioc_svc_unbind(tgt, portnum); 861 } 862 } 863 864 /* 865 * srpt_ioc_get_locked() 866 * 867 * Requires srpt_ctxt->rw_lock be held outside of call. 868 */ 869 srpt_ioc_t * 870 srpt_ioc_get_locked(ib_guid_t guid) 871 { 872 srpt_ioc_t *ioc; 873 874 ioc = list_head(&srpt_ctxt->sc_ioc_list); 875 while (ioc != NULL) { 876 if (ioc->ioc_guid == guid) { 877 break; 878 } 879 ioc = list_next(&srpt_ctxt->sc_ioc_list, ioc); 880 } 881 return (ioc); 882 } 883 884 /* 885 * srpt_ioc_get() 886 */ 887 srpt_ioc_t * 888 srpt_ioc_get(ib_guid_t guid) 889 { 890 srpt_ioc_t *ioc; 891 892 rw_enter(&srpt_ctxt->sc_rwlock, RW_READER); 893 ioc = srpt_ioc_get_locked(guid); 894 rw_exit(&srpt_ctxt->sc_rwlock); 895 return (ioc); 896 } 897 898 /* 899 * srpt_ioc_post_recv_iu() 900 */ 901 ibt_status_t 902 srpt_ioc_post_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu) 903 { 904 ibt_status_t status; 905 ibt_recv_wr_t wr; 906 uint_t posted; 907 908 ASSERT(ioc != NULL); 909 ASSERT(iu != NULL); 910 911 wr.wr_id = (ibt_wrid_t)(uintptr_t)iu; 912 wr.wr_nds = 1; 913 wr.wr_sgl = &iu->iu_sge; 914 posted = 0; 915 916 status = ibt_post_srq(ioc->ioc_srq_hdl, &wr, 1, &posted); 917 if (status != IBT_SUCCESS) { 918 SRPT_DPRINTF_L2("ioc_post_recv_iu, post error (%d)", 919 status); 920 } 921 return (status); 922 } 923 924 /* 925 * srpt_ioc_repost_recv_iu() 926 */ 927 void 928 srpt_ioc_repost_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu) 929 { 930 srpt_channel_t *ch; 931 ibt_status_t status; 932 933 ASSERT(iu != NULL); 934 ASSERT(mutex_owned(&iu->iu_lock)); 935 936 /* 937 * Some additional sanity checks while in debug state, all STMF 938 * related task activities should be complete prior to returning 939 * this IU to the available pool. 940 */ 941 ASSERT(iu->iu_stmf_task == NULL); 942 ASSERT(iu->iu_sq_posted_cnt == 0); 943 944 ch = iu->iu_ch; 945 iu->iu_ch = NULL; 946 iu->iu_num_rdescs = 0; 947 iu->iu_rdescs = NULL; 948 iu->iu_tot_xfer_len = 0; 949 iu->iu_tag = 0; 950 iu->iu_flags = 0; 951 iu->iu_sq_posted_cnt = 0; 952 953 status = srpt_ioc_post_recv_iu(ioc, iu); 954 955 if (status != IBT_SUCCESS) { 956 /* 957 * Very bad, we should initiate a shutdown of the I/O 958 * Controller here, off-lining any targets associated 959 * with this I/O Controller (and therefore disconnecting 960 * any logins that remain). 961 * 962 * In practice this should never happen so we put 963 * the code near the bottom of the implementation list. 964 */ 965 SRPT_DPRINTF_L0("ioc_repost_recv_iu, error RX IU (%d)", 966 status); 967 ASSERT(0); 968 } else if (ch != NULL) { 969 atomic_inc_32(&ch->ch_req_lim_delta); 970 } 971 } 972 973 /* 974 * srpt_ioc_init_profile() 975 * 976 * SRP I/O Controller serialization lock must be held when this 977 * routine is invoked. 978 */ 979 void 980 srpt_ioc_init_profile(srpt_ioc_t *ioc) 981 { 982 srpt_ioc_opcap_mask_t capmask = {0}; 983 984 ASSERT(ioc != NULL); 985 986 ioc->ioc_profile.ioc_guid = h2b64(ioc->ioc_guid); 987 (void) memcpy(ioc->ioc_profile.ioc_id_string, 988 "Solaris SRP Target 0.9a", 23); 989 990 /* 991 * Note vendor ID and subsystem ID are 24 bit values. Low order 992 * 8 bits in vendor ID field is slot and is initialized to zero. 993 * Low order 8 bits of subsystem ID is a reserved field and 994 * initialized to zero. 995 */ 996 ioc->ioc_profile.ioc_vendorid = 997 h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8)); 998 ioc->ioc_profile.ioc_deviceid = 999 h2b32((uint32_t)ioc->ioc_attr.hca_device_id); 1000 ioc->ioc_profile.ioc_device_ver = 1001 h2b16((uint16_t)ioc->ioc_attr.hca_version_id); 1002 ioc->ioc_profile.ioc_subsys_vendorid = 1003 h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8)); 1004 ioc->ioc_profile.ioc_subsys_id = h2b32(0); 1005 ioc->ioc_profile.ioc_io_class = h2b16(SRP_REV_16A_IO_CLASS); 1006 ioc->ioc_profile.ioc_io_subclass = h2b16(SRP_IO_SUBCLASS); 1007 ioc->ioc_profile.ioc_protocol = h2b16(SRP_PROTOCOL); 1008 ioc->ioc_profile.ioc_protocol_ver = h2b16(SRP_PROTOCOL_VERSION); 1009 ioc->ioc_profile.ioc_send_msg_qdepth = h2b16(srpt_send_msg_depth); 1010 ioc->ioc_profile.ioc_rdma_read_qdepth = 1011 ioc->ioc_attr.hca_max_rdma_out_chan; 1012 ioc->ioc_profile.ioc_send_msg_sz = h2b32(SRPT_DEFAULT_SEND_MSG_SIZE); 1013 ioc->ioc_profile.ioc_rdma_xfer_sz = h2b32(SRPT_DEFAULT_MAX_RDMA_SIZE); 1014 1015 capmask.bits.st = 1; /* Messages can be sent to IOC */ 1016 capmask.bits.sf = 1; /* Messages can be sent from IOC */ 1017 capmask.bits.rf = 1; /* RDMA Reads can be sent from IOC */ 1018 capmask.bits.wf = 1; /* RDMA Writes can be sent from IOC */ 1019 ioc->ioc_profile.ioc_ctrl_opcap_mask = capmask.mask; 1020 1021 /* 1022 * We currently only have one target, but if we had a list we would 1023 * go through that list and only count those that are ONLINE when 1024 * setting the services count and entries. 1025 */ 1026 if (ioc->ioc_tgt_port->tp_srp_enabled) { 1027 ioc->ioc_profile.ioc_service_entries = 1; 1028 ioc->ioc_svc.srv_id = h2b64(ioc->ioc_guid); 1029 (void) snprintf((char *)ioc->ioc_svc.srv_name, 1030 IB_DM_MAX_SVC_NAME_LEN, "SRP.T10:%016llx", 1031 (u_longlong_t)ioc->ioc_guid); 1032 } else { 1033 ioc->ioc_profile.ioc_service_entries = 0; 1034 ioc->ioc_svc.srv_id = 0; 1035 } 1036 } 1037 1038 /* 1039 * srpt_ioc_ds_alloc_dbuf() 1040 */ 1041 /* ARGSUSED */ 1042 stmf_data_buf_t * 1043 srpt_ioc_ds_alloc_dbuf(struct scsi_task *task, uint32_t size, 1044 uint32_t *pminsize, uint32_t flags) 1045 { 1046 srpt_iu_t *iu; 1047 srpt_ioc_t *ioc; 1048 srpt_ds_dbuf_t *dbuf; 1049 stmf_data_buf_t *stmf_dbuf; 1050 void *buf; 1051 srpt_mr_t mr; 1052 1053 ASSERT(task != NULL); 1054 iu = task->task_port_private; 1055 ioc = iu->iu_ioc; 1056 1057 SRPT_DPRINTF_L4("ioc_ds_alloc_dbuf, invoked ioc(%p)" 1058 " size(%d), flags(%x)", 1059 (void *)ioc, size, flags); 1060 1061 buf = srpt_vmem_alloc(ioc->ioc_dbuf_pool, size); 1062 if (buf == NULL) { 1063 return (NULL); 1064 } 1065 1066 if (srpt_vmem_mr(ioc->ioc_dbuf_pool, buf, size, &mr) != 0) { 1067 goto stmf_alloc_err; 1068 } 1069 1070 stmf_dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, sizeof (srpt_ds_dbuf_t), 1071 0); 1072 if (stmf_dbuf == NULL) { 1073 SRPT_DPRINTF_L2("ioc_ds_alloc_dbuf, stmf_alloc failed"); 1074 goto stmf_alloc_err; 1075 } 1076 1077 dbuf = stmf_dbuf->db_port_private; 1078 dbuf->db_stmf_buf = stmf_dbuf; 1079 dbuf->db_mr_hdl = mr.mr_hdl; 1080 dbuf->db_ioc = ioc; 1081 dbuf->db_sge.ds_va = mr.mr_va; 1082 dbuf->db_sge.ds_key = mr.mr_lkey; 1083 dbuf->db_sge.ds_len = size; 1084 1085 stmf_dbuf->db_buf_size = size; 1086 stmf_dbuf->db_data_size = size; 1087 stmf_dbuf->db_relative_offset = 0; 1088 stmf_dbuf->db_flags = 0; 1089 stmf_dbuf->db_xfer_status = 0; 1090 stmf_dbuf->db_sglist_length = 1; 1091 stmf_dbuf->db_sglist[0].seg_addr = buf; 1092 stmf_dbuf->db_sglist[0].seg_length = size; 1093 1094 return (stmf_dbuf); 1095 1096 buf_mr_err: 1097 stmf_free(stmf_dbuf); 1098 1099 stmf_alloc_err: 1100 srpt_vmem_free(ioc->ioc_dbuf_pool, buf, size); 1101 1102 return (NULL); 1103 } 1104 1105 void 1106 srpt_ioc_ds_free_dbuf(struct stmf_dbuf_store *ds, 1107 stmf_data_buf_t *dbuf) 1108 { 1109 srpt_ioc_t *ioc; 1110 1111 SRPT_DPRINTF_L4("ioc_ds_free_dbuf, invoked buf (%p)", 1112 (void *)dbuf); 1113 ioc = ds->ds_port_private; 1114 1115 srpt_vmem_free(ioc->ioc_dbuf_pool, dbuf->db_sglist[0].seg_addr, 1116 dbuf->db_buf_size); 1117 stmf_free(dbuf); 1118 } 1119 1120 /* Memory arena routines */ 1121 1122 static srpt_vmem_pool_t * 1123 srpt_vmem_create(const char *name, srpt_ioc_t *ioc, ib_memlen_t chunksize, 1124 uint64_t maxsize, ibt_mr_flags_t flags) 1125 { 1126 srpt_mr_t *chunk; 1127 srpt_vmem_pool_t *result; 1128 1129 ASSERT(chunksize <= maxsize); 1130 1131 result = kmem_zalloc(sizeof (srpt_vmem_pool_t), KM_SLEEP); 1132 1133 result->svp_ioc = ioc; 1134 result->svp_chunksize = chunksize; 1135 result->svp_max_size = maxsize; 1136 result->svp_flags = flags; 1137 1138 rw_init(&result->svp_lock, NULL, RW_DRIVER, NULL); 1139 avl_create(&result->svp_mr_list, srpt_vmem_mr_compare, 1140 sizeof (srpt_mr_t), offsetof(srpt_mr_t, mr_avl)); 1141 1142 chunk = srpt_vmem_chunk_alloc(result, chunksize); 1143 1144 avl_add(&result->svp_mr_list, chunk); 1145 result->svp_total_size = chunksize; 1146 1147 result->svp_vmem = vmem_create(name, 1148 (void*)(uintptr_t)chunk->mr_va, 1149 (size_t)chunk->mr_len, SRPT_MR_QUANTSIZE, 1150 NULL, NULL, NULL, 0, VM_SLEEP); 1151 1152 return (result); 1153 } 1154 1155 static void 1156 srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool) 1157 { 1158 srpt_mr_t *chunk; 1159 srpt_mr_t *next; 1160 1161 rw_enter(&vm_pool->svp_lock, RW_WRITER); 1162 vmem_destroy(vm_pool->svp_vmem); 1163 1164 chunk = avl_first(&vm_pool->svp_mr_list); 1165 1166 while (chunk != NULL) { 1167 next = AVL_NEXT(&vm_pool->svp_mr_list, chunk); 1168 avl_remove(&vm_pool->svp_mr_list, chunk); 1169 srpt_vmem_chunk_free(vm_pool, chunk); 1170 chunk = next; 1171 } 1172 1173 avl_destroy(&vm_pool->svp_mr_list); 1174 1175 rw_exit(&vm_pool->svp_lock); 1176 rw_destroy(&vm_pool->svp_lock); 1177 1178 kmem_free(vm_pool, sizeof (srpt_vmem_pool_t)); 1179 } 1180 1181 static void * 1182 srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size) 1183 { 1184 void *result; 1185 srpt_mr_t *next; 1186 ib_memlen_t chunklen; 1187 1188 ASSERT(vm_pool != NULL); 1189 1190 result = vmem_alloc(vm_pool->svp_vmem, size, 1191 VM_NOSLEEP | VM_FIRSTFIT); 1192 1193 if (result != NULL) { 1194 /* memory successfully allocated */ 1195 return (result); 1196 } 1197 1198 /* need more vmem */ 1199 rw_enter(&vm_pool->svp_lock, RW_WRITER); 1200 chunklen = vm_pool->svp_chunksize; 1201 1202 if (vm_pool->svp_total_size >= vm_pool->svp_max_size) { 1203 /* no more room to alloc */ 1204 rw_exit(&vm_pool->svp_lock); 1205 return (NULL); 1206 } 1207 1208 if ((vm_pool->svp_total_size + chunklen) > vm_pool->svp_max_size) { 1209 chunklen = vm_pool->svp_max_size - vm_pool->svp_total_size; 1210 } 1211 1212 next = srpt_vmem_chunk_alloc(vm_pool, chunklen); 1213 if (next != NULL) { 1214 /* 1215 * Note that the size of the chunk we got 1216 * may not be the size we requested. Use the 1217 * length returned in the chunk itself. 1218 */ 1219 if (vmem_add(vm_pool->svp_vmem, (void*)(uintptr_t)next->mr_va, 1220 next->mr_len, VM_NOSLEEP) == NULL) { 1221 srpt_vmem_chunk_free(vm_pool, next); 1222 SRPT_DPRINTF_L2("vmem_add failed"); 1223 } else { 1224 vm_pool->svp_total_size += next->mr_len; 1225 avl_add(&vm_pool->svp_mr_list, next); 1226 } 1227 } 1228 1229 rw_exit(&vm_pool->svp_lock); 1230 1231 result = vmem_alloc(vm_pool->svp_vmem, size, VM_NOSLEEP | VM_FIRSTFIT); 1232 1233 return (result); 1234 } 1235 1236 static void 1237 srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size) 1238 { 1239 vmem_free(vm_pool->svp_vmem, vaddr, size); 1240 } 1241 1242 static int 1243 srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size, 1244 srpt_mr_t *mr) 1245 { 1246 avl_index_t where; 1247 ib_vaddr_t mrva = (ib_vaddr_t)(uintptr_t)vaddr; 1248 srpt_mr_t chunk; 1249 srpt_mr_t *nearest; 1250 ib_vaddr_t chunk_end; 1251 int status = DDI_FAILURE; 1252 1253 rw_enter(&vm_pool->svp_lock, RW_READER); 1254 1255 chunk.mr_va = mrva; 1256 nearest = avl_find(&vm_pool->svp_mr_list, &chunk, &where); 1257 1258 if (nearest == NULL) { 1259 nearest = avl_nearest(&vm_pool->svp_mr_list, where, 1260 AVL_BEFORE); 1261 } 1262 1263 if (nearest != NULL) { 1264 /* Verify this chunk contains the specified address range */ 1265 ASSERT(nearest->mr_va <= mrva); 1266 1267 chunk_end = nearest->mr_va + nearest->mr_len; 1268 if (chunk_end >= mrva + size) { 1269 mr->mr_hdl = nearest->mr_hdl; 1270 mr->mr_va = mrva; 1271 mr->mr_len = size; 1272 mr->mr_lkey = nearest->mr_lkey; 1273 mr->mr_rkey = nearest->mr_rkey; 1274 status = DDI_SUCCESS; 1275 } 1276 } 1277 1278 rw_exit(&vm_pool->svp_lock); 1279 return (status); 1280 } 1281 1282 static srpt_mr_t * 1283 srpt_vmem_chunk_alloc(srpt_vmem_pool_t *vm_pool, ib_memlen_t chunksize) 1284 { 1285 void *chunk = NULL; 1286 srpt_mr_t *result = NULL; 1287 1288 while ((chunk == NULL) && (chunksize >= SRPT_MIN_CHUNKSIZE)) { 1289 chunk = kmem_alloc(chunksize, KM_NOSLEEP); 1290 if (chunk == NULL) { 1291 SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: " 1292 "failed to alloc chunk of %d, trying %d", 1293 (int)chunksize, (int)chunksize/2); 1294 chunksize /= 2; 1295 } 1296 } 1297 1298 if (chunk != NULL) { 1299 result = srpt_reg_mem(vm_pool, (ib_vaddr_t)(uintptr_t)chunk, 1300 chunksize); 1301 if (result == NULL) { 1302 SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: " 1303 "chunk registration failed"); 1304 kmem_free(chunk, chunksize); 1305 } 1306 } 1307 1308 return (result); 1309 } 1310 1311 static void 1312 srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr) 1313 { 1314 void *chunk = (void *)(uintptr_t)mr->mr_va; 1315 ib_memlen_t chunksize = mr->mr_len; 1316 1317 srpt_dereg_mem(vm_pool->svp_ioc, mr); 1318 kmem_free(chunk, chunksize); 1319 } 1320 1321 static srpt_mr_t * 1322 srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, ib_memlen_t len) 1323 { 1324 srpt_mr_t *result = NULL; 1325 ibt_mr_attr_t mr_attr; 1326 ibt_mr_desc_t mr_desc; 1327 ibt_status_t status; 1328 srpt_ioc_t *ioc = vm_pool->svp_ioc; 1329 1330 result = kmem_zalloc(sizeof (srpt_mr_t), KM_NOSLEEP); 1331 if (result == NULL) { 1332 SRPT_DPRINTF_L2("srpt_reg_mem: failed to allocate"); 1333 return (NULL); 1334 } 1335 1336 bzero(&mr_attr, sizeof (ibt_mr_attr_t)); 1337 bzero(&mr_desc, sizeof (ibt_mr_desc_t)); 1338 1339 mr_attr.mr_vaddr = vaddr; 1340 mr_attr.mr_len = len; 1341 mr_attr.mr_as = NULL; 1342 mr_attr.mr_flags = vm_pool->svp_flags; 1343 1344 status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl, 1345 &mr_attr, &result->mr_hdl, &mr_desc); 1346 if (status != IBT_SUCCESS) { 1347 SRPT_DPRINTF_L2("srpt_reg_mem: ibt_register_mr " 1348 "failed %d", status); 1349 kmem_free(result, sizeof (srpt_mr_t)); 1350 return (NULL); 1351 } 1352 1353 result->mr_va = mr_attr.mr_vaddr; 1354 result->mr_len = mr_attr.mr_len; 1355 result->mr_lkey = mr_desc.md_lkey; 1356 result->mr_rkey = mr_desc.md_rkey; 1357 1358 return (result); 1359 } 1360 1361 static void 1362 srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr) 1363 { 1364 ibt_status_t status; 1365 1366 status = ibt_deregister_mr(ioc->ioc_ibt_hdl, mr->mr_hdl); 1367 if (status != IBT_SUCCESS) { 1368 SRPT_DPRINTF_L1("ioc_fini, error deregistering MR (%d)", 1369 status); 1370 } 1371 kmem_free(mr, sizeof (srpt_mr_t)); 1372 } 1373 1374 static int 1375 srpt_vmem_mr_compare(const void *a, const void *b) 1376 { 1377 srpt_mr_t *mr1 = (srpt_mr_t *)a; 1378 srpt_mr_t *mr2 = (srpt_mr_t *)b; 1379 1380 /* sort and match by virtual address */ 1381 if (mr1->mr_va < mr2->mr_va) { 1382 return (-1); 1383 } else if (mr1->mr_va > mr2->mr_va) { 1384 return (1); 1385 } 1386 1387 return (0); 1388 } 1389