1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <netinet/in.h> 31 #include <sys/sunddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/iscsi_protocol.h> 34 35 #include <sys/ib/clients/iser/iser.h> 36 #include <sys/ib/clients/iser/iser_idm.h> 37 38 /* 39 * iser_ib.c 40 * Routines for InfiniBand transport for iSER 41 * 42 * This file contains the routines to interface with the IBT API to attach and 43 * allocate IB resources, handle async events, and post recv work requests. 44 * 45 */ 46 47 static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid); 48 static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid); 49 50 static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid); 51 static int iser_ib_free_hca(iser_hca_t *hca); 52 static int iser_ib_update_hcaports(iser_hca_t *hca); 53 static int iser_ib_init_hcas(void); 54 static int iser_ib_fini_hcas(void); 55 56 static iser_sbind_t *iser_ib_get_bind( 57 iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid); 58 static int iser_ib_activate_port( 59 idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid); 60 static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid); 61 62 static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size); 63 static void iser_ib_fini_qp(iser_qp_t *qp); 64 65 static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, 66 ibt_cq_hdl_t *cq_hdl); 67 68 static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 69 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 70 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs); 71 72 static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, 73 ibt_async_event_t *event); 74 static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, 75 ibt_async_event_t *event); 76 static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, 77 ibt_async_event_t *event); 78 79 static void iser_ib_post_recv_task(void *arg); 80 81 static struct ibt_clnt_modinfo_s iser_ib_modinfo = { 82 IBTI_V_CURR, 83 IBT_STORAGE_DEV, 84 iser_ib_async_handler, 85 NULL, 86 "iSER" 87 }; 88 89 /* 90 * iser_ib_init 91 * 92 * This function registers the HCA drivers with IBTF and registers and binds 93 * iSER as a service with IBTF. 94 */ 95 int 96 iser_ib_init(void) 97 { 98 int status; 99 100 /* Register with IBTF */ 101 status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state, 102 &iser_state->is_ibhdl); 103 if (status != DDI_SUCCESS) { 104 ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)", 105 status); 106 return (DDI_FAILURE); 107 } 108 109 /* Create the global work request kmem_cache */ 110 iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache", 111 sizeof (iser_wr_t), 0, NULL, NULL, NULL, 112 iser_state, NULL, KM_SLEEP); 113 114 /* Populate our list of HCAs */ 115 status = iser_ib_init_hcas(); 116 if (status != DDI_SUCCESS) { 117 /* HCAs failed to initialize, tear it down */ 118 kmem_cache_destroy(iser_state->iser_wr_cache); 119 (void) ibt_detach(iser_state->is_ibhdl); 120 iser_state->is_ibhdl = NULL; 121 ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs"); 122 return (DDI_FAILURE); 123 } 124 125 /* Target will register iSER as a service with IBTF when required */ 126 127 /* Target will bind this service when it comes online */ 128 129 return (DDI_SUCCESS); 130 } 131 132 /* 133 * iser_ib_fini 134 * 135 * This function unbinds and degisters the iSER service from IBTF 136 */ 137 int 138 iser_ib_fini(void) 139 { 140 /* IDM would have already disabled all the services */ 141 142 /* Teardown the HCA list and associated resources */ 143 if (iser_ib_fini_hcas() != DDI_SUCCESS) 144 return (DDI_FAILURE); 145 146 /* Teardown the global work request kmem_cache */ 147 kmem_cache_destroy(iser_state->iser_wr_cache); 148 149 /* Deregister with IBTF */ 150 if (iser_state->is_ibhdl != NULL) { 151 (void) ibt_detach(iser_state->is_ibhdl); 152 iser_state->is_ibhdl = NULL; 153 } 154 155 return (DDI_SUCCESS); 156 } 157 158 /* 159 * iser_ib_register_service 160 * 161 * This function registers the iSER service using the RDMA-Aware Service ID. 162 */ 163 int 164 iser_ib_register_service(idm_svc_t *idm_svc) 165 { 166 ibt_srv_desc_t srvdesc; 167 iser_svc_t *iser_svc; 168 int status; 169 170 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 171 172 /* Set up IBTI client callback handler from the CM */ 173 srvdesc.sd_handler = iser_ib_cm_handler; 174 175 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 176 177 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 178 179 /* Register the service on the specified port */ 180 status = ibt_register_service( 181 iser_state->is_ibhdl, &srvdesc, 182 iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL); 183 184 return (status); 185 } 186 187 /* 188 * iser_ib_bind_service 189 * 190 * This function binds a given iSER service on all available HCA ports. The 191 * current specification does not allow user to specify transport bindings 192 * for each iscsi target. The ULP invokes this function to bind the target 193 * to all available iser ports after checking for the presence of an IB HCA. 194 * iSER is "configured" whenever an IB-capable IP address exists. The lack 195 * of active IB ports is a less-fatal condition, and sockets would be used 196 * as the transport even though an Infiniband HCA is configured but unusable. 197 * 198 */ 199 int 200 iser_ib_bind_service(idm_svc_t *idm_svc) 201 { 202 iser_hca_t *hca; 203 ib_gid_t gid; 204 int num_ports = 0; 205 int num_binds = 0; 206 int num_inactive_binds = 0; /* if HCA ports inactive */ 207 int status; 208 int i; 209 210 ASSERT(idm_svc != NULL); 211 ASSERT(idm_svc->is_iser_svc != NULL); 212 213 /* Register the iSER service on all available ports */ 214 mutex_enter(&iser_state->is_hcalist_lock); 215 216 for (hca = list_head(&iser_state->is_hcalist); 217 hca != NULL; 218 hca = list_next(&iser_state->is_hcalist, hca)) { 219 220 for (i = 0; i < hca->hca_num_ports; i++) { 221 num_ports++; 222 if (hca->hca_port_info[i].p_linkstate != 223 IBT_PORT_ACTIVE) { 224 /* 225 * Move on. We will attempt to bind service 226 * in our async handler if the port comes up 227 * at a later time. 228 */ 229 num_inactive_binds++; 230 continue; 231 } 232 233 gid = hca->hca_port_info[i].p_sgid_tbl[0]; 234 235 /* If the port is already bound, skip */ 236 if (iser_ib_get_bind( 237 idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) { 238 239 status = iser_ib_activate_port( 240 idm_svc, hca->hca_guid, gid); 241 if (status != IBT_SUCCESS) { 242 ISER_LOG(CE_NOTE, 243 "iser_ib_bind_service: " 244 "iser_ib_activate_port failure " 245 "(0x%x)", status); 246 continue; 247 } 248 } 249 num_binds++; 250 } 251 } 252 mutex_exit(&iser_state->is_hcalist_lock); 253 254 if (num_binds) { 255 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on " 256 "(%d) of (%d) ports", num_binds, num_ports); 257 return (ISER_STATUS_SUCCESS); 258 } else if (num_inactive_binds) { 259 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Could not bind " 260 "service, HCA ports are not active."); 261 /* 262 * still considered success, the async handler will bind 263 * the service when the port comes up at a later time 264 */ 265 return (ISER_STATUS_SUCCESS); 266 } else { 267 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service"); 268 return (ISER_STATUS_FAIL); 269 } 270 } 271 272 /* 273 * iser_ib_unbind_service 274 * 275 * This function unbinds a given service on a all HCA ports 276 */ 277 void 278 iser_ib_unbind_service(idm_svc_t *idm_svc) 279 { 280 iser_svc_t *iser_svc; 281 iser_sbind_t *is_sbind, *next_sb; 282 283 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 284 285 iser_svc = idm_svc->is_iser_svc; 286 287 for (is_sbind = list_head(&iser_svc->is_sbindlist); 288 is_sbind != NULL; 289 is_sbind = next_sb) { 290 next_sb = list_next(&iser_svc->is_sbindlist, is_sbind); 291 (void) ibt_unbind_service(iser_svc->is_srvhdl, 292 is_sbind->is_sbindhdl); 293 list_remove(&iser_svc->is_sbindlist, is_sbind); 294 kmem_free(is_sbind, sizeof (iser_sbind_t)); 295 } 296 } 297 } 298 299 /* ARGSUSED */ 300 void 301 iser_ib_deregister_service(idm_svc_t *idm_svc) 302 { 303 iser_svc_t *iser_svc; 304 305 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 306 307 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 308 (void) ibt_deregister_service(iser_state->is_ibhdl, 309 iser_svc->is_srvhdl); 310 (void) ibt_release_ip_sid(iser_svc->is_svcid); 311 } 312 } 313 314 /* 315 * iser_ib_get_paths 316 * This function finds the IB path between the local and the remote address. 317 * 318 */ 319 int 320 iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip, 321 ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip) 322 { 323 ibt_ip_path_attr_t ipattr; 324 int status; 325 326 (void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 327 ipattr.ipa_dst_ip = remote_ip; 328 ipattr.ipa_src_ip = *local_ip; 329 ipattr.ipa_max_paths = 1; 330 ipattr.ipa_ndst = 1; 331 332 (void) bzero(path, sizeof (ibt_path_info_t)); 333 status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS, 334 &ipattr, path, NULL, path_src_ip); 335 if (status != IBT_SUCCESS) { 336 ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths " 337 "failure: status (%d)", status); 338 return (status); 339 } 340 341 if (local_ip != NULL) { 342 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]", 343 local_ip->un.ip4addr, remote_ip->un.ip4addr); 344 } else { 345 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: " 346 "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr); 347 } 348 349 return (ISER_STATUS_SUCCESS); 350 } 351 352 /* 353 * iser_ib_alloc_channel_nopathlookup 354 * 355 * This function allocates a reliable connected channel. This function does 356 * not invoke ibt_get_ip_paths() to do the path lookup. The HCA GUID and 357 * port are input to this function. 358 */ 359 iser_chan_t * 360 iser_ib_alloc_channel_nopathlookup(ib_guid_t hca_guid, uint8_t hca_port) 361 { 362 iser_hca_t *hca; 363 iser_chan_t *chan; 364 365 /* Lookup the hca using the gid in the path info */ 366 hca = iser_ib_guid2hca(hca_guid); 367 if (hca == NULL) { 368 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed " 369 "to lookup HCA(%llx) handle", (longlong_t)hca_guid); 370 return (NULL); 371 } 372 373 chan = iser_ib_alloc_rc_channel(hca, hca_port); 374 if (chan == NULL) { 375 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed " 376 "to alloc channel on HCA(%llx) %d", 377 (longlong_t)hca_guid, hca_port); 378 return (NULL); 379 } 380 381 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: " 382 "chanhdl (0x%p), HCA(%llx) %d", 383 (void *)chan->ic_chanhdl, (longlong_t)hca_guid, hca_port); 384 385 return (chan); 386 } 387 388 /* 389 * iser_ib_alloc_channel_pathlookup 390 * 391 * This function allocates a reliable connected channel but first invokes 392 * ibt_get_ip_paths() with the given local and remote addres to get the 393 * HCA lgid and the port number. 394 */ 395 iser_chan_t * 396 iser_ib_alloc_channel_pathlookup( 397 ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip) 398 { 399 ibt_path_info_t ibt_path; 400 ibt_path_ip_src_t path_src_ip; 401 ib_gid_t lgid; 402 uint8_t hca_port; /* from path */ 403 iser_hca_t *hca; 404 iser_chan_t *chan; 405 int status; 406 407 /* Lookup a path to the given destination */ 408 status = iser_ib_get_paths( 409 local_ip, remote_ip, &ibt_path, &path_src_ip); 410 411 if (status != ISER_STATUS_SUCCESS) { 412 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: faild " 413 "Path lookup IP:[%llx to %llx] failed: status (%d)", 414 (longlong_t)local_ip->un.ip4addr, 415 (longlong_t)remote_ip->un.ip4addr, 416 status); 417 return (NULL); 418 } 419 420 /* get the local gid from the path info */ 421 lgid = ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; 422 423 /* get the hca port from the path info */ 424 hca_port = ibt_path.pi_prim_cep_path.cep_hca_port_num; 425 426 /* Lookup the hca using the gid in the path info */ 427 hca = iser_ib_gid2hca(lgid); 428 if (hca == NULL) { 429 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed " 430 "to lookup HCA (%llx) handle", 431 (longlong_t)hca->hca_guid); 432 return (NULL); 433 } 434 435 chan = iser_ib_alloc_rc_channel(hca, hca_port); 436 if (chan == NULL) { 437 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed " 438 "to alloc channel from IP:[%llx to %llx] on HCA (%llx) %d", 439 (longlong_t)local_ip->un.ip4addr, 440 (longlong_t)remote_ip->un.ip4addr, 441 (longlong_t)hca->hca_guid, hca_port); 442 return (NULL); 443 } 444 445 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: " 446 "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d", 447 (void *)chan->ic_chanhdl, 448 (longlong_t)local_ip->un.ip4addr, 449 (longlong_t)remote_ip->un.ip4addr, 450 (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid, 451 (longlong_t)hca->hca_guid, hca_port); 452 453 chan->ic_ibt_path = ibt_path; 454 chan->ic_localip = path_src_ip.ip_primary; 455 chan->ic_remoteip = *remote_ip; 456 457 return (chan); 458 } 459 460 /* 461 * iser_ib_alloc_rc_channel 462 * 463 * This function allocates a reliable communication channel using the specified 464 * channel attributes. 465 */ 466 iser_chan_t * 467 iser_ib_alloc_rc_channel(iser_hca_t *hca, uint8_t hca_port) 468 { 469 470 iser_chan_t *chan; 471 ibt_rc_chan_alloc_args_t chanargs; 472 uint_t sq_size, rq_size; 473 int status; 474 475 chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP); 476 477 mutex_init(&chan->ic_chan_lock, NULL, MUTEX_DRIVER, NULL); 478 mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL); 479 480 /* Set up the iSER channel handle with HCA */ 481 chan->ic_hca = hca; 482 483 /* 484 * Determine the queue sizes, based upon the HCA query data. 485 * For our Work Queues, we will use either our default value, 486 * or the HCA's maximum value, whichever is smaller. 487 */ 488 sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE); 489 rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE); 490 491 /* 492 * For our Completion Queues, we again check the device maximum. 493 * We want to end up with CQs that are the next size up from the 494 * WQs they are servicing so that they have some overhead. 495 */ 496 if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) { 497 chan->ic_sendcq_sz = sq_size + 1; 498 } else { 499 chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz; 500 sq_size = chan->ic_sendcq_sz - 1; 501 } 502 503 if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) { 504 chan->ic_recvcq_sz = rq_size + 1; 505 } else { 506 chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz; 507 rq_size = chan->ic_recvcq_sz - 1; 508 } 509 510 /* Initialize the iSER channel's QP handle */ 511 iser_ib_init_qp(chan, sq_size, rq_size); 512 513 /* Set up the Send Completion Queue */ 514 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz, 515 &chan->ic_sendcq); 516 if (status != ISER_STATUS_SUCCESS) { 517 iser_ib_fini_qp(&chan->ic_qp); 518 mutex_destroy(&chan->ic_chan_lock); 519 mutex_destroy(&chan->ic_sq_post_lock); 520 kmem_free(chan, sizeof (iser_chan_t)); 521 return (NULL); 522 } 523 ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan); 524 (void) ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION); 525 526 /* Set up the Receive Completion Queue */ 527 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz, 528 &chan->ic_recvcq); 529 if (status != ISER_STATUS_SUCCESS) { 530 (void) ibt_free_cq(chan->ic_sendcq); 531 iser_ib_fini_qp(&chan->ic_qp); 532 mutex_destroy(&chan->ic_chan_lock); 533 mutex_destroy(&chan->ic_sq_post_lock); 534 kmem_free(chan, sizeof (iser_chan_t)); 535 return (NULL); 536 } 537 ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan); 538 (void) ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION); 539 540 /* Setup the channel arguments */ 541 iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq, 542 sq_size, rq_size, hca->hca_pdhdl, &chanargs); 543 544 status = ibt_alloc_rc_channel(hca->hca_hdl, 545 IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL); 546 if (status != IBT_SUCCESS) { 547 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " 548 "ibt_alloc_rc_channel: status (%d)", status); 549 (void) ibt_free_cq(chan->ic_sendcq); 550 (void) ibt_free_cq(chan->ic_recvcq); 551 iser_ib_fini_qp(&chan->ic_qp); 552 mutex_destroy(&chan->ic_chan_lock); 553 mutex_destroy(&chan->ic_sq_post_lock); 554 kmem_free(chan, sizeof (iser_chan_t)); 555 return (NULL); 556 } 557 558 /* Set the 'channel' as the client private data */ 559 (void) ibt_set_chan_private(chan->ic_chanhdl, chan); 560 561 return (chan); 562 } 563 564 /* 565 * iser_ib_open_rc_channel 566 * This function opens a RC connection on the given allocated RC channel 567 */ 568 int 569 iser_ib_open_rc_channel(iser_chan_t *chan) 570 { 571 ibt_ip_cm_info_t ipcm_info; 572 iser_private_data_t iser_priv_data; 573 ibt_chan_open_args_t ocargs; 574 ibt_rc_returns_t ocreturns; 575 int status; 576 577 mutex_enter(&chan->ic_chan_lock); 578 579 /* 580 * For connection establishment, the initiator sends a CM REQ using the 581 * iSER RDMA-Aware Service ID. Included are the source and destination 582 * IP addresses, and the src port. 583 */ 584 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 585 ipcm_info.src_addr = chan->ic_localip; 586 ipcm_info.dst_addr = chan->ic_remoteip; 587 ipcm_info.src_port = chan->ic_lport; 588 589 /* 590 * The CM Private Data field defines the iSER connection parameters 591 * such as zero based virtual address exception (ZBVAE) and Send with 592 * invalidate Exception (SIE). 593 * 594 * Solaris IBT does not currently support ZBVAE or SIE. 595 */ 596 iser_priv_data.rsvd1 = 0; 597 iser_priv_data.sie = 1; 598 iser_priv_data.zbvae = 1; 599 600 status = ibt_format_ip_private_data(&ipcm_info, 601 sizeof (iser_private_data_t), &iser_priv_data); 602 if (status != IBT_SUCCESS) { 603 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 604 mutex_exit(&chan->ic_chan_lock); 605 return (status); 606 } 607 608 /* 609 * Set the SID we are attempting to connect to, based upon the 610 * remote port number. 611 */ 612 chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport); 613 614 /* Set up the args for the channel open */ 615 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 616 ocargs.oc_path = &chan->ic_ibt_path; 617 ocargs.oc_cm_handler = iser_ib_cm_handler; 618 ocargs.oc_cm_clnt_private = iser_state; 619 ocargs.oc_rdma_ra_out = 4; 620 ocargs.oc_rdma_ra_in = 4; 621 ocargs.oc_path_retry_cnt = 2; 622 ocargs.oc_path_rnr_retry_cnt = 2; 623 ocargs.oc_priv_data_len = sizeof (iser_private_data_t); 624 ocargs.oc_priv_data = &iser_priv_data; 625 626 bzero(&ocreturns, sizeof (ibt_rc_returns_t)); 627 628 status = ibt_open_rc_channel(chan->ic_chanhdl, 629 IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns); 630 631 if (status != IBT_SUCCESS) { 632 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 633 mutex_exit(&chan->ic_chan_lock); 634 return (status); 635 } 636 637 mutex_exit(&chan->ic_chan_lock); 638 return (IDM_STATUS_SUCCESS); 639 } 640 641 /* 642 * iser_ib_close_rc_channel 643 * This function closes the RC channel related to this iser_chan handle. 644 * We invoke this in a non-blocking, no callbacks context. 645 */ 646 void 647 iser_ib_close_rc_channel(iser_chan_t *chan) 648 { 649 int status; 650 651 mutex_enter(&chan->ic_chan_lock); 652 status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL, 653 0, NULL, NULL, 0); 654 if (status != IBT_SUCCESS) { 655 ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: " 656 "ibt_close_rc_channel failed: status (%d)", status); 657 } 658 mutex_exit(&chan->ic_chan_lock); 659 } 660 661 /* 662 * iser_ib_free_rc_channel 663 * 664 * This function tears down an RC channel's QP initialization and frees it. 665 * Note that we do not need synchronization here; the channel has been 666 * closed already, so we should only have completion polling occuring. Once 667 * complete, we are free to free the IBTF channel, WQ and CQ resources, and 668 * our own related resources. 669 */ 670 void 671 iser_ib_free_rc_channel(iser_chan_t *chan) 672 { 673 iser_qp_t *iser_qp; 674 675 iser_qp = &chan->ic_qp; 676 677 /* Ensure the SQ is empty */ 678 while (chan->ic_sq_post_count != 0) { 679 mutex_exit(&chan->ic_conn->ic_lock); 680 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 681 mutex_enter(&chan->ic_conn->ic_lock); 682 } 683 mutex_destroy(&chan->ic_sq_post_lock); 684 685 /* Ensure the RQ is empty */ 686 (void) ibt_flush_channel(chan->ic_chanhdl); 687 mutex_enter(&iser_qp->qp_lock); 688 while (iser_qp->rq_level != 0) { 689 mutex_exit(&iser_qp->qp_lock); 690 mutex_exit(&chan->ic_conn->ic_lock); 691 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 692 mutex_enter(&chan->ic_conn->ic_lock); 693 mutex_enter(&iser_qp->qp_lock); 694 } 695 696 /* Free our QP handle */ 697 mutex_exit(&iser_qp->qp_lock); 698 (void) iser_ib_fini_qp(iser_qp); 699 700 /* Free the IBT channel resources */ 701 (void) ibt_free_channel(chan->ic_chanhdl); 702 chan->ic_chanhdl = NULL; 703 704 /* Free the CQs */ 705 (void) ibt_free_cq(chan->ic_sendcq); 706 (void) ibt_free_cq(chan->ic_recvcq); 707 708 /* Free the chan handle */ 709 mutex_destroy(&chan->ic_chan_lock); 710 kmem_free(chan, sizeof (iser_chan_t)); 711 } 712 713 /* 714 * iser_ib_post_recv 715 * 716 * This function handles keeping the RQ full on a given channel. 717 * This routine will mostly be run on a taskq, and will check the 718 * current fill level of the RQ, and post as many WRs as necessary 719 * to fill it again. 720 */ 721 722 int 723 iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl) 724 { 725 iser_chan_t *chan; 726 int status; 727 728 /* Pull our iSER channel handle from the private data */ 729 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 730 731 /* 732 * Caller must check that chan->ic_conn->ic_stage indicates 733 * the connection is active (not closing, not closed) and 734 * it must hold the mutex cross the check and the call to this function 735 */ 736 ASSERT(mutex_owned(&chan->ic_conn->ic_lock)); 737 ASSERT((chan->ic_conn->ic_stage >= ISER_CONN_STAGE_IC_CONNECTED) && 738 (chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN)); 739 idm_conn_hold(chan->ic_conn->ic_idmc); 740 status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task, 741 (void *)chanhdl, DDI_NOSLEEP); 742 if (status != DDI_SUCCESS) { 743 idm_conn_rele(chan->ic_conn->ic_idmc); 744 } 745 746 return (status); 747 } 748 749 static void 750 iser_ib_post_recv_task(void *arg) 751 { 752 ibt_channel_hdl_t chanhdl = arg; 753 iser_chan_t *chan; 754 755 /* Pull our iSER channel handle from the private data */ 756 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 757 758 iser_ib_post_recv(chanhdl); 759 idm_conn_rele(chan->ic_conn->ic_idmc); 760 } 761 762 void 763 iser_ib_post_recv(ibt_channel_hdl_t chanhdl) 764 { 765 iser_chan_t *chan; 766 iser_hca_t *hca; 767 iser_msg_t *msg; 768 ibt_recv_wr_t *wrlist, wr[ISER_IB_RQ_POST_MAX]; 769 int rq_space, msg_ret; 770 int total_num, npost; 771 uint_t nposted; 772 int status, i; 773 iser_qp_t *iser_qp; 774 775 /* Pull our iSER channel handle from the private data */ 776 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 777 778 ASSERT(chan != NULL); 779 780 mutex_enter(&chan->ic_conn->ic_lock); 781 782 /* Bail out if the connection is closed; no need for more recv WRs */ 783 if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) || 784 (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) { 785 mutex_exit(&chan->ic_conn->ic_lock); 786 return; 787 } 788 789 /* get the QP handle from the iser_chan */ 790 iser_qp = &chan->ic_qp; 791 792 hca = chan->ic_hca; 793 794 if (hca == NULL) { 795 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve " 796 "HCA handle"); 797 mutex_exit(&chan->ic_conn->ic_lock); 798 return; 799 } 800 801 /* check for space to post on the RQ */ 802 mutex_enter(&iser_qp->qp_lock); 803 rq_space = iser_qp->rq_depth - iser_qp->rq_level; 804 if (rq_space == 0) { 805 /* The RQ is full, clear the pending flag and return */ 806 iser_qp->rq_taskqpending = B_FALSE; 807 mutex_exit(&iser_qp->qp_lock); 808 mutex_exit(&chan->ic_conn->ic_lock); 809 return; 810 } 811 812 /* Keep track of the lowest value for rq_min_post_level */ 813 if (iser_qp->rq_level < iser_qp->rq_min_post_level) 814 iser_qp->rq_min_post_level = iser_qp->rq_level; 815 816 mutex_exit(&iser_qp->qp_lock); 817 818 /* we've room to post, so pull from the msg cache */ 819 msg = iser_msg_get(hca, rq_space, &msg_ret); 820 if (msg == NULL) { 821 ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles " 822 "available in msg cache currently"); 823 /* 824 * There are no messages on the cache. Wait a half- 825 * second, then try again. 826 */ 827 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 828 status = iser_ib_post_recv_async(chanhdl); 829 if (status != DDI_SUCCESS) { 830 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 831 "redispatch routine"); 832 /* Failed to dispatch, clear pending flag */ 833 mutex_enter(&iser_qp->qp_lock); 834 iser_qp->rq_taskqpending = B_FALSE; 835 mutex_exit(&iser_qp->qp_lock); 836 } 837 mutex_exit(&chan->ic_conn->ic_lock); 838 return; 839 } 840 841 if (msg_ret != rq_space) { 842 ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of " 843 "messages not allocated: requested (%d) allocated (%d)", 844 rq_space, msg_ret); 845 /* We got some, but not all, of our requested depth */ 846 rq_space = msg_ret; 847 } 848 849 /* 850 * Now, walk through the allocated WRs and post them, 851 * ISER_IB_RQ_POST_MAX (or less) at a time. 852 */ 853 wrlist = &wr[0]; 854 total_num = rq_space; 855 856 while (total_num) { 857 /* determine the number to post on this iteration */ 858 npost = (total_num > ISER_IB_RQ_POST_MAX) ? 859 ISER_IB_RQ_POST_MAX : total_num; 860 861 /* build a list of WRs from the msg list */ 862 for (i = 0; i < npost; i++) { 863 wrlist[i].wr_id = (ibt_wrid_t)(uintptr_t)msg; 864 wrlist[i].wr_nds = ISER_IB_SGLIST_SIZE; 865 wrlist[i].wr_sgl = &msg->msg_ds; 866 msg = msg->nextp; 867 } 868 869 /* post the list to the RQ */ 870 nposted = 0; 871 status = ibt_post_recv(chanhdl, wrlist, npost, &nposted); 872 if ((status != IBT_SUCCESS) || (nposted != npost)) { 873 ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv " 874 "failed: requested (%d) posted (%d) status (%d)", 875 npost, nposted, status); 876 total_num -= nposted; 877 break; 878 } 879 880 /* decrement total number to post by the number posted */ 881 total_num -= nposted; 882 } 883 884 mutex_enter(&iser_qp->qp_lock); 885 if (total_num != 0) { 886 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, " 887 "failed to post (%d) WRs", total_num); 888 iser_qp->rq_level += rq_space - total_num; 889 } else { 890 iser_qp->rq_level += rq_space; 891 } 892 893 /* 894 * Now that we've filled the RQ, check that all of the recv WRs 895 * haven't just been immediately consumed. If so, taskqpending is 896 * still B_TRUE, so we need to fire off a taskq thread to post 897 * more WRs. 898 */ 899 if (iser_qp->rq_level == 0) { 900 mutex_exit(&iser_qp->qp_lock); 901 status = iser_ib_post_recv_async(chanhdl); 902 if (status != DDI_SUCCESS) { 903 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 904 "dispatch followup routine"); 905 /* Failed to dispatch, clear pending flag */ 906 mutex_enter(&iser_qp->qp_lock); 907 iser_qp->rq_taskqpending = B_FALSE; 908 mutex_exit(&iser_qp->qp_lock); 909 } 910 } else { 911 /* 912 * We're done, we've filled the RQ. Clear the taskq 913 * flag so that we can run again. 914 */ 915 iser_qp->rq_taskqpending = B_FALSE; 916 mutex_exit(&iser_qp->qp_lock); 917 } 918 919 mutex_exit(&chan->ic_conn->ic_lock); 920 } 921 922 /* 923 * iser_ib_handle_portup_event() 924 * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event. 925 * 926 * To facilitate a seamless bringover of the port and configure the CM service 927 * for inbound iSER service requests on this newly active port, the existing 928 * IDM services will be checked for iSER support. 929 * If an iSER service was already created, then this service will simply be 930 * bound to the gid of the newly active port. If on the other hand, the CM 931 * service did not exist, i.e. only socket communication, then a new CM 932 * service will be first registered with the saved service parameters and 933 * then bound to the newly active port. 934 * 935 */ 936 /* ARGSUSED */ 937 static void 938 iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 939 { 940 iser_hca_t *hca; 941 ib_gid_t gid; 942 idm_svc_t *idm_svc; 943 int status; 944 945 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)", 946 (longlong_t)event->ev_hca_guid, event->ev_port); 947 948 /* 949 * Query all ports on the HCA and update the port information 950 * maintainted in the iser_hca_t structure 951 */ 952 hca = iser_ib_guid2hca(event->ev_hca_guid); 953 if (hca == NULL) { 954 955 /* HCA is just made available, first port on that HCA */ 956 hca = iser_ib_alloc_hca(event->ev_hca_guid); 957 if (hca == NULL) { 958 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 959 "iser_ib_alloc_hca failed: HCA(0x%llx) port(%d)", 960 (longlong_t)event->ev_hca_guid, event->ev_port); 961 return; 962 } 963 mutex_enter(&iser_state->is_hcalist_lock); 964 list_insert_tail(&iser_state->is_hcalist, hca); 965 iser_state->is_num_hcas++; 966 mutex_exit(&iser_state->is_hcalist_lock); 967 968 } else { 969 970 status = iser_ib_update_hcaports(hca); 971 972 if (status != IBT_SUCCESS) { 973 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 974 "status(0x%x): iser_ib_update_hcaports failed: " 975 "HCA(0x%llx) port(%d)", status, 976 (longlong_t)event->ev_hca_guid, event->ev_port); 977 return; 978 } 979 } 980 981 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 982 983 /* 984 * Iterate through the global list of IDM target services 985 * and check for existing iSER CM service. 986 */ 987 mutex_enter(&idm.idm_global_mutex); 988 for (idm_svc = list_head(&idm.idm_tgt_svc_list); 989 idm_svc != NULL; 990 idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) { 991 992 993 if (idm_svc->is_iser_svc == NULL) { 994 995 /* Establish a new CM service for iSER requests */ 996 status = iser_tgt_svc_create( 997 &idm_svc->is_svc_req, idm_svc); 998 999 if (status != IBT_SUCCESS) { 1000 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 1001 "status(0x%x): iser_tgt_svc_create failed: " 1002 "HCA(0x%llx) port(%d)", status, 1003 (longlong_t)event->ev_hca_guid, 1004 event->ev_port); 1005 1006 continue; 1007 } 1008 } 1009 1010 status = iser_ib_activate_port( 1011 idm_svc, event->ev_hca_guid, gid); 1012 if (status != IBT_SUCCESS) { 1013 1014 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 1015 "status(0x%x): Bind service on port " 1016 "(%llx:%llx) failed", 1017 status, (longlong_t)gid.gid_prefix, 1018 (longlong_t)gid.gid_guid); 1019 1020 continue; 1021 } 1022 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound " 1023 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 1024 event->ev_port); 1025 } 1026 mutex_exit(&idm.idm_global_mutex); 1027 1028 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: " 1029 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 1030 event->ev_port); 1031 } 1032 1033 /* 1034 * iser_ib_handle_portdown_event() 1035 * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error. 1036 * 1037 * Unconfigure the CM service on the deactivated port and teardown the 1038 * connections that are using the CM service. 1039 */ 1040 /* ARGSUSED */ 1041 static void 1042 iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 1043 { 1044 iser_hca_t *hca; 1045 ib_gid_t gid; 1046 int status; 1047 1048 /* 1049 * Query all ports on the HCA and update the port information 1050 * maintainted in the iser_hca_t structure 1051 */ 1052 hca = iser_ib_guid2hca(event->ev_hca_guid); 1053 ASSERT(hca != NULL); 1054 1055 status = iser_ib_update_hcaports(hca); 1056 if (status != IBT_SUCCESS) { 1057 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): " 1058 "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)", 1059 status, (longlong_t)event->ev_hca_guid, event->ev_port); 1060 return; 1061 } 1062 1063 /* get the gid of the new port */ 1064 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 1065 iser_ib_deactivate_port(event->ev_hca_guid, gid); 1066 1067 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: " 1068 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 1069 event->ev_port); 1070 } 1071 1072 /* 1073 * iser_ib_handle_hca_detach_event() 1074 * Quiesce all activity bound for the port, teardown the connection, unbind 1075 * iSER services on all ports and release the HCA handle. 1076 */ 1077 /* ARGSUSED */ 1078 static void 1079 iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 1080 { 1081 iser_hca_t *nexthca, *hca; 1082 int i, status; 1083 1084 ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)", 1085 (longlong_t)event->ev_hca_guid); 1086 1087 hca = iser_ib_guid2hca(event->ev_hca_guid); 1088 for (i = 0; i < hca->hca_num_ports; i++) { 1089 iser_ib_deactivate_port(hca->hca_guid, 1090 hca->hca_port_info[i].p_sgid_tbl[0]); 1091 } 1092 1093 /* 1094 * Update the HCA list maintained in the iser_state. Free the 1095 * resources allocated to the HCA, i.e. caches, protection domain 1096 */ 1097 mutex_enter(&iser_state->is_hcalist_lock); 1098 1099 for (hca = list_head(&iser_state->is_hcalist); 1100 hca != NULL; 1101 hca = nexthca) { 1102 1103 nexthca = list_next(&iser_state->is_hcalist, hca); 1104 1105 if (hca->hca_guid == event->ev_hca_guid) { 1106 1107 list_remove(&iser_state->is_hcalist, hca); 1108 iser_state->is_num_hcas--; 1109 1110 status = iser_ib_free_hca(hca); 1111 if (status != DDI_SUCCESS) { 1112 ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: " 1113 "Failed to free hca(%p)", (void *)hca); 1114 list_insert_tail(&iser_state->is_hcalist, hca); 1115 iser_state->is_num_hcas++; 1116 } 1117 /* No way to return status to IBT if this fails */ 1118 } 1119 } 1120 mutex_exit(&iser_state->is_hcalist_lock); 1121 1122 } 1123 1124 /* 1125 * iser_ib_async_handler 1126 * An IBT Asynchronous Event handler is registered it with the framework and 1127 * passed via the ibt_attach() routine. This function handles the following 1128 * asynchronous events. 1129 * IBT_EVENT_PORT_UP 1130 * IBT_ERROR_PORT_DOWN 1131 * IBT_HCA_ATTACH_EVENT 1132 * IBT_HCA_DETACH_EVENT 1133 */ 1134 /* ARGSUSED */ 1135 void 1136 iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1137 ibt_async_event_t *event) 1138 { 1139 switch (code) { 1140 case IBT_EVENT_PORT_UP: 1141 iser_ib_handle_portup_event(hdl, event); 1142 break; 1143 1144 case IBT_ERROR_PORT_DOWN: 1145 iser_ib_handle_portdown_event(hdl, event); 1146 break; 1147 1148 case IBT_HCA_ATTACH_EVENT: 1149 /* 1150 * A new HCA device is available for use, ignore this 1151 * event because the corresponding IBT_EVENT_PORT_UP 1152 * events will get triggered and handled accordingly. 1153 */ 1154 break; 1155 1156 case IBT_HCA_DETACH_EVENT: 1157 iser_ib_handle_hca_detach_event(hdl, event); 1158 break; 1159 1160 default: 1161 break; 1162 } 1163 } 1164 1165 /* 1166 * iser_ib_init_hcas 1167 * 1168 * This function opens all the HCA devices, gathers the HCA state information 1169 * and adds the HCA handle for each HCA found in the iser_soft_state. 1170 */ 1171 static int 1172 iser_ib_init_hcas(void) 1173 { 1174 ib_guid_t *guid; 1175 int num_hcas; 1176 int i; 1177 iser_hca_t *hca; 1178 1179 /* Retrieve the HCA list */ 1180 num_hcas = ibt_get_hca_list(&guid); 1181 if (num_hcas == 0) { 1182 /* 1183 * This shouldn't happen, but might if we have all HCAs 1184 * detach prior to initialization. 1185 */ 1186 return (DDI_FAILURE); 1187 } 1188 1189 /* Initialize the hcalist lock */ 1190 mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL); 1191 1192 /* Create the HCA list */ 1193 list_create(&iser_state->is_hcalist, sizeof (iser_hca_t), 1194 offsetof(iser_hca_t, hca_node)); 1195 1196 for (i = 0; i < num_hcas; i++) { 1197 1198 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA " 1199 "(0x%llx)", (longlong_t)guid[i]); 1200 1201 hca = iser_ib_alloc_hca(guid[i]); 1202 if (hca == NULL) { 1203 /* This shouldn't happen, teardown and fail */ 1204 (void) iser_ib_fini_hcas(); 1205 (void) ibt_free_hca_list(guid, num_hcas); 1206 return (DDI_FAILURE); 1207 } 1208 1209 mutex_enter(&iser_state->is_hcalist_lock); 1210 list_insert_tail(&iser_state->is_hcalist, hca); 1211 iser_state->is_num_hcas++; 1212 mutex_exit(&iser_state->is_hcalist_lock); 1213 1214 } 1215 1216 /* Free the IBT HCA list */ 1217 (void) ibt_free_hca_list(guid, num_hcas); 1218 1219 /* Check that we've initialized at least one HCA */ 1220 mutex_enter(&iser_state->is_hcalist_lock); 1221 if (list_is_empty(&iser_state->is_hcalist)) { 1222 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize " 1223 "any HCAs"); 1224 1225 mutex_exit(&iser_state->is_hcalist_lock); 1226 (void) iser_ib_fini_hcas(); 1227 return (DDI_FAILURE); 1228 } 1229 mutex_exit(&iser_state->is_hcalist_lock); 1230 1231 return (DDI_SUCCESS); 1232 } 1233 1234 /* 1235 * iser_ib_fini_hcas 1236 * 1237 * Teardown the iSER HCA list initialized above. 1238 */ 1239 static int 1240 iser_ib_fini_hcas(void) 1241 { 1242 iser_hca_t *nexthca, *hca; 1243 int status; 1244 1245 mutex_enter(&iser_state->is_hcalist_lock); 1246 for (hca = list_head(&iser_state->is_hcalist); 1247 hca != NULL; 1248 hca = nexthca) { 1249 1250 nexthca = list_next(&iser_state->is_hcalist, hca); 1251 1252 list_remove(&iser_state->is_hcalist, hca); 1253 1254 status = iser_ib_free_hca(hca); 1255 if (status != IBT_SUCCESS) { 1256 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free " 1257 "HCA during fini"); 1258 list_insert_tail(&iser_state->is_hcalist, hca); 1259 return (DDI_FAILURE); 1260 } 1261 1262 iser_state->is_num_hcas--; 1263 1264 } 1265 mutex_exit(&iser_state->is_hcalist_lock); 1266 list_destroy(&iser_state->is_hcalist); 1267 mutex_destroy(&iser_state->is_hcalist_lock); 1268 1269 return (DDI_SUCCESS); 1270 } 1271 1272 /* 1273 * iser_ib_alloc_hca 1274 * 1275 * This function opens the given HCA device, gathers the HCA state information 1276 * and adds the HCA handle 1277 */ 1278 static iser_hca_t * 1279 iser_ib_alloc_hca(ib_guid_t guid) 1280 { 1281 iser_hca_t *hca; 1282 int status; 1283 1284 /* Allocate an iser_hca_t HCA handle */ 1285 hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP); 1286 1287 /* Open this HCA */ 1288 status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl); 1289 if (status != IBT_SUCCESS) { 1290 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:" 1291 " guid (0x%llx) status (0x%x)", (longlong_t)guid, status); 1292 kmem_free(hca, sizeof (iser_hca_t)); 1293 return (NULL); 1294 } 1295 1296 hca->hca_guid = guid; 1297 hca->hca_clnt_hdl = iser_state->is_ibhdl; 1298 1299 /* Query the HCA */ 1300 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr); 1301 if (status != IBT_SUCCESS) { 1302 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca " 1303 "failure: guid (0x%llx) status (0x%x)", 1304 (longlong_t)guid, status); 1305 (void) ibt_close_hca(hca->hca_hdl); 1306 kmem_free(hca, sizeof (iser_hca_t)); 1307 return (NULL); 1308 } 1309 1310 /* Query all ports on the HCA */ 1311 status = ibt_query_hca_ports(hca->hca_hdl, 0, 1312 &hca->hca_port_info, &hca->hca_num_ports, 1313 &hca->hca_port_info_sz); 1314 if (status != IBT_SUCCESS) { 1315 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: " 1316 "ibt_query_hca_ports failure: guid (0x%llx) " 1317 "status (0x%x)", (longlong_t)guid, status); 1318 (void) ibt_close_hca(hca->hca_hdl); 1319 kmem_free(hca, sizeof (iser_hca_t)); 1320 return (NULL); 1321 } 1322 1323 /* Allocate a single PD on this HCA */ 1324 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, 1325 &hca->hca_pdhdl); 1326 if (status != IBT_SUCCESS) { 1327 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd " 1328 "failure: guid (0x%llx) status (0x%x)", 1329 (longlong_t)guid, status); 1330 (void) ibt_close_hca(hca->hca_hdl); 1331 ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz); 1332 kmem_free(hca, sizeof (iser_hca_t)); 1333 return (NULL); 1334 } 1335 1336 /* Initialize the message and data MR caches for this HCA */ 1337 iser_init_hca_caches(hca); 1338 1339 return (hca); 1340 } 1341 1342 static int 1343 iser_ib_free_hca(iser_hca_t *hca) 1344 { 1345 int status; 1346 ibt_hca_portinfo_t *hca_port_info; 1347 uint_t hca_port_info_sz; 1348 1349 ASSERT(hca != NULL); 1350 if (hca->hca_failed) 1351 return (DDI_FAILURE); 1352 1353 hca_port_info = hca->hca_port_info; 1354 hca_port_info_sz = hca->hca_port_info_sz; 1355 1356 /* 1357 * Free the memory regions before freeing 1358 * the associated protection domain 1359 */ 1360 iser_fini_hca_caches(hca); 1361 1362 status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl); 1363 if (status != IBT_SUCCESS) { 1364 ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD " 1365 "status=0x%x", status); 1366 goto out_caches; 1367 } 1368 1369 status = ibt_close_hca(hca->hca_hdl); 1370 if (status != IBT_SUCCESS) { 1371 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA " 1372 "status=0x%x", status); 1373 goto out_pd; 1374 } 1375 1376 ibt_free_portinfo(hca_port_info, hca_port_info_sz); 1377 1378 kmem_free(hca, sizeof (iser_hca_t)); 1379 return (DDI_SUCCESS); 1380 1381 /* 1382 * We only managed to partially tear down the HCA, try to put it back 1383 * like it was before returning. 1384 */ 1385 out_pd: 1386 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl); 1387 if (status != IBT_SUCCESS) { 1388 hca->hca_failed = B_TRUE; 1389 /* Report error and exit */ 1390 ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD " 1391 "status=0x%x", status); 1392 return (DDI_FAILURE); 1393 } 1394 1395 out_caches: 1396 iser_init_hca_caches(hca); 1397 1398 return (DDI_FAILURE); 1399 } 1400 1401 static int 1402 iser_ib_update_hcaports(iser_hca_t *hca) 1403 { 1404 ibt_hca_portinfo_t *pinfop, *oldpinfop; 1405 uint_t size, oldsize, nport; 1406 int status; 1407 1408 ASSERT(hca != NULL); 1409 1410 status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size); 1411 if (status != IBT_SUCCESS) { 1412 ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status); 1413 return (status); 1414 } 1415 1416 oldpinfop = hca->hca_port_info; 1417 oldsize = hca->hca_port_info_sz; 1418 hca->hca_port_info = pinfop; 1419 hca->hca_port_info_sz = size; 1420 1421 (void) ibt_free_portinfo(oldpinfop, oldsize); 1422 1423 return (IBT_SUCCESS); 1424 } 1425 1426 /* 1427 * iser_ib_gid2hca 1428 * Given a gid, find the corresponding hca 1429 */ 1430 iser_hca_t * 1431 iser_ib_gid2hca(ib_gid_t gid) 1432 { 1433 1434 iser_hca_t *hca; 1435 int i; 1436 1437 mutex_enter(&iser_state->is_hcalist_lock); 1438 for (hca = list_head(&iser_state->is_hcalist); 1439 hca != NULL; 1440 hca = list_next(&iser_state->is_hcalist, hca)) { 1441 1442 for (i = 0; i < hca->hca_num_ports; i++) { 1443 if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix == 1444 gid.gid_prefix) && 1445 (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid == 1446 gid.gid_guid)) { 1447 1448 mutex_exit(&iser_state->is_hcalist_lock); 1449 1450 return (hca); 1451 } 1452 } 1453 } 1454 mutex_exit(&iser_state->is_hcalist_lock); 1455 return (NULL); 1456 } 1457 1458 /* 1459 * iser_ib_guid2hca 1460 * Given a HCA guid, find the corresponding HCA 1461 */ 1462 iser_hca_t * 1463 iser_ib_guid2hca(ib_guid_t guid) 1464 { 1465 1466 iser_hca_t *hca; 1467 1468 mutex_enter(&iser_state->is_hcalist_lock); 1469 for (hca = list_head(&iser_state->is_hcalist); 1470 hca != NULL; 1471 hca = list_next(&iser_state->is_hcalist, hca)) { 1472 1473 if (hca->hca_guid == guid) { 1474 mutex_exit(&iser_state->is_hcalist_lock); 1475 return (hca); 1476 } 1477 } 1478 mutex_exit(&iser_state->is_hcalist_lock); 1479 return (NULL); 1480 } 1481 1482 /* 1483 * iser_ib_conv_sockaddr2ibtaddr 1484 * This function converts a socket address into the IBT format 1485 */ 1486 void iser_ib_conv_sockaddr2ibtaddr( 1487 idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr) 1488 { 1489 if (saddr == NULL) { 1490 ibt_addr->family = AF_UNSPEC; 1491 ibt_addr->un.ip4addr = 0; 1492 } else { 1493 switch (saddr->sin.sa_family) { 1494 case AF_INET: 1495 1496 ibt_addr->family = saddr->sin4.sin_family; 1497 ibt_addr->un.ip4addr = saddr->sin4.sin_addr.s_addr; 1498 break; 1499 1500 case AF_INET6: 1501 1502 ibt_addr->family = saddr->sin6.sin6_family; 1503 ibt_addr->un.ip6addr = saddr->sin6.sin6_addr; 1504 break; 1505 1506 default: 1507 ibt_addr->family = AF_UNSPEC; 1508 } 1509 1510 } 1511 } 1512 1513 /* 1514 * iser_ib_conv_ibtaddr2sockaddr 1515 * This function converts an IBT ip address handle to a sockaddr 1516 */ 1517 void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss, 1518 ibt_ip_addr_t *ibt_addr, in_port_t port) 1519 { 1520 struct sockaddr_in *sin; 1521 struct sockaddr_in6 *sin6; 1522 1523 switch (ibt_addr->family) { 1524 case AF_INET: 1525 case AF_UNSPEC: 1526 1527 sin = (struct sockaddr_in *)ibt_addr; 1528 sin->sin_port = ntohs(port); 1529 bcopy(sin, ss, sizeof (struct sockaddr_in)); 1530 break; 1531 1532 case AF_INET6: 1533 1534 sin6 = (struct sockaddr_in6 *)ibt_addr; 1535 sin6->sin6_port = ntohs(port); 1536 bcopy(sin6, ss, sizeof (struct sockaddr_in6)); 1537 break; 1538 1539 default: 1540 ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: " 1541 "unknown family type: 0x%x", ibt_addr->family); 1542 } 1543 } 1544 1545 /* 1546 * iser_ib_setup_cq 1547 * This function sets up the Completion Queue size and allocates the specified 1548 * Completion Queue 1549 */ 1550 static int 1551 iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl) 1552 { 1553 1554 ibt_cq_attr_t cq_attr; 1555 int status; 1556 1557 cq_attr.cq_size = cq_size; 1558 cq_attr.cq_sched = 0; 1559 cq_attr.cq_flags = IBT_CQ_NO_FLAGS; 1560 1561 /* Allocate a Completion Queue */ 1562 status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL); 1563 if (status != IBT_SUCCESS) { 1564 ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)", 1565 status); 1566 return (status); 1567 } 1568 1569 return (ISER_STATUS_SUCCESS); 1570 } 1571 1572 /* 1573 * iser_ib_setup_chanargs 1574 * 1575 */ 1576 static void 1577 iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 1578 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 1579 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs) 1580 { 1581 1582 bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t)); 1583 1584 /* 1585 * Set up the size of the channels send queue, receive queue and the 1586 * maximum number of elements in a scatter gather list of work requests 1587 * posted to the send and receive queues. 1588 */ 1589 cargs->rc_sizes.cs_sq = sq_size; 1590 cargs->rc_sizes.cs_rq = rq_size; 1591 cargs->rc_sizes.cs_sq_sgl = ISER_IB_SGLIST_SIZE; 1592 cargs->rc_sizes.cs_rq_sgl = ISER_IB_SGLIST_SIZE; 1593 1594 /* 1595 * All Work requests signaled on a WR basis will receive a send 1596 * request completion. 1597 */ 1598 cargs->rc_flags = IBT_ALL_SIGNALED; 1599 1600 /* Enable RDMA read and RDMA write on the channel end points */ 1601 cargs->rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 1602 1603 /* Set the local hca port on which the channel is allocated */ 1604 cargs->rc_hca_port_num = hca_port; 1605 1606 /* Set the Send and Receive Completion Queue handles */ 1607 cargs->rc_scq = scq_hdl; 1608 cargs->rc_rcq = rcq_hdl; 1609 1610 /* Set the protection domain associated with the channel */ 1611 cargs->rc_pd = hca_pdhdl; 1612 1613 /* No SRQ usage */ 1614 cargs->rc_srq = NULL; 1615 } 1616 1617 /* 1618 * iser_ib_init_qp 1619 * Initialize the QP handle 1620 */ 1621 void 1622 iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size) 1623 { 1624 /* Initialize the handle lock */ 1625 mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1626 1627 /* Record queue sizes */ 1628 chan->ic_qp.sq_size = sq_size; 1629 chan->ic_qp.rq_size = rq_size; 1630 1631 /* Initialize the RQ monitoring data */ 1632 chan->ic_qp.rq_depth = rq_size; 1633 chan->ic_qp.rq_level = 0; 1634 chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100; 1635 1636 /* Initialize the taskq flag */ 1637 chan->ic_qp.rq_taskqpending = B_FALSE; 1638 } 1639 1640 /* 1641 * iser_ib_fini_qp 1642 * Teardown the QP handle 1643 */ 1644 void 1645 iser_ib_fini_qp(iser_qp_t *qp) 1646 { 1647 /* Destroy the handle lock */ 1648 mutex_destroy(&qp->qp_lock); 1649 } 1650 1651 static int 1652 iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid) 1653 { 1654 iser_svc_t *iser_svc; 1655 iser_sbind_t *is_sbind; 1656 int status; 1657 1658 iser_svc = idm_svc->is_iser_svc; 1659 1660 /* 1661 * Save the address of the service bind handle in the 1662 * iser_svc_t to undo the service binding at a later time 1663 */ 1664 is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP); 1665 is_sbind->is_gid = gid; 1666 is_sbind->is_guid = guid; 1667 1668 status = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL, 1669 idm_svc, &is_sbind->is_sbindhdl); 1670 1671 if (status != IBT_SUCCESS) { 1672 ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): " 1673 "Bind service(%llx) on port(%llx:%llx) failed", 1674 status, (longlong_t)iser_svc->is_svcid, 1675 (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid); 1676 1677 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1678 1679 return (status); 1680 } 1681 1682 list_insert_tail(&iser_svc->is_sbindlist, is_sbind); 1683 1684 return (IBT_SUCCESS); 1685 } 1686 1687 static void 1688 iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid) 1689 { 1690 iser_svc_t *iser_svc; 1691 iser_conn_t *iser_conn; 1692 iser_sbind_t *is_sbind; 1693 idm_conn_t *idm_conn; 1694 1695 /* 1696 * Iterate through the global list of IDM target connections. 1697 * Issue a TRANSPORT_FAIL for any connections on this port, and 1698 * if there is a bound service running on the port, tear it down. 1699 */ 1700 mutex_enter(&idm.idm_global_mutex); 1701 for (idm_conn = list_head(&idm.idm_tgt_conn_list); 1702 idm_conn != NULL; 1703 idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) { 1704 1705 if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) { 1706 /* this is not an iSER connection, skip it */ 1707 continue; 1708 } 1709 1710 iser_conn = idm_conn->ic_transport_private; 1711 if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) { 1712 /* this iSER connection is on a different port */ 1713 continue; 1714 } 1715 1716 /* Fail the transport for this connection */ 1717 idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 1718 1719 if (idm_conn->ic_conn_type == CONN_TYPE_INI) { 1720 /* initiator connection, nothing else to do */ 1721 continue; 1722 } 1723 1724 /* Check for a service binding */ 1725 iser_svc = idm_conn->ic_svc_binding->is_iser_svc; 1726 is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid); 1727 if (is_sbind != NULL) { 1728 /* This service is still bound, tear it down */ 1729 (void) ibt_unbind_service(iser_svc->is_srvhdl, 1730 is_sbind->is_sbindhdl); 1731 list_remove(&iser_svc->is_sbindlist, is_sbind); 1732 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1733 } 1734 } 1735 mutex_exit(&idm.idm_global_mutex); 1736 } 1737 1738 static iser_sbind_t * 1739 iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid) 1740 { 1741 iser_sbind_t *is_sbind; 1742 1743 for (is_sbind = list_head(&iser_svc->is_sbindlist); 1744 is_sbind != NULL; 1745 is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) { 1746 1747 if ((is_sbind->is_guid == hca_guid) && 1748 (is_sbind->is_gid.gid_prefix == gid.gid_prefix) && 1749 (is_sbind->is_gid.gid_guid == gid.gid_guid)) { 1750 return (is_sbind); 1751 } 1752 } 1753 return (NULL); 1754 } 1755