1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/types.h> 26 #include <sys/ddi.h> 27 #include <sys/types.h> 28 #include <sys/socket.h> 29 #include <netinet/in.h> 30 #include <sys/sunddi.h> 31 #include <sys/sysmacros.h> 32 #include <sys/iscsi_protocol.h> 33 34 #include <sys/ib/clients/iser/iser.h> 35 #include <sys/ib/clients/iser/iser_idm.h> 36 37 /* 38 * iser_ib.c 39 * Routines for InfiniBand transport for iSER 40 * 41 * This file contains the routines to interface with the IBT API to attach and 42 * allocate IB resources, handle async events, and post recv work requests. 43 * 44 */ 45 46 static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid); 47 static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid); 48 49 static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid); 50 static int iser_ib_free_hca(iser_hca_t *hca); 51 static int iser_ib_update_hcaports(iser_hca_t *hca); 52 static int iser_ib_init_hcas(void); 53 static int iser_ib_fini_hcas(void); 54 55 static iser_sbind_t *iser_ib_get_bind( 56 iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid); 57 static int iser_ib_activate_port( 58 idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid); 59 static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid); 60 61 static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size); 62 static void iser_ib_fini_qp(iser_qp_t *qp); 63 64 static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, 65 ibt_cq_hdl_t *cq_hdl); 66 67 static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 68 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 69 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs); 70 71 static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, 72 ibt_async_event_t *event); 73 static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, 74 ibt_async_event_t *event); 75 static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, 76 ibt_async_event_t *event); 77 78 static void iser_ib_post_recv_task(void *arg); 79 80 static struct ibt_clnt_modinfo_s iser_ib_modinfo = { 81 IBTI_V_CURR, 82 IBT_STORAGE_DEV, 83 iser_ib_async_handler, 84 NULL, 85 "iSER" 86 }; 87 88 /* 89 * iser_ib_init 90 * 91 * This function registers the HCA drivers with IBTF and registers and binds 92 * iSER as a service with IBTF. 93 */ 94 int 95 iser_ib_init(void) 96 { 97 int status; 98 99 /* Register with IBTF */ 100 status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state, 101 &iser_state->is_ibhdl); 102 if (status != DDI_SUCCESS) { 103 ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)", 104 status); 105 return (DDI_FAILURE); 106 } 107 108 /* Create the global work request kmem_cache */ 109 iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache", 110 sizeof (iser_wr_t), 0, NULL, NULL, NULL, 111 iser_state, NULL, KM_SLEEP); 112 113 /* Populate our list of HCAs */ 114 status = iser_ib_init_hcas(); 115 if (status != DDI_SUCCESS) { 116 /* HCAs failed to initialize, tear it down */ 117 kmem_cache_destroy(iser_state->iser_wr_cache); 118 (void) ibt_detach(iser_state->is_ibhdl); 119 iser_state->is_ibhdl = NULL; 120 ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs"); 121 return (DDI_FAILURE); 122 } 123 124 /* Target will register iSER as a service with IBTF when required */ 125 126 /* Target will bind this service when it comes online */ 127 128 return (DDI_SUCCESS); 129 } 130 131 /* 132 * iser_ib_fini 133 * 134 * This function unbinds and degisters the iSER service from IBTF 135 */ 136 int 137 iser_ib_fini(void) 138 { 139 /* IDM would have already disabled all the services */ 140 141 /* Teardown the HCA list and associated resources */ 142 if (iser_ib_fini_hcas() != DDI_SUCCESS) 143 return (DDI_FAILURE); 144 145 /* Teardown the global work request kmem_cache */ 146 kmem_cache_destroy(iser_state->iser_wr_cache); 147 148 /* Deregister with IBTF */ 149 if (iser_state->is_ibhdl != NULL) { 150 (void) ibt_detach(iser_state->is_ibhdl); 151 iser_state->is_ibhdl = NULL; 152 } 153 154 return (DDI_SUCCESS); 155 } 156 157 /* 158 * iser_ib_register_service 159 * 160 * This function registers the iSER service using the RDMA-Aware Service ID. 161 */ 162 int 163 iser_ib_register_service(idm_svc_t *idm_svc) 164 { 165 ibt_srv_desc_t srvdesc; 166 iser_svc_t *iser_svc; 167 int status; 168 169 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 170 171 /* Set up IBTI client callback handler from the CM */ 172 srvdesc.sd_handler = iser_ib_cm_handler; 173 174 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 175 176 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 177 178 /* Register the service on the specified port */ 179 status = ibt_register_service( 180 iser_state->is_ibhdl, &srvdesc, 181 iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL); 182 183 return (status); 184 } 185 186 /* 187 * iser_ib_bind_service 188 * 189 * This function binds a given iSER service on all available HCA ports. The 190 * current specification does not allow user to specify transport bindings 191 * for each iscsi target. The ULP invokes this function to bind the target 192 * to all available iser ports after checking for the presence of an IB HCA. 193 * iSER is "configured" whenever an IB-capable IP address exists. The lack 194 * of active IB ports is a less-fatal condition, and sockets would be used 195 * as the transport even though an Infiniband HCA is configured but unusable. 196 * 197 */ 198 int 199 iser_ib_bind_service(idm_svc_t *idm_svc) 200 { 201 iser_hca_t *hca; 202 ib_gid_t gid; 203 int num_ports = 0; 204 int num_binds = 0; 205 int num_inactive_binds = 0; /* if HCA ports inactive */ 206 int status; 207 int i; 208 209 ASSERT(idm_svc != NULL); 210 ASSERT(idm_svc->is_iser_svc != NULL); 211 212 /* Register the iSER service on all available ports */ 213 mutex_enter(&iser_state->is_hcalist_lock); 214 215 for (hca = list_head(&iser_state->is_hcalist); 216 hca != NULL; 217 hca = list_next(&iser_state->is_hcalist, hca)) { 218 219 for (i = 0; i < hca->hca_num_ports; i++) { 220 num_ports++; 221 if (hca->hca_port_info[i].p_linkstate != 222 IBT_PORT_ACTIVE) { 223 /* 224 * Move on. We will attempt to bind service 225 * in our async handler if the port comes up 226 * at a later time. 227 */ 228 num_inactive_binds++; 229 continue; 230 } 231 232 gid = hca->hca_port_info[i].p_sgid_tbl[0]; 233 234 /* If the port is already bound, skip */ 235 if (iser_ib_get_bind( 236 idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) { 237 238 status = iser_ib_activate_port( 239 idm_svc, hca->hca_guid, gid); 240 if (status != IBT_SUCCESS) { 241 ISER_LOG(CE_NOTE, 242 "iser_ib_bind_service: " 243 "iser_ib_activate_port failure " 244 "(0x%x)", status); 245 continue; 246 } 247 } 248 num_binds++; 249 } 250 } 251 mutex_exit(&iser_state->is_hcalist_lock); 252 253 if (num_binds) { 254 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on " 255 "(%d) of (%d) ports", num_binds, num_ports); 256 return (ISER_STATUS_SUCCESS); 257 } else if (num_inactive_binds) { 258 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Could not bind " 259 "service, HCA ports are not active."); 260 /* 261 * still considered success, the async handler will bind 262 * the service when the port comes up at a later time 263 */ 264 return (ISER_STATUS_SUCCESS); 265 } else { 266 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service"); 267 return (ISER_STATUS_FAIL); 268 } 269 } 270 271 /* 272 * iser_ib_unbind_service 273 * 274 * This function unbinds a given service on a all HCA ports 275 */ 276 void 277 iser_ib_unbind_service(idm_svc_t *idm_svc) 278 { 279 iser_svc_t *iser_svc; 280 iser_sbind_t *is_sbind, *next_sb; 281 282 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 283 284 iser_svc = idm_svc->is_iser_svc; 285 286 for (is_sbind = list_head(&iser_svc->is_sbindlist); 287 is_sbind != NULL; 288 is_sbind = next_sb) { 289 next_sb = list_next(&iser_svc->is_sbindlist, is_sbind); 290 (void) ibt_unbind_service(iser_svc->is_srvhdl, 291 is_sbind->is_sbindhdl); 292 list_remove(&iser_svc->is_sbindlist, is_sbind); 293 kmem_free(is_sbind, sizeof (iser_sbind_t)); 294 } 295 } 296 } 297 298 /* ARGSUSED */ 299 void 300 iser_ib_deregister_service(idm_svc_t *idm_svc) 301 { 302 iser_svc_t *iser_svc; 303 304 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 305 306 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 307 (void) ibt_deregister_service(iser_state->is_ibhdl, 308 iser_svc->is_srvhdl); 309 (void) ibt_release_ip_sid(iser_svc->is_svcid); 310 } 311 } 312 313 /* 314 * iser_ib_get_paths 315 * This function finds the IB path between the local and the remote address. 316 * 317 */ 318 int 319 iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip, 320 ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip) 321 { 322 ibt_ip_path_attr_t ipattr; 323 int status; 324 325 (void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 326 ipattr.ipa_dst_ip = remote_ip; 327 ipattr.ipa_src_ip = *local_ip; 328 ipattr.ipa_max_paths = 1; 329 ipattr.ipa_ndst = 1; 330 331 (void) bzero(path, sizeof (ibt_path_info_t)); 332 status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS, 333 &ipattr, path, NULL, path_src_ip); 334 if (status != IBT_SUCCESS) { 335 ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths " 336 "failure: status (%d)", status); 337 return (status); 338 } 339 340 if (local_ip != NULL) { 341 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]", 342 local_ip->un.ip4addr, remote_ip->un.ip4addr); 343 } else { 344 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: " 345 "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr); 346 } 347 348 return (ISER_STATUS_SUCCESS); 349 } 350 351 /* 352 * iser_ib_alloc_channel_nopathlookup 353 * 354 * This function allocates a reliable connected channel. This function does 355 * not invoke ibt_get_ip_paths() to do the path lookup. The HCA GUID and 356 * port are input to this function. 357 */ 358 iser_chan_t * 359 iser_ib_alloc_channel_nopathlookup(ib_guid_t hca_guid, uint8_t hca_port) 360 { 361 iser_hca_t *hca; 362 iser_chan_t *chan; 363 364 /* Lookup the hca using the gid in the path info */ 365 hca = iser_ib_guid2hca(hca_guid); 366 if (hca == NULL) { 367 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed " 368 "to lookup HCA(%llx) handle", (longlong_t)hca_guid); 369 return (NULL); 370 } 371 372 chan = iser_ib_alloc_rc_channel(hca, hca_port); 373 if (chan == NULL) { 374 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed " 375 "to alloc channel on HCA(%llx) %d", 376 (longlong_t)hca_guid, hca_port); 377 return (NULL); 378 } 379 380 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: " 381 "chanhdl (0x%p), HCA(%llx) %d", 382 (void *)chan->ic_chanhdl, (longlong_t)hca_guid, hca_port); 383 384 return (chan); 385 } 386 387 /* 388 * iser_ib_alloc_channel_pathlookup 389 * 390 * This function allocates a reliable connected channel but first invokes 391 * ibt_get_ip_paths() with the given local and remote addres to get the 392 * HCA lgid and the port number. 393 */ 394 iser_chan_t * 395 iser_ib_alloc_channel_pathlookup( 396 ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip) 397 { 398 ibt_path_info_t ibt_path; 399 ibt_path_ip_src_t path_src_ip; 400 ib_gid_t lgid; 401 uint8_t hca_port; /* from path */ 402 iser_hca_t *hca; 403 iser_chan_t *chan; 404 int status; 405 406 /* Lookup a path to the given destination */ 407 status = iser_ib_get_paths( 408 local_ip, remote_ip, &ibt_path, &path_src_ip); 409 410 if (status != ISER_STATUS_SUCCESS) { 411 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: faild " 412 "Path lookup IP:[%llx to %llx] failed: status (%d)", 413 (longlong_t)local_ip->un.ip4addr, 414 (longlong_t)remote_ip->un.ip4addr, 415 status); 416 return (NULL); 417 } 418 419 /* get the local gid from the path info */ 420 lgid = ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; 421 422 /* get the hca port from the path info */ 423 hca_port = ibt_path.pi_prim_cep_path.cep_hca_port_num; 424 425 /* Lookup the hca using the gid in the path info */ 426 hca = iser_ib_gid2hca(lgid); 427 if (hca == NULL) { 428 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed " 429 "to lookup HCA (%llx) handle", 430 (longlong_t)hca->hca_guid); 431 return (NULL); 432 } 433 434 chan = iser_ib_alloc_rc_channel(hca, hca_port); 435 if (chan == NULL) { 436 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed " 437 "to alloc channel from IP:[%llx to %llx] on HCA (%llx) %d", 438 (longlong_t)local_ip->un.ip4addr, 439 (longlong_t)remote_ip->un.ip4addr, 440 (longlong_t)hca->hca_guid, hca_port); 441 return (NULL); 442 } 443 444 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: " 445 "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d", 446 (void *)chan->ic_chanhdl, 447 (longlong_t)local_ip->un.ip4addr, 448 (longlong_t)remote_ip->un.ip4addr, 449 (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid, 450 (longlong_t)hca->hca_guid, hca_port); 451 452 chan->ic_ibt_path = ibt_path; 453 chan->ic_localip = path_src_ip.ip_primary; 454 chan->ic_remoteip = *remote_ip; 455 456 return (chan); 457 } 458 459 /* 460 * iser_ib_alloc_rc_channel 461 * 462 * This function allocates a reliable communication channel using the specified 463 * channel attributes. 464 */ 465 iser_chan_t * 466 iser_ib_alloc_rc_channel(iser_hca_t *hca, uint8_t hca_port) 467 { 468 469 iser_chan_t *chan; 470 ibt_rc_chan_alloc_args_t chanargs; 471 uint_t sq_size, rq_size; 472 int status; 473 474 chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP); 475 476 mutex_init(&chan->ic_chan_lock, NULL, MUTEX_DRIVER, NULL); 477 mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL); 478 479 /* Set up the iSER channel handle with HCA */ 480 chan->ic_hca = hca; 481 482 /* 483 * Determine the queue sizes, based upon the HCA query data. 484 * For our Work Queues, we will use either our default value, 485 * or the HCA's maximum value, whichever is smaller. 486 */ 487 sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE); 488 rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE); 489 490 /* 491 * For our Completion Queues, we again check the device maximum. 492 * We want to end up with CQs that are the next size up from the 493 * WQs they are servicing so that they have some overhead. 494 */ 495 if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) { 496 chan->ic_sendcq_sz = sq_size + 1; 497 } else { 498 chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz; 499 sq_size = chan->ic_sendcq_sz - 1; 500 } 501 502 if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) { 503 chan->ic_recvcq_sz = rq_size + 1; 504 } else { 505 chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz; 506 rq_size = chan->ic_recvcq_sz - 1; 507 } 508 509 /* Initialize the iSER channel's QP handle */ 510 iser_ib_init_qp(chan, sq_size, rq_size); 511 512 /* Set up the Send Completion Queue */ 513 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz, 514 &chan->ic_sendcq); 515 if (status != ISER_STATUS_SUCCESS) { 516 iser_ib_fini_qp(&chan->ic_qp); 517 mutex_destroy(&chan->ic_chan_lock); 518 mutex_destroy(&chan->ic_sq_post_lock); 519 kmem_free(chan, sizeof (iser_chan_t)); 520 return (NULL); 521 } 522 ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan); 523 (void) ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION); 524 525 /* Set up the Receive Completion Queue */ 526 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz, 527 &chan->ic_recvcq); 528 if (status != ISER_STATUS_SUCCESS) { 529 (void) ibt_free_cq(chan->ic_sendcq); 530 iser_ib_fini_qp(&chan->ic_qp); 531 mutex_destroy(&chan->ic_chan_lock); 532 mutex_destroy(&chan->ic_sq_post_lock); 533 kmem_free(chan, sizeof (iser_chan_t)); 534 return (NULL); 535 } 536 ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan); 537 (void) ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION); 538 539 /* Setup the channel arguments */ 540 iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq, 541 sq_size, rq_size, hca->hca_pdhdl, &chanargs); 542 543 status = ibt_alloc_rc_channel(hca->hca_hdl, 544 IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL); 545 if (status != IBT_SUCCESS) { 546 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " 547 "ibt_alloc_rc_channel: status (%d)", status); 548 (void) ibt_free_cq(chan->ic_sendcq); 549 (void) ibt_free_cq(chan->ic_recvcq); 550 iser_ib_fini_qp(&chan->ic_qp); 551 mutex_destroy(&chan->ic_chan_lock); 552 mutex_destroy(&chan->ic_sq_post_lock); 553 kmem_free(chan, sizeof (iser_chan_t)); 554 return (NULL); 555 } 556 557 /* Set the 'channel' as the client private data */ 558 (void) ibt_set_chan_private(chan->ic_chanhdl, chan); 559 560 return (chan); 561 } 562 563 /* 564 * iser_ib_open_rc_channel 565 * This function opens a RC connection on the given allocated RC channel 566 */ 567 int 568 iser_ib_open_rc_channel(iser_chan_t *chan) 569 { 570 ibt_ip_cm_info_t ipcm_info; 571 iser_private_data_t iser_priv_data; 572 ibt_chan_open_args_t ocargs; 573 ibt_rc_returns_t ocreturns; 574 int status; 575 576 mutex_enter(&chan->ic_chan_lock); 577 578 /* 579 * For connection establishment, the initiator sends a CM REQ using the 580 * iSER RDMA-Aware Service ID. Included are the source and destination 581 * IP addresses, and the src port. 582 */ 583 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 584 ipcm_info.src_addr = chan->ic_localip; 585 ipcm_info.dst_addr = chan->ic_remoteip; 586 ipcm_info.src_port = chan->ic_lport; 587 588 /* 589 * The CM Private Data field defines the iSER connection parameters 590 * such as zero based virtual address exception (ZBVAE) and Send with 591 * invalidate Exception (SIE). 592 * 593 * Solaris IBT does not currently support ZBVAE or SIE. 594 */ 595 iser_priv_data.rsvd1 = 0; 596 iser_priv_data.sie = 1; 597 iser_priv_data.zbvae = 1; 598 599 status = ibt_format_ip_private_data(&ipcm_info, 600 sizeof (iser_private_data_t), &iser_priv_data); 601 if (status != IBT_SUCCESS) { 602 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 603 mutex_exit(&chan->ic_chan_lock); 604 return (status); 605 } 606 607 /* 608 * Set the SID we are attempting to connect to, based upon the 609 * remote port number. 610 */ 611 chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport); 612 613 /* Set up the args for the channel open */ 614 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 615 ocargs.oc_path = &chan->ic_ibt_path; 616 ocargs.oc_cm_handler = iser_ib_cm_handler; 617 ocargs.oc_cm_clnt_private = iser_state; 618 ocargs.oc_rdma_ra_out = 4; 619 ocargs.oc_rdma_ra_in = 4; 620 ocargs.oc_path_retry_cnt = 2; 621 ocargs.oc_path_rnr_retry_cnt = 2; 622 ocargs.oc_priv_data_len = sizeof (iser_private_data_t); 623 ocargs.oc_priv_data = &iser_priv_data; 624 625 bzero(&ocreturns, sizeof (ibt_rc_returns_t)); 626 627 status = ibt_open_rc_channel(chan->ic_chanhdl, 628 IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns); 629 630 if (status != IBT_SUCCESS) { 631 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 632 mutex_exit(&chan->ic_chan_lock); 633 return (status); 634 } 635 636 mutex_exit(&chan->ic_chan_lock); 637 return (IDM_STATUS_SUCCESS); 638 } 639 640 /* 641 * iser_ib_close_rc_channel 642 * This function closes the RC channel related to this iser_chan handle. 643 * We invoke this in a non-blocking, no callbacks context. 644 */ 645 void 646 iser_ib_close_rc_channel(iser_chan_t *chan) 647 { 648 int status; 649 650 mutex_enter(&chan->ic_chan_lock); 651 status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL, 652 0, NULL, NULL, 0); 653 if (status != IBT_SUCCESS) { 654 ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: " 655 "ibt_close_rc_channel failed: status (%d)", status); 656 } 657 mutex_exit(&chan->ic_chan_lock); 658 } 659 660 /* 661 * iser_ib_free_rc_channel 662 * 663 * This function tears down an RC channel's QP initialization and frees it. 664 * Note that we do not need synchronization here; the channel has been 665 * closed already, so we should only have completion polling occuring. Once 666 * complete, we are free to free the IBTF channel, WQ and CQ resources, and 667 * our own related resources. 668 */ 669 void 670 iser_ib_free_rc_channel(iser_chan_t *chan) 671 { 672 iser_qp_t *iser_qp; 673 674 iser_qp = &chan->ic_qp; 675 676 /* Ensure the SQ is empty */ 677 while (chan->ic_sq_post_count != 0) { 678 mutex_exit(&chan->ic_conn->ic_lock); 679 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 680 mutex_enter(&chan->ic_conn->ic_lock); 681 } 682 mutex_destroy(&chan->ic_sq_post_lock); 683 684 /* Ensure the RQ is empty */ 685 (void) ibt_flush_channel(chan->ic_chanhdl); 686 mutex_enter(&iser_qp->qp_lock); 687 while (iser_qp->rq_level != 0) { 688 mutex_exit(&iser_qp->qp_lock); 689 mutex_exit(&chan->ic_conn->ic_lock); 690 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 691 mutex_enter(&chan->ic_conn->ic_lock); 692 mutex_enter(&iser_qp->qp_lock); 693 } 694 695 /* Free our QP handle */ 696 mutex_exit(&iser_qp->qp_lock); 697 (void) iser_ib_fini_qp(iser_qp); 698 699 /* Free the IBT channel resources */ 700 (void) ibt_free_channel(chan->ic_chanhdl); 701 chan->ic_chanhdl = NULL; 702 703 /* Free the CQs */ 704 (void) ibt_free_cq(chan->ic_sendcq); 705 (void) ibt_free_cq(chan->ic_recvcq); 706 707 /* Free the chan handle */ 708 mutex_destroy(&chan->ic_chan_lock); 709 kmem_free(chan, sizeof (iser_chan_t)); 710 } 711 712 /* 713 * iser_ib_post_recv 714 * 715 * This function handles keeping the RQ full on a given channel. 716 * This routine will mostly be run on a taskq, and will check the 717 * current fill level of the RQ, and post as many WRs as necessary 718 * to fill it again. 719 */ 720 721 int 722 iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl) 723 { 724 iser_chan_t *chan; 725 int status; 726 727 /* Pull our iSER channel handle from the private data */ 728 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 729 730 /* 731 * Caller must check that chan->ic_conn->ic_stage indicates 732 * the connection is active (not closing, not closed) and 733 * it must hold the mutex cross the check and the call to this function 734 */ 735 ASSERT(mutex_owned(&chan->ic_conn->ic_lock)); 736 ASSERT((chan->ic_conn->ic_stage >= ISER_CONN_STAGE_ALLOCATED) && 737 (chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN)); 738 idm_conn_hold(chan->ic_conn->ic_idmc); 739 status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task, 740 (void *)chanhdl, DDI_NOSLEEP); 741 if (status != DDI_SUCCESS) { 742 idm_conn_rele(chan->ic_conn->ic_idmc); 743 } 744 745 return (status); 746 } 747 748 static void 749 iser_ib_post_recv_task(void *arg) 750 { 751 ibt_channel_hdl_t chanhdl = arg; 752 iser_chan_t *chan; 753 754 /* Pull our iSER channel handle from the private data */ 755 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 756 757 iser_ib_post_recv(chanhdl); 758 idm_conn_rele(chan->ic_conn->ic_idmc); 759 } 760 761 void 762 iser_ib_post_recv(ibt_channel_hdl_t chanhdl) 763 { 764 iser_chan_t *chan; 765 iser_hca_t *hca; 766 iser_msg_t *msg; 767 ibt_recv_wr_t *wrlist, wr[ISER_IB_RQ_POST_MAX]; 768 int rq_space, msg_ret; 769 int total_num, npost; 770 uint_t nposted; 771 int status, i; 772 iser_qp_t *iser_qp; 773 774 /* Pull our iSER channel handle from the private data */ 775 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 776 777 ASSERT(chan != NULL); 778 779 mutex_enter(&chan->ic_conn->ic_lock); 780 781 /* Bail out if the connection is closed; no need for more recv WRs */ 782 if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) || 783 (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) { 784 mutex_exit(&chan->ic_conn->ic_lock); 785 return; 786 } 787 788 /* get the QP handle from the iser_chan */ 789 iser_qp = &chan->ic_qp; 790 791 hca = chan->ic_hca; 792 793 if (hca == NULL) { 794 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve " 795 "HCA handle"); 796 mutex_exit(&chan->ic_conn->ic_lock); 797 return; 798 } 799 800 /* check for space to post on the RQ */ 801 mutex_enter(&iser_qp->qp_lock); 802 rq_space = iser_qp->rq_depth - iser_qp->rq_level; 803 if (rq_space == 0) { 804 /* The RQ is full, clear the pending flag and return */ 805 iser_qp->rq_taskqpending = B_FALSE; 806 mutex_exit(&iser_qp->qp_lock); 807 mutex_exit(&chan->ic_conn->ic_lock); 808 return; 809 } 810 811 /* Keep track of the lowest value for rq_min_post_level */ 812 if (iser_qp->rq_level < iser_qp->rq_min_post_level) 813 iser_qp->rq_min_post_level = iser_qp->rq_level; 814 815 mutex_exit(&iser_qp->qp_lock); 816 817 /* we've room to post, so pull from the msg cache */ 818 msg = iser_msg_get(hca, rq_space, &msg_ret); 819 if (msg == NULL) { 820 ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles " 821 "available in msg cache currently"); 822 /* 823 * There are no messages on the cache. Wait a half- 824 * second, then try again. 825 */ 826 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 827 status = iser_ib_post_recv_async(chanhdl); 828 if (status != DDI_SUCCESS) { 829 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 830 "redispatch routine"); 831 /* Failed to dispatch, clear pending flag */ 832 mutex_enter(&iser_qp->qp_lock); 833 iser_qp->rq_taskqpending = B_FALSE; 834 mutex_exit(&iser_qp->qp_lock); 835 } 836 mutex_exit(&chan->ic_conn->ic_lock); 837 return; 838 } 839 840 if (msg_ret != rq_space) { 841 ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of " 842 "messages not allocated: requested (%d) allocated (%d)", 843 rq_space, msg_ret); 844 /* We got some, but not all, of our requested depth */ 845 rq_space = msg_ret; 846 } 847 848 /* 849 * Now, walk through the allocated WRs and post them, 850 * ISER_IB_RQ_POST_MAX (or less) at a time. 851 */ 852 wrlist = &wr[0]; 853 total_num = rq_space; 854 855 while (total_num) { 856 /* determine the number to post on this iteration */ 857 npost = (total_num > ISER_IB_RQ_POST_MAX) ? 858 ISER_IB_RQ_POST_MAX : total_num; 859 860 /* build a list of WRs from the msg list */ 861 for (i = 0; i < npost; i++) { 862 wrlist[i].wr_id = (ibt_wrid_t)(uintptr_t)msg; 863 wrlist[i].wr_nds = ISER_IB_SGLIST_SIZE; 864 wrlist[i].wr_sgl = &msg->msg_ds; 865 msg = msg->nextp; 866 } 867 868 /* post the list to the RQ */ 869 nposted = 0; 870 status = ibt_post_recv(chanhdl, wrlist, npost, &nposted); 871 if ((status != IBT_SUCCESS) || (nposted != npost)) { 872 ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv " 873 "failed: requested (%d) posted (%d) status (%d)", 874 npost, nposted, status); 875 total_num -= nposted; 876 break; 877 } 878 879 /* decrement total number to post by the number posted */ 880 total_num -= nposted; 881 } 882 883 mutex_enter(&iser_qp->qp_lock); 884 if (total_num != 0) { 885 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, " 886 "failed to post (%d) WRs", total_num); 887 iser_qp->rq_level += rq_space - total_num; 888 } else { 889 iser_qp->rq_level += rq_space; 890 } 891 892 /* 893 * Now that we've filled the RQ, check that all of the recv WRs 894 * haven't just been immediately consumed. If so, taskqpending is 895 * still B_TRUE, so we need to fire off a taskq thread to post 896 * more WRs. 897 */ 898 if (iser_qp->rq_level == 0) { 899 mutex_exit(&iser_qp->qp_lock); 900 status = iser_ib_post_recv_async(chanhdl); 901 if (status != DDI_SUCCESS) { 902 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 903 "dispatch followup routine"); 904 /* Failed to dispatch, clear pending flag */ 905 mutex_enter(&iser_qp->qp_lock); 906 iser_qp->rq_taskqpending = B_FALSE; 907 mutex_exit(&iser_qp->qp_lock); 908 } 909 } else { 910 /* 911 * We're done, we've filled the RQ. Clear the taskq 912 * flag so that we can run again. 913 */ 914 iser_qp->rq_taskqpending = B_FALSE; 915 mutex_exit(&iser_qp->qp_lock); 916 } 917 918 mutex_exit(&chan->ic_conn->ic_lock); 919 } 920 921 /* 922 * iser_ib_handle_portup_event() 923 * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event. 924 * 925 * To facilitate a seamless bringover of the port and configure the CM service 926 * for inbound iSER service requests on this newly active port, the existing 927 * IDM services will be checked for iSER support. 928 * If an iSER service was already created, then this service will simply be 929 * bound to the gid of the newly active port. If on the other hand, the CM 930 * service did not exist, i.e. only socket communication, then a new CM 931 * service will be first registered with the saved service parameters and 932 * then bound to the newly active port. 933 * 934 */ 935 /* ARGSUSED */ 936 static void 937 iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 938 { 939 iser_hca_t *hca; 940 ib_gid_t gid; 941 idm_svc_t *idm_svc; 942 int status; 943 944 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)", 945 (longlong_t)event->ev_hca_guid, event->ev_port); 946 947 /* 948 * Query all ports on the HCA and update the port information 949 * maintainted in the iser_hca_t structure 950 */ 951 hca = iser_ib_guid2hca(event->ev_hca_guid); 952 if (hca == NULL) { 953 954 /* HCA is just made available, first port on that HCA */ 955 hca = iser_ib_alloc_hca(event->ev_hca_guid); 956 if (hca == NULL) { 957 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 958 "iser_ib_alloc_hca failed: HCA(0x%llx) port(%d)", 959 (longlong_t)event->ev_hca_guid, event->ev_port); 960 return; 961 } 962 mutex_enter(&iser_state->is_hcalist_lock); 963 list_insert_tail(&iser_state->is_hcalist, hca); 964 iser_state->is_num_hcas++; 965 mutex_exit(&iser_state->is_hcalist_lock); 966 967 } else { 968 969 status = iser_ib_update_hcaports(hca); 970 971 if (status != IBT_SUCCESS) { 972 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 973 "status(0x%x): iser_ib_update_hcaports failed: " 974 "HCA(0x%llx) port(%d)", status, 975 (longlong_t)event->ev_hca_guid, event->ev_port); 976 return; 977 } 978 } 979 980 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 981 982 /* 983 * Iterate through the global list of IDM target services 984 * and check for existing iSER CM service. 985 */ 986 mutex_enter(&idm.idm_global_mutex); 987 for (idm_svc = list_head(&idm.idm_tgt_svc_list); 988 idm_svc != NULL; 989 idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) { 990 991 992 if (idm_svc->is_iser_svc == NULL) { 993 994 /* Establish a new CM service for iSER requests */ 995 status = iser_tgt_svc_create( 996 &idm_svc->is_svc_req, idm_svc); 997 998 if (status != IBT_SUCCESS) { 999 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 1000 "status(0x%x): iser_tgt_svc_create failed: " 1001 "HCA(0x%llx) port(%d)", status, 1002 (longlong_t)event->ev_hca_guid, 1003 event->ev_port); 1004 1005 continue; 1006 } 1007 } 1008 1009 status = iser_ib_activate_port( 1010 idm_svc, event->ev_hca_guid, gid); 1011 if (status != IBT_SUCCESS) { 1012 1013 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 1014 "status(0x%x): Bind service on port " 1015 "(%llx:%llx) failed", 1016 status, (longlong_t)gid.gid_prefix, 1017 (longlong_t)gid.gid_guid); 1018 1019 continue; 1020 } 1021 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound " 1022 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 1023 event->ev_port); 1024 } 1025 mutex_exit(&idm.idm_global_mutex); 1026 1027 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: " 1028 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 1029 event->ev_port); 1030 } 1031 1032 /* 1033 * iser_ib_handle_portdown_event() 1034 * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error. 1035 * 1036 * Unconfigure the CM service on the deactivated port and teardown the 1037 * connections that are using the CM service. 1038 */ 1039 /* ARGSUSED */ 1040 static void 1041 iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 1042 { 1043 iser_hca_t *hca; 1044 ib_gid_t gid; 1045 int status; 1046 1047 /* 1048 * Query all ports on the HCA and update the port information 1049 * maintainted in the iser_hca_t structure 1050 */ 1051 hca = iser_ib_guid2hca(event->ev_hca_guid); 1052 ASSERT(hca != NULL); 1053 1054 status = iser_ib_update_hcaports(hca); 1055 if (status != IBT_SUCCESS) { 1056 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): " 1057 "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)", 1058 status, (longlong_t)event->ev_hca_guid, event->ev_port); 1059 return; 1060 } 1061 1062 /* get the gid of the new port */ 1063 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 1064 iser_ib_deactivate_port(event->ev_hca_guid, gid); 1065 1066 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: " 1067 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 1068 event->ev_port); 1069 } 1070 1071 /* 1072 * iser_ib_handle_hca_detach_event() 1073 * Quiesce all activity bound for the port, teardown the connection, unbind 1074 * iSER services on all ports and release the HCA handle. 1075 */ 1076 /* ARGSUSED */ 1077 static void 1078 iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 1079 { 1080 iser_hca_t *nexthca, *hca; 1081 int i, status; 1082 1083 ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)", 1084 (longlong_t)event->ev_hca_guid); 1085 1086 hca = iser_ib_guid2hca(event->ev_hca_guid); 1087 for (i = 0; i < hca->hca_num_ports; i++) { 1088 iser_ib_deactivate_port(hca->hca_guid, 1089 hca->hca_port_info[i].p_sgid_tbl[0]); 1090 } 1091 1092 /* 1093 * Update the HCA list maintained in the iser_state. Free the 1094 * resources allocated to the HCA, i.e. caches, protection domain 1095 */ 1096 mutex_enter(&iser_state->is_hcalist_lock); 1097 1098 for (hca = list_head(&iser_state->is_hcalist); 1099 hca != NULL; 1100 hca = nexthca) { 1101 1102 nexthca = list_next(&iser_state->is_hcalist, hca); 1103 1104 if (hca->hca_guid == event->ev_hca_guid) { 1105 1106 list_remove(&iser_state->is_hcalist, hca); 1107 iser_state->is_num_hcas--; 1108 1109 status = iser_ib_free_hca(hca); 1110 if (status != DDI_SUCCESS) { 1111 ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: " 1112 "Failed to free hca(%p)", (void *)hca); 1113 list_insert_tail(&iser_state->is_hcalist, hca); 1114 iser_state->is_num_hcas++; 1115 } 1116 /* No way to return status to IBT if this fails */ 1117 } 1118 } 1119 mutex_exit(&iser_state->is_hcalist_lock); 1120 1121 } 1122 1123 /* 1124 * iser_ib_async_handler 1125 * An IBT Asynchronous Event handler is registered it with the framework and 1126 * passed via the ibt_attach() routine. This function handles the following 1127 * asynchronous events. 1128 * IBT_EVENT_PORT_UP 1129 * IBT_ERROR_PORT_DOWN 1130 * IBT_HCA_ATTACH_EVENT 1131 * IBT_HCA_DETACH_EVENT 1132 */ 1133 /* ARGSUSED */ 1134 void 1135 iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1136 ibt_async_event_t *event) 1137 { 1138 switch (code) { 1139 case IBT_EVENT_PORT_UP: 1140 iser_ib_handle_portup_event(hdl, event); 1141 break; 1142 1143 case IBT_ERROR_PORT_DOWN: 1144 iser_ib_handle_portdown_event(hdl, event); 1145 break; 1146 1147 case IBT_HCA_ATTACH_EVENT: 1148 /* 1149 * A new HCA device is available for use, ignore this 1150 * event because the corresponding IBT_EVENT_PORT_UP 1151 * events will get triggered and handled accordingly. 1152 */ 1153 break; 1154 1155 case IBT_HCA_DETACH_EVENT: 1156 iser_ib_handle_hca_detach_event(hdl, event); 1157 break; 1158 1159 default: 1160 break; 1161 } 1162 } 1163 1164 /* 1165 * iser_ib_init_hcas 1166 * 1167 * This function opens all the HCA devices, gathers the HCA state information 1168 * and adds the HCA handle for each HCA found in the iser_soft_state. 1169 */ 1170 static int 1171 iser_ib_init_hcas(void) 1172 { 1173 ib_guid_t *guid; 1174 int num_hcas; 1175 int i; 1176 iser_hca_t *hca; 1177 1178 /* Retrieve the HCA list */ 1179 num_hcas = ibt_get_hca_list(&guid); 1180 if (num_hcas == 0) { 1181 /* 1182 * This shouldn't happen, but might if we have all HCAs 1183 * detach prior to initialization. 1184 */ 1185 return (DDI_FAILURE); 1186 } 1187 1188 /* Initialize the hcalist lock */ 1189 mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL); 1190 1191 /* Create the HCA list */ 1192 list_create(&iser_state->is_hcalist, sizeof (iser_hca_t), 1193 offsetof(iser_hca_t, hca_node)); 1194 1195 for (i = 0; i < num_hcas; i++) { 1196 1197 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA " 1198 "(0x%llx)", (longlong_t)guid[i]); 1199 1200 hca = iser_ib_alloc_hca(guid[i]); 1201 if (hca == NULL) { 1202 /* This shouldn't happen, teardown and fail */ 1203 (void) iser_ib_fini_hcas(); 1204 (void) ibt_free_hca_list(guid, num_hcas); 1205 return (DDI_FAILURE); 1206 } 1207 1208 mutex_enter(&iser_state->is_hcalist_lock); 1209 list_insert_tail(&iser_state->is_hcalist, hca); 1210 iser_state->is_num_hcas++; 1211 mutex_exit(&iser_state->is_hcalist_lock); 1212 1213 } 1214 1215 /* Free the IBT HCA list */ 1216 (void) ibt_free_hca_list(guid, num_hcas); 1217 1218 /* Check that we've initialized at least one HCA */ 1219 mutex_enter(&iser_state->is_hcalist_lock); 1220 if (list_is_empty(&iser_state->is_hcalist)) { 1221 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize " 1222 "any HCAs"); 1223 1224 mutex_exit(&iser_state->is_hcalist_lock); 1225 (void) iser_ib_fini_hcas(); 1226 return (DDI_FAILURE); 1227 } 1228 mutex_exit(&iser_state->is_hcalist_lock); 1229 1230 return (DDI_SUCCESS); 1231 } 1232 1233 /* 1234 * iser_ib_fini_hcas 1235 * 1236 * Teardown the iSER HCA list initialized above. 1237 */ 1238 static int 1239 iser_ib_fini_hcas(void) 1240 { 1241 iser_hca_t *nexthca, *hca; 1242 int status; 1243 1244 mutex_enter(&iser_state->is_hcalist_lock); 1245 for (hca = list_head(&iser_state->is_hcalist); 1246 hca != NULL; 1247 hca = nexthca) { 1248 1249 nexthca = list_next(&iser_state->is_hcalist, hca); 1250 1251 list_remove(&iser_state->is_hcalist, hca); 1252 1253 status = iser_ib_free_hca(hca); 1254 if (status != IBT_SUCCESS) { 1255 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free " 1256 "HCA during fini"); 1257 list_insert_tail(&iser_state->is_hcalist, hca); 1258 return (DDI_FAILURE); 1259 } 1260 1261 iser_state->is_num_hcas--; 1262 1263 } 1264 mutex_exit(&iser_state->is_hcalist_lock); 1265 list_destroy(&iser_state->is_hcalist); 1266 mutex_destroy(&iser_state->is_hcalist_lock); 1267 1268 return (DDI_SUCCESS); 1269 } 1270 1271 /* 1272 * iser_ib_alloc_hca 1273 * 1274 * This function opens the given HCA device, gathers the HCA state information 1275 * and adds the HCA handle 1276 */ 1277 static iser_hca_t * 1278 iser_ib_alloc_hca(ib_guid_t guid) 1279 { 1280 iser_hca_t *hca; 1281 int status; 1282 1283 /* Allocate an iser_hca_t HCA handle */ 1284 hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP); 1285 1286 /* Open this HCA */ 1287 status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl); 1288 if (status != IBT_SUCCESS) { 1289 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:" 1290 " guid (0x%llx) status (0x%x)", (longlong_t)guid, status); 1291 kmem_free(hca, sizeof (iser_hca_t)); 1292 return (NULL); 1293 } 1294 1295 hca->hca_guid = guid; 1296 hca->hca_clnt_hdl = iser_state->is_ibhdl; 1297 1298 /* Query the HCA */ 1299 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr); 1300 if (status != IBT_SUCCESS) { 1301 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca " 1302 "failure: guid (0x%llx) status (0x%x)", 1303 (longlong_t)guid, status); 1304 (void) ibt_close_hca(hca->hca_hdl); 1305 kmem_free(hca, sizeof (iser_hca_t)); 1306 return (NULL); 1307 } 1308 1309 /* Query all ports on the HCA */ 1310 status = ibt_query_hca_ports(hca->hca_hdl, 0, 1311 &hca->hca_port_info, &hca->hca_num_ports, 1312 &hca->hca_port_info_sz); 1313 if (status != IBT_SUCCESS) { 1314 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: " 1315 "ibt_query_hca_ports failure: guid (0x%llx) " 1316 "status (0x%x)", (longlong_t)guid, status); 1317 (void) ibt_close_hca(hca->hca_hdl); 1318 kmem_free(hca, sizeof (iser_hca_t)); 1319 return (NULL); 1320 } 1321 1322 /* Allocate a single PD on this HCA */ 1323 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, 1324 &hca->hca_pdhdl); 1325 if (status != IBT_SUCCESS) { 1326 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd " 1327 "failure: guid (0x%llx) status (0x%x)", 1328 (longlong_t)guid, status); 1329 (void) ibt_close_hca(hca->hca_hdl); 1330 ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz); 1331 kmem_free(hca, sizeof (iser_hca_t)); 1332 return (NULL); 1333 } 1334 1335 /* Initialize the message and data MR caches for this HCA */ 1336 iser_init_hca_caches(hca); 1337 1338 return (hca); 1339 } 1340 1341 static int 1342 iser_ib_free_hca(iser_hca_t *hca) 1343 { 1344 int status; 1345 ibt_hca_portinfo_t *hca_port_info; 1346 uint_t hca_port_info_sz; 1347 1348 ASSERT(hca != NULL); 1349 if (hca->hca_failed) 1350 return (DDI_FAILURE); 1351 1352 hca_port_info = hca->hca_port_info; 1353 hca_port_info_sz = hca->hca_port_info_sz; 1354 1355 /* 1356 * Free the memory regions before freeing 1357 * the associated protection domain 1358 */ 1359 iser_fini_hca_caches(hca); 1360 1361 status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl); 1362 if (status != IBT_SUCCESS) { 1363 ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD " 1364 "status=0x%x", status); 1365 goto out_caches; 1366 } 1367 1368 status = ibt_close_hca(hca->hca_hdl); 1369 if (status != IBT_SUCCESS) { 1370 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA " 1371 "status=0x%x", status); 1372 goto out_pd; 1373 } 1374 1375 ibt_free_portinfo(hca_port_info, hca_port_info_sz); 1376 1377 kmem_free(hca, sizeof (iser_hca_t)); 1378 return (DDI_SUCCESS); 1379 1380 /* 1381 * We only managed to partially tear down the HCA, try to put it back 1382 * like it was before returning. 1383 */ 1384 out_pd: 1385 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl); 1386 if (status != IBT_SUCCESS) { 1387 hca->hca_failed = B_TRUE; 1388 /* Report error and exit */ 1389 ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD " 1390 "status=0x%x", status); 1391 return (DDI_FAILURE); 1392 } 1393 1394 out_caches: 1395 iser_init_hca_caches(hca); 1396 1397 return (DDI_FAILURE); 1398 } 1399 1400 static int 1401 iser_ib_update_hcaports(iser_hca_t *hca) 1402 { 1403 ibt_hca_portinfo_t *pinfop, *oldpinfop; 1404 uint_t size, oldsize, nport; 1405 int status; 1406 1407 ASSERT(hca != NULL); 1408 1409 status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size); 1410 if (status != IBT_SUCCESS) { 1411 ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status); 1412 return (status); 1413 } 1414 1415 oldpinfop = hca->hca_port_info; 1416 oldsize = hca->hca_port_info_sz; 1417 hca->hca_port_info = pinfop; 1418 hca->hca_port_info_sz = size; 1419 1420 (void) ibt_free_portinfo(oldpinfop, oldsize); 1421 1422 return (IBT_SUCCESS); 1423 } 1424 1425 /* 1426 * iser_ib_gid2hca 1427 * Given a gid, find the corresponding hca 1428 */ 1429 iser_hca_t * 1430 iser_ib_gid2hca(ib_gid_t gid) 1431 { 1432 1433 iser_hca_t *hca; 1434 int i; 1435 1436 mutex_enter(&iser_state->is_hcalist_lock); 1437 for (hca = list_head(&iser_state->is_hcalist); 1438 hca != NULL; 1439 hca = list_next(&iser_state->is_hcalist, hca)) { 1440 1441 for (i = 0; i < hca->hca_num_ports; i++) { 1442 if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix == 1443 gid.gid_prefix) && 1444 (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid == 1445 gid.gid_guid)) { 1446 1447 mutex_exit(&iser_state->is_hcalist_lock); 1448 1449 return (hca); 1450 } 1451 } 1452 } 1453 mutex_exit(&iser_state->is_hcalist_lock); 1454 return (NULL); 1455 } 1456 1457 /* 1458 * iser_ib_guid2hca 1459 * Given a HCA guid, find the corresponding HCA 1460 */ 1461 iser_hca_t * 1462 iser_ib_guid2hca(ib_guid_t guid) 1463 { 1464 1465 iser_hca_t *hca; 1466 1467 mutex_enter(&iser_state->is_hcalist_lock); 1468 for (hca = list_head(&iser_state->is_hcalist); 1469 hca != NULL; 1470 hca = list_next(&iser_state->is_hcalist, hca)) { 1471 1472 if (hca->hca_guid == guid) { 1473 mutex_exit(&iser_state->is_hcalist_lock); 1474 return (hca); 1475 } 1476 } 1477 mutex_exit(&iser_state->is_hcalist_lock); 1478 return (NULL); 1479 } 1480 1481 /* 1482 * iser_ib_conv_sockaddr2ibtaddr 1483 * This function converts a socket address into the IBT format 1484 */ 1485 void iser_ib_conv_sockaddr2ibtaddr( 1486 idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr) 1487 { 1488 if (saddr == NULL) { 1489 ibt_addr->family = AF_UNSPEC; 1490 ibt_addr->un.ip4addr = 0; 1491 } else { 1492 switch (saddr->sin.sa_family) { 1493 case AF_INET: 1494 1495 ibt_addr->family = saddr->sin4.sin_family; 1496 ibt_addr->un.ip4addr = saddr->sin4.sin_addr.s_addr; 1497 break; 1498 1499 case AF_INET6: 1500 1501 ibt_addr->family = saddr->sin6.sin6_family; 1502 ibt_addr->un.ip6addr = saddr->sin6.sin6_addr; 1503 break; 1504 1505 default: 1506 ibt_addr->family = AF_UNSPEC; 1507 } 1508 1509 } 1510 } 1511 1512 /* 1513 * iser_ib_conv_ibtaddr2sockaddr 1514 * This function converts an IBT ip address handle to a sockaddr 1515 */ 1516 void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss, 1517 ibt_ip_addr_t *ibt_addr, in_port_t port) 1518 { 1519 struct sockaddr_in *sin; 1520 struct sockaddr_in6 *sin6; 1521 1522 switch (ibt_addr->family) { 1523 case AF_INET: 1524 case AF_UNSPEC: 1525 1526 sin = (struct sockaddr_in *)ibt_addr; 1527 sin->sin_port = ntohs(port); 1528 bcopy(sin, ss, sizeof (struct sockaddr_in)); 1529 break; 1530 1531 case AF_INET6: 1532 1533 sin6 = (struct sockaddr_in6 *)ibt_addr; 1534 sin6->sin6_port = ntohs(port); 1535 bcopy(sin6, ss, sizeof (struct sockaddr_in6)); 1536 break; 1537 1538 default: 1539 ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: " 1540 "unknown family type: 0x%x", ibt_addr->family); 1541 } 1542 } 1543 1544 /* 1545 * iser_ib_setup_cq 1546 * This function sets up the Completion Queue size and allocates the specified 1547 * Completion Queue 1548 */ 1549 static int 1550 iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl) 1551 { 1552 1553 ibt_cq_attr_t cq_attr; 1554 int status; 1555 1556 cq_attr.cq_size = cq_size; 1557 cq_attr.cq_sched = 0; 1558 cq_attr.cq_flags = IBT_CQ_NO_FLAGS; 1559 1560 /* Allocate a Completion Queue */ 1561 status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL); 1562 if (status != IBT_SUCCESS) { 1563 ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)", 1564 status); 1565 return (status); 1566 } 1567 1568 return (ISER_STATUS_SUCCESS); 1569 } 1570 1571 /* 1572 * iser_ib_setup_chanargs 1573 * 1574 */ 1575 static void 1576 iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 1577 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 1578 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs) 1579 { 1580 1581 bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t)); 1582 1583 /* 1584 * Set up the size of the channels send queue, receive queue and the 1585 * maximum number of elements in a scatter gather list of work requests 1586 * posted to the send and receive queues. 1587 */ 1588 cargs->rc_sizes.cs_sq = sq_size; 1589 cargs->rc_sizes.cs_rq = rq_size; 1590 cargs->rc_sizes.cs_sq_sgl = ISER_IB_SGLIST_SIZE; 1591 cargs->rc_sizes.cs_rq_sgl = ISER_IB_SGLIST_SIZE; 1592 1593 /* 1594 * All Work requests signaled on a WR basis will receive a send 1595 * request completion. 1596 */ 1597 cargs->rc_flags = IBT_ALL_SIGNALED; 1598 1599 /* Enable RDMA read and RDMA write on the channel end points */ 1600 cargs->rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 1601 1602 /* Set the local hca port on which the channel is allocated */ 1603 cargs->rc_hca_port_num = hca_port; 1604 1605 /* Set the Send and Receive Completion Queue handles */ 1606 cargs->rc_scq = scq_hdl; 1607 cargs->rc_rcq = rcq_hdl; 1608 1609 /* Set the protection domain associated with the channel */ 1610 cargs->rc_pd = hca_pdhdl; 1611 1612 /* No SRQ usage */ 1613 cargs->rc_srq = NULL; 1614 } 1615 1616 /* 1617 * iser_ib_init_qp 1618 * Initialize the QP handle 1619 */ 1620 void 1621 iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size) 1622 { 1623 /* Initialize the handle lock */ 1624 mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1625 1626 /* Record queue sizes */ 1627 chan->ic_qp.sq_size = sq_size; 1628 chan->ic_qp.rq_size = rq_size; 1629 1630 /* Initialize the RQ monitoring data */ 1631 chan->ic_qp.rq_depth = rq_size; 1632 chan->ic_qp.rq_level = 0; 1633 chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100; 1634 1635 /* Initialize the taskq flag */ 1636 chan->ic_qp.rq_taskqpending = B_FALSE; 1637 } 1638 1639 /* 1640 * iser_ib_fini_qp 1641 * Teardown the QP handle 1642 */ 1643 void 1644 iser_ib_fini_qp(iser_qp_t *qp) 1645 { 1646 /* Destroy the handle lock */ 1647 mutex_destroy(&qp->qp_lock); 1648 } 1649 1650 static int 1651 iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid) 1652 { 1653 iser_svc_t *iser_svc; 1654 iser_sbind_t *is_sbind; 1655 int status; 1656 1657 iser_svc = idm_svc->is_iser_svc; 1658 1659 /* 1660 * Save the address of the service bind handle in the 1661 * iser_svc_t to undo the service binding at a later time 1662 */ 1663 is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP); 1664 is_sbind->is_gid = gid; 1665 is_sbind->is_guid = guid; 1666 1667 status = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL, 1668 idm_svc, &is_sbind->is_sbindhdl); 1669 1670 if (status != IBT_SUCCESS) { 1671 ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): " 1672 "Bind service(%llx) on port(%llx:%llx) failed", 1673 status, (longlong_t)iser_svc->is_svcid, 1674 (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid); 1675 1676 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1677 1678 return (status); 1679 } 1680 1681 list_insert_tail(&iser_svc->is_sbindlist, is_sbind); 1682 1683 return (IBT_SUCCESS); 1684 } 1685 1686 static void 1687 iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid) 1688 { 1689 iser_svc_t *iser_svc; 1690 iser_conn_t *iser_conn; 1691 iser_sbind_t *is_sbind; 1692 idm_conn_t *idm_conn; 1693 1694 /* 1695 * Iterate through the global list of IDM target connections. 1696 * Issue a TRANSPORT_FAIL for any connections on this port, and 1697 * if there is a bound service running on the port, tear it down. 1698 */ 1699 mutex_enter(&idm.idm_global_mutex); 1700 for (idm_conn = list_head(&idm.idm_tgt_conn_list); 1701 idm_conn != NULL; 1702 idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) { 1703 1704 if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) { 1705 /* this is not an iSER connection, skip it */ 1706 continue; 1707 } 1708 1709 iser_conn = idm_conn->ic_transport_private; 1710 if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) { 1711 /* this iSER connection is on a different port */ 1712 continue; 1713 } 1714 1715 /* Fail the transport for this connection */ 1716 idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 1717 1718 if (idm_conn->ic_conn_type == CONN_TYPE_INI) { 1719 /* initiator connection, nothing else to do */ 1720 continue; 1721 } 1722 1723 /* Check for a service binding */ 1724 iser_svc = idm_conn->ic_svc_binding->is_iser_svc; 1725 is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid); 1726 if (is_sbind != NULL) { 1727 /* This service is still bound, tear it down */ 1728 (void) ibt_unbind_service(iser_svc->is_srvhdl, 1729 is_sbind->is_sbindhdl); 1730 list_remove(&iser_svc->is_sbindlist, is_sbind); 1731 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1732 } 1733 } 1734 mutex_exit(&idm.idm_global_mutex); 1735 } 1736 1737 static iser_sbind_t * 1738 iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid) 1739 { 1740 iser_sbind_t *is_sbind; 1741 1742 for (is_sbind = list_head(&iser_svc->is_sbindlist); 1743 is_sbind != NULL; 1744 is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) { 1745 1746 if ((is_sbind->is_guid == hca_guid) && 1747 (is_sbind->is_gid.gid_prefix == gid.gid_prefix) && 1748 (is_sbind->is_gid.gid_guid == gid.gid_guid)) { 1749 return (is_sbind); 1750 } 1751 } 1752 return (NULL); 1753 } 1754