1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <netinet/in.h> 31 #include <sys/sunddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/iscsi_protocol.h> 34 35 #include <sys/ib/clients/iser/iser.h> 36 #include <sys/ib/clients/iser/iser_idm.h> 37 38 /* 39 * iser_ib.c 40 * Routines for InfiniBand transport for iSER 41 * 42 * This file contains the routines to interface with the IBT API to attach and 43 * allocate IB resources, handle async events, and post recv work requests. 44 * 45 */ 46 47 static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid); 48 static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid); 49 50 static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid); 51 static int iser_ib_free_hca(iser_hca_t *hca); 52 static int iser_ib_update_hcaports(iser_hca_t *hca); 53 static int iser_ib_init_hcas(void); 54 static int iser_ib_fini_hcas(void); 55 56 static iser_sbind_t *iser_ib_get_bind( 57 iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid); 58 static int iser_ib_activate_port( 59 idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid); 60 static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid); 61 62 static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size); 63 static void iser_ib_fini_qp(iser_qp_t *qp); 64 65 static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, 66 ibt_cq_hdl_t *cq_hdl); 67 68 static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 69 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 70 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs); 71 72 static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, 73 ibt_async_event_t *event); 74 static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, 75 ibt_async_event_t *event); 76 static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, 77 ibt_async_event_t *event); 78 79 static void iser_ib_post_recv_task(void *arg); 80 81 static struct ibt_clnt_modinfo_s iser_ib_modinfo = { 82 IBTI_V_CURR, 83 IBT_STORAGE_DEV, 84 iser_ib_async_handler, 85 NULL, 86 "iSER" 87 }; 88 89 /* 90 * iser_ib_init 91 * 92 * This function registers the HCA drivers with IBTF and registers and binds 93 * iSER as a service with IBTF. 94 */ 95 int 96 iser_ib_init(void) 97 { 98 int status; 99 100 /* Register with IBTF */ 101 status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state, 102 &iser_state->is_ibhdl); 103 if (status != DDI_SUCCESS) { 104 ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)", 105 status); 106 return (DDI_FAILURE); 107 } 108 109 /* Create the global work request kmem_cache */ 110 iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache", 111 sizeof (iser_wr_t), 0, NULL, NULL, NULL, 112 iser_state, NULL, KM_SLEEP); 113 114 /* Populate our list of HCAs */ 115 status = iser_ib_init_hcas(); 116 if (status != DDI_SUCCESS) { 117 /* HCAs failed to initialize, tear it down */ 118 kmem_cache_destroy(iser_state->iser_wr_cache); 119 (void) ibt_detach(iser_state->is_ibhdl); 120 iser_state->is_ibhdl = NULL; 121 ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs"); 122 return (DDI_FAILURE); 123 } 124 125 /* Target will register iSER as a service with IBTF when required */ 126 127 /* Target will bind this service when it comes online */ 128 129 return (DDI_SUCCESS); 130 } 131 132 /* 133 * iser_ib_fini 134 * 135 * This function unbinds and degisters the iSER service from IBTF 136 */ 137 int 138 iser_ib_fini(void) 139 { 140 /* IDM would have already disabled all the services */ 141 142 /* Teardown the HCA list and associated resources */ 143 if (iser_ib_fini_hcas() != DDI_SUCCESS) 144 return (DDI_FAILURE); 145 146 /* Teardown the global work request kmem_cache */ 147 kmem_cache_destroy(iser_state->iser_wr_cache); 148 149 /* Deregister with IBTF */ 150 if (iser_state->is_ibhdl != NULL) { 151 (void) ibt_detach(iser_state->is_ibhdl); 152 iser_state->is_ibhdl = NULL; 153 } 154 155 return (DDI_SUCCESS); 156 } 157 158 /* 159 * iser_ib_register_service 160 * 161 * This function registers the iSER service using the RDMA-Aware Service ID. 162 */ 163 int 164 iser_ib_register_service(idm_svc_t *idm_svc) 165 { 166 ibt_srv_desc_t srvdesc; 167 iser_svc_t *iser_svc; 168 int status; 169 170 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 171 172 /* Set up IBTI client callback handler from the CM */ 173 srvdesc.sd_handler = iser_ib_cm_handler; 174 175 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 176 177 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 178 179 /* Register the service on the specified port */ 180 status = ibt_register_service( 181 iser_state->is_ibhdl, &srvdesc, 182 iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL); 183 184 return (status); 185 } 186 187 /* 188 * iser_ib_bind_service 189 * 190 * This function binds a given iSER service on all available HCA ports. The 191 * current specification does not allow user to specify transport bindings 192 * for each iscsi target. The ULP invokes this function to bind the target 193 * to all available iser ports after checking for the presence of an IB HCA. 194 * iSER is "configured" whenever an IB-capable IP address exists. The lack 195 * of active IB ports is a less-fatal condition, and sockets would be used 196 * as the transport even though an Infiniband HCA is configured but unusable. 197 * 198 */ 199 int 200 iser_ib_bind_service(idm_svc_t *idm_svc) 201 { 202 iser_hca_t *hca; 203 ib_gid_t gid; 204 int num_ports = 0; 205 int num_binds = 0; 206 int num_inactive_binds = 0; /* if HCA ports inactive */ 207 int status; 208 int i; 209 210 ASSERT(idm_svc != NULL); 211 ASSERT(idm_svc->is_iser_svc != NULL); 212 213 /* Register the iSER service on all available ports */ 214 mutex_enter(&iser_state->is_hcalist_lock); 215 216 for (hca = list_head(&iser_state->is_hcalist); 217 hca != NULL; 218 hca = list_next(&iser_state->is_hcalist, hca)) { 219 220 for (i = 0; i < hca->hca_num_ports; i++) { 221 num_ports++; 222 if (hca->hca_port_info[i].p_linkstate != 223 IBT_PORT_ACTIVE) { 224 /* 225 * Move on. We will attempt to bind service 226 * in our async handler if the port comes up 227 * at a later time. 228 */ 229 num_inactive_binds++; 230 continue; 231 } 232 233 gid = hca->hca_port_info[i].p_sgid_tbl[0]; 234 235 /* If the port is already bound, skip */ 236 if (iser_ib_get_bind( 237 idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) { 238 239 status = iser_ib_activate_port( 240 idm_svc, hca->hca_guid, gid); 241 if (status != IBT_SUCCESS) { 242 ISER_LOG(CE_NOTE, 243 "iser_ib_bind_service: " 244 "iser_ib_activate_port failure " 245 "(0x%x)", status); 246 continue; 247 } 248 } 249 num_binds++; 250 } 251 } 252 mutex_exit(&iser_state->is_hcalist_lock); 253 254 if (num_binds) { 255 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on " 256 "(%d) of (%d) ports", num_binds, num_ports); 257 return (ISER_STATUS_SUCCESS); 258 } else if (num_inactive_binds) { 259 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Could not bind " 260 "service, HCA ports are not active."); 261 /* 262 * still considered success, the async handler will bind 263 * the service when the port comes up at a later time 264 */ 265 return (ISER_STATUS_SUCCESS); 266 } else { 267 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service"); 268 return (ISER_STATUS_FAIL); 269 } 270 } 271 272 /* 273 * iser_ib_unbind_service 274 * 275 * This function unbinds a given service on a all HCA ports 276 */ 277 void 278 iser_ib_unbind_service(idm_svc_t *idm_svc) 279 { 280 iser_svc_t *iser_svc; 281 iser_sbind_t *is_sbind, *next_sb; 282 283 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 284 285 iser_svc = idm_svc->is_iser_svc; 286 287 for (is_sbind = list_head(&iser_svc->is_sbindlist); 288 is_sbind != NULL; 289 is_sbind = next_sb) { 290 next_sb = list_next(&iser_svc->is_sbindlist, is_sbind); 291 ibt_unbind_service(iser_svc->is_srvhdl, 292 is_sbind->is_sbindhdl); 293 list_remove(&iser_svc->is_sbindlist, is_sbind); 294 kmem_free(is_sbind, sizeof (iser_sbind_t)); 295 } 296 } 297 } 298 299 /* ARGSUSED */ 300 void 301 iser_ib_deregister_service(idm_svc_t *idm_svc) 302 { 303 iser_svc_t *iser_svc; 304 305 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 306 307 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 308 ibt_deregister_service(iser_state->is_ibhdl, 309 iser_svc->is_srvhdl); 310 ibt_release_ip_sid(iser_svc->is_svcid); 311 } 312 } 313 314 /* 315 * iser_ib_get_paths 316 * This function finds the IB path between the local and the remote address. 317 * 318 */ 319 int 320 iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip, 321 ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip) 322 { 323 ibt_ip_path_attr_t ipattr; 324 int status; 325 326 (void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 327 ipattr.ipa_dst_ip = remote_ip; 328 ipattr.ipa_src_ip = *local_ip; 329 ipattr.ipa_max_paths = 1; 330 ipattr.ipa_ndst = 1; 331 332 (void) bzero(path, sizeof (ibt_path_info_t)); 333 status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS, 334 &ipattr, path, NULL, path_src_ip); 335 if (status != IBT_SUCCESS) { 336 ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths " 337 "failure: status (%d)", status); 338 return (status); 339 } 340 341 if (local_ip != NULL) { 342 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]", 343 local_ip->un.ip4addr, remote_ip->un.ip4addr); 344 } else { 345 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: " 346 "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr); 347 } 348 349 return (ISER_STATUS_SUCCESS); 350 } 351 352 /* 353 * iser_ib_alloc_rc_channel 354 * 355 * This function allocates a reliable communication channel using the specified 356 * channel attributes. 357 */ 358 iser_chan_t * 359 iser_ib_alloc_rc_channel(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip) 360 { 361 362 iser_chan_t *chan; 363 ib_gid_t lgid; 364 uint8_t hca_port; /* from path */ 365 iser_hca_t *hca; 366 ibt_path_ip_src_t path_src_ip; 367 ibt_rc_chan_alloc_args_t chanargs; 368 uint_t sq_size, rq_size; 369 int status; 370 371 chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP); 372 373 mutex_init(&chan->ic_lock, NULL, MUTEX_DRIVER, NULL); 374 mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL); 375 376 /* Lookup a path to the given destination */ 377 status = iser_ib_get_paths(local_ip, remote_ip, &chan->ic_ibt_path, 378 &path_src_ip); 379 380 if (status != ISER_STATUS_SUCCESS) { 381 ISER_LOG(CE_NOTE, "iser_ib_get_paths failed: status (%d)", 382 status); 383 mutex_destroy(&chan->ic_lock); 384 mutex_destroy(&chan->ic_sq_post_lock); 385 kmem_free(chan, sizeof (iser_chan_t)); 386 return (NULL); 387 } 388 389 /* get the local gid from the path info */ 390 lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; 391 392 /* get the hca port from the path info */ 393 hca_port = chan->ic_ibt_path.pi_prim_cep_path.cep_hca_port_num; 394 395 /* Lookup the hca using the gid in the path info */ 396 hca = iser_ib_gid2hca(lgid); 397 if (hca == NULL) { 398 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " 399 "to lookup HCA handle"); 400 mutex_destroy(&chan->ic_lock); 401 mutex_destroy(&chan->ic_sq_post_lock); 402 kmem_free(chan, sizeof (iser_chan_t)); 403 return (NULL); 404 } 405 406 /* Set up the iSER channel handle with HCA and IP data */ 407 chan->ic_hca = hca; 408 chan->ic_localip = path_src_ip.ip_primary; 409 chan->ic_remoteip = *remote_ip; 410 411 /* 412 * Determine the queue sizes, based upon the HCA query data. 413 * For our Work Queues, we will use either our default value, 414 * or the HCA's maximum value, whichever is smaller. 415 */ 416 sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE); 417 rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE); 418 419 /* 420 * For our Completion Queues, we again check the device maximum. 421 * We want to end up with CQs that are the next size up from the 422 * WQs they are servicing so that they have some overhead. 423 */ 424 if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) { 425 chan->ic_sendcq_sz = sq_size + 1; 426 } else { 427 chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz; 428 sq_size = chan->ic_sendcq_sz - 1; 429 } 430 431 if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) { 432 chan->ic_recvcq_sz = rq_size + 1; 433 } else { 434 chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz; 435 rq_size = chan->ic_recvcq_sz - 1; 436 } 437 438 /* Initialize the iSER channel's QP handle */ 439 iser_ib_init_qp(chan, sq_size, rq_size); 440 441 /* Set up the Send Completion Queue */ 442 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz, 443 &chan->ic_sendcq); 444 if (status != ISER_STATUS_SUCCESS) { 445 iser_ib_fini_qp(&chan->ic_qp); 446 mutex_destroy(&chan->ic_lock); 447 mutex_destroy(&chan->ic_sq_post_lock); 448 kmem_free(chan, sizeof (iser_chan_t)); 449 return (NULL); 450 } 451 ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan); 452 ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION); 453 454 /* Set up the Receive Completion Queue */ 455 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz, 456 &chan->ic_recvcq); 457 if (status != ISER_STATUS_SUCCESS) { 458 (void) ibt_free_cq(chan->ic_sendcq); 459 iser_ib_fini_qp(&chan->ic_qp); 460 mutex_destroy(&chan->ic_lock); 461 mutex_destroy(&chan->ic_sq_post_lock); 462 kmem_free(chan, sizeof (iser_chan_t)); 463 return (NULL); 464 } 465 ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan); 466 ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION); 467 468 /* Setup the channel arguments */ 469 iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq, 470 sq_size, rq_size, hca->hca_pdhdl, &chanargs); 471 472 status = ibt_alloc_rc_channel(hca->hca_hdl, 473 IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL); 474 if (status != IBT_SUCCESS) { 475 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " 476 "ibt_alloc_rc_channel: status (%d)", status); 477 (void) ibt_free_cq(chan->ic_sendcq); 478 (void) ibt_free_cq(chan->ic_recvcq); 479 iser_ib_fini_qp(&chan->ic_qp); 480 mutex_destroy(&chan->ic_lock); 481 mutex_destroy(&chan->ic_sq_post_lock); 482 kmem_free(chan, sizeof (iser_chan_t)); 483 return (NULL); 484 } 485 486 /* Set the 'channel' as the client private data */ 487 (void) ibt_set_chan_private(chan->ic_chanhdl, chan); 488 489 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel success: " 490 "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d", 491 (void *)chan->ic_chanhdl, 492 (longlong_t)local_ip->un.ip4addr, 493 (longlong_t)remote_ip->un.ip4addr, 494 (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid, 495 (longlong_t)hca->hca_guid, hca_port); 496 497 return (chan); 498 } 499 500 /* 501 * iser_ib_open_rc_channel 502 * This function opens a RC connection on the given allocated RC channel 503 */ 504 int 505 iser_ib_open_rc_channel(iser_chan_t *chan) 506 { 507 ibt_ip_cm_info_t ipcm_info; 508 iser_private_data_t iser_priv_data; 509 ibt_chan_open_args_t ocargs; 510 ibt_rc_returns_t ocreturns; 511 int status; 512 513 mutex_enter(&chan->ic_lock); 514 515 /* 516 * For connection establishment, the initiator sends a CM REQ using the 517 * iSER RDMA-Aware Service ID. Included are the source and destination 518 * IP addresses, and the src port. 519 */ 520 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 521 ipcm_info.src_addr = chan->ic_localip; 522 ipcm_info.dst_addr = chan->ic_remoteip; 523 ipcm_info.src_port = chan->ic_lport; 524 525 /* 526 * The CM Private Data field defines the iSER connection parameters 527 * such as zero based virtual address exception (ZBVAE) and Send with 528 * invalidate Exception (SIE). 529 * 530 * Solaris IBT does not currently support ZBVAE or SIE. 531 */ 532 iser_priv_data.rsvd1 = 0; 533 iser_priv_data.sie = 1; 534 iser_priv_data.zbvae = 1; 535 536 status = ibt_format_ip_private_data(&ipcm_info, 537 sizeof (iser_private_data_t), &iser_priv_data); 538 if (status != IBT_SUCCESS) { 539 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 540 mutex_exit(&chan->ic_lock); 541 return (status); 542 } 543 544 /* 545 * Set the SID we are attempting to connect to, based upon the 546 * remote port number. 547 */ 548 chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport); 549 550 /* Set up the args for the channel open */ 551 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 552 ocargs.oc_path = &chan->ic_ibt_path; 553 ocargs.oc_cm_handler = iser_ib_cm_handler; 554 ocargs.oc_cm_clnt_private = iser_state; 555 ocargs.oc_rdma_ra_out = 4; 556 ocargs.oc_rdma_ra_in = 4; 557 ocargs.oc_path_retry_cnt = 2; 558 ocargs.oc_path_rnr_retry_cnt = 2; 559 ocargs.oc_priv_data_len = sizeof (iser_private_data_t); 560 ocargs.oc_priv_data = &iser_priv_data; 561 562 bzero(&ocreturns, sizeof (ibt_rc_returns_t)); 563 564 status = ibt_open_rc_channel(chan->ic_chanhdl, 565 IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns); 566 567 if (status != IBT_SUCCESS) { 568 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 569 mutex_exit(&chan->ic_lock); 570 return (status); 571 } 572 573 mutex_exit(&chan->ic_lock); 574 return (IDM_STATUS_SUCCESS); 575 } 576 577 /* 578 * iser_ib_close_rc_channel 579 * This function closes the RC channel related to this iser_chan handle. 580 * We invoke this in a non-blocking, no callbacks context. 581 */ 582 void 583 iser_ib_close_rc_channel(iser_chan_t *chan) 584 { 585 int status; 586 587 mutex_enter(&chan->ic_lock); 588 status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL, 589 0, NULL, NULL, 0); 590 if (status != IBT_SUCCESS) { 591 ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: " 592 "ibt_close_rc_channel failed: status (%d)", status); 593 } 594 mutex_exit(&chan->ic_lock); 595 } 596 597 /* 598 * iser_ib_free_rc_channel 599 * 600 * This function tears down an RC channel's QP initialization and frees it. 601 * Note that we do not need synchronization here; the channel has been 602 * closed already, so we should only have completion polling occuring. Once 603 * complete, we are free to free the IBTF channel, WQ and CQ resources, and 604 * our own related resources. 605 */ 606 void 607 iser_ib_free_rc_channel(iser_chan_t *chan) 608 { 609 iser_qp_t *iser_qp; 610 611 iser_qp = &chan->ic_qp; 612 613 /* Ensure the SQ is empty */ 614 while (chan->ic_sq_post_count != 0) { 615 mutex_exit(&chan->ic_conn->ic_lock); 616 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 617 mutex_enter(&chan->ic_conn->ic_lock); 618 } 619 mutex_destroy(&chan->ic_sq_post_lock); 620 621 /* Ensure the RQ is empty */ 622 (void) ibt_flush_channel(chan->ic_chanhdl); 623 mutex_enter(&iser_qp->qp_lock); 624 while (iser_qp->rq_level != 0) { 625 mutex_exit(&iser_qp->qp_lock); 626 mutex_exit(&chan->ic_conn->ic_lock); 627 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 628 mutex_enter(&chan->ic_conn->ic_lock); 629 mutex_enter(&iser_qp->qp_lock); 630 } 631 632 /* Free our QP handle */ 633 mutex_exit(&iser_qp->qp_lock); 634 (void) iser_ib_fini_qp(iser_qp); 635 636 /* Free the IBT channel resources */ 637 (void) ibt_free_channel(chan->ic_chanhdl); 638 chan->ic_chanhdl = NULL; 639 640 /* Free the CQs */ 641 ibt_free_cq(chan->ic_sendcq); 642 ibt_free_cq(chan->ic_recvcq); 643 644 /* Free the chan handle */ 645 mutex_destroy(&chan->ic_lock); 646 kmem_free(chan, sizeof (iser_chan_t)); 647 } 648 649 /* 650 * iser_ib_post_recv 651 * 652 * This function handles keeping the RQ full on a given channel. 653 * This routine will mostly be run on a taskq, and will check the 654 * current fill level of the RQ, and post as many WRs as necessary 655 * to fill it again. 656 */ 657 658 int 659 iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl) 660 { 661 iser_chan_t *chan; 662 int status; 663 664 /* Pull our iSER channel handle from the private data */ 665 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 666 667 /* 668 * Caller must check that chan->ic_conn->ic_stage indicates 669 * the connection is active (not closing, not closed) and 670 * it must hold the mutex cross the check and the call to this function 671 */ 672 ASSERT(mutex_owned(&chan->ic_conn->ic_lock)); 673 ASSERT((chan->ic_conn->ic_stage >= ISER_CONN_STAGE_IC_CONNECTED) && 674 (chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN)); 675 idm_conn_hold(chan->ic_conn->ic_idmc); 676 status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task, 677 (void *)chanhdl, DDI_NOSLEEP); 678 if (status != DDI_SUCCESS) { 679 idm_conn_rele(chan->ic_conn->ic_idmc); 680 } 681 682 return (status); 683 } 684 685 static void 686 iser_ib_post_recv_task(void *arg) 687 { 688 ibt_channel_hdl_t chanhdl = arg; 689 iser_chan_t *chan; 690 691 /* Pull our iSER channel handle from the private data */ 692 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 693 694 iser_ib_post_recv(chanhdl); 695 idm_conn_rele(chan->ic_conn->ic_idmc); 696 } 697 698 void 699 iser_ib_post_recv(ibt_channel_hdl_t chanhdl) 700 { 701 iser_chan_t *chan; 702 iser_hca_t *hca; 703 iser_msg_t *msg; 704 ibt_recv_wr_t *wrlist, wr[ISER_IB_RQ_POST_MAX]; 705 int rq_space, msg_ret; 706 int total_num, npost; 707 uint_t nposted; 708 int status, i; 709 iser_qp_t *iser_qp; 710 ib_gid_t lgid; 711 712 /* Pull our iSER channel handle from the private data */ 713 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 714 715 ASSERT(chan != NULL); 716 717 mutex_enter(&chan->ic_conn->ic_lock); 718 719 /* Bail out if the connection is closed; no need for more recv WRs */ 720 if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) || 721 (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) { 722 mutex_exit(&chan->ic_conn->ic_lock); 723 return; 724 } 725 726 /* get the QP handle from the iser_chan */ 727 iser_qp = &chan->ic_qp; 728 729 /* get the local gid from the path info */ 730 lgid = chan->ic_ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; 731 732 /* get the hca port from the path info */ 733 hca = iser_ib_gid2hca(lgid); 734 if (hca == NULL) { 735 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve " 736 "HCA handle"); 737 mutex_exit(&chan->ic_conn->ic_lock); 738 return; 739 } 740 741 /* check for space to post on the RQ */ 742 mutex_enter(&iser_qp->qp_lock); 743 rq_space = iser_qp->rq_depth - iser_qp->rq_level; 744 if (rq_space == 0) { 745 /* The RQ is full, clear the pending flag and return */ 746 iser_qp->rq_taskqpending = B_FALSE; 747 mutex_exit(&iser_qp->qp_lock); 748 mutex_exit(&chan->ic_conn->ic_lock); 749 return; 750 } 751 752 /* Keep track of the lowest value for rq_min_post_level */ 753 if (iser_qp->rq_level < iser_qp->rq_min_post_level) 754 iser_qp->rq_min_post_level = iser_qp->rq_level; 755 756 mutex_exit(&iser_qp->qp_lock); 757 758 /* we've room to post, so pull from the msg cache */ 759 msg = iser_msg_get(hca, rq_space, &msg_ret); 760 if (msg == NULL) { 761 ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles " 762 "available in msg cache currently"); 763 /* 764 * There are no messages on the cache. Wait a half- 765 * second, then try again. 766 */ 767 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 768 status = iser_ib_post_recv_async(chanhdl); 769 if (status != DDI_SUCCESS) { 770 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 771 "redispatch routine"); 772 /* Failed to dispatch, clear pending flag */ 773 mutex_enter(&iser_qp->qp_lock); 774 iser_qp->rq_taskqpending = B_FALSE; 775 mutex_exit(&iser_qp->qp_lock); 776 } 777 mutex_exit(&chan->ic_conn->ic_lock); 778 return; 779 } 780 781 if (msg_ret != rq_space) { 782 ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of " 783 "messages not allocated: requested (%d) allocated (%d)", 784 rq_space, msg_ret); 785 /* We got some, but not all, of our requested depth */ 786 rq_space = msg_ret; 787 } 788 789 /* 790 * Now, walk through the allocated WRs and post them, 791 * ISER_IB_RQ_POST_MAX (or less) at a time. 792 */ 793 wrlist = &wr[0]; 794 total_num = rq_space; 795 796 while (total_num) { 797 /* determine the number to post on this iteration */ 798 npost = (total_num > ISER_IB_RQ_POST_MAX) ? 799 ISER_IB_RQ_POST_MAX : total_num; 800 801 /* build a list of WRs from the msg list */ 802 for (i = 0; i < npost; i++) { 803 wrlist[i].wr_id = (ibt_wrid_t)(uintptr_t)msg; 804 wrlist[i].wr_nds = ISER_IB_SGLIST_SIZE; 805 wrlist[i].wr_sgl = &msg->msg_ds; 806 msg = msg->nextp; 807 } 808 809 /* post the list to the RQ */ 810 nposted = 0; 811 status = ibt_post_recv(chanhdl, wrlist, npost, &nposted); 812 if ((status != IBT_SUCCESS) || (nposted != npost)) { 813 ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv " 814 "failed: requested (%d) posted (%d) status (%d)", 815 npost, nposted, status); 816 total_num -= nposted; 817 break; 818 } 819 820 /* decrement total number to post by the number posted */ 821 total_num -= nposted; 822 } 823 824 mutex_enter(&iser_qp->qp_lock); 825 if (total_num != 0) { 826 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, " 827 "failed to post (%d) WRs", total_num); 828 iser_qp->rq_level += rq_space - total_num; 829 } else { 830 iser_qp->rq_level += rq_space; 831 } 832 833 /* 834 * Now that we've filled the RQ, check that all of the recv WRs 835 * haven't just been immediately consumed. If so, taskqpending is 836 * still B_TRUE, so we need to fire off a taskq thread to post 837 * more WRs. 838 */ 839 if (iser_qp->rq_level == 0) { 840 mutex_exit(&iser_qp->qp_lock); 841 status = iser_ib_post_recv_async(chanhdl); 842 if (status != DDI_SUCCESS) { 843 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 844 "dispatch followup routine"); 845 /* Failed to dispatch, clear pending flag */ 846 mutex_enter(&iser_qp->qp_lock); 847 iser_qp->rq_taskqpending = B_FALSE; 848 mutex_exit(&iser_qp->qp_lock); 849 } 850 } else { 851 /* 852 * We're done, we've filled the RQ. Clear the taskq 853 * flag so that we can run again. 854 */ 855 iser_qp->rq_taskqpending = B_FALSE; 856 mutex_exit(&iser_qp->qp_lock); 857 } 858 859 mutex_exit(&chan->ic_conn->ic_lock); 860 } 861 862 /* 863 * iser_ib_handle_portup_event() 864 * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event. 865 * 866 * To facilitate a seamless bringover of the port and configure the CM service 867 * for inbound iSER service requests on this newly active port, the existing 868 * IDM services will be checked for iSER support. 869 * If an iSER service was already created, then this service will simply be 870 * bound to the gid of the newly active port. If on the other hand, the CM 871 * service did not exist, i.e. only socket communication, then a new CM 872 * service will be first registered with the saved service parameters and 873 * then bound to the newly active port. 874 * 875 */ 876 /* ARGSUSED */ 877 static void 878 iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 879 { 880 iser_hca_t *hca; 881 ib_gid_t gid; 882 idm_svc_t *idm_svc; 883 int status; 884 885 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)", 886 (longlong_t)event->ev_hca_guid, event->ev_port); 887 888 /* 889 * Query all ports on the HCA and update the port information 890 * maintainted in the iser_hca_t structure 891 */ 892 hca = iser_ib_guid2hca(event->ev_hca_guid); 893 if (hca == NULL) { 894 895 /* HCA is just made available, first port on that HCA */ 896 hca = iser_ib_alloc_hca(event->ev_hca_guid); 897 898 mutex_enter(&iser_state->is_hcalist_lock); 899 list_insert_tail(&iser_state->is_hcalist, hca); 900 iser_state->is_num_hcas++; 901 mutex_exit(&iser_state->is_hcalist_lock); 902 903 } else { 904 905 status = iser_ib_update_hcaports(hca); 906 907 if (status != IBT_SUCCESS) { 908 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 909 "status(0x%x): iser_ib_update_hcaports failed: " 910 "HCA(0x%llx) port(%d)", status, 911 (longlong_t)event->ev_hca_guid, event->ev_port); 912 return; 913 } 914 } 915 916 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 917 918 /* 919 * Iterate through the global list of IDM target services 920 * and check for existing iSER CM service. 921 */ 922 mutex_enter(&idm.idm_global_mutex); 923 for (idm_svc = list_head(&idm.idm_tgt_svc_list); 924 idm_svc != NULL; 925 idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) { 926 927 928 if (idm_svc->is_iser_svc == NULL) { 929 930 /* Establish a new CM service for iSER requests */ 931 status = iser_tgt_svc_create( 932 &idm_svc->is_svc_req, idm_svc); 933 934 if (status != IBT_SUCCESS) { 935 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 936 "status(0x%x): iser_tgt_svc_create failed: " 937 "HCA(0x%llx) port(%d)", status, 938 (longlong_t)event->ev_hca_guid, 939 event->ev_port); 940 941 continue; 942 } 943 } 944 945 status = iser_ib_activate_port( 946 idm_svc, event->ev_hca_guid, gid); 947 if (status != IBT_SUCCESS) { 948 949 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 950 "status(0x%x): Bind service on port " 951 "(%llx:%llx) failed", 952 status, (longlong_t)gid.gid_prefix, 953 (longlong_t)gid.gid_guid); 954 955 continue; 956 } 957 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound " 958 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 959 event->ev_port); 960 } 961 mutex_exit(&idm.idm_global_mutex); 962 963 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: " 964 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 965 event->ev_port); 966 } 967 968 /* 969 * iser_ib_handle_portdown_event() 970 * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error. 971 * 972 * Unconfigure the CM service on the deactivated port and teardown the 973 * connections that are using the CM service. 974 */ 975 /* ARGSUSED */ 976 static void 977 iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 978 { 979 iser_hca_t *hca; 980 ib_gid_t gid; 981 int status; 982 983 /* 984 * Query all ports on the HCA and update the port information 985 * maintainted in the iser_hca_t structure 986 */ 987 hca = iser_ib_guid2hca(event->ev_hca_guid); 988 ASSERT(hca != NULL); 989 990 status = iser_ib_update_hcaports(hca); 991 if (status != IBT_SUCCESS) { 992 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): " 993 "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)", 994 status, (longlong_t)event->ev_hca_guid, event->ev_port); 995 return; 996 } 997 998 /* get the gid of the new port */ 999 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 1000 iser_ib_deactivate_port(event->ev_hca_guid, gid); 1001 1002 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: " 1003 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 1004 event->ev_port); 1005 } 1006 1007 /* 1008 * iser_ib_handle_hca_detach_event() 1009 * Quiesce all activity bound for the port, teardown the connection, unbind 1010 * iSER services on all ports and release the HCA handle. 1011 */ 1012 /* ARGSUSED */ 1013 static void 1014 iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 1015 { 1016 iser_hca_t *nexthca, *hca; 1017 int i, status; 1018 1019 ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)", 1020 (longlong_t)event->ev_hca_guid); 1021 1022 hca = iser_ib_guid2hca(event->ev_hca_guid); 1023 for (i = 0; i < hca->hca_num_ports; i++) { 1024 iser_ib_deactivate_port(hca->hca_guid, 1025 hca->hca_port_info[i].p_sgid_tbl[0]); 1026 } 1027 1028 /* 1029 * Update the HCA list maintained in the iser_state. Free the 1030 * resources allocated to the HCA, i.e. caches, protection domain 1031 */ 1032 mutex_enter(&iser_state->is_hcalist_lock); 1033 1034 for (hca = list_head(&iser_state->is_hcalist); 1035 hca != NULL; 1036 hca = nexthca) { 1037 1038 nexthca = list_next(&iser_state->is_hcalist, hca); 1039 1040 if (hca->hca_guid == event->ev_hca_guid) { 1041 1042 list_remove(&iser_state->is_hcalist, hca); 1043 iser_state->is_num_hcas--; 1044 1045 status = iser_ib_free_hca(hca); 1046 if (status != DDI_SUCCESS) { 1047 ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: " 1048 "Failed to free hca(%p)", (void *)hca); 1049 list_insert_tail(&iser_state->is_hcalist, hca); 1050 iser_state->is_num_hcas++; 1051 } 1052 /* No way to return status to IBT if this fails */ 1053 } 1054 } 1055 mutex_exit(&iser_state->is_hcalist_lock); 1056 1057 } 1058 1059 /* 1060 * iser_ib_async_handler 1061 * An IBT Asynchronous Event handler is registered it with the framework and 1062 * passed via the ibt_attach() routine. This function handles the following 1063 * asynchronous events. 1064 * IBT_EVENT_PORT_UP 1065 * IBT_ERROR_PORT_DOWN 1066 * IBT_HCA_ATTACH_EVENT 1067 * IBT_HCA_DETACH_EVENT 1068 */ 1069 /* ARGSUSED */ 1070 void 1071 iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1072 ibt_async_event_t *event) 1073 { 1074 switch (code) { 1075 case IBT_EVENT_PORT_UP: 1076 iser_ib_handle_portup_event(hdl, event); 1077 break; 1078 1079 case IBT_ERROR_PORT_DOWN: 1080 iser_ib_handle_portdown_event(hdl, event); 1081 break; 1082 1083 case IBT_HCA_ATTACH_EVENT: 1084 /* 1085 * A new HCA device is available for use, ignore this 1086 * event because the corresponding IBT_EVENT_PORT_UP 1087 * events will get triggered and handled accordingly. 1088 */ 1089 break; 1090 1091 case IBT_HCA_DETACH_EVENT: 1092 iser_ib_handle_hca_detach_event(hdl, event); 1093 break; 1094 1095 default: 1096 break; 1097 } 1098 } 1099 1100 /* 1101 * iser_ib_init_hcas 1102 * 1103 * This function opens all the HCA devices, gathers the HCA state information 1104 * and adds the HCA handle for each HCA found in the iser_soft_state. 1105 */ 1106 static int 1107 iser_ib_init_hcas(void) 1108 { 1109 ib_guid_t *guid; 1110 int num_hcas; 1111 int i; 1112 iser_hca_t *hca; 1113 1114 /* Retrieve the HCA list */ 1115 num_hcas = ibt_get_hca_list(&guid); 1116 if (num_hcas == 0) { 1117 /* 1118 * This shouldn't happen, but might if we have all HCAs 1119 * detach prior to initialization. 1120 */ 1121 return (DDI_FAILURE); 1122 } 1123 1124 /* Initialize the hcalist lock */ 1125 mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL); 1126 1127 /* Create the HCA list */ 1128 list_create(&iser_state->is_hcalist, sizeof (iser_hca_t), 1129 offsetof(iser_hca_t, hca_node)); 1130 1131 for (i = 0; i < num_hcas; i++) { 1132 1133 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA " 1134 "(0x%llx)", (longlong_t)guid[i]); 1135 1136 hca = iser_ib_alloc_hca(guid[i]); 1137 if (hca == NULL) { 1138 /* This shouldn't happen, teardown and fail */ 1139 (void) iser_ib_fini_hcas(); 1140 (void) ibt_free_hca_list(guid, num_hcas); 1141 return (DDI_FAILURE); 1142 } 1143 1144 mutex_enter(&iser_state->is_hcalist_lock); 1145 list_insert_tail(&iser_state->is_hcalist, hca); 1146 iser_state->is_num_hcas++; 1147 mutex_exit(&iser_state->is_hcalist_lock); 1148 1149 } 1150 1151 /* Free the IBT HCA list */ 1152 (void) ibt_free_hca_list(guid, num_hcas); 1153 1154 /* Check that we've initialized at least one HCA */ 1155 mutex_enter(&iser_state->is_hcalist_lock); 1156 if (list_is_empty(&iser_state->is_hcalist)) { 1157 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize " 1158 "any HCAs"); 1159 1160 mutex_exit(&iser_state->is_hcalist_lock); 1161 (void) iser_ib_fini_hcas(); 1162 return (DDI_FAILURE); 1163 } 1164 mutex_exit(&iser_state->is_hcalist_lock); 1165 1166 return (DDI_SUCCESS); 1167 } 1168 1169 /* 1170 * iser_ib_fini_hcas 1171 * 1172 * Teardown the iSER HCA list initialized above. 1173 */ 1174 static int 1175 iser_ib_fini_hcas(void) 1176 { 1177 iser_hca_t *nexthca, *hca; 1178 int status; 1179 1180 mutex_enter(&iser_state->is_hcalist_lock); 1181 for (hca = list_head(&iser_state->is_hcalist); 1182 hca != NULL; 1183 hca = nexthca) { 1184 1185 nexthca = list_next(&iser_state->is_hcalist, hca); 1186 1187 list_remove(&iser_state->is_hcalist, hca); 1188 1189 status = iser_ib_free_hca(hca); 1190 if (status != IBT_SUCCESS) { 1191 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free " 1192 "HCA during fini"); 1193 list_insert_tail(&iser_state->is_hcalist, hca); 1194 return (DDI_FAILURE); 1195 } 1196 1197 iser_state->is_num_hcas--; 1198 1199 } 1200 mutex_exit(&iser_state->is_hcalist_lock); 1201 list_destroy(&iser_state->is_hcalist); 1202 mutex_destroy(&iser_state->is_hcalist_lock); 1203 1204 return (DDI_SUCCESS); 1205 } 1206 1207 /* 1208 * iser_ib_alloc_hca 1209 * 1210 * This function opens the given HCA device, gathers the HCA state information 1211 * and adds the HCA handle 1212 */ 1213 static iser_hca_t * 1214 iser_ib_alloc_hca(ib_guid_t guid) 1215 { 1216 iser_hca_t *hca; 1217 int status; 1218 1219 /* Allocate an iser_hca_t HCA handle */ 1220 hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP); 1221 1222 /* Open this HCA */ 1223 status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl); 1224 if (status != IBT_SUCCESS) { 1225 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:" 1226 " guid (0x%llx) status (0x%x)", (longlong_t)guid, status); 1227 kmem_free(hca, sizeof (iser_hca_t)); 1228 return (NULL); 1229 } 1230 1231 hca->hca_guid = guid; 1232 hca->hca_clnt_hdl = iser_state->is_ibhdl; 1233 1234 /* Query the HCA */ 1235 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr); 1236 if (status != IBT_SUCCESS) { 1237 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca " 1238 "failure: guid (0x%llx) status (0x%x)", 1239 (longlong_t)guid, status); 1240 (void) ibt_close_hca(hca->hca_hdl); 1241 kmem_free(hca, sizeof (iser_hca_t)); 1242 return (NULL); 1243 } 1244 1245 /* Query all ports on the HCA */ 1246 status = ibt_query_hca_ports(hca->hca_hdl, 0, 1247 &hca->hca_port_info, &hca->hca_num_ports, 1248 &hca->hca_port_info_sz); 1249 if (status != IBT_SUCCESS) { 1250 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: " 1251 "ibt_query_hca_ports failure: guid (0x%llx) " 1252 "status (0x%x)", (longlong_t)guid, status); 1253 (void) ibt_close_hca(hca->hca_hdl); 1254 kmem_free(hca, sizeof (iser_hca_t)); 1255 return (NULL); 1256 } 1257 1258 /* Allocate a single PD on this HCA */ 1259 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, 1260 &hca->hca_pdhdl); 1261 if (status != IBT_SUCCESS) { 1262 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd " 1263 "failure: guid (0x%llx) status (0x%x)", 1264 (longlong_t)guid, status); 1265 (void) ibt_close_hca(hca->hca_hdl); 1266 ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz); 1267 kmem_free(hca, sizeof (iser_hca_t)); 1268 return (NULL); 1269 } 1270 1271 /* Initialize the message and data MR caches for this HCA */ 1272 iser_init_hca_caches(hca); 1273 1274 return (hca); 1275 } 1276 1277 static int 1278 iser_ib_free_hca(iser_hca_t *hca) 1279 { 1280 int status; 1281 ibt_hca_portinfo_t *hca_port_info; 1282 uint_t hca_port_info_sz; 1283 1284 ASSERT(hca != NULL); 1285 if (hca->hca_failed) 1286 return (DDI_FAILURE); 1287 1288 hca_port_info = hca->hca_port_info; 1289 hca_port_info_sz = hca->hca_port_info_sz; 1290 1291 /* 1292 * Free the memory regions before freeing 1293 * the associated protection domain 1294 */ 1295 iser_fini_hca_caches(hca); 1296 1297 status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl); 1298 if (status != IBT_SUCCESS) { 1299 ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD " 1300 "status=0x%x", status); 1301 goto out_caches; 1302 } 1303 1304 status = ibt_close_hca(hca->hca_hdl); 1305 if (status != IBT_SUCCESS) { 1306 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA " 1307 "status=0x%x", status); 1308 goto out_pd; 1309 } 1310 1311 ibt_free_portinfo(hca_port_info, hca_port_info_sz); 1312 1313 kmem_free(hca, sizeof (iser_hca_t)); 1314 return (DDI_SUCCESS); 1315 1316 /* 1317 * We only managed to partially tear down the HCA, try to put it back 1318 * like it was before returning. 1319 */ 1320 out_pd: 1321 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl); 1322 if (status != IBT_SUCCESS) { 1323 hca->hca_failed = B_TRUE; 1324 /* Report error and exit */ 1325 ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD " 1326 "status=0x%x", status); 1327 return (DDI_FAILURE); 1328 } 1329 1330 out_caches: 1331 iser_init_hca_caches(hca); 1332 1333 return (DDI_FAILURE); 1334 } 1335 1336 static int 1337 iser_ib_update_hcaports(iser_hca_t *hca) 1338 { 1339 ibt_hca_portinfo_t *pinfop, *oldpinfop; 1340 uint_t size, oldsize, nport; 1341 int status; 1342 1343 ASSERT(hca != NULL); 1344 1345 status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size); 1346 if (status != IBT_SUCCESS) { 1347 ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status); 1348 return (status); 1349 } 1350 1351 oldpinfop = hca->hca_port_info; 1352 oldsize = hca->hca_port_info_sz; 1353 hca->hca_port_info = pinfop; 1354 hca->hca_port_info_sz = size; 1355 1356 (void) ibt_free_portinfo(oldpinfop, oldsize); 1357 1358 return (IBT_SUCCESS); 1359 } 1360 1361 /* 1362 * iser_ib_gid2hca 1363 * Given a gid, find the corresponding hca 1364 */ 1365 iser_hca_t * 1366 iser_ib_gid2hca(ib_gid_t gid) 1367 { 1368 1369 iser_hca_t *hca; 1370 int i; 1371 1372 mutex_enter(&iser_state->is_hcalist_lock); 1373 for (hca = list_head(&iser_state->is_hcalist); 1374 hca != NULL; 1375 hca = list_next(&iser_state->is_hcalist, hca)) { 1376 1377 for (i = 0; i < hca->hca_num_ports; i++) { 1378 if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix == 1379 gid.gid_prefix) && 1380 (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid == 1381 gid.gid_guid)) { 1382 1383 mutex_exit(&iser_state->is_hcalist_lock); 1384 1385 return (hca); 1386 } 1387 } 1388 } 1389 mutex_exit(&iser_state->is_hcalist_lock); 1390 return (NULL); 1391 } 1392 1393 /* 1394 * iser_ib_guid2hca 1395 * Given a HCA guid, find the corresponding HCA 1396 */ 1397 iser_hca_t * 1398 iser_ib_guid2hca(ib_guid_t guid) 1399 { 1400 1401 iser_hca_t *hca; 1402 1403 mutex_enter(&iser_state->is_hcalist_lock); 1404 for (hca = list_head(&iser_state->is_hcalist); 1405 hca != NULL; 1406 hca = list_next(&iser_state->is_hcalist, hca)) { 1407 1408 if (hca->hca_guid == guid) { 1409 mutex_exit(&iser_state->is_hcalist_lock); 1410 return (hca); 1411 } 1412 } 1413 mutex_exit(&iser_state->is_hcalist_lock); 1414 return (NULL); 1415 } 1416 1417 /* 1418 * iser_ib_conv_sockaddr2ibtaddr 1419 * This function converts a socket address into the IBT format 1420 */ 1421 void iser_ib_conv_sockaddr2ibtaddr( 1422 idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr) 1423 { 1424 if (saddr == NULL) { 1425 ibt_addr->family = AF_UNSPEC; 1426 ibt_addr->un.ip4addr = 0; 1427 } else { 1428 switch (saddr->sin.sa_family) { 1429 case AF_INET: 1430 1431 ibt_addr->family = saddr->sin4.sin_family; 1432 ibt_addr->un.ip4addr = saddr->sin4.sin_addr.s_addr; 1433 break; 1434 1435 case AF_INET6: 1436 1437 ibt_addr->family = saddr->sin6.sin6_family; 1438 ibt_addr->un.ip6addr = saddr->sin6.sin6_addr; 1439 break; 1440 1441 default: 1442 ibt_addr->family = AF_UNSPEC; 1443 } 1444 1445 } 1446 } 1447 1448 /* 1449 * iser_ib_conv_ibtaddr2sockaddr 1450 * This function converts an IBT ip address handle to a sockaddr 1451 */ 1452 void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss, 1453 ibt_ip_addr_t *ibt_addr, in_port_t port) 1454 { 1455 struct sockaddr_in *sin; 1456 struct sockaddr_in6 *sin6; 1457 1458 switch (ibt_addr->family) { 1459 case AF_INET: 1460 case AF_UNSPEC: 1461 1462 sin = (struct sockaddr_in *)ibt_addr; 1463 sin->sin_port = ntohs(port); 1464 bcopy(sin, ss, sizeof (struct sockaddr_in)); 1465 break; 1466 1467 case AF_INET6: 1468 1469 sin6 = (struct sockaddr_in6 *)ibt_addr; 1470 sin6->sin6_port = ntohs(port); 1471 bcopy(sin6, ss, sizeof (struct sockaddr_in6)); 1472 break; 1473 1474 default: 1475 ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: " 1476 "unknown family type: 0x%x", ibt_addr->family); 1477 } 1478 } 1479 1480 /* 1481 * iser_ib_setup_cq 1482 * This function sets up the Completion Queue size and allocates the specified 1483 * Completion Queue 1484 */ 1485 static int 1486 iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl) 1487 { 1488 1489 ibt_cq_attr_t cq_attr; 1490 int status; 1491 1492 cq_attr.cq_size = cq_size; 1493 cq_attr.cq_sched = 0; 1494 cq_attr.cq_flags = IBT_CQ_NO_FLAGS; 1495 1496 /* Allocate a Completion Queue */ 1497 status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL); 1498 if (status != IBT_SUCCESS) { 1499 ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)", 1500 status); 1501 return (status); 1502 } 1503 1504 return (ISER_STATUS_SUCCESS); 1505 } 1506 1507 /* 1508 * iser_ib_setup_chanargs 1509 * 1510 */ 1511 static void 1512 iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 1513 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 1514 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs) 1515 { 1516 1517 bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t)); 1518 1519 /* 1520 * Set up the size of the channels send queue, receive queue and the 1521 * maximum number of elements in a scatter gather list of work requests 1522 * posted to the send and receive queues. 1523 */ 1524 cargs->rc_sizes.cs_sq = sq_size; 1525 cargs->rc_sizes.cs_rq = rq_size; 1526 cargs->rc_sizes.cs_sq_sgl = ISER_IB_SGLIST_SIZE; 1527 cargs->rc_sizes.cs_rq_sgl = ISER_IB_SGLIST_SIZE; 1528 1529 /* 1530 * All Work requests signaled on a WR basis will receive a send 1531 * request completion. 1532 */ 1533 cargs->rc_flags = IBT_ALL_SIGNALED; 1534 1535 /* Enable RDMA read and RDMA write on the channel end points */ 1536 cargs->rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 1537 1538 /* Set the local hca port on which the channel is allocated */ 1539 cargs->rc_hca_port_num = hca_port; 1540 1541 /* Set the Send and Receive Completion Queue handles */ 1542 cargs->rc_scq = scq_hdl; 1543 cargs->rc_rcq = rcq_hdl; 1544 1545 /* Set the protection domain associated with the channel */ 1546 cargs->rc_pd = hca_pdhdl; 1547 1548 /* No SRQ usage */ 1549 cargs->rc_srq = NULL; 1550 } 1551 1552 /* 1553 * iser_ib_init_qp 1554 * Initialize the QP handle 1555 */ 1556 void 1557 iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size) 1558 { 1559 /* Initialize the handle lock */ 1560 mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1561 1562 /* Record queue sizes */ 1563 chan->ic_qp.sq_size = sq_size; 1564 chan->ic_qp.rq_size = rq_size; 1565 1566 /* Initialize the RQ monitoring data */ 1567 chan->ic_qp.rq_depth = rq_size; 1568 chan->ic_qp.rq_level = 0; 1569 chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100; 1570 1571 /* Initialize the taskq flag */ 1572 chan->ic_qp.rq_taskqpending = B_FALSE; 1573 } 1574 1575 /* 1576 * iser_ib_fini_qp 1577 * Teardown the QP handle 1578 */ 1579 void 1580 iser_ib_fini_qp(iser_qp_t *qp) 1581 { 1582 /* Destroy the handle lock */ 1583 mutex_destroy(&qp->qp_lock); 1584 } 1585 1586 static int 1587 iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid) 1588 { 1589 iser_svc_t *iser_svc; 1590 iser_sbind_t *is_sbind; 1591 int status; 1592 1593 iser_svc = idm_svc->is_iser_svc; 1594 1595 /* 1596 * Save the address of the service bind handle in the 1597 * iser_svc_t to undo the service binding at a later time 1598 */ 1599 is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP); 1600 is_sbind->is_gid = gid; 1601 is_sbind->is_guid = guid; 1602 1603 status = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL, 1604 idm_svc, &is_sbind->is_sbindhdl); 1605 1606 if (status != IBT_SUCCESS) { 1607 ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): " 1608 "Bind service(%llx) on port(%llx:%llx) failed", 1609 status, (longlong_t)iser_svc->is_svcid, 1610 (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid); 1611 1612 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1613 1614 return (status); 1615 } 1616 1617 list_insert_tail(&iser_svc->is_sbindlist, is_sbind); 1618 1619 return (IBT_SUCCESS); 1620 } 1621 1622 static void 1623 iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid) 1624 { 1625 iser_svc_t *iser_svc; 1626 iser_conn_t *iser_conn; 1627 iser_sbind_t *is_sbind; 1628 idm_conn_t *idm_conn; 1629 1630 /* 1631 * Iterate through the global list of IDM target connections. 1632 * Issue a TRANSPORT_FAIL for any connections on this port, and 1633 * if there is a bound service running on the port, tear it down. 1634 */ 1635 mutex_enter(&idm.idm_global_mutex); 1636 for (idm_conn = list_head(&idm.idm_tgt_conn_list); 1637 idm_conn != NULL; 1638 idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) { 1639 1640 if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) { 1641 /* this is not an iSER connection, skip it */ 1642 continue; 1643 } 1644 1645 iser_conn = idm_conn->ic_transport_private; 1646 if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) { 1647 /* this iSER connection is on a different port */ 1648 continue; 1649 } 1650 1651 /* Fail the transport for this connection */ 1652 idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 1653 1654 if (idm_conn->ic_conn_type == CONN_TYPE_INI) { 1655 /* initiator connection, nothing else to do */ 1656 continue; 1657 } 1658 1659 /* Check for a service binding */ 1660 iser_svc = idm_conn->ic_svc_binding->is_iser_svc; 1661 is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid); 1662 if (is_sbind != NULL) { 1663 /* This service is still bound, tear it down */ 1664 ibt_unbind_service(iser_svc->is_srvhdl, 1665 is_sbind->is_sbindhdl); 1666 list_remove(&iser_svc->is_sbindlist, is_sbind); 1667 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1668 } 1669 } 1670 mutex_exit(&idm.idm_global_mutex); 1671 } 1672 1673 static iser_sbind_t * 1674 iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid) 1675 { 1676 iser_sbind_t *is_sbind; 1677 1678 for (is_sbind = list_head(&iser_svc->is_sbindlist); 1679 is_sbind != NULL; 1680 is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) { 1681 1682 if ((is_sbind->is_guid == hca_guid) && 1683 (is_sbind->is_gid.gid_prefix == gid.gid_prefix) && 1684 (is_sbind->is_gid.gid_guid == gid.gid_guid)) { 1685 return (is_sbind); 1686 } 1687 } 1688 return (NULL); 1689 } 1690