1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/types.h> 78 #include <sys/ddi.h> 79 #include <sys/sunddi.h> 80 #include <sys/ib/clients/rds/rdsib_cm.h> 81 #include <sys/ib/clients/rds/rdsib_ib.h> 82 #include <sys/ib/clients/rds/rdsib_buf.h> 83 #include <sys/ib/clients/rds/rdsib_ep.h> 84 #include <sys/ib/clients/rds/rds_kstat.h> 85 86 static void rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, 87 ibt_async_code_t code, ibt_async_event_t *event); 88 89 static struct ibt_clnt_modinfo_s rds_ib_modinfo = { 90 IBTI_V2, 91 IBT_NETWORK, 92 rds_async_handler, 93 NULL, 94 "RDS" 95 }; 96 97 /* performance tunables */ 98 uint_t rds_no_interrupts = 0; 99 uint_t rds_poll_percent_full = 25; 100 uint_t rds_wc_signal = IBT_NEXT_SOLICITED; 101 uint_t rds_waittime_ms = 100; /* ms */ 102 103 extern dev_info_t *rdsib_dev_info; 104 extern void rds_close_sessions(); 105 106 static void 107 rdsib_validate_chan_sizes(ibt_hca_attr_t *hattrp) 108 { 109 /* The SQ size should not be more than that supported by the HCA */ 110 if (((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_chan_sz) || 111 ((MaxDataSendBuffers + RDS_NUM_ACKS) > hattrp->hca_max_cq_sz)) { 112 RDS_DPRINTF2("RDSIB", "MaxDataSendBuffers + %d is greater " 113 "than that supported by the HCA driver " 114 "(%d + %d > %d or %d), lowering it to a supported value.", 115 RDS_NUM_ACKS, MaxDataSendBuffers, RDS_NUM_ACKS, 116 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 117 118 MaxDataSendBuffers = (hattrp->hca_max_chan_sz > 119 hattrp->hca_max_cq_sz) ? 120 hattrp->hca_max_cq_sz - RDS_NUM_ACKS : 121 hattrp->hca_max_chan_sz - RDS_NUM_ACKS; 122 } 123 124 /* The RQ size should not be more than that supported by the HCA */ 125 if ((MaxDataRecvBuffers > hattrp->hca_max_chan_sz) || 126 (MaxDataRecvBuffers > hattrp->hca_max_cq_sz)) { 127 RDS_DPRINTF2("RDSIB", "MaxDataRecvBuffers is greater than that " 128 "supported by the HCA driver (%d > %d or %d), lowering it " 129 "to a supported value.", MaxDataRecvBuffers, 130 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 131 132 MaxDataRecvBuffers = (hattrp->hca_max_chan_sz > 133 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 134 hattrp->hca_max_chan_sz; 135 } 136 137 /* The SQ size should not be more than that supported by the HCA */ 138 if ((MaxCtrlSendBuffers > hattrp->hca_max_chan_sz) || 139 (MaxCtrlSendBuffers > hattrp->hca_max_cq_sz)) { 140 RDS_DPRINTF2("RDSIB", "MaxCtrlSendBuffers is greater than that " 141 "supported by the HCA driver (%d > %d or %d), lowering it " 142 "to a supported value.", MaxCtrlSendBuffers, 143 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 144 145 MaxCtrlSendBuffers = (hattrp->hca_max_chan_sz > 146 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 147 hattrp->hca_max_chan_sz; 148 } 149 150 /* The RQ size should not be more than that supported by the HCA */ 151 if ((MaxCtrlRecvBuffers > hattrp->hca_max_chan_sz) || 152 (MaxCtrlRecvBuffers > hattrp->hca_max_cq_sz)) { 153 RDS_DPRINTF2("RDSIB", "MaxCtrlRecvBuffers is greater than that " 154 "supported by the HCA driver (%d > %d or %d), lowering it " 155 "to a supported value.", MaxCtrlRecvBuffers, 156 hattrp->hca_max_chan_sz, hattrp->hca_max_cq_sz); 157 158 MaxCtrlRecvBuffers = (hattrp->hca_max_chan_sz > 159 hattrp->hca_max_cq_sz) ? hattrp->hca_max_cq_sz : 160 hattrp->hca_max_chan_sz; 161 } 162 163 /* The MaxRecvMemory should be less than that supported by the HCA */ 164 if ((NDataRX * RdsPktSize) > hattrp->hca_max_memr_len) { 165 RDS_DPRINTF2("RDSIB", "MaxRecvMemory is greater than that " 166 "supported by the HCA driver (%d > %d), lowering it to %d", 167 NDataRX * RdsPktSize, hattrp->hca_max_memr_len, 168 hattrp->hca_max_memr_len); 169 170 NDataRX = hattrp->hca_max_memr_len/RdsPktSize; 171 } 172 } 173 174 /* 175 * Called from attach 176 */ 177 int 178 rdsib_initialize_ib() 179 { 180 ib_guid_t *guidp; 181 rds_hca_t *hcap, *hcap1; 182 uint_t ix, hcaix, nhcas; 183 int ret; 184 185 RDS_DPRINTF2("rdsib_initialize_ib", "enter: statep %p", rdsib_statep); 186 187 ASSERT(rdsib_statep != NULL); 188 if (rdsib_statep == NULL) { 189 RDS_DPRINTF1("rdsib_initialize_ib", 190 "RDS Statep not initialized"); 191 return (-1); 192 } 193 194 /* How many hcas are there? */ 195 nhcas = ibt_get_hca_list(&guidp); 196 if (nhcas == 0) { 197 RDS_DPRINTF2("rdsib_initialize_ib", "No IB HCAs Available"); 198 return (-1); 199 } 200 201 RDS_DPRINTF3("rdsib_initialize_ib", "Number of HCAs: %d", nhcas); 202 203 /* Register with IBTF */ 204 ret = ibt_attach(&rds_ib_modinfo, rdsib_dev_info, rdsib_statep, 205 &rdsib_statep->rds_ibhdl); 206 if (ret != IBT_SUCCESS) { 207 RDS_DPRINTF2("rdsib_initialize_ib", "ibt_attach failed: %d", 208 ret); 209 (void) ibt_free_hca_list(guidp, nhcas); 210 return (-1); 211 } 212 213 /* 214 * Open each HCA and gather its information. Don't care about HCAs 215 * that cannot be opened. It is OK as long as atleast one HCA can be 216 * opened. 217 * Initialize a HCA only if all the information is available. 218 */ 219 hcap1 = NULL; 220 for (ix = 0, hcaix = 0; ix < nhcas; ix++) { 221 RDS_DPRINTF3(LABEL, "Open HCA: 0x%llx", guidp[ix]); 222 223 hcap = (rds_hca_t *)kmem_zalloc(sizeof (rds_hca_t), KM_SLEEP); 224 225 ret = ibt_open_hca(rdsib_statep->rds_ibhdl, guidp[ix], 226 &hcap->hca_hdl); 227 if (ret != IBT_SUCCESS) { 228 RDS_DPRINTF2("rdsib_initialize_ib", 229 "ibt_open_hca: 0x%llx failed: %d", guidp[ix], ret); 230 kmem_free(hcap, sizeof (rds_hca_t)); 231 continue; 232 } 233 234 hcap->hca_guid = guidp[ix]; 235 236 ret = ibt_query_hca(hcap->hca_hdl, &hcap->hca_attr); 237 if (ret != IBT_SUCCESS) { 238 RDS_DPRINTF2("rdsib_initialize_ib", 239 "Query HCA: 0x%llx failed: %d", guidp[ix], ret); 240 ret = ibt_close_hca(hcap->hca_hdl); 241 ASSERT(ret == IBT_SUCCESS); 242 kmem_free(hcap, sizeof (rds_hca_t)); 243 continue; 244 } 245 246 ret = ibt_query_hca_ports(hcap->hca_hdl, 0, 247 &hcap->hca_pinfop, &hcap->hca_nports, &hcap->hca_pinfo_sz); 248 if (ret != IBT_SUCCESS) { 249 RDS_DPRINTF2("rdsib_initialize_ib", 250 "Query HCA 0x%llx ports failed: %d", guidp[ix], 251 ret); 252 ret = ibt_close_hca(hcap->hca_hdl); 253 ASSERT(ret == IBT_SUCCESS); 254 kmem_free(hcap, sizeof (rds_hca_t)); 255 continue; 256 } 257 258 /* Only one PD per HCA is allocated, so do it here */ 259 ret = ibt_alloc_pd(hcap->hca_hdl, IBT_PD_NO_FLAGS, 260 &hcap->hca_pdhdl); 261 if (ret != IBT_SUCCESS) { 262 RDS_DPRINTF2("rdsib_initialize_ib", 263 "ibt_alloc_pd 0x%llx failed: %d", guidp[ix], ret); 264 (void) ibt_free_portinfo(hcap->hca_pinfop, 265 hcap->hca_pinfo_sz); 266 ret = ibt_close_hca(hcap->hca_hdl); 267 ASSERT(ret == IBT_SUCCESS); 268 kmem_free(hcap, sizeof (rds_hca_t)); 269 continue; 270 } 271 272 rdsib_validate_chan_sizes(&hcap->hca_attr); 273 274 /* this HCA is fully initialized, go to the next one */ 275 hcaix++; 276 hcap->hca_nextp = hcap1; 277 hcap1 = hcap; 278 } 279 280 /* free the HCA list, we are done with it */ 281 (void) ibt_free_hca_list(guidp, nhcas); 282 283 if (hcaix == 0) { 284 /* Failed to Initialize even one HCA */ 285 RDS_DPRINTF2("rdsib_initialize_ib", "No HCAs are initialized"); 286 (void) ibt_detach(rdsib_statep->rds_ibhdl); 287 rdsib_statep->rds_ibhdl = NULL; 288 return (-1); 289 } 290 291 if (hcaix < nhcas) { 292 RDS_DPRINTF2("rdsib_open_ib", "HCAs %d/%d failed to initialize", 293 (nhcas - hcaix), nhcas); 294 } 295 296 rdsib_statep->rds_hcalistp = hcap1; 297 rdsib_statep->rds_nhcas = hcaix; 298 299 RDS_DPRINTF2("rdsib_initialize_ib", "return: statep %p", rdsib_statep); 300 301 return (0); 302 } 303 304 /* 305 * Called from detach 306 */ 307 void 308 rdsib_deinitialize_ib() 309 { 310 rds_hca_t *hcap, *nextp; 311 int ret; 312 313 RDS_DPRINTF2("rdsib_deinitialize_ib", "enter: statep %p", rdsib_statep); 314 315 /* close and destroy all the sessions */ 316 rds_close_sessions(NULL); 317 318 /* Release all HCA resources */ 319 rw_enter(&rdsib_statep->rds_hca_lock, RW_WRITER); 320 hcap = rdsib_statep->rds_hcalistp; 321 rdsib_statep->rds_hcalistp = NULL; 322 rdsib_statep->rds_nhcas = 0; 323 rw_exit(&rdsib_statep->rds_hca_lock); 324 325 while (hcap != NULL) { 326 nextp = hcap->hca_nextp; 327 328 ret = ibt_free_pd(hcap->hca_hdl, hcap->hca_pdhdl); 329 ASSERT(ret == IBT_SUCCESS); 330 331 (void) ibt_free_portinfo(hcap->hca_pinfop, hcap->hca_pinfo_sz); 332 333 ret = ibt_close_hca(hcap->hca_hdl); 334 ASSERT(ret == IBT_SUCCESS); 335 336 kmem_free(hcap, sizeof (rds_hca_t)); 337 hcap = nextp; 338 } 339 340 /* Deregister with IBTF */ 341 if (rdsib_statep->rds_ibhdl != NULL) { 342 (void) ibt_detach(rdsib_statep->rds_ibhdl); 343 rdsib_statep->rds_ibhdl = NULL; 344 } 345 346 RDS_DPRINTF2("rdsib_deinitialize_ib", "return: statep %p", 347 rdsib_statep); 348 } 349 350 /* 351 * Called on open of first RDS socket 352 */ 353 int 354 rdsib_open_ib() 355 { 356 int ret; 357 358 RDS_DPRINTF2("rdsib_open_ib", "enter: statep %p", rdsib_statep); 359 360 /* Enable incoming connection requests */ 361 if (rdsib_statep->rds_srvhdl == NULL) { 362 rdsib_statep->rds_srvhdl = 363 rds_register_service(rdsib_statep->rds_ibhdl); 364 if (rdsib_statep->rds_srvhdl == NULL) { 365 RDS_DPRINTF2("rdsib_open_ib", 366 "Service registration failed"); 367 return (-1); 368 } else { 369 /* bind the service on all available ports */ 370 ret = rds_bind_service(rdsib_statep); 371 if (ret != 0) { 372 RDS_DPRINTF2("rdsib_open_ib", 373 "Bind service failed: %d", ret); 374 } 375 } 376 } 377 378 RDS_DPRINTF2("rdsib_open_ib", "return: statep %p", rdsib_statep); 379 380 return (0); 381 } 382 383 /* 384 * Called when all ports are closed. 385 */ 386 void 387 rdsib_close_ib() 388 { 389 int ret; 390 391 RDS_DPRINTF2("rdsib_close_ib", "enter: statep %p", rdsib_statep); 392 393 /* Disable incoming connection requests */ 394 if (rdsib_statep->rds_srvhdl != NULL) { 395 ret = ibt_unbind_all_services(rdsib_statep->rds_srvhdl); 396 if (ret != 0) { 397 RDS_DPRINTF2("rdsib_close_ib", 398 "ibt_unbind_all_services failed: %d\n", ret); 399 } 400 ret = ibt_deregister_service(rdsib_statep->rds_ibhdl, 401 rdsib_statep->rds_srvhdl); 402 if (ret != 0) { 403 RDS_DPRINTF2("rdsib_close_ib", 404 "ibt_deregister_service failed: %d\n", ret); 405 } else { 406 rdsib_statep->rds_srvhdl = NULL; 407 } 408 409 ret = ibt_unbind_all_services(rdsib_statep->rds_old_srvhdl); 410 if (ret != 0) { 411 RDS_DPRINTF2("rdsib_close_ib", 412 "ibt_unbind_all_services failed for old service" 413 ": %d\n", ret); 414 } 415 ret = ibt_deregister_service(rdsib_statep->rds_ibhdl, 416 rdsib_statep->rds_old_srvhdl); 417 if (ret != 0) { 418 RDS_DPRINTF2("rdsib_close_ib", 419 "ibt_deregister_service failed for old service:" 420 "%d\n", ret); 421 } else { 422 rdsib_statep->rds_old_srvhdl = NULL; 423 } 424 } 425 426 RDS_DPRINTF2("rdsib_close_ib", "return: statep %p", rdsib_statep); 427 } 428 429 /* Return hcap, given the hca guid */ 430 rds_hca_t * 431 rds_get_hcap(rds_state_t *statep, ib_guid_t hca_guid) 432 { 433 rds_hca_t *hcap; 434 435 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: Enter: statep: 0x%p " 436 "guid: %llx", statep, hca_guid); 437 438 rw_enter(&statep->rds_hca_lock, RW_READER); 439 440 hcap = statep->rds_hcalistp; 441 while ((hcap != NULL) && (hcap->hca_guid != hca_guid)) { 442 hcap = hcap->hca_nextp; 443 } 444 445 rw_exit(&statep->rds_hca_lock); 446 447 RDS_DPRINTF4("rds_get_hcap", "rds_get_hcap: return"); 448 449 return (hcap); 450 } 451 452 /* Return hcap, given a gid */ 453 rds_hca_t * 454 rds_gid_to_hcap(rds_state_t *statep, ib_gid_t gid) 455 { 456 rds_hca_t *hcap; 457 uint_t ix; 458 459 RDS_DPRINTF4("rds_gid_to_hcap", "Enter: statep: 0x%p gid: %llx:%llx", 460 statep, gid.gid_prefix, gid.gid_guid); 461 462 rw_enter(&statep->rds_hca_lock, RW_READER); 463 464 hcap = statep->rds_hcalistp; 465 while (hcap != NULL) { 466 for (ix = 0; ix < hcap->hca_nports; ix++) { 467 if ((hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_prefix == 468 gid.gid_prefix) && 469 (hcap->hca_pinfop[ix].p_sgid_tbl[0].gid_guid == 470 gid.gid_guid)) { 471 RDS_DPRINTF4("rds_gid_to_hcap", 472 "gid found in hcap: 0x%p", hcap); 473 rw_exit(&statep->rds_hca_lock); 474 return (hcap); 475 } 476 } 477 hcap = hcap->hca_nextp; 478 } 479 480 rw_exit(&statep->rds_hca_lock); 481 482 return (NULL); 483 } 484 485 /* This is called from the send CQ handler */ 486 void 487 rds_send_acknowledgement(rds_ep_t *ep) 488 { 489 int ret; 490 uint_t ix; 491 492 RDS_DPRINTF4("rds_send_acknowledgement", "Enter EP(%p)", ep); 493 494 mutex_enter(&ep->ep_lock); 495 496 ASSERT(ep->ep_rdmacnt != 0); 497 498 /* 499 * The previous ACK completed successfully, send the next one 500 * if more messages were received after sending the last ACK 501 */ 502 if (ep->ep_rbufid != *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va) { 503 *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid; 504 mutex_exit(&ep->ep_lock); 505 506 /* send acknowledgement */ 507 RDS_INCR_TXACKS(); 508 ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix); 509 if (ret != IBT_SUCCESS) { 510 RDS_DPRINTF2("rds_send_acknowledgement", 511 "EP(%p): ibt_post_send for acknowledgement " 512 "failed: %d, SQ depth: %d", 513 ep, ret, ep->ep_sndpool.pool_nbusy); 514 mutex_enter(&ep->ep_lock); 515 ep->ep_rdmacnt--; 516 mutex_exit(&ep->ep_lock); 517 } 518 } else { 519 /* ACKed all messages, no more to ACK */ 520 ep->ep_rdmacnt--; 521 mutex_exit(&ep->ep_lock); 522 return; 523 } 524 525 RDS_DPRINTF4("rds_send_acknowledgement", "Return EP(%p)", ep); 526 } 527 528 static int 529 rds_poll_ctrl_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 530 { 531 ibt_wc_t wc; 532 uint_t npolled; 533 rds_buf_t *bp; 534 rds_ctrl_pkt_t *cpkt; 535 rds_qp_t *recvqp; 536 int ret = IBT_SUCCESS; 537 538 RDS_DPRINTF4("rds_poll_ctrl_completions", "Enter: EP(%p)", ep); 539 540 bzero(&wc, sizeof (ibt_wc_t)); 541 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 542 if (ret != IBT_SUCCESS) { 543 if (ret != IBT_CQ_EMPTY) { 544 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 545 "returned: %d", ep, cq, ret); 546 } else { 547 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 548 "returned: IBT_CQ_EMPTY", ep, cq); 549 } 550 return (ret); 551 } 552 553 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 554 555 if (wc.wc_status != IBT_WC_SUCCESS) { 556 mutex_enter(&ep->ep_recvqp.qp_lock); 557 ep->ep_recvqp.qp_level--; 558 mutex_exit(&ep->ep_recvqp.qp_lock); 559 560 /* Free the buffer */ 561 bp->buf_state = RDS_RCVBUF_FREE; 562 rds_free_recv_buf(bp, 1); 563 564 /* Receive completion failure */ 565 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 566 RDS_DPRINTF2("rds_poll_ctrl_completions", 567 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 568 ep, cq, wc.wc_id, wc.wc_status); 569 } 570 return (ret); 571 } 572 573 /* there is one less in the RQ */ 574 recvqp = &ep->ep_recvqp; 575 mutex_enter(&recvqp->qp_lock); 576 recvqp->qp_level--; 577 if ((recvqp->qp_taskqpending == B_FALSE) && 578 (recvqp->qp_level <= recvqp->qp_lwm)) { 579 /* Time to post more buffers into the RQ */ 580 recvqp->qp_taskqpending = B_TRUE; 581 mutex_exit(&recvqp->qp_lock); 582 583 ret = ddi_taskq_dispatch(rds_taskq, 584 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 585 if (ret != DDI_SUCCESS) { 586 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 587 ret); 588 mutex_enter(&recvqp->qp_lock); 589 recvqp->qp_taskqpending = B_FALSE; 590 mutex_exit(&recvqp->qp_lock); 591 } 592 } else { 593 mutex_exit(&recvqp->qp_lock); 594 } 595 596 cpkt = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 597 rds_handle_control_message(ep->ep_sp, cpkt); 598 599 bp->buf_state = RDS_RCVBUF_FREE; 600 rds_free_recv_buf(bp, 1); 601 602 RDS_DPRINTF4("rds_poll_ctrl_completions", "Return: EP(%p)", ep); 603 604 return (ret); 605 } 606 607 #define RDS_POST_FEW_ATATIME 100 608 /* Post recv WRs into the RQ. Assumes the ep->refcnt is already incremented */ 609 void 610 rds_post_recv_buf(void *arg) 611 { 612 ibt_channel_hdl_t chanhdl; 613 rds_ep_t *ep; 614 rds_session_t *sp; 615 rds_qp_t *recvqp; 616 rds_bufpool_t *gp; 617 rds_buf_t *bp, *bp1; 618 ibt_recv_wr_t *wrp, wr[RDS_POST_FEW_ATATIME]; 619 rds_hca_t *hcap; 620 uint_t npost, nspace, rcv_len; 621 uint_t ix, jx, kx; 622 int ret; 623 624 chanhdl = (ibt_channel_hdl_t)arg; 625 RDS_DPRINTF4("rds_post_recv_buf", "Enter: CHAN(%p)", chanhdl); 626 RDS_INCR_POST_RCV_BUF_CALLS(); 627 628 ep = (rds_ep_t *)ibt_get_chan_private(chanhdl); 629 ASSERT(ep != NULL); 630 sp = ep->ep_sp; 631 recvqp = &ep->ep_recvqp; 632 633 RDS_DPRINTF5("rds_post_recv_buf", "EP(%p)", ep); 634 635 /* get the hcap for the HCA hosting this channel */ 636 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 637 if (hcap == NULL) { 638 RDS_DPRINTF2("rds_post_recv_buf", "HCA (0x%llx) not found", 639 ep->ep_hca_guid); 640 return; 641 } 642 643 /* Make sure the session is still connected */ 644 rw_enter(&sp->session_lock, RW_READER); 645 if ((sp->session_state != RDS_SESSION_STATE_INIT) && 646 (sp->session_state != RDS_SESSION_STATE_CONNECTED)) { 647 RDS_DPRINTF2("rds_post_recv_buf", "EP(%p): Session is not " 648 "in active state (%d)", ep, sp->session_state); 649 rw_exit(&sp->session_lock); 650 return; 651 } 652 rw_exit(&sp->session_lock); 653 654 /* how many can be posted */ 655 mutex_enter(&recvqp->qp_lock); 656 nspace = recvqp->qp_depth - recvqp->qp_level; 657 if (nspace == 0) { 658 RDS_DPRINTF2("rds_post_recv_buf", "RQ is FULL"); 659 recvqp->qp_taskqpending = B_FALSE; 660 mutex_exit(&recvqp->qp_lock); 661 return; 662 } 663 mutex_exit(&recvqp->qp_lock); 664 665 if (ep->ep_type == RDS_EP_TYPE_DATA) { 666 gp = &rds_dpool; 667 rcv_len = RdsPktSize; 668 } else { 669 gp = &rds_cpool; 670 rcv_len = RDS_CTRLPKT_SIZE; 671 } 672 673 bp = rds_get_buf(gp, nspace, &jx); 674 if (bp == NULL) { 675 RDS_DPRINTF2(LABEL, "EP(%p): No Recv buffers available", ep); 676 /* try again later */ 677 ret = ddi_taskq_dispatch(rds_taskq, rds_post_recv_buf, 678 (void *)ep->ep_chanhdl, DDI_NOSLEEP); 679 if (ret != DDI_SUCCESS) { 680 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 681 ret); 682 mutex_enter(&recvqp->qp_lock); 683 recvqp->qp_taskqpending = B_FALSE; 684 mutex_exit(&recvqp->qp_lock); 685 } 686 return; 687 } 688 689 if (jx != nspace) { 690 RDS_DPRINTF2(LABEL, "EP(%p): Recv buffers " 691 "needed: %d available: %d", ep, nspace, jx); 692 nspace = jx; 693 } 694 695 bp1 = bp; 696 for (ix = 0; ix < nspace; ix++) { 697 bp1->buf_ep = ep; 698 ASSERT(bp1->buf_state == RDS_RCVBUF_FREE); 699 bp1->buf_state = RDS_RCVBUF_POSTED; 700 bp1->buf_ds.ds_key = hcap->hca_lkey; 701 bp1->buf_ds.ds_len = rcv_len; 702 bp1 = bp1->buf_nextp; 703 } 704 705 #if 0 706 wrp = kmem_zalloc(RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t), 707 KM_SLEEP); 708 #else 709 wrp = &wr[0]; 710 #endif 711 712 npost = nspace; 713 while (npost) { 714 jx = (npost > RDS_POST_FEW_ATATIME) ? 715 RDS_POST_FEW_ATATIME : npost; 716 for (ix = 0; ix < jx; ix++) { 717 wrp[ix].wr_id = (uintptr_t)bp; 718 wrp[ix].wr_nds = 1; 719 wrp[ix].wr_sgl = &bp->buf_ds; 720 bp = bp->buf_nextp; 721 } 722 723 ret = ibt_post_recv(chanhdl, wrp, jx, &kx); 724 if ((ret != IBT_SUCCESS) || (kx != jx)) { 725 RDS_DPRINTF2(LABEL, "ibt_post_recv for %d WRs failed: " 726 "%d", npost, ret); 727 npost -= kx; 728 break; 729 } 730 731 npost -= jx; 732 } 733 734 mutex_enter(&recvqp->qp_lock); 735 if (npost != 0) { 736 RDS_DPRINTF2("rds_post_recv_buf", 737 "EP(%p) Failed to post %d WRs", ep, npost); 738 recvqp->qp_level += (nspace - npost); 739 } else { 740 recvqp->qp_level += nspace; 741 } 742 743 /* 744 * sometimes, the recv WRs can get consumed as soon as they are 745 * posted. In that case, taskq thread to post more WRs to the RQ will 746 * not be scheduled as the taskqpending flag is still set. 747 */ 748 if (recvqp->qp_level == 0) { 749 mutex_exit(&recvqp->qp_lock); 750 ret = ddi_taskq_dispatch(rds_taskq, 751 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 752 if (ret != DDI_SUCCESS) { 753 RDS_DPRINTF2("rds_post_recv_buf", 754 "ddi_taskq_dispatch failed: %d", ret); 755 mutex_enter(&recvqp->qp_lock); 756 recvqp->qp_taskqpending = B_FALSE; 757 mutex_exit(&recvqp->qp_lock); 758 } 759 } else { 760 recvqp->qp_taskqpending = B_FALSE; 761 mutex_exit(&recvqp->qp_lock); 762 } 763 764 #if 0 765 kmem_free(wrp, RDS_POST_FEW_ATATIME * sizeof (ibt_recv_wr_t)); 766 #endif 767 768 RDS_DPRINTF4("rds_post_recv_buf", "Return: EP(%p)", ep); 769 } 770 771 static int 772 rds_poll_data_completions(ibt_cq_hdl_t cq, rds_ep_t *ep) 773 { 774 ibt_wc_t wc; 775 rds_buf_t *bp; 776 rds_data_hdr_t *pktp; 777 rds_qp_t *recvqp; 778 uint_t npolled; 779 int ret = IBT_SUCCESS; 780 781 782 RDS_DPRINTF4("rds_poll_data_completions", "Enter: EP(%p)", ep); 783 784 bzero(&wc, sizeof (ibt_wc_t)); 785 ret = ibt_poll_cq(cq, &wc, 1, &npolled); 786 if (ret != IBT_SUCCESS) { 787 if (ret != IBT_CQ_EMPTY) { 788 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 789 "returned: %d", ep, cq, ret); 790 } else { 791 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): ibt_poll_cq " 792 "returned: IBT_CQ_EMPTY", ep, cq); 793 } 794 return (ret); 795 } 796 797 bp = (rds_buf_t *)(uintptr_t)wc.wc_id; 798 ASSERT(bp->buf_state == RDS_RCVBUF_POSTED); 799 bp->buf_state = RDS_RCVBUF_ONSOCKQ; 800 bp->buf_nextp = NULL; 801 802 if (wc.wc_status != IBT_WC_SUCCESS) { 803 mutex_enter(&ep->ep_recvqp.qp_lock); 804 ep->ep_recvqp.qp_level--; 805 mutex_exit(&ep->ep_recvqp.qp_lock); 806 807 /* free the buffer */ 808 bp->buf_state = RDS_RCVBUF_FREE; 809 rds_free_recv_buf(bp, 1); 810 811 /* Receive completion failure */ 812 if (wc.wc_status != IBT_WC_WR_FLUSHED_ERR) { 813 RDS_DPRINTF2("rds_poll_data_completions", 814 "EP(%p) CQ(%p) BP(%p): WC Error Status: %d", 815 ep, cq, wc.wc_id, wc.wc_status); 816 RDS_INCR_RXERRS(); 817 } 818 return (ret); 819 } 820 821 /* there is one less in the RQ */ 822 recvqp = &ep->ep_recvqp; 823 mutex_enter(&recvqp->qp_lock); 824 recvqp->qp_level--; 825 if ((recvqp->qp_taskqpending == B_FALSE) && 826 (recvqp->qp_level <= recvqp->qp_lwm)) { 827 /* Time to post more buffers into the RQ */ 828 recvqp->qp_taskqpending = B_TRUE; 829 mutex_exit(&recvqp->qp_lock); 830 831 ret = ddi_taskq_dispatch(rds_taskq, 832 rds_post_recv_buf, (void *)ep->ep_chanhdl, DDI_NOSLEEP); 833 if (ret != DDI_SUCCESS) { 834 RDS_DPRINTF2(LABEL, "ddi_taskq_dispatch failed: %d", 835 ret); 836 mutex_enter(&recvqp->qp_lock); 837 recvqp->qp_taskqpending = B_FALSE; 838 mutex_exit(&recvqp->qp_lock); 839 } 840 } else { 841 mutex_exit(&recvqp->qp_lock); 842 } 843 844 pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va; 845 ASSERT(pktp->dh_datalen != 0); 846 847 RDS_DPRINTF5(LABEL, "Message Received: sendIP: 0x%x recvIP: 0x%x " 848 "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip, 849 ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport, 850 pktp->dh_npkts, pktp->dh_psn); 851 852 RDS_DPRINTF3(LABEL, "BP(%p): npkts: %d psn: %d", bp, 853 pktp->dh_npkts, pktp->dh_psn); 854 855 if (pktp->dh_npkts == 1) { 856 /* single pkt or last packet */ 857 if (pktp->dh_psn != 0) { 858 /* last packet of a segmented message */ 859 ASSERT(ep->ep_seglbp != NULL); 860 ep->ep_seglbp->buf_nextp = bp; 861 ep->ep_seglbp = bp; 862 rds_received_msg(ep, ep->ep_segfbp); 863 ep->ep_segfbp = NULL; 864 ep->ep_seglbp = NULL; 865 } else { 866 /* single packet */ 867 rds_received_msg(ep, bp); 868 } 869 } else { 870 /* multi-pkt msg */ 871 if (pktp->dh_psn == 0) { 872 /* first packet */ 873 ASSERT(ep->ep_segfbp == NULL); 874 ep->ep_segfbp = bp; 875 ep->ep_seglbp = bp; 876 } else { 877 /* intermediate packet */ 878 ASSERT(ep->ep_segfbp != NULL); 879 ep->ep_seglbp->buf_nextp = bp; 880 ep->ep_seglbp = bp; 881 } 882 } 883 884 RDS_DPRINTF4("rds_poll_data_completions", "Return: EP(%p)", ep); 885 886 return (ret); 887 } 888 889 void 890 rds_recvcq_handler(ibt_cq_hdl_t cq, void *arg) 891 { 892 rds_ep_t *ep; 893 int ret = IBT_SUCCESS; 894 int (*func)(ibt_cq_hdl_t, rds_ep_t *); 895 896 ep = (rds_ep_t *)arg; 897 898 RDS_DPRINTF4("rds_recvcq_handler", "enter: EP(%p)", ep); 899 900 if (ep->ep_type == RDS_EP_TYPE_DATA) { 901 func = rds_poll_data_completions; 902 } else { 903 func = rds_poll_ctrl_completions; 904 } 905 906 do { 907 ret = func(cq, ep); 908 } while (ret != IBT_CQ_EMPTY); 909 910 /* enable the CQ */ 911 ret = ibt_enable_cq_notify(cq, rds_wc_signal); 912 if (ret != IBT_SUCCESS) { 913 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 914 "failed: %d", ep, cq, ret); 915 return; 916 } 917 918 do { 919 ret = func(cq, ep); 920 } while (ret != IBT_CQ_EMPTY); 921 922 RDS_DPRINTF4("rds_recvcq_handler", "Return: EP(%p)", ep); 923 } 924 925 void 926 rds_poll_send_completions(ibt_cq_hdl_t cq, rds_ep_t *ep, boolean_t lock) 927 { 928 ibt_wc_t wc[RDS_NUM_DATA_SEND_WCS]; 929 uint_t npolled, nret, send_error = 0; 930 rds_buf_t *headp, *tailp, *bp; 931 int ret, ix; 932 933 RDS_DPRINTF4("rds_poll_send_completions", "Enter EP(%p)", ep); 934 935 headp = NULL; 936 tailp = NULL; 937 npolled = 0; 938 do { 939 ret = ibt_poll_cq(cq, wc, RDS_NUM_DATA_SEND_WCS, &nret); 940 if (ret != IBT_SUCCESS) { 941 if (ret != IBT_CQ_EMPTY) { 942 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): " 943 "ibt_poll_cq returned: %d", ep, cq, ret); 944 } else { 945 RDS_DPRINTF5(LABEL, "EP(%p) CQ(%p): " 946 "ibt_poll_cq returned: IBT_CQ_EMPTY", 947 ep, cq); 948 } 949 950 break; 951 } 952 953 for (ix = 0; ix < nret; ix++) { 954 if (wc[ix].wc_status == IBT_WC_SUCCESS) { 955 if (wc[ix].wc_type == IBT_WRC_RDMAW) { 956 rds_send_acknowledgement(ep); 957 continue; 958 } 959 960 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 961 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 962 bp->buf_state = RDS_SNDBUF_FREE; 963 } else if (wc[ix].wc_status == IBT_WC_WR_FLUSHED_ERR) { 964 RDS_INCR_TXERRS(); 965 RDS_DPRINTF5("rds_poll_send_completions", 966 "EP(%p): WC ID: %p ERROR: %d", ep, 967 wc[ix].wc_id, wc[ix].wc_status); 968 969 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 970 mutex_enter(&ep->ep_lock); 971 ep->ep_rdmacnt--; 972 mutex_exit(&ep->ep_lock); 973 continue; 974 } 975 976 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 977 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 978 bp->buf_state = RDS_SNDBUF_FREE; 979 } else { 980 RDS_INCR_TXERRS(); 981 RDS_DPRINTF2("rds_poll_send_completions", 982 "EP(%p): WC ID: %p ERROR: %d", ep, 983 wc[ix].wc_id, wc[ix].wc_status); 984 if (send_error == 0) { 985 rds_session_t *sp = ep->ep_sp; 986 987 /* don't let anyone send anymore */ 988 rw_enter(&sp->session_lock, RW_WRITER); 989 if (sp->session_state != 990 RDS_SESSION_STATE_ERROR) { 991 sp->session_state = 992 RDS_SESSION_STATE_ERROR; 993 /* Make this the active end */ 994 sp->session_type = 995 RDS_SESSION_ACTIVE; 996 } 997 rw_exit(&sp->session_lock); 998 } 999 1000 send_error++; 1001 1002 if (wc[ix].wc_id == RDS_RDMAW_WRID) { 1003 mutex_enter(&ep->ep_lock); 1004 ep->ep_rdmacnt--; 1005 mutex_exit(&ep->ep_lock); 1006 continue; 1007 } 1008 1009 bp = (rds_buf_t *)(uintptr_t)wc[ix].wc_id; 1010 ASSERT(bp->buf_state == RDS_SNDBUF_PENDING); 1011 bp->buf_state = RDS_SNDBUF_FREE; 1012 } 1013 1014 bp->buf_nextp = NULL; 1015 if (headp) { 1016 tailp->buf_nextp = bp; 1017 tailp = bp; 1018 } else { 1019 headp = bp; 1020 tailp = bp; 1021 } 1022 1023 npolled++; 1024 } 1025 1026 if (rds_no_interrupts && (npolled > 100)) { 1027 break; 1028 } 1029 1030 if (rds_no_interrupts == 1) { 1031 break; 1032 } 1033 } while (ret != IBT_CQ_EMPTY); 1034 1035 RDS_DPRINTF5("rds_poll_send_completions", "Npolled: %d send_error: %d", 1036 npolled, send_error); 1037 1038 /* put the buffers to the pool */ 1039 if (npolled != 0) { 1040 rds_free_send_buf(ep, headp, tailp, npolled, lock); 1041 } 1042 1043 if (send_error != 0) { 1044 rds_handle_send_error(ep); 1045 } 1046 1047 RDS_DPRINTF4("rds_poll_send_completions", "Return EP(%p)", ep); 1048 } 1049 1050 void 1051 rds_sendcq_handler(ibt_cq_hdl_t cq, void *arg) 1052 { 1053 rds_ep_t *ep; 1054 int ret; 1055 1056 ep = (rds_ep_t *)arg; 1057 1058 RDS_DPRINTF4("rds_sendcq_handler", "Enter: EP(%p)", ep); 1059 1060 /* enable the CQ */ 1061 ret = ibt_enable_cq_notify(cq, IBT_NEXT_COMPLETION); 1062 if (ret != IBT_SUCCESS) { 1063 RDS_DPRINTF2(LABEL, "EP(%p) CQ(%p): ibt_enable_cq_notify " 1064 "failed: %d", ep, cq, ret); 1065 return; 1066 } 1067 1068 rds_poll_send_completions(cq, ep, B_FALSE); 1069 1070 RDS_DPRINTF4("rds_sendcq_handler", "Return: EP(%p)", ep); 1071 } 1072 1073 void 1074 rds_ep_free_rc_channel(rds_ep_t *ep) 1075 { 1076 int ret; 1077 1078 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Enter", ep); 1079 1080 ASSERT(mutex_owned(&ep->ep_lock)); 1081 1082 /* free the QP */ 1083 if (ep->ep_chanhdl != NULL) { 1084 /* wait until the RQ is empty */ 1085 (void) ibt_flush_channel(ep->ep_chanhdl); 1086 (void) rds_is_recvq_empty(ep, B_TRUE); 1087 ret = ibt_free_channel(ep->ep_chanhdl); 1088 if (ret != IBT_SUCCESS) { 1089 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) " 1090 "ibt_free_channel returned: %d", ep, ret); 1091 } 1092 ep->ep_chanhdl = NULL; 1093 } else { 1094 RDS_DPRINTF2("rds_ep_free_rc_channel", 1095 "EP(%p) Channel is ALREADY FREE", ep); 1096 } 1097 1098 /* free the Send CQ */ 1099 if (ep->ep_sendcq != NULL) { 1100 ret = ibt_free_cq(ep->ep_sendcq); 1101 if (ret != IBT_SUCCESS) { 1102 RDS_DPRINTF2("rds_ep_free_rc_channel", 1103 "EP(%p) - for sendcq, ibt_free_cq returned %d", 1104 ep, ret); 1105 } 1106 ep->ep_sendcq = NULL; 1107 } else { 1108 RDS_DPRINTF2("rds_ep_free_rc_channel", 1109 "EP(%p) SendCQ is ALREADY FREE", ep); 1110 } 1111 1112 /* free the Recv CQ */ 1113 if (ep->ep_recvcq != NULL) { 1114 ret = ibt_free_cq(ep->ep_recvcq); 1115 if (ret != IBT_SUCCESS) { 1116 RDS_DPRINTF2("rds_ep_free_rc_channel", 1117 "EP(%p) - for recvcq, ibt_free_cq returned %d", 1118 ep, ret); 1119 } 1120 ep->ep_recvcq = NULL; 1121 } else { 1122 RDS_DPRINTF2("rds_ep_free_rc_channel", 1123 "EP(%p) RecvCQ is ALREADY FREE", ep); 1124 } 1125 1126 RDS_DPRINTF2("rds_ep_free_rc_channel", "EP(%p) - Return", ep); 1127 } 1128 1129 /* Allocate resources for RC channel */ 1130 ibt_channel_hdl_t 1131 rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port) 1132 { 1133 int ret = IBT_SUCCESS; 1134 ibt_cq_attr_t scqattr, rcqattr; 1135 ibt_rc_chan_alloc_args_t chanargs; 1136 ibt_channel_hdl_t chanhdl; 1137 rds_session_t *sp; 1138 rds_hca_t *hcap; 1139 1140 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Enter: 0x%p port: %d", 1141 ep, hca_port); 1142 1143 /* Update the EP with the right IP address and HCA guid */ 1144 sp = ep->ep_sp; 1145 ASSERT(sp != NULL); 1146 rw_enter(&sp->session_lock, RW_READER); 1147 mutex_enter(&ep->ep_lock); 1148 ep->ep_myip = sp->session_myip; 1149 ep->ep_remip = sp->session_remip; 1150 hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 1151 ep->ep_hca_guid = hcap->hca_guid; 1152 mutex_exit(&ep->ep_lock); 1153 rw_exit(&sp->session_lock); 1154 1155 /* reset taskqpending flag here */ 1156 ep->ep_recvqp.qp_taskqpending = B_FALSE; 1157 1158 if (ep->ep_type == RDS_EP_TYPE_CTRL) { 1159 scqattr.cq_size = MaxCtrlSendBuffers; 1160 scqattr.cq_sched = NULL; 1161 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1162 1163 rcqattr.cq_size = MaxCtrlRecvBuffers; 1164 rcqattr.cq_sched = NULL; 1165 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1166 1167 chanargs.rc_sizes.cs_sq = MaxCtrlSendBuffers; 1168 chanargs.rc_sizes.cs_rq = MaxCtrlRecvBuffers; 1169 chanargs.rc_sizes.cs_sq_sgl = 1; 1170 chanargs.rc_sizes.cs_rq_sgl = 1; 1171 } else { 1172 scqattr.cq_size = MaxDataSendBuffers + RDS_NUM_ACKS; 1173 scqattr.cq_sched = NULL; 1174 scqattr.cq_flags = IBT_CQ_NO_FLAGS; 1175 1176 rcqattr.cq_size = MaxDataRecvBuffers; 1177 rcqattr.cq_sched = NULL; 1178 rcqattr.cq_flags = IBT_CQ_NO_FLAGS; 1179 1180 chanargs.rc_sizes.cs_sq = MaxDataSendBuffers + RDS_NUM_ACKS; 1181 chanargs.rc_sizes.cs_rq = MaxDataRecvBuffers; 1182 chanargs.rc_sizes.cs_sq_sgl = 1; 1183 chanargs.rc_sizes.cs_rq_sgl = 1; 1184 } 1185 1186 mutex_enter(&ep->ep_lock); 1187 if (ep->ep_sendcq == NULL) { 1188 /* returned size is always greater than the requested size */ 1189 ret = ibt_alloc_cq(hcap->hca_hdl, &scqattr, 1190 &ep->ep_sendcq, NULL); 1191 if (ret != IBT_SUCCESS) { 1192 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for sendCQ " 1193 "failed, size = %d: %d", scqattr.cq_size, ret); 1194 mutex_exit(&ep->ep_lock); 1195 return (NULL); 1196 } 1197 1198 (void) ibt_set_cq_handler(ep->ep_sendcq, rds_sendcq_handler, 1199 ep); 1200 1201 if (rds_no_interrupts == 0) { 1202 ret = ibt_enable_cq_notify(ep->ep_sendcq, 1203 IBT_NEXT_COMPLETION); 1204 if (ret != IBT_SUCCESS) { 1205 RDS_DPRINTF2(LABEL, 1206 "ibt_enable_cq_notify failed: %d", ret); 1207 (void) ibt_free_cq(ep->ep_sendcq); 1208 ep->ep_sendcq = NULL; 1209 mutex_exit(&ep->ep_lock); 1210 return (NULL); 1211 } 1212 } 1213 } 1214 1215 if (ep->ep_recvcq == NULL) { 1216 /* returned size is always greater than the requested size */ 1217 ret = ibt_alloc_cq(hcap->hca_hdl, &rcqattr, 1218 &ep->ep_recvcq, NULL); 1219 if (ret != IBT_SUCCESS) { 1220 RDS_DPRINTF2(LABEL, "ibt_alloc_cq for recvCQ " 1221 "failed, size = %d: %d", rcqattr.cq_size, ret); 1222 (void) ibt_free_cq(ep->ep_sendcq); 1223 ep->ep_sendcq = NULL; 1224 mutex_exit(&ep->ep_lock); 1225 return (NULL); 1226 } 1227 1228 (void) ibt_set_cq_handler(ep->ep_recvcq, rds_recvcq_handler, 1229 ep); 1230 1231 ret = ibt_enable_cq_notify(ep->ep_recvcq, rds_wc_signal); 1232 if (ret != IBT_SUCCESS) { 1233 RDS_DPRINTF2(LABEL, 1234 "ibt_enable_cq_notify failed: %d", ret); 1235 (void) ibt_free_cq(ep->ep_recvcq); 1236 ep->ep_recvcq = NULL; 1237 (void) ibt_free_cq(ep->ep_sendcq); 1238 ep->ep_sendcq = NULL; 1239 mutex_exit(&ep->ep_lock); 1240 return (NULL); 1241 } 1242 } 1243 1244 chanargs.rc_flags = IBT_ALL_SIGNALED; 1245 chanargs.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR | 1246 IBT_CEP_ATOMIC; 1247 chanargs.rc_hca_port_num = hca_port; 1248 chanargs.rc_scq = ep->ep_sendcq; 1249 chanargs.rc_rcq = ep->ep_recvcq; 1250 chanargs.rc_pd = hcap->hca_pdhdl; 1251 chanargs.rc_srq = NULL; 1252 1253 ret = ibt_alloc_rc_channel(hcap->hca_hdl, 1254 IBT_ACHAN_NO_FLAGS, &chanargs, &chanhdl, NULL); 1255 if (ret != IBT_SUCCESS) { 1256 RDS_DPRINTF2(LABEL, "ibt_alloc_rc_channel fail: %d", 1257 ret); 1258 (void) ibt_free_cq(ep->ep_recvcq); 1259 ep->ep_recvcq = NULL; 1260 (void) ibt_free_cq(ep->ep_sendcq); 1261 ep->ep_sendcq = NULL; 1262 mutex_exit(&ep->ep_lock); 1263 return (NULL); 1264 } 1265 mutex_exit(&ep->ep_lock); 1266 1267 /* Chan private should contain the ep */ 1268 (void) ibt_set_chan_private(chanhdl, ep); 1269 1270 RDS_DPRINTF4("rds_ep_alloc_rc_channel", "Return: 0x%p", chanhdl); 1271 1272 return (chanhdl); 1273 } 1274 1275 1276 #if 0 1277 1278 /* Return node guid given a port gid */ 1279 ib_guid_t 1280 rds_gid_to_node_guid(ib_gid_t gid) 1281 { 1282 ibt_node_info_t nodeinfo; 1283 int ret; 1284 1285 RDS_DPRINTF4("rds_gid_to_node_guid", "Enter: gid: %llx:%llx", 1286 gid.gid_prefix, gid.gid_guid); 1287 1288 ret = ibt_gid_to_node_info(gid, &nodeinfo); 1289 if (ret != IBT_SUCCESS) { 1290 RDS_DPRINTF2(LABEL, "ibt_gid_node_info for gid: %llx:%llx " 1291 "failed", gid.gid_prefix, gid.gid_guid); 1292 return (0LL); 1293 } 1294 1295 RDS_DPRINTF4("rds_gid_to_node_guid", "Return: Node guid: %llx", 1296 nodeinfo.n_node_guid); 1297 1298 return (nodeinfo.n_node_guid); 1299 } 1300 1301 #endif 1302 1303 static void 1304 rds_handle_portup_event(rds_state_t *statep, ibt_hca_hdl_t hdl, 1305 ibt_async_event_t *event) 1306 { 1307 rds_hca_t *hcap; 1308 ibt_hca_portinfo_t *newpinfop, *oldpinfop; 1309 uint_t newsize, oldsize, nport; 1310 ib_gid_t gid; 1311 int ret; 1312 1313 RDS_DPRINTF2("rds_handle_portup_event", 1314 "Enter: GUID: 0x%llx Statep: %p", event->ev_hca_guid, statep); 1315 1316 /* If RDS service is not registered then no bind is needed */ 1317 if (statep->rds_srvhdl == NULL) { 1318 RDS_DPRINTF2("rds_handle_portup_event", 1319 "RDS Service is not registered, so no action needed"); 1320 return; 1321 } 1322 1323 hcap = rds_get_hcap(statep, event->ev_hca_guid); 1324 if (hcap == NULL) { 1325 RDS_DPRINTF2("rds_handle_portup_event", "HCA: 0x%llx is " 1326 "not in our list", event->ev_hca_guid); 1327 return; 1328 } 1329 1330 ret = ibt_query_hca_ports(hdl, 0, &newpinfop, &nport, &newsize); 1331 if (ret != IBT_SUCCESS) { 1332 RDS_DPRINTF2(LABEL, "ibt_query_hca_ports failed: %d", ret); 1333 return; 1334 } 1335 1336 oldpinfop = hcap->hca_pinfop; 1337 oldsize = hcap->hca_pinfo_sz; 1338 hcap->hca_pinfop = newpinfop; 1339 hcap->hca_pinfo_sz = newsize; 1340 1341 /* structure copy */ 1342 gid = newpinfop[event->ev_port - 1].p_sgid_tbl[0]; 1343 1344 /* bind RDS service on the port, pass statep as cm_private */ 1345 ret = ibt_bind_service(statep->rds_srvhdl, gid, NULL, statep, NULL); 1346 if (ret != IBT_SUCCESS) { 1347 RDS_DPRINTF2(LABEL, "Bind service for HCA: 0x%llx Port: %d " 1348 "gid %llx:%llx returned: %d", event->ev_hca_guid, 1349 event->ev_port, gid.gid_prefix, gid.gid_guid, ret); 1350 } 1351 1352 (void) ibt_free_portinfo(oldpinfop, oldsize); 1353 1354 RDS_DPRINTF2("rds_handle_portup_event", "Return: GUID: 0x%llx", 1355 event->ev_hca_guid); 1356 } 1357 1358 static void 1359 rds_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1360 ibt_async_event_t *event) 1361 { 1362 rds_state_t *statep; 1363 1364 RDS_DPRINTF2("rds_async_handler", "Async code: %d", code); 1365 1366 switch (code) { 1367 case IBT_EVENT_PORT_UP: 1368 statep = (rds_state_t *)clntp; 1369 rds_handle_portup_event(statep, hdl, event); 1370 break; 1371 1372 default: 1373 RDS_DPRINTF2(LABEL, "Async event: %d not handled", code); 1374 } 1375 1376 RDS_DPRINTF2("rds_async_handler", "Return: code: %d", code); 1377 } 1378