1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #include <sys/ib/clients/rds/rdsib_cm.h> 76 #include <sys/ib/clients/rds/rdsib_ib.h> 77 #include <sys/ib/clients/rds/rdsib_buf.h> 78 #include <sys/ib/clients/rds/rdsib_ep.h> 79 80 /* 81 * This file contains CM related work: 82 * 83 * Service registration/deregistration 84 * Path lookup 85 * CM connection callbacks 86 * CM active and passive connection establishment 87 * Connection failover 88 */ 89 90 #define SRCIP src_addr.un.ip4addr 91 #define DSTIP dst_addr.un.ip4addr 92 93 /* 94 * Handle an incoming CM REQ 95 */ 96 /* ARGSUSED */ 97 static ibt_cm_status_t 98 rds_handle_cm_req(rds_state_t *statep, ibt_cm_event_t *evp, 99 ibt_cm_return_args_t *rargsp, void *rcmp, ibt_priv_data_len_t rcmp_len) 100 { 101 ibt_cm_req_rcv_t *reqp; 102 ib_gid_t lgid, rgid; 103 rds_cm_private_data_t cmp; 104 rds_session_t *sp; 105 rds_ep_t *ep; 106 ibt_channel_hdl_t chanhdl; 107 ibt_ip_cm_info_t ipcm_info; 108 int ret; 109 110 RDS_DPRINTF2("rds_handle_cm_req", "Enter"); 111 112 reqp = &evp->cm_event.req; 113 rgid = reqp->req_prim_addr.av_dgid; /* requester gid */ 114 lgid = reqp->req_prim_addr.av_sgid; /* receiver gid */ 115 116 RDS_DPRINTF2(LABEL, "REQ Received: From: %llx:%llx To: %llx:%llx", 117 rgid.gid_prefix, rgid.gid_guid, lgid.gid_prefix, lgid.gid_guid); 118 119 /* 120 * CM private data brings IP information 121 * Private data received is a stream of bytes and may not be properly 122 * aligned. So, bcopy the data onto the stack before accessing it. 123 */ 124 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 125 sizeof (rds_cm_private_data_t)); 126 127 /* extract the CM IP info */ 128 ret = ibt_get_ip_data(evp->cm_priv_data_len, evp->cm_priv_data, 129 &ipcm_info); 130 if (ret != IBT_SUCCESS) { 131 RDS_DPRINTF2("rds_handle_cm_req", "ibt_get_ip_data failed: %d", 132 ret); 133 return (IBT_CM_REJECT); 134 } 135 136 RDS_DPRINTF2("rds_handle_cm_req", 137 "REQ Received: From IP: 0x%x To IP: 0x%x type: %d", 138 ipcm_info.SRCIP, ipcm_info.DSTIP, cmp.cmp_eptype); 139 140 if (cmp.cmp_version != RDS_VERSION) { 141 RDS_DPRINTF2(LABEL, "Version Mismatch: Local version: %d " 142 "Remote version: %d", RDS_VERSION, cmp.cmp_version); 143 return (IBT_CM_REJECT); 144 } 145 146 /* RDS supports V4 addresses only */ 147 if ((ipcm_info.src_addr.family != AF_INET) || 148 (ipcm_info.dst_addr.family != AF_INET)) { 149 RDS_DPRINTF2(LABEL, "Unsupported Address Family: " 150 "src: %d dst: %d", ipcm_info.src_addr.family, 151 ipcm_info.dst_addr.family); 152 return (IBT_CM_REJECT); 153 } 154 155 if (cmp.cmp_arch != RDS_THIS_ARCH) { 156 RDS_DPRINTF2(LABEL, "ARCH does not match (%d != %d)", 157 cmp.cmp_arch, RDS_THIS_ARCH); 158 return (IBT_CM_REJECT); 159 } 160 161 if ((cmp.cmp_eptype != RDS_EP_TYPE_CTRL) && 162 (cmp.cmp_eptype != RDS_EP_TYPE_DATA)) { 163 RDS_DPRINTF2(LABEL, "Unknown Channel type: %d", cmp.cmp_eptype); 164 return (IBT_CM_REJECT); 165 } 166 167 /* user_buffer_size should be same on all nodes */ 168 if (cmp.cmp_user_buffer_size != UserBufferSize) { 169 RDS_DPRINTF2(LABEL, 170 "UserBufferSize Mismatch, this node: %d remote node: %d", 171 UserBufferSize, cmp.cmp_user_buffer_size); 172 return (IBT_CM_REJECT); 173 } 174 175 /* 176 * RDS needs more time to process a failover REQ so send an MRA. 177 * Otherwise, the remote may retry the REQ and fail the connection. 178 */ 179 if ((cmp.cmp_failover) && (cmp.cmp_eptype == RDS_EP_TYPE_DATA)) { 180 RDS_DPRINTF2("rds_handle_cm_req", "Session Failover, send MRA"); 181 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, 182 10000000 /* 10 sec */, NULL, 0); 183 } 184 185 /* Is there a session to the destination node? */ 186 rw_enter(&statep->rds_sessionlock, RW_READER); 187 sp = rds_session_lkup(statep, ipcm_info.SRCIP, rgid.gid_guid); 188 rw_exit(&statep->rds_sessionlock); 189 190 if (sp == NULL) { 191 /* 192 * currently there is no session to the destination 193 * remote ip in the private data is the local ip and vice 194 * versa 195 */ 196 sp = rds_session_create(statep, ipcm_info.DSTIP, 197 ipcm_info.SRCIP, reqp, RDS_SESSION_PASSIVE); 198 if (sp == NULL) { 199 /* Check the list anyway. */ 200 rw_enter(&statep->rds_sessionlock, RW_READER); 201 sp = rds_session_lkup(statep, ipcm_info.SRCIP, 202 rgid.gid_guid); 203 rw_exit(&statep->rds_sessionlock); 204 if (sp == NULL) { 205 /* 206 * The only way this can fail is due to lack 207 * of kernel resources 208 */ 209 return (IBT_CM_REJECT); 210 } 211 } 212 } 213 214 rw_enter(&sp->session_lock, RW_WRITER); 215 216 /* catch peer-to-peer case as soon as possible */ 217 if ((sp->session_state == RDS_SESSION_STATE_CREATED) || 218 (sp->session_state == RDS_SESSION_STATE_INIT)) { 219 /* Check possible peer-to-peer case here */ 220 if (sp->session_type != RDS_SESSION_PASSIVE) { 221 RDS_DPRINTF2("rds_handle_cm_req", 222 "SP(%p) Peer-peer connection handling", sp); 223 if (lgid.gid_guid > rgid.gid_guid) { 224 /* this node is active so reject this request */ 225 rw_exit(&sp->session_lock); 226 return (IBT_CM_REJECT); 227 } else { 228 /* this node is passive, change the session */ 229 sp->session_type = RDS_SESSION_PASSIVE; 230 sp->session_lgid = lgid; 231 sp->session_rgid = rgid; 232 } 233 } 234 } 235 236 RDS_DPRINTF2(LABEL, "SP(%p) state: %d", sp, sp->session_state); 237 238 switch (sp->session_state) { 239 case RDS_SESSION_STATE_CONNECTED: 240 RDS_DPRINTF2(LABEL, "STALE Session Detected SP(%p)", sp); 241 sp->session_state = RDS_SESSION_STATE_ERROR; 242 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 243 "RDS_SESSION_STATE_ERROR", sp); 244 245 /* FALLTHRU */ 246 case RDS_SESSION_STATE_ERROR: 247 case RDS_SESSION_STATE_PASSIVE_CLOSING: 248 sp->session_type = RDS_SESSION_PASSIVE; 249 rw_exit(&sp->session_lock); 250 251 /* Handling this will take some time, so send an MRA */ 252 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, evp->cm_session_id, 253 10000000 /* 10 sec */, NULL, 0); 254 255 /* 256 * Any pending completions don't get flushed until the channel 257 * is closed. So, passing 0 here will not wait for pending 258 * completions in rds_session_close before closing the channel 259 */ 260 rds_session_close(sp, IBT_NOCALLBACKS, 0); 261 262 /* move the session to init state */ 263 rw_enter(&sp->session_lock, RW_WRITER); 264 ret = rds_session_reinit(sp, lgid); 265 sp->session_myip = ipcm_info.DSTIP; 266 sp->session_lgid = lgid; 267 sp->session_rgid = rgid; 268 if (ret != 0) { 269 rds_session_fini(sp); 270 sp->session_state = RDS_SESSION_STATE_FAILED; 271 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 272 "RDS_SESSION_STATE_FAILED", sp); 273 rw_exit(&sp->session_lock); 274 return (IBT_CM_REJECT); 275 } else { 276 sp->session_state = RDS_SESSION_STATE_INIT; 277 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 278 "RDS_SESSION_STATE_INIT", sp); 279 } 280 281 if (cmp.cmp_eptype == RDS_EP_TYPE_CTRL) { 282 ep = &sp->session_ctrlep; 283 } else { 284 ep = &sp->session_dataep; 285 } 286 break; 287 case RDS_SESSION_STATE_CREATED: 288 case RDS_SESSION_STATE_FAILED: 289 case RDS_SESSION_STATE_FINI: 290 /* 291 * Initialize both channels, we accept this connection 292 * only if both channels are initialized 293 */ 294 sp->session_type = RDS_SESSION_PASSIVE; 295 sp->session_lgid = lgid; 296 sp->session_rgid = rgid; 297 sp->session_state = RDS_SESSION_STATE_CREATED; 298 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 299 "RDS_SESSION_STATE_CREATED", sp); 300 ret = rds_session_init(sp); 301 if (ret != 0) { 302 /* Seems like there are not enough resources */ 303 sp->session_state = RDS_SESSION_STATE_FAILED; 304 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 305 "RDS_SESSION_STATE_FAILED", sp); 306 rw_exit(&sp->session_lock); 307 return (IBT_CM_REJECT); 308 } 309 sp->session_state = RDS_SESSION_STATE_INIT; 310 RDS_DPRINTF3("rds_handle_cm_req", "SP(%p) State " 311 "RDS_SESSION_STATE_INIT", sp); 312 313 /* FALLTHRU */ 314 case RDS_SESSION_STATE_INIT: 315 /* 316 * When re-using an existing session, make sure the 317 * session is still through the same HCA. Otherwise, the 318 * memory registrations have to moved to the new HCA. 319 */ 320 if (cmp.cmp_eptype == RDS_EP_TYPE_DATA) { 321 if (sp->session_lgid.gid_guid != lgid.gid_guid) { 322 RDS_DPRINTF2("rds_handle_cm_req", 323 "Existing Session but different gid " 324 "existing: 0x%llx, new: 0x%llx, " 325 "sending an MRA", 326 sp->session_lgid.gid_guid, lgid.gid_guid); 327 (void) ibt_cm_delay(IBT_CM_DELAY_REQ, 328 evp->cm_session_id, 10000000 /* 10 sec */, 329 NULL, 0); 330 ret = rds_session_reinit(sp, lgid); 331 if (ret != 0) { 332 rds_session_fini(sp); 333 sp->session_state = 334 RDS_SESSION_STATE_FAILED; 335 sp->session_failover = 0; 336 RDS_DPRINTF3("rds_failover_session", 337 "SP(%p) State " 338 "RDS_SESSION_STATE_FAILED", sp); 339 rw_exit(&sp->session_lock); 340 return (IBT_CM_REJECT); 341 } 342 } 343 ep = &sp->session_dataep; 344 } else { 345 ep = &sp->session_ctrlep; 346 } 347 348 break; 349 default: 350 RDS_DPRINTF2(LABEL, "ERROR: SP(%p) is in an unexpected " 351 "state: %d", sp, sp->session_state); 352 rw_exit(&sp->session_lock); 353 return (IBT_CM_REJECT); 354 } 355 356 sp->session_failover = 0; /* reset any previous value */ 357 if (cmp.cmp_failover) { 358 RDS_DPRINTF2("rds_handle_cm_req", 359 "SP(%p) Failover Session (BP %p)", sp, cmp.cmp_last_bufid); 360 sp->session_failover = 1; 361 } 362 363 mutex_enter(&ep->ep_lock); 364 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 365 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 366 sp->session_type = RDS_SESSION_PASSIVE; 367 rw_exit(&sp->session_lock); 368 } else if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 369 rw_exit(&sp->session_lock); 370 /* 371 * Peer to peer connection. There is an active 372 * connection pending on this ep. The one with 373 * greater port guid becomes active and the 374 * other becomes passive. 375 */ 376 RDS_DPRINTF2("rds_handle_cm_req", 377 "EP(%p) Peer-peer connection handling", ep); 378 if (lgid.gid_guid > rgid.gid_guid) { 379 /* this node is active so reject this request */ 380 mutex_exit(&ep->ep_lock); 381 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p): " 382 "Rejecting passive in favor of active", sp, ep); 383 return (IBT_CM_REJECT); 384 } else { 385 /* 386 * This session is not the active end, change it 387 * to passive end. 388 */ 389 ep->ep_state = RDS_EP_STATE_PASSIVE_PENDING; 390 391 rw_enter(&sp->session_lock, RW_WRITER); 392 sp->session_type = RDS_SESSION_PASSIVE; 393 sp->session_lgid = lgid; 394 sp->session_rgid = rgid; 395 rw_exit(&sp->session_lock); 396 } 397 } else { 398 rw_exit(&sp->session_lock); 399 } 400 401 ep->ep_lbufid = cmp.cmp_last_bufid; 402 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 403 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 404 cmp.cmp_last_bufid = ep->ep_rbufid; 405 cmp.cmp_ack_addr = ep->ep_ack_addr; 406 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 407 mutex_exit(&ep->ep_lock); 408 409 /* continue with accepting the connection request for this channel */ 410 chanhdl = rds_ep_alloc_rc_channel(ep, reqp->req_prim_hca_port); 411 if (chanhdl == NULL) { 412 mutex_enter(&ep->ep_lock); 413 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 414 mutex_exit(&ep->ep_lock); 415 return (IBT_CM_REJECT); 416 } 417 418 /* pre-post recv buffers in the RQ */ 419 rds_post_recv_buf((void *)chanhdl); 420 421 rargsp->cm_ret_len = sizeof (rds_cm_private_data_t); 422 bcopy((uint8_t *)&cmp, rcmp, sizeof (rds_cm_private_data_t)); 423 rargsp->cm_ret.rep.cm_channel = chanhdl; 424 rargsp->cm_ret.rep.cm_rdma_ra_out = 4; 425 rargsp->cm_ret.rep.cm_rdma_ra_in = 4; 426 rargsp->cm_ret.rep.cm_rnr_retry_cnt = MinRnrRetry; 427 428 RDS_DPRINTF2("rds_handle_cm_req", "Return: SP(%p) EP(%p) Chan (%p)", 429 sp, ep, chanhdl); 430 431 return (IBT_CM_ACCEPT); 432 } 433 434 /* 435 * Handle an incoming CM REP 436 * Pre-post recv buffers for the QP 437 */ 438 /* ARGSUSED */ 439 static ibt_cm_status_t 440 rds_handle_cm_rep(ibt_cm_event_t *evp, ibt_cm_return_args_t *rargsp, 441 void *rcmp, ibt_priv_data_len_t rcmp_len) 442 { 443 rds_ep_t *ep; 444 rds_cm_private_data_t cmp; 445 446 RDS_DPRINTF2("rds_handle_cm_rep", "Enter"); 447 448 /* pre-post recv buffers in the RQ */ 449 rds_post_recv_buf((void *)evp->cm_channel); 450 451 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 452 bcopy((uint8_t *)evp->cm_priv_data, &cmp, 453 sizeof (rds_cm_private_data_t)); 454 ep->ep_lbufid = cmp.cmp_last_bufid; 455 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = (ib_vaddr_t)cmp.cmp_ack_addr; 456 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = cmp.cmp_ack_rkey; 457 458 rargsp->cm_ret_len = 0; 459 460 RDS_DPRINTF2("rds_handle_cm_rep", "Return: lbufid: %p", ep->ep_lbufid); 461 462 return (IBT_CM_ACCEPT); 463 } 464 465 /* 466 * Handle CONN EST 467 */ 468 static ibt_cm_status_t 469 rds_handle_cm_conn_est(ibt_cm_event_t *evp) 470 { 471 rds_session_t *sp; 472 rds_ep_t *ep; 473 474 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 475 476 RDS_DPRINTF2("rds_handle_cm_conn_est", "EP(%p) State: %d", ep, 477 ep->ep_state); 478 479 mutex_enter(&ep->ep_lock); 480 ASSERT((ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) || 481 (ep->ep_state == RDS_EP_STATE_PASSIVE_PENDING)); 482 ep->ep_state = RDS_EP_STATE_CONNECTED; 483 ep->ep_chanhdl = evp->cm_channel; 484 sp = ep->ep_sp; 485 mutex_exit(&ep->ep_lock); 486 487 (void) rds_session_active(sp); 488 489 RDS_DPRINTF2("rds_handle_cm_conn_est", "Return"); 490 return (IBT_CM_ACCEPT); 491 } 492 493 /* 494 * Handle CONN CLOSED 495 */ 496 static ibt_cm_status_t 497 rds_handle_cm_conn_closed(ibt_cm_event_t *evp) 498 { 499 rds_ep_t *ep; 500 rds_session_t *sp; 501 502 /* Catch DREQs but ignore DREPs */ 503 if (evp->cm_event.closed != IBT_CM_CLOSED_DREQ_RCVD) { 504 RDS_DPRINTF2("rds_handle_cm_conn_closed", 505 "Ignoring Event: %d received", evp->cm_event.closed); 506 return (IBT_CM_ACCEPT); 507 } 508 509 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 510 sp = ep->ep_sp; 511 RDS_DPRINTF2("rds_handle_cm_conn_closed", "EP(%p) Chan(%p) Enter", 512 ep, evp->cm_channel); 513 514 mutex_enter(&ep->ep_lock); 515 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 516 /* Ignore this DREQ */ 517 RDS_DPRINTF2("rds_handle_cm_conn_closed", 518 "EP(%p) not connected, state: %d", ep, ep->ep_state); 519 mutex_exit(&ep->ep_lock); 520 return (IBT_CM_ACCEPT); 521 } 522 ep->ep_state = RDS_EP_STATE_CLOSING; 523 mutex_exit(&ep->ep_lock); 524 525 rw_enter(&sp->session_lock, RW_WRITER); 526 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) - state: %d", sp, 527 sp->session_state); 528 529 switch (sp->session_state) { 530 case RDS_SESSION_STATE_CONNECTED: 531 case RDS_SESSION_STATE_HCA_CLOSING: 532 sp->session_state = RDS_SESSION_STATE_PASSIVE_CLOSING; 533 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 534 "RDS_SESSION_STATE_PASSIVE_CLOSING", sp); 535 break; 536 537 case RDS_SESSION_STATE_PASSIVE_CLOSING: 538 sp->session_state = RDS_SESSION_STATE_CLOSED; 539 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 540 "RDS_SESSION_STATE_CLOSED", sp); 541 rds_passive_session_fini(sp); 542 sp->session_state = RDS_SESSION_STATE_FINI; 543 RDS_DPRINTF3("rds_handle_cm_conn_closed", 544 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 545 break; 546 547 case RDS_SESSION_STATE_ACTIVE_CLOSING: 548 case RDS_SESSION_STATE_ERROR: 549 case RDS_SESSION_STATE_CLOSED: 550 break; 551 552 case RDS_SESSION_STATE_INIT: 553 sp->session_state = RDS_SESSION_STATE_ERROR; 554 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 555 "RDS_SESSION_STATE_ERROR", sp); 556 rds_passive_session_fini(sp); 557 sp->session_state = RDS_SESSION_STATE_FAILED; 558 RDS_DPRINTF3("rds_handle_cm_conn_closed", 559 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 560 break; 561 562 default: 563 RDS_DPRINTF2("rds_handle_cm_conn_closed", 564 "SP(%p) - Unexpected state: %d", sp, sp->session_state); 565 rds_passive_session_fini(sp); 566 sp->session_state = RDS_SESSION_STATE_FAILED; 567 RDS_DPRINTF3("rds_handle_cm_conn_closed", "SP(%p) State " 568 "RDS_SESSION_STATE_FAILED", sp); 569 } 570 rw_exit(&sp->session_lock); 571 572 mutex_enter(&ep->ep_lock); 573 ep->ep_state = RDS_EP_STATE_CLOSED; 574 mutex_exit(&ep->ep_lock); 575 576 RDS_DPRINTF2("rds_handle_cm_conn_closed", "SP(%p) Return", sp); 577 return (IBT_CM_ACCEPT); 578 } 579 580 /* 581 * Handle EVENT FAILURE 582 */ 583 static ibt_cm_status_t 584 rds_handle_cm_event_failure(ibt_cm_event_t *evp) 585 { 586 rds_ep_t *ep; 587 rds_session_t *sp; 588 int ret; 589 590 RDS_DPRINTF2("rds_handle_cm_event_failure", "Enter: Chan hdl: 0x%p " 591 "Code: %d msg: %d reason: %d", evp->cm_channel, 592 evp->cm_event.failed.cf_code, evp->cm_event.failed.cf_msg, 593 evp->cm_event.failed.cf_reason); 594 595 if (evp->cm_event.failed.cf_reason == IBT_CM_INVALID_SID) { 596 RDS_DPRINTF2(LABEL, 597 "Received REJ with reason IBT_CM_INVALID_SID: " 598 "RDS may not be loaded on the remote system"); 599 } 600 601 if (evp->cm_channel == NULL) { 602 return (IBT_CM_ACCEPT); 603 } 604 605 if ((evp->cm_event.failed.cf_code != IBT_CM_FAILURE_STALE) && 606 (evp->cm_event.failed.cf_msg == IBT_CM_FAILURE_REQ)) { 607 /* 608 * This end is active, just ignore, ibt_open_rc_channel() 609 * caller will take care of cleanup. 610 */ 611 RDS_DPRINTF2("rds_handle_cm_event_failure", 612 "Ignoring this event: Chan hdl: 0x%p", evp->cm_channel); 613 return (IBT_CM_ACCEPT); 614 } 615 616 ep = (rds_ep_t *)ibt_get_chan_private(evp->cm_channel); 617 sp = ep->ep_sp; 618 619 rw_enter(&sp->session_lock, RW_WRITER); 620 if (sp->session_type == RDS_SESSION_PASSIVE) { 621 RDS_DPRINTF2("rds_handle_cm_event_failure", 622 "SP(%p) - state: %d", sp, sp->session_state); 623 if ((sp->session_state == RDS_SESSION_STATE_INIT) || 624 (sp->session_state == RDS_SESSION_STATE_CONNECTED)) { 625 sp->session_state = RDS_SESSION_STATE_ERROR; 626 RDS_DPRINTF3("rds_handle_cm_event_failure", 627 "SP(%p) State RDS_SESSION_STATE_ERROR", sp); 628 629 /* 630 * Store the cm_channel for freeing later 631 * Active side frees it on ibt_open_rc_channel 632 * failure 633 */ 634 if (ep->ep_chanhdl == NULL) { 635 ep->ep_chanhdl = evp->cm_channel; 636 } 637 rw_exit(&sp->session_lock); 638 639 /* 640 * rds_passive_session_fini should not be called 641 * directly in the CM handler. It will cause a deadlock. 642 */ 643 ret = ddi_taskq_dispatch(rds_taskq, 644 rds_cleanup_passive_session, (void *)sp, 645 DDI_NOSLEEP); 646 if (ret != DDI_SUCCESS) { 647 RDS_DPRINTF2("rds_handle_cm_event_failure", 648 "SP(%p) TaskQ dispatch FAILED:%d", sp, ret); 649 } 650 return (IBT_CM_ACCEPT); 651 } 652 } 653 rw_exit(&sp->session_lock); 654 655 RDS_DPRINTF2("rds_handle_cm_event_failure", "SP(%p) Return", sp); 656 return (IBT_CM_ACCEPT); 657 } 658 659 /* 660 * CM Handler 661 * 662 * Called by IBCM 663 * The cm_private type differs for active and passive events. 664 */ 665 ibt_cm_status_t 666 rds_cm_handler(void *cm_private, ibt_cm_event_t *eventp, 667 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 668 ibt_priv_data_len_t ret_len_max) 669 { 670 ibt_cm_status_t ret = IBT_CM_ACCEPT; 671 672 RDS_DPRINTF2("rds_cm_handler", "Enter: event: %d", eventp->cm_type); 673 674 switch (eventp->cm_type) { 675 case IBT_CM_EVENT_REQ_RCV: 676 ret = rds_handle_cm_req((rds_state_t *)cm_private, eventp, 677 ret_args, ret_priv_data, ret_len_max); 678 break; 679 case IBT_CM_EVENT_REP_RCV: 680 ret = rds_handle_cm_rep(eventp, ret_args, ret_priv_data, 681 ret_len_max); 682 break; 683 case IBT_CM_EVENT_MRA_RCV: 684 /* Not supported */ 685 break; 686 case IBT_CM_EVENT_CONN_EST: 687 ret = rds_handle_cm_conn_est(eventp); 688 break; 689 case IBT_CM_EVENT_CONN_CLOSED: 690 ret = rds_handle_cm_conn_closed(eventp); 691 break; 692 case IBT_CM_EVENT_FAILURE: 693 ret = rds_handle_cm_event_failure(eventp); 694 break; 695 case IBT_CM_EVENT_LAP_RCV: 696 /* Not supported */ 697 RDS_DPRINTF2(LABEL, "LAP message received"); 698 break; 699 case IBT_CM_EVENT_APR_RCV: 700 /* Not supported */ 701 RDS_DPRINTF2(LABEL, "APR message received"); 702 break; 703 default: 704 break; 705 } 706 707 RDS_DPRINTF2("rds_cm_handler", "Return"); 708 709 return (ret); 710 } 711 712 /* This is based on OFED Linux RDS */ 713 #define RDS_PORT_NUM 6556 714 715 /* 716 * Register the wellknown service with service id: RDS_SERVICE_ID 717 * Incoming connection requests should arrive on this service id. 718 */ 719 ibt_srv_hdl_t 720 rds_register_service(ibt_clnt_hdl_t rds_ibhdl) 721 { 722 ibt_srv_hdl_t srvhdl; 723 ibt_srv_desc_t srvdesc; 724 int ret; 725 726 RDS_DPRINTF2("rds_register_service", "Enter: 0x%p", rds_ibhdl); 727 728 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 729 srvdesc.sd_handler = rds_cm_handler; 730 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 731 732 /* 733 * This is the new service id as per: 734 * Annex A11: RDMA IP CM Service 735 */ 736 rdsib_statep->rds_service_id = ibt_get_ip_sid(IPPROTO_TCP, 737 RDS_PORT_NUM); 738 ret = ibt_register_service(rds_ibhdl, &srvdesc, 739 rdsib_statep->rds_service_id, 1, &srvhdl, NULL); 740 if (ret != IBT_SUCCESS) { 741 RDS_DPRINTF2(LABEL, 742 "RDS Service (0x%llx) Registration Failed: %d", 743 rdsib_statep->rds_service_id, ret); 744 return (NULL); 745 } 746 747 RDS_DPRINTF2("rds_register_service", "Return: 0x%p", srvhdl); 748 return (srvhdl); 749 } 750 751 /* Bind the RDS service on all ports */ 752 int 753 rds_bind_service(rds_state_t *statep) 754 { 755 rds_hca_t *hcap; 756 ib_gid_t gid; 757 uint_t jx, nbinds = 0, nports = 0; 758 int ret; 759 760 RDS_DPRINTF2("rds_bind_service", "Enter: 0x%p", statep); 761 762 rw_enter(&statep->rds_hca_lock, RW_READER); 763 764 hcap = statep->rds_hcalistp; 765 while (hcap != NULL) { 766 767 /* skip the HCAs that are not fully online */ 768 if ((hcap->hca_state != RDS_HCA_STATE_OPEN) && 769 (hcap->hca_state != RDS_HCA_STATE_MEM_REGISTERED)) { 770 RDS_DPRINTF2("rds_bind_service", 771 "Skipping HCA: 0x%llx, state: %d", 772 hcap->hca_guid, hcap->hca_state); 773 hcap = hcap->hca_nextp; 774 continue; 775 } 776 777 /* currently, we have space for only 4 bindhdls */ 778 ASSERT(hcap->hca_nports < 4); 779 for (jx = 0; jx < hcap->hca_nports; jx++) { 780 nports++; 781 if (hcap->hca_pinfop[jx].p_linkstate != 782 IBT_PORT_ACTIVE) { 783 /* 784 * service bind will be called in the async 785 * handler when the port comes up. Clear any 786 * stale bind handle. 787 */ 788 hcap->hca_bindhdl[jx] = NULL; 789 continue; 790 } 791 792 gid = hcap->hca_pinfop[jx].p_sgid_tbl[0]; 793 RDS_DPRINTF5(LABEL, "HCA: 0x%llx Port: %d " 794 "gid: %llx:%llx", hcap->hca_guid, 795 hcap->hca_pinfop[jx].p_port_num, gid.gid_prefix, 796 gid.gid_guid); 797 798 /* pass statep as cm_private */ 799 ret = ibt_bind_service(statep->rds_srvhdl, gid, 800 NULL, statep, &hcap->hca_bindhdl[jx]); 801 if (ret != IBT_SUCCESS) { 802 RDS_DPRINTF2(LABEL, "Bind service for " 803 "HCA: 0x%llx Port: %d gid %llx:%llx " 804 "failed: %d", hcap->hca_guid, 805 hcap->hca_pinfop[jx].p_port_num, 806 gid.gid_prefix, gid.gid_guid, ret); 807 continue; 808 } 809 810 nbinds++; 811 } 812 hcap = hcap->hca_nextp; 813 } 814 815 rw_exit(&statep->rds_hca_lock); 816 817 RDS_DPRINTF2(LABEL, "RDS Service available on %d/%d ports", 818 nbinds, nports); 819 820 #if 0 821 if (nbinds == 0) { 822 return (-1); 823 } 824 #endif 825 826 RDS_DPRINTF2("rds_bind_service", "Return"); 827 828 return (0); 829 } 830 831 /* Open an RC connection */ 832 int 833 rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo, 834 ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl) 835 { 836 rds_session_t *sp; 837 ibt_chan_open_args_t ocargs; 838 ibt_rc_returns_t ocrets; 839 rds_cm_private_data_t cmp; 840 uint8_t hca_port; 841 ibt_channel_hdl_t hdl; 842 ibt_status_t ret = 0; 843 ibt_ip_cm_info_t ipcm_info; 844 845 RDS_DPRINTF2("rds_open_rc_channel", "Enter: EP(%p) mode: %d", ep, mode); 846 847 sp = ep->ep_sp; 848 849 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 850 ipcm_info.src_addr.family = AF_INET; 851 ipcm_info.SRCIP = sp->session_myip; 852 ipcm_info.dst_addr.family = AF_INET; 853 ipcm_info.DSTIP = sp->session_remip; 854 ipcm_info.src_port = RDS_PORT_NUM; 855 ret = ibt_format_ip_private_data(&ipcm_info, 856 sizeof (rds_cm_private_data_t), &cmp); 857 if (ret != IBT_SUCCESS) { 858 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_format_ip_private_data " 859 "failed: %d", sp, ep, ret); 860 return (-1); 861 } 862 863 hca_port = pinfo->pi_prim_cep_path.cep_hca_port_num; 864 865 hdl = rds_ep_alloc_rc_channel(ep, hca_port); 866 if (hdl == NULL) { 867 return (-1); 868 } 869 870 cmp.cmp_version = RDS_VERSION; 871 cmp.cmp_arch = RDS_THIS_ARCH; 872 cmp.cmp_eptype = ep->ep_type; 873 cmp.cmp_failover = sp->session_failover; 874 cmp.cmp_last_bufid = ep->ep_rbufid; 875 cmp.cmp_user_buffer_size = UserBufferSize; 876 cmp.cmp_ack_addr = ep->ep_ack_addr; 877 cmp.cmp_ack_rkey = ep->ep_ack_rkey; 878 879 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 880 bzero(&ocrets, sizeof (ibt_rc_returns_t)); 881 ocargs.oc_path = pinfo; 882 ocargs.oc_cm_handler = rds_cm_handler; 883 ocargs.oc_cm_clnt_private = NULL; 884 ocargs.oc_rdma_ra_out = 4; 885 ocargs.oc_rdma_ra_in = 4; 886 ocargs.oc_priv_data_len = sizeof (rds_cm_private_data_t); 887 ocargs.oc_priv_data = &cmp; 888 ocargs.oc_path_retry_cnt = IBPathRetryCount; 889 ocargs.oc_path_rnr_retry_cnt = MinRnrRetry; 890 ret = ibt_open_rc_channel(hdl, IBT_OCHAN_NO_FLAGS, 891 mode, &ocargs, &ocrets); 892 if (ret != IBT_SUCCESS) { 893 RDS_DPRINTF2(LABEL, "SP(%p) EP(%p) ibt_open_rc_channel " 894 "failed: %d", sp, ep, ret); 895 (void) ibt_flush_channel(hdl); 896 (void) ibt_free_channel(hdl); 897 898 mutex_enter(&ep->ep_lock); 899 /* don't cleanup if this failure is due to peer-peer race */ 900 if (ep->ep_state == RDS_EP_STATE_ACTIVE_PENDING) { 901 /* cleanup stuff allocated in rds_ep_alloc_rc_channel */ 902 ep->ep_state = RDS_EP_STATE_ERROR; 903 rds_ep_free_rc_channel(ep); 904 } 905 mutex_exit(&ep->ep_lock); 906 907 return (-1); 908 } 909 910 *chanhdl = hdl; 911 912 RDS_DPRINTF2("rds_open_rc_channel", "Return: EP(%p) Chan: %p", ep, 913 *chanhdl); 914 915 return (0); 916 } 917 918 int 919 rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode) 920 { 921 int ret; 922 923 RDS_DPRINTF2("rds_close_rc_channel", "Enter: Chan(%p) Mode(%d)", 924 chanhdl, mode); 925 926 ret = ibt_close_rc_channel(chanhdl, mode, NULL, 0, NULL, NULL, 0); 927 928 RDS_DPRINTF2("rds_close_rc_channel", "Return Chan(%p)", chanhdl); 929 930 return (ret); 931 } 932