1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #pragma ident "%Z%%M% %I% %E% SMI" 76 77 #include <sys/ib/clients/rds/rdsib_cm.h> 78 #include <sys/ib/clients/rds/rdsib_ib.h> 79 #include <sys/ib/clients/rds/rdsib_buf.h> 80 #include <sys/ib/clients/rds/rdsib_ep.h> 81 #include <sys/ib/clients/rds/rds_kstat.h> 82 83 /* 84 * This File contains the buffer management code 85 */ 86 87 #define DUMP_USER_PARAMS() \ 88 RDS_DPRINTF3(LABEL, "UserBufferSize = %d", UserBufferSize); \ 89 RDS_DPRINTF3(LABEL, "RdsPktSize = %d", RdsPktSize); \ 90 RDS_DPRINTF3(LABEL, "MaxRecvMemory = %d", MaxRecvMemory); \ 91 RDS_DPRINTF3(LABEL, "MaxDataSendBuffers = %d", MaxDataSendBuffers); \ 92 RDS_DPRINTF3(LABEL, "MaxDataRecvBuffers = %d", MaxDataRecvBuffers); \ 93 RDS_DPRINTF3(LABEL, "MaxCtrlSendBuffers = %d", MaxCtrlSendBuffers); \ 94 RDS_DPRINTF3(LABEL, "MaxCtrlRecvBuffers = %d", MaxCtrlRecvBuffers); \ 95 RDS_DPRINTF3(LABEL, "DataRecvBufferLWM = %d", DataRecvBufferLWM); \ 96 RDS_DPRINTF3(LABEL, "PendingRxPktsHWM = %d", PendingRxPktsHWM); \ 97 RDS_DPRINTF3(LABEL, "MinRnrRetry = %d", MinRnrRetry) 98 99 static void 100 rds_free_mblk(char *arg) 101 { 102 rds_buf_t *bp = (rds_buf_t *)(uintptr_t)arg; 103 104 /* Free the recv buffer */ 105 RDS_DPRINTF4("rds_free_mblk", "Enter: BP(%p)", bp); 106 ASSERT(bp->buf_state == RDS_RCVBUF_ONSOCKQ); 107 rds_free_recv_buf(bp, 1); 108 RDS_DECR_RXPKTS_PEND(1); 109 RDS_DPRINTF4("rds_free_mblk", "Return: BP(%p)", bp); 110 } 111 112 void 113 rds_free_recv_caches(rds_state_t *statep) 114 { 115 rds_hca_t *hcap; 116 int ret; 117 118 RDS_DPRINTF4("rds_free_recv_caches", "Enter"); 119 120 mutex_enter(&rds_dpool.pool_lock); 121 if (rds_dpool.pool_memp == NULL) { 122 RDS_DPRINTF2("rds_free_recv_caches", "Caches are empty"); 123 mutex_exit(&rds_dpool.pool_lock); 124 return; 125 } 126 127 /* 128 * All buffers must have been freed as all sessions are closed 129 * and destroyed 130 */ 131 ASSERT(rds_dpool.pool_nbusy == 0); 132 RDS_DPRINTF2("rds_free_recv_caches", "Data Pool has " 133 "pending buffers: %d", rds_dpool.pool_nbusy); 134 while (rds_dpool.pool_nbusy != 0) { 135 mutex_exit(&rds_dpool.pool_lock); 136 delay(drv_usectohz(1000000)); 137 mutex_enter(&rds_dpool.pool_lock); 138 } 139 140 hcap = statep->rds_hcalistp; 141 while (hcap != NULL) { 142 if (hcap->hca_mrhdl != NULL) { 143 ret = ibt_deregister_mr(hcap->hca_hdl, 144 hcap->hca_mrhdl); 145 if (ret == IBT_SUCCESS) { 146 hcap->hca_mrhdl = NULL; 147 hcap->hca_lkey = 0; 148 hcap->hca_rkey = 0; 149 } else { 150 RDS_DPRINTF2(LABEL, "ibt_deregister_mr " 151 "failed: %d, mrhdl: 0x%p", ret, 152 hcap->hca_mrhdl); 153 } 154 } 155 hcap = hcap->hca_nextp; 156 } 157 158 kmem_free(rds_dpool.pool_bufmemp, (rds_dpool.pool_nbuffers + 159 rds_cpool.pool_nbuffers) * sizeof (rds_buf_t)); 160 rds_dpool.pool_bufmemp = NULL; 161 162 kmem_free(rds_dpool.pool_memp, rds_dpool.pool_memsize); 163 rds_dpool.pool_memp = NULL; 164 165 mutex_exit(&rds_dpool.pool_lock); 166 167 RDS_DPRINTF4("rds_free_recv_caches", "Return"); 168 } 169 170 int 171 rds_init_recv_caches(rds_state_t *statep) 172 { 173 uint8_t *mp; 174 rds_buf_t *bp; 175 rds_hca_t *hcap; 176 uint32_t nsessions; 177 uint_t ix; 178 uint_t ndatarx, nctrlrx; 179 uint8_t *memp; 180 uint_t memsize, nbuf; 181 rds_buf_t *bufmemp; 182 ibt_mr_attr_t mem_attr; 183 ibt_mr_desc_t mem_desc; 184 int ret; 185 186 RDS_DPRINTF4("rds_init_recv_caches", "Enter"); 187 188 DUMP_USER_PARAMS(); 189 190 mutex_enter(&rds_dpool.pool_lock); 191 if (rds_dpool.pool_memp != NULL) { 192 RDS_DPRINTF2("rds_init_recv_caches", "Pools are already " 193 "initialized"); 194 mutex_exit(&rds_dpool.pool_lock); 195 return (0); 196 } 197 198 /* Max number of receive buffers on the system */ 199 ndatarx = (MaxRecvMemory * 1024)/UserBufferSize; 200 201 /* 202 * High water mark for the receive buffers in the system. If the 203 * number of buffers used crosses this mark then all sockets in 204 * would be stalled. The port quota for the sockets is set based 205 * on this limit. 206 */ 207 rds_rx_pkts_pending_hwm = (PendingRxPktsHWM * ndatarx)/100; 208 209 /* nsessions can never be less than 1 */ 210 nsessions = ndatarx/MaxDataRecvBuffers; 211 nctrlrx = (nsessions + 1) * MaxCtrlRecvBuffers; 212 213 RDS_DPRINTF3(LABEL, "Number of Possible Sessions: %d", nsessions); 214 215 /* Add the hdr */ 216 RdsPktSize = UserBufferSize + RDS_DATA_HDR_SZ; 217 218 memsize = (ndatarx * RdsPktSize) + (nctrlrx * RDS_CTRLPKT_SIZE); 219 nbuf = ndatarx + nctrlrx; 220 RDS_DPRINTF3(LABEL, "RDS Buffer Pool Memory: %lld", memsize); 221 RDS_DPRINTF3(LABEL, "Total Buffers: %d", nbuf); 222 223 memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP); 224 if (memp == NULL) { 225 RDS_DPRINTF1(LABEL, "RDS Memory allocation failed"); 226 mutex_exit(&rds_dpool.pool_lock); 227 return (-1); 228 } 229 230 RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld", 231 nbuf * sizeof (rds_buf_t)); 232 233 /* allocate memory for buffer entries */ 234 bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t), 235 KM_SLEEP); 236 237 /* register the memory with all HCAs */ 238 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp; 239 mem_attr.mr_len = memsize; 240 mem_attr.mr_as = NULL; 241 mem_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 242 243 hcap = statep->rds_hcalistp; 244 while (hcap != NULL) { 245 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 246 &mem_attr, &hcap->hca_mrhdl, &mem_desc); 247 if (ret != IBT_SUCCESS) { 248 RDS_DPRINTF2(LABEL, "ibt_register_mr failed: %d", ret); 249 return (-1); 250 } 251 252 hcap->hca_lkey = mem_desc.md_lkey; 253 hcap->hca_rkey = mem_desc.md_rkey; 254 255 hcap = hcap->hca_nextp; 256 } 257 258 /* Initialize data pool */ 259 rds_dpool.pool_memp = memp; 260 rds_dpool.pool_memsize = memsize; 261 rds_dpool.pool_bufmemp = bufmemp; 262 rds_dpool.pool_nbuffers = ndatarx; 263 rds_dpool.pool_nbusy = 0; 264 rds_dpool.pool_nfree = ndatarx; 265 266 /* chain the buffers */ 267 mp = memp; 268 bp = bufmemp; 269 for (ix = 0; ix < ndatarx; ix++) { 270 bp[ix].buf_nextp = &bp[ix + 1]; 271 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 272 bp[ix].buf_state = RDS_RCVBUF_FREE; 273 bp[ix].buf_frtn.free_func = rds_free_mblk; 274 bp[ix].buf_frtn.free_arg = (char *)&bp[ix]; 275 mp = mp + RdsPktSize; 276 } 277 bp[ndatarx - 1].buf_nextp = NULL; 278 rds_dpool.pool_headp = &bp[0]; 279 rds_dpool.pool_tailp = &bp[ndatarx - 1]; 280 281 /* Initialize ctrl pool */ 282 rds_cpool.pool_nbuffers = nctrlrx; 283 rds_cpool.pool_nbusy = 0; 284 rds_cpool.pool_nfree = nctrlrx; 285 286 /* chain the buffers */ 287 for (ix = ndatarx; ix < nbuf - 1; ix++) { 288 bp[ix].buf_nextp = &bp[ix + 1]; 289 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 290 mp = mp + RDS_CTRLPKT_SIZE; 291 } 292 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 293 bp[nbuf - 1].buf_nextp = NULL; 294 rds_cpool.pool_headp = &bp[ndatarx]; 295 rds_cpool.pool_tailp = &bp[nbuf - 1]; 296 297 mutex_exit(&rds_dpool.pool_lock); 298 299 RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp); 300 RDS_DPRINTF4("rds_init_recv_caches", "Return"); 301 return (0); 302 } 303 304 void 305 rds_free_send_pool(rds_ep_t *ep) 306 { 307 rds_bufpool_t *pool; 308 rds_hca_t *hcap; 309 int ret; 310 311 pool = &ep->ep_sndpool; 312 313 mutex_enter(&pool->pool_lock); 314 if (pool->pool_memp == NULL) { 315 mutex_exit(&pool->pool_lock); 316 RDS_DPRINTF2("rds_free_send_pool", 317 "EP(%p) DOUBLE Free on Send Pool", ep); 318 return; 319 } 320 321 /* get the hcap for the HCA hosting this channel */ 322 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 323 if (hcap == NULL) { 324 RDS_DPRINTF2("rds_free_send_pool", "HCA (0x%llx) not found", 325 ep->ep_hca_guid); 326 } else { 327 ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_snd_mrhdl); 328 if (ret != IBT_SUCCESS) { 329 RDS_DPRINTF2(LABEL, 330 "ibt_deregister_mr failed: %d, mrhdl: 0x%p", 331 ret, ep->ep_snd_mrhdl); 332 } 333 334 if (ep->ep_ack_addr) { 335 ret = ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl); 336 if (ret != IBT_SUCCESS) { 337 RDS_DPRINTF2(LABEL, 338 "ibt_deregister_mr ackhdl failed: %d, " 339 "mrhdl: 0x%p", ret, ep->ep_ackhdl); 340 } 341 342 kmem_free((void *)ep->ep_ack_addr, sizeof (uintptr_t)); 343 ep->ep_ack_addr = NULL; 344 } 345 } 346 347 kmem_free(pool->pool_memp, pool->pool_memsize); 348 kmem_free(pool->pool_bufmemp, 349 pool->pool_nbuffers * sizeof (rds_buf_t)); 350 pool->pool_memp = NULL; 351 pool->pool_bufmemp = NULL; 352 mutex_exit(&pool->pool_lock); 353 } 354 355 int 356 rds_init_send_pool(rds_ep_t *ep) 357 { 358 uint8_t *mp; 359 rds_buf_t *bp; 360 rds_hca_t *hcap; 361 uint_t ix, rcv_len; 362 ibt_mr_attr_t mem_attr; 363 ibt_mr_desc_t mem_desc; 364 uint8_t *memp; 365 rds_buf_t *bufmemp; 366 uintptr_t ack_addr = NULL; 367 uint_t memsize; 368 uint_t nbuf; 369 rds_bufpool_t *spool; 370 rds_data_hdr_t *pktp; 371 int ret; 372 373 RDS_DPRINTF2("rds_init_send_pool", "Enter"); 374 375 spool = &ep->ep_sndpool; 376 377 ASSERT(spool->pool_memp == NULL); 378 379 /* get the hcap for the HCA hosting this channel */ 380 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 381 if (hcap == NULL) { 382 RDS_DPRINTF2("rds_init_send_pool", "HCA (0x%llx) not found", 383 ep->ep_hca_guid); 384 return (-1); 385 } 386 387 if (ep->ep_type == RDS_EP_TYPE_DATA) { 388 spool->pool_nbuffers = MaxDataSendBuffers; 389 spool->pool_nbusy = 0; 390 spool->pool_nfree = MaxDataSendBuffers; 391 memsize = (MaxDataSendBuffers * RdsPktSize) + 392 sizeof (uintptr_t); 393 rcv_len = RdsPktSize; 394 } else { 395 spool->pool_nbuffers = MaxCtrlSendBuffers; 396 spool->pool_nbusy = 0; 397 spool->pool_nfree = MaxCtrlSendBuffers; 398 memsize = MaxCtrlSendBuffers * RDS_CTRLPKT_SIZE; 399 rcv_len = RDS_CTRLPKT_SIZE; 400 } 401 nbuf = spool->pool_nbuffers; 402 403 RDS_DPRINTF3(LABEL, "RDS Send Pool Memory: %lld", memsize); 404 405 memp = (uint8_t *)kmem_zalloc(memsize, KM_NOSLEEP); 406 if (memp == NULL) { 407 RDS_DPRINTF1(LABEL, "RDS Send Memory allocation failed"); 408 return (-1); 409 } 410 411 RDS_DPRINTF3(LABEL, "RDS Buffer Entries Memory: %lld", 412 nbuf * sizeof (rds_buf_t)); 413 414 /* allocate memory for buffer entries */ 415 bufmemp = (rds_buf_t *)kmem_zalloc(nbuf * sizeof (rds_buf_t), 416 KM_SLEEP); 417 418 if (ep->ep_type == RDS_EP_TYPE_DATA) { 419 ack_addr = (uintptr_t)kmem_zalloc(sizeof (uintptr_t), KM_SLEEP); 420 421 /* register the memory with the HCA for this channel */ 422 mem_attr.mr_vaddr = (ib_vaddr_t)ack_addr; 423 mem_attr.mr_len = sizeof (uintptr_t); 424 mem_attr.mr_as = NULL; 425 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE | 426 IBT_MR_ENABLE_REMOTE_WRITE; 427 428 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 429 &mem_attr, &ep->ep_ackhdl, &mem_desc); 430 if (ret != IBT_SUCCESS) { 431 RDS_DPRINTF2("rds_init_send_pool", 432 "EP(%p): ibt_register_mr for ack failed: %d", 433 ep, ret); 434 kmem_free(memp, memsize); 435 kmem_free(bufmemp, nbuf * sizeof (rds_buf_t)); 436 kmem_free((void *)ack_addr, sizeof (uintptr_t)); 437 return (-1); 438 } 439 ep->ep_ack_rkey = mem_desc.md_rkey; 440 ep->ep_ack_addr = ack_addr; 441 } 442 443 /* register the memory with the HCA for this channel */ 444 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)memp; 445 mem_attr.mr_len = memsize; 446 mem_attr.mr_as = NULL; 447 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 448 449 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 450 &mem_attr, &ep->ep_snd_mrhdl, &mem_desc); 451 if (ret != IBT_SUCCESS) { 452 RDS_DPRINTF2("rds_init_send_pool", "EP(%p): ibt_register_mr " 453 "failed: %d", ep, ret); 454 kmem_free(memp, memsize); 455 kmem_free(bufmemp, nbuf * sizeof (rds_buf_t)); 456 if (ack_addr != NULL) 457 kmem_free((void *)ack_addr, sizeof (uintptr_t)); 458 return (-1); 459 } 460 ep->ep_snd_lkey = mem_desc.md_lkey; 461 462 463 /* Initialize the pool */ 464 spool->pool_memp = memp; 465 spool->pool_memsize = memsize; 466 spool->pool_bufmemp = bufmemp; 467 spool->pool_sqpoll_pending = B_FALSE; 468 469 /* chain the buffers and initialize them */ 470 mp = memp; 471 bp = bufmemp; 472 473 if (ep->ep_type == RDS_EP_TYPE_DATA) { 474 for (ix = 0; ix < nbuf - 1; ix++) { 475 bp[ix].buf_nextp = &bp[ix + 1]; 476 bp[ix].buf_ep = ep; 477 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 478 bp[ix].buf_ds.ds_key = ep->ep_snd_lkey; 479 bp[ix].buf_state = RDS_SNDBUF_FREE; 480 pktp = (rds_data_hdr_t *)(uintptr_t)mp; 481 pktp->dh_bufid = (uintptr_t)&bp[ix]; 482 mp = mp + rcv_len; 483 } 484 bp[nbuf - 1].buf_nextp = NULL; 485 bp[nbuf - 1].buf_ep = ep; 486 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 487 bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey; 488 bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE; 489 pktp = (rds_data_hdr_t *)(uintptr_t)mp; 490 pktp->dh_bufid = (uintptr_t)&bp[nbuf - 1]; 491 492 spool->pool_headp = &bp[0]; 493 spool->pool_tailp = &bp[nbuf - 1]; 494 495 mp = mp + rcv_len; 496 ep->ep_ackds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 497 ep->ep_ackds.ds_key = ep->ep_snd_lkey; 498 ep->ep_ackds.ds_len = sizeof (uintptr_t); 499 500 *(uintptr_t *)ep->ep_ack_addr = (uintptr_t)spool->pool_tailp; 501 } else { 502 /* control send pool */ 503 for (ix = 0; ix < nbuf - 1; ix++) { 504 bp[ix].buf_nextp = &bp[ix + 1]; 505 bp[ix].buf_ep = ep; 506 bp[ix].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 507 bp[ix].buf_ds.ds_key = ep->ep_snd_lkey; 508 bp[ix].buf_state = RDS_SNDBUF_FREE; 509 mp = mp + rcv_len; 510 } 511 bp[nbuf - 1].buf_nextp = NULL; 512 bp[nbuf - 1].buf_ep = ep; 513 bp[nbuf - 1].buf_ds.ds_va = (ib_vaddr_t)(uintptr_t)mp; 514 bp[nbuf - 1].buf_ds.ds_key = ep->ep_snd_lkey; 515 bp[nbuf - 1].buf_state = RDS_SNDBUF_FREE; 516 spool->pool_headp = &bp[0]; 517 spool->pool_tailp = &bp[nbuf - 1]; 518 } 519 520 RDS_DPRINTF3(LABEL, "rdsmemp start: %p end: %p", memp, mp); 521 RDS_DPRINTF2("rds_init_send_pool", "Return"); 522 523 return (0); 524 } 525 526 int 527 rds_reinit_send_pool(rds_ep_t *ep, ib_guid_t hca_guid) 528 { 529 rds_buf_t *bp; 530 rds_hca_t *hcap; 531 ibt_mr_attr_t mem_attr; 532 ibt_mr_desc_t mem_desc; 533 rds_bufpool_t *spool; 534 int ret; 535 536 RDS_DPRINTF2("rds_reinit_send_pool", "Enter: EP(%p)", ep); 537 538 spool = &ep->ep_sndpool; 539 ASSERT(spool->pool_memp != NULL); 540 541 /* deregister the send pool memory from the previous HCA */ 542 hcap = rds_get_hcap(rdsib_statep, ep->ep_hca_guid); 543 if (hcap == NULL) { 544 RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found", 545 ep->ep_hca_guid); 546 } else { 547 if (ep->ep_snd_mrhdl != NULL) { 548 (void) ibt_deregister_mr(hcap->hca_hdl, 549 ep->ep_snd_mrhdl); 550 ep->ep_snd_mrhdl = NULL; 551 ep->ep_snd_lkey = 0; 552 } 553 554 if ((ep->ep_type == RDS_EP_TYPE_DATA) && 555 (ep->ep_ackhdl != NULL)) { 556 (void) ibt_deregister_mr(hcap->hca_hdl, ep->ep_ackhdl); 557 ep->ep_ackhdl = NULL; 558 ep->ep_ack_rkey = 0; 559 } 560 561 ep->ep_hca_guid = NULL; 562 } 563 564 /* get the hcap for the new HCA */ 565 hcap = rds_get_hcap(rdsib_statep, hca_guid); 566 if (hcap == NULL) { 567 RDS_DPRINTF2("rds_reinit_send_pool", "HCA (0x%llx) not found", 568 hca_guid); 569 return (-1); 570 } 571 572 /* register the send memory */ 573 mem_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)spool->pool_memp; 574 mem_attr.mr_len = spool->pool_memsize; 575 mem_attr.mr_as = NULL; 576 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 577 578 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 579 &mem_attr, &ep->ep_snd_mrhdl, &mem_desc); 580 if (ret != IBT_SUCCESS) { 581 RDS_DPRINTF2("rds_reinit_send_pool", 582 "EP(%p): ibt_register_mr failed: %d", ep, ret); 583 return (-1); 584 } 585 ep->ep_snd_lkey = mem_desc.md_lkey; 586 587 /* register the acknowledgement space */ 588 if (ep->ep_type == RDS_EP_TYPE_DATA) { 589 mem_attr.mr_vaddr = (ib_vaddr_t)ep->ep_ack_addr; 590 mem_attr.mr_len = sizeof (uintptr_t); 591 mem_attr.mr_as = NULL; 592 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE | 593 IBT_MR_ENABLE_REMOTE_WRITE; 594 595 ret = ibt_register_mr(hcap->hca_hdl, hcap->hca_pdhdl, 596 &mem_attr, &ep->ep_ackhdl, &mem_desc); 597 if (ret != IBT_SUCCESS) { 598 RDS_DPRINTF2("rds_reinit_send_pool", 599 "EP(%p): ibt_register_mr for ack failed: %d", 600 ep, ret); 601 (void) ibt_deregister_mr(hcap->hca_hdl, 602 ep->ep_snd_mrhdl); 603 ep->ep_snd_mrhdl = NULL; 604 ep->ep_snd_lkey = 0; 605 return (-1); 606 } 607 ep->ep_ack_rkey = mem_desc.md_rkey; 608 609 /* update the LKEY in the acknowledgement WR */ 610 ep->ep_ackds.ds_key = ep->ep_snd_lkey; 611 } 612 613 /* update the LKEY in each buffer */ 614 bp = spool->pool_headp; 615 while (bp) { 616 bp->buf_ds.ds_key = ep->ep_snd_lkey; 617 bp = bp->buf_nextp; 618 } 619 620 ep->ep_hca_guid = hca_guid; 621 622 RDS_DPRINTF2("rds_reinit_send_pool", "Return: EP(%p)", ep); 623 624 return (0); 625 } 626 627 void 628 rds_free_recv_pool(rds_ep_t *ep) 629 { 630 rds_bufpool_t *pool; 631 632 if (ep->ep_type == RDS_EP_TYPE_DATA) { 633 pool = &rds_dpool; 634 } else { 635 pool = &rds_cpool; 636 } 637 638 mutex_enter(&ep->ep_rcvpool.pool_lock); 639 if (ep->ep_rcvpool.pool_nfree != 0) { 640 rds_free_buf(pool, ep->ep_rcvpool.pool_headp, 641 ep->ep_rcvpool.pool_nfree); 642 ep->ep_rcvpool.pool_nfree = 0; 643 ep->ep_rcvpool.pool_headp = NULL; 644 ep->ep_rcvpool.pool_tailp = NULL; 645 } 646 mutex_exit(&ep->ep_rcvpool.pool_lock); 647 } 648 649 int 650 rds_init_recv_pool(rds_ep_t *ep) 651 { 652 rds_bufpool_t *rpool; 653 rds_qp_t *recvqp; 654 655 recvqp = &ep->ep_recvqp; 656 rpool = &ep->ep_rcvpool; 657 if (ep->ep_type == RDS_EP_TYPE_DATA) { 658 recvqp->qp_depth = MaxDataRecvBuffers; 659 recvqp->qp_level = 0; 660 recvqp->qp_lwm = (DataRecvBufferLWM * MaxDataRecvBuffers)/100; 661 recvqp->qp_taskqpending = B_FALSE; 662 663 rpool->pool_nbuffers = MaxDataRecvBuffers; 664 rpool->pool_nbusy = 0; 665 rpool->pool_nfree = 0; 666 } else { 667 recvqp->qp_depth = MaxCtrlRecvBuffers; 668 recvqp->qp_level = 0; 669 recvqp->qp_lwm = (CtrlRecvBufferLWM * MaxCtrlRecvBuffers)/100; 670 recvqp->qp_taskqpending = B_FALSE; 671 672 rpool->pool_nbuffers = MaxCtrlRecvBuffers; 673 rpool->pool_nbusy = 0; 674 rpool->pool_nfree = 0; 675 } 676 677 return (0); 678 } 679 680 /* Free buffers to the global pool, either cpool or dpool */ 681 void 682 rds_free_buf(rds_bufpool_t *pool, rds_buf_t *bp, uint_t nbuf) 683 { 684 uint_t ix; 685 686 RDS_DPRINTF4("rds_free_buf", "Enter"); 687 688 ASSERT(nbuf != 0); 689 690 mutex_enter(&pool->pool_lock); 691 692 if (pool->pool_nfree != 0) { 693 pool->pool_tailp->buf_nextp = bp; 694 } else { 695 pool->pool_headp = bp; 696 } 697 698 if (nbuf == 1) { 699 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 700 bp->buf_ep = NULL; 701 bp->buf_nextp = NULL; 702 pool->pool_tailp = bp; 703 } else { 704 for (ix = 1; ix < nbuf; ix++) { 705 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 706 bp->buf_ep = NULL; 707 bp = bp->buf_nextp; 708 } 709 ASSERT(bp->buf_state == RDS_RCVBUF_FREE); 710 bp->buf_ep = NULL; 711 bp->buf_nextp = NULL; 712 pool->pool_tailp = bp; 713 } 714 /* tail is always the last buffer */ 715 pool->pool_tailp->buf_nextp = NULL; 716 717 pool->pool_nfree += nbuf; 718 pool->pool_nbusy -= nbuf; 719 720 mutex_exit(&pool->pool_lock); 721 722 RDS_DPRINTF4("rds_free_buf", "Return"); 723 } 724 725 /* Get buffers from the global pools, either cpool or dpool */ 726 rds_buf_t * 727 rds_get_buf(rds_bufpool_t *pool, uint_t nbuf, uint_t *nret) 728 { 729 rds_buf_t *bp = NULL, *bp1; 730 uint_t ix; 731 732 RDS_DPRINTF4("rds_get_buf", "Enter"); 733 734 mutex_enter(&pool->pool_lock); 735 736 RDS_DPRINTF3("rds_get_buf", "Available: %d Needed: %d", 737 pool->pool_nfree, nbuf); 738 739 if (nbuf < pool->pool_nfree) { 740 *nret = nbuf; 741 742 bp1 = pool->pool_headp; 743 for (ix = 1; ix < nbuf; ix++) { 744 bp1 = bp1->buf_nextp; 745 } 746 747 bp = pool->pool_headp; 748 pool->pool_headp = bp1->buf_nextp; 749 bp1->buf_nextp = NULL; 750 751 pool->pool_nfree -= nbuf; 752 pool->pool_nbusy += nbuf; 753 } else if (nbuf >= pool->pool_nfree) { 754 *nret = pool->pool_nfree; 755 756 bp = pool->pool_headp; 757 758 pool->pool_headp = NULL; 759 pool->pool_tailp = NULL; 760 761 pool->pool_nbusy += pool->pool_nfree; 762 pool->pool_nfree = 0; 763 } 764 765 mutex_exit(&pool->pool_lock); 766 767 RDS_DPRINTF4("rds_get_buf", "Return"); 768 769 return (bp); 770 } 771 772 boolean_t 773 rds_is_recvq_empty(rds_ep_t *ep, boolean_t wait) 774 { 775 rds_qp_t *recvqp; 776 rds_bufpool_t *rpool; 777 boolean_t ret = B_TRUE; 778 779 recvqp = &ep->ep_recvqp; 780 mutex_enter(&recvqp->qp_lock); 781 RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): QP has %d WRs", 782 ep, recvqp->qp_level); 783 if (wait) { 784 /* wait until the RQ is empty */ 785 while (recvqp->qp_level != 0) { 786 /* wait one second and try again */ 787 mutex_exit(&recvqp->qp_lock); 788 delay(drv_usectohz(1000000)); 789 mutex_enter(&recvqp->qp_lock); 790 } 791 } else if (recvqp->qp_level != 0) { 792 ret = B_FALSE; 793 } 794 mutex_exit(&recvqp->qp_lock); 795 796 rpool = &ep->ep_rcvpool; 797 mutex_enter(&rpool->pool_lock); 798 RDS_DPRINTF2("rds_is_recvq_empty", "EP(%p): " 799 "There are %d pending buffers on sockqs", ep, rpool->pool_nbusy); 800 if (wait) { 801 /* Wait for all buffers to be freed by sockfs */ 802 while (rpool->pool_nbusy != 0) { 803 /* wait one second and try again */ 804 mutex_exit(&rpool->pool_lock); 805 delay(drv_usectohz(1000000)); 806 mutex_enter(&rpool->pool_lock); 807 } 808 } else if (rpool->pool_nbusy != 0) { 809 ret = B_FALSE; 810 } 811 mutex_exit(&rpool->pool_lock); 812 813 return (ret); 814 } 815 816 boolean_t 817 rds_is_sendq_empty(rds_ep_t *ep, uint_t wait) 818 { 819 rds_bufpool_t *spool; 820 rds_buf_t *bp; 821 boolean_t ret1 = B_TRUE; 822 823 /* check if all the sends completed */ 824 spool = &ep->ep_sndpool; 825 mutex_enter(&spool->pool_lock); 826 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 827 "Send Pool contains: %d", ep, spool->pool_nbusy); 828 if (wait) { 829 while (spool->pool_nbusy != 0) { 830 if (rds_no_interrupts) { 831 /* wait one second and try again */ 832 delay(drv_usectohz(1000000)); 833 rds_poll_send_completions(ep->ep_sendcq, ep, 834 B_TRUE); 835 } else { 836 /* wait one second and try again */ 837 mutex_exit(&spool->pool_lock); 838 delay(drv_usectohz(1000000)); 839 mutex_enter(&spool->pool_lock); 840 } 841 } 842 843 if ((wait == 2) && (ep->ep_type == RDS_EP_TYPE_DATA)) { 844 rds_buf_t *ackbp; 845 846 /* 847 * If the last one is acknowledged then everything 848 * is acknowledged 849 */ 850 bp = spool->pool_tailp; 851 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 852 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 853 "Checking for acknowledgements", ep); 854 while (bp != ackbp) { 855 RDS_DPRINTF2("rds_is_sendq_empty", 856 "EP(%p) BP(0x%p/0x%p) last " 857 "sent/acknowledged", ep, bp, ackbp); 858 mutex_exit(&spool->pool_lock); 859 delay(drv_usectohz(1000000)); 860 mutex_enter(&spool->pool_lock); 861 862 bp = spool->pool_tailp; 863 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 864 } 865 } 866 } else if (spool->pool_nbusy != 0) { 867 ret1 = B_FALSE; 868 } 869 mutex_exit(&spool->pool_lock); 870 871 /* check if all the rdma acks completed */ 872 mutex_enter(&ep->ep_lock); 873 RDS_DPRINTF2("rds_is_sendq_empty", "EP(%p): " 874 "Outstanding RDMA Acks: %d", ep, ep->ep_rdmacnt); 875 if (wait) { 876 while (ep->ep_rdmacnt != 0) { 877 if (rds_no_interrupts) { 878 /* wait one second and try again */ 879 delay(drv_usectohz(1000000)); 880 rds_poll_send_completions(ep->ep_sendcq, ep, 881 B_FALSE); 882 } else { 883 /* wait one second and try again */ 884 mutex_exit(&ep->ep_lock); 885 delay(drv_usectohz(1000000)); 886 mutex_enter(&ep->ep_lock); 887 } 888 } 889 } else if (ep->ep_rdmacnt != 0) { 890 ret1 = B_FALSE; 891 } 892 mutex_exit(&ep->ep_lock); 893 894 return (ret1); 895 } 896 897 /* Get buffers from the send pool */ 898 rds_buf_t * 899 rds_get_send_buf(rds_ep_t *ep, uint_t nbuf) 900 { 901 rds_buf_t *bp = NULL, *bp1; 902 rds_bufpool_t *spool; 903 uint_t waittime = rds_waittime_ms * 1000; 904 uint_t ix; 905 int ret; 906 907 RDS_DPRINTF4("rds_get_send_buf", "Enter: EP(%p) Buffers requested: %d", 908 ep, nbuf); 909 910 spool = &ep->ep_sndpool; 911 mutex_enter(&spool->pool_lock); 912 913 if (rds_no_interrupts) { 914 if ((spool->pool_sqpoll_pending == B_FALSE) && 915 (spool->pool_nbusy > 916 (spool->pool_nbuffers * rds_poll_percent_full)/100)) { 917 spool->pool_sqpoll_pending = B_TRUE; 918 mutex_exit(&spool->pool_lock); 919 rds_poll_send_completions(ep->ep_sendcq, ep, B_FALSE); 920 mutex_enter(&spool->pool_lock); 921 spool->pool_sqpoll_pending = B_FALSE; 922 } 923 } 924 925 if (spool->pool_nfree < nbuf) { 926 /* wait for buffers to become available */ 927 spool->pool_cv_count += nbuf; 928 ret = cv_timedwait_sig(&spool->pool_cv, &spool->pool_lock, 929 ddi_get_lbolt() + drv_usectohz(waittime)); 930 /* ret = cv_wait_sig(&spool->pool_cv, &spool->pool_lock); */ 931 if (ret == 0) { 932 /* signal pending */ 933 spool->pool_cv_count -= nbuf; 934 mutex_exit(&spool->pool_lock); 935 return (NULL); 936 } 937 938 spool->pool_cv_count -= nbuf; 939 } 940 941 /* Have the number of buffers needed */ 942 if (spool->pool_nfree > nbuf) { 943 bp = spool->pool_headp; 944 945 if (ep->ep_type == RDS_EP_TYPE_DATA) { 946 rds_buf_t *ackbp; 947 ackbp = *(rds_buf_t **)ep->ep_ack_addr; 948 949 /* check if all the needed buffers are acknowledged */ 950 bp1 = bp; 951 for (ix = 0; ix < nbuf; ix++) { 952 if ((bp1 == ackbp) || 953 (bp1->buf_state != RDS_SNDBUF_FREE)) { 954 /* 955 * The buffer is not yet signalled or 956 * is not yet acknowledged 957 */ 958 RDS_DPRINTF5("rds_get_send_buf", 959 "EP(%p) Buffer (%p) not yet " 960 "acked/completed", ep, bp1); 961 mutex_exit(&spool->pool_lock); 962 return (NULL); 963 } 964 965 bp1 = bp1->buf_nextp; 966 } 967 } 968 969 /* mark the buffers as pending */ 970 bp1 = bp; 971 for (ix = 1; ix < nbuf; ix++) { 972 ASSERT(bp1->buf_state == RDS_SNDBUF_FREE); 973 bp1->buf_state = RDS_SNDBUF_PENDING; 974 bp1 = bp1->buf_nextp; 975 } 976 ASSERT(bp1->buf_state == RDS_SNDBUF_FREE); 977 bp1->buf_state = RDS_SNDBUF_PENDING; 978 979 spool->pool_headp = bp1->buf_nextp; 980 bp1->buf_nextp = NULL; 981 if (spool->pool_headp == NULL) 982 spool->pool_tailp = NULL; 983 spool->pool_nfree -= nbuf; 984 spool->pool_nbusy += nbuf; 985 } 986 mutex_exit(&spool->pool_lock); 987 988 RDS_DPRINTF4("rds_get_send_buf", "Return: EP(%p) Buffers requested: %d", 989 ep, nbuf); 990 991 return (bp); 992 } 993 994 #define RDS_MIN_BUF_TO_WAKE_THREADS 10 995 996 void 997 rds_free_send_buf(rds_ep_t *ep, rds_buf_t *headp, rds_buf_t *tailp, uint_t nbuf, 998 boolean_t lock) 999 { 1000 rds_bufpool_t *spool; 1001 rds_buf_t *tmp; 1002 1003 RDS_DPRINTF4("rds_free_send_buf", "Enter"); 1004 1005 ASSERT(nbuf != 0); 1006 1007 if (tailp == NULL) { 1008 if (nbuf > 1) { 1009 tmp = headp; 1010 while (tmp->buf_nextp) { 1011 tmp = tmp->buf_nextp; 1012 } 1013 tailp = tmp; 1014 } else { 1015 tailp = headp; 1016 } 1017 } 1018 1019 spool = &ep->ep_sndpool; 1020 1021 if (lock == B_FALSE) { 1022 /* lock is not held outside */ 1023 mutex_enter(&spool->pool_lock); 1024 } 1025 1026 if (spool->pool_nfree) { 1027 spool->pool_tailp->buf_nextp = headp; 1028 } else { 1029 spool->pool_headp = headp; 1030 } 1031 spool->pool_tailp = tailp; 1032 1033 spool->pool_nfree += nbuf; 1034 spool->pool_nbusy -= nbuf; 1035 1036 if ((spool->pool_cv_count > 0) && 1037 (spool->pool_nfree > RDS_MIN_BUF_TO_WAKE_THREADS)) { 1038 if (spool->pool_nfree >= spool->pool_cv_count) 1039 cv_broadcast(&spool->pool_cv); 1040 else 1041 cv_signal(&spool->pool_cv); 1042 } 1043 1044 if (lock == B_FALSE) { 1045 mutex_exit(&spool->pool_lock); 1046 } 1047 1048 RDS_DPRINTF4("rds_free_send_buf", "Return"); 1049 } 1050 1051 #define RDS_NBUFFERS_TO_PUTBACK 100 1052 void 1053 rds_free_recv_buf(rds_buf_t *bp, uint_t nbuf) 1054 { 1055 rds_ep_t *ep; 1056 rds_bufpool_t *rpool; 1057 rds_buf_t *bp1; 1058 uint_t ix; 1059 1060 RDS_DPRINTF4("rds_free_recv_buf", "Enter"); 1061 1062 ASSERT(nbuf != 0); 1063 1064 ep = bp->buf_ep; 1065 rpool = &ep->ep_rcvpool; 1066 1067 mutex_enter(&rpool->pool_lock); 1068 1069 /* Add the buffers to the local pool */ 1070 if (rpool->pool_tailp == NULL) { 1071 ASSERT(rpool->pool_headp == NULL); 1072 ASSERT(rpool->pool_nfree == 0); 1073 rpool->pool_headp = bp; 1074 bp1 = bp; 1075 for (ix = 1; ix < nbuf; ix++) { 1076 if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) { 1077 rpool->pool_nbusy--; 1078 } 1079 bp1->buf_state = RDS_RCVBUF_FREE; 1080 bp1 = bp1->buf_nextp; 1081 } 1082 bp1->buf_nextp = NULL; 1083 if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) { 1084 rpool->pool_nbusy--; 1085 } 1086 bp->buf_state = RDS_RCVBUF_FREE; 1087 rpool->pool_tailp = bp1; 1088 rpool->pool_nfree += nbuf; 1089 } else { 1090 bp1 = bp; 1091 for (ix = 1; ix < nbuf; ix++) { 1092 if (bp1->buf_state == RDS_RCVBUF_ONSOCKQ) { 1093 rpool->pool_nbusy--; 1094 } 1095 bp1->buf_state = RDS_RCVBUF_FREE; 1096 bp1 = bp1->buf_nextp; 1097 } 1098 bp1->buf_nextp = NULL; 1099 if (bp->buf_state == RDS_RCVBUF_ONSOCKQ) { 1100 rpool->pool_nbusy--; 1101 } 1102 bp->buf_state = RDS_RCVBUF_FREE; 1103 rpool->pool_tailp->buf_nextp = bp; 1104 rpool->pool_tailp = bp1; 1105 rpool->pool_nfree += nbuf; 1106 } 1107 1108 if (rpool->pool_nfree >= RDS_NBUFFERS_TO_PUTBACK) { 1109 bp = rpool->pool_headp; 1110 nbuf = rpool->pool_nfree; 1111 rpool->pool_headp = NULL; 1112 rpool->pool_tailp = NULL; 1113 rpool->pool_nfree = 0; 1114 mutex_exit(&rpool->pool_lock); 1115 1116 /* Free the buffers to the global pool */ 1117 if (ep->ep_type == RDS_EP_TYPE_DATA) { 1118 rds_free_buf(&rds_dpool, bp, nbuf); 1119 } else { 1120 rds_free_buf(&rds_cpool, bp, nbuf); 1121 } 1122 1123 return; 1124 } 1125 mutex_exit(&rpool->pool_lock); 1126 1127 RDS_DPRINTF4("rds_free_recv_buf", "Return"); 1128 } 1129