1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. 5 * Copyright (c) 2004 Infinicon Corporation. All rights reserved. 6 * Copyright (c) 2004 Intel Corporation. All rights reserved. 7 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 8 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 9 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 10 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 11 * 12 * This software is available to you under a choice of one of two 13 * licenses. You may choose to be licensed under the terms of the GNU 14 * General Public License (GPL) Version 2, available from the file 15 * COPYING in the main directory of this source tree, or the 16 * OpenIB.org BSD license below: 17 * 18 * Redistribution and use in source and binary forms, with or 19 * without modification, are permitted provided that the following 20 * conditions are met: 21 * 22 * - Redistributions of source code must retain the above 23 * copyright notice, this list of conditions and the following 24 * disclaimer. 25 * 26 * - Redistributions in binary form must reproduce the above 27 * copyright notice, this list of conditions and the following 28 * disclaimer in the documentation and/or other materials 29 * provided with the distribution. 30 * 31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 35 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 36 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 37 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 38 * SOFTWARE. 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include <linux/errno.h> 45 #include <linux/err.h> 46 #include <linux/string.h> 47 #include <linux/slab.h> 48 #include <linux/in.h> 49 #include <linux/in6.h> 50 #include <linux/wait.h> 51 52 #include <rdma/ib_verbs.h> 53 #include <rdma/ib_cache.h> 54 #include <rdma/ib_addr.h> 55 56 #include <netinet/ip.h> 57 #include <netinet/ip6.h> 58 59 #include <machine/in_cksum.h> 60 61 #include "core_priv.h" 62 63 static const char * const ib_events[] = { 64 [IB_EVENT_CQ_ERR] = "CQ error", 65 [IB_EVENT_QP_FATAL] = "QP fatal error", 66 [IB_EVENT_QP_REQ_ERR] = "QP request error", 67 [IB_EVENT_QP_ACCESS_ERR] = "QP access error", 68 [IB_EVENT_COMM_EST] = "communication established", 69 [IB_EVENT_SQ_DRAINED] = "send queue drained", 70 [IB_EVENT_PATH_MIG] = "path migration successful", 71 [IB_EVENT_PATH_MIG_ERR] = "path migration error", 72 [IB_EVENT_DEVICE_FATAL] = "device fatal error", 73 [IB_EVENT_PORT_ACTIVE] = "port active", 74 [IB_EVENT_PORT_ERR] = "port error", 75 [IB_EVENT_LID_CHANGE] = "LID change", 76 [IB_EVENT_PKEY_CHANGE] = "P_key change", 77 [IB_EVENT_SM_CHANGE] = "SM change", 78 [IB_EVENT_SRQ_ERR] = "SRQ error", 79 [IB_EVENT_SRQ_LIMIT_REACHED] = "SRQ limit reached", 80 [IB_EVENT_QP_LAST_WQE_REACHED] = "last WQE reached", 81 [IB_EVENT_CLIENT_REREGISTER] = "client reregister", 82 [IB_EVENT_GID_CHANGE] = "GID changed", 83 }; 84 85 const char *__attribute_const__ ib_event_msg(enum ib_event_type event) 86 { 87 size_t index = event; 88 89 return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ? 90 ib_events[index] : "unrecognized event"; 91 } 92 EXPORT_SYMBOL(ib_event_msg); 93 94 static const char * const wc_statuses[] = { 95 [IB_WC_SUCCESS] = "success", 96 [IB_WC_LOC_LEN_ERR] = "local length error", 97 [IB_WC_LOC_QP_OP_ERR] = "local QP operation error", 98 [IB_WC_LOC_EEC_OP_ERR] = "local EE context operation error", 99 [IB_WC_LOC_PROT_ERR] = "local protection error", 100 [IB_WC_WR_FLUSH_ERR] = "WR flushed", 101 [IB_WC_MW_BIND_ERR] = "memory management operation error", 102 [IB_WC_BAD_RESP_ERR] = "bad response error", 103 [IB_WC_LOC_ACCESS_ERR] = "local access error", 104 [IB_WC_REM_INV_REQ_ERR] = "invalid request error", 105 [IB_WC_REM_ACCESS_ERR] = "remote access error", 106 [IB_WC_REM_OP_ERR] = "remote operation error", 107 [IB_WC_RETRY_EXC_ERR] = "transport retry counter exceeded", 108 [IB_WC_RNR_RETRY_EXC_ERR] = "RNR retry counter exceeded", 109 [IB_WC_LOC_RDD_VIOL_ERR] = "local RDD violation error", 110 [IB_WC_REM_INV_RD_REQ_ERR] = "remote invalid RD request", 111 [IB_WC_REM_ABORT_ERR] = "operation aborted", 112 [IB_WC_INV_EECN_ERR] = "invalid EE context number", 113 [IB_WC_INV_EEC_STATE_ERR] = "invalid EE context state", 114 [IB_WC_FATAL_ERR] = "fatal error", 115 [IB_WC_RESP_TIMEOUT_ERR] = "response timeout error", 116 [IB_WC_GENERAL_ERR] = "general error", 117 }; 118 119 const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status) 120 { 121 size_t index = status; 122 123 return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ? 124 wc_statuses[index] : "unrecognized status"; 125 } 126 EXPORT_SYMBOL(ib_wc_status_msg); 127 128 __attribute_const__ int ib_rate_to_mult(enum ib_rate rate) 129 { 130 switch (rate) { 131 case IB_RATE_2_5_GBPS: return 1; 132 case IB_RATE_5_GBPS: return 2; 133 case IB_RATE_10_GBPS: return 4; 134 case IB_RATE_20_GBPS: return 8; 135 case IB_RATE_30_GBPS: return 12; 136 case IB_RATE_40_GBPS: return 16; 137 case IB_RATE_60_GBPS: return 24; 138 case IB_RATE_80_GBPS: return 32; 139 case IB_RATE_120_GBPS: return 48; 140 case IB_RATE_14_GBPS: return 6; 141 case IB_RATE_56_GBPS: return 22; 142 case IB_RATE_112_GBPS: return 45; 143 case IB_RATE_168_GBPS: return 67; 144 case IB_RATE_25_GBPS: return 10; 145 case IB_RATE_100_GBPS: return 40; 146 case IB_RATE_200_GBPS: return 80; 147 case IB_RATE_300_GBPS: return 120; 148 case IB_RATE_28_GBPS: return 11; 149 case IB_RATE_50_GBPS: return 20; 150 case IB_RATE_400_GBPS: return 160; 151 case IB_RATE_600_GBPS: return 240; 152 default: return -1; 153 } 154 } 155 EXPORT_SYMBOL(ib_rate_to_mult); 156 157 __attribute_const__ enum ib_rate mult_to_ib_rate(int mult) 158 { 159 switch (mult) { 160 case 1: return IB_RATE_2_5_GBPS; 161 case 2: return IB_RATE_5_GBPS; 162 case 4: return IB_RATE_10_GBPS; 163 case 8: return IB_RATE_20_GBPS; 164 case 12: return IB_RATE_30_GBPS; 165 case 16: return IB_RATE_40_GBPS; 166 case 24: return IB_RATE_60_GBPS; 167 case 32: return IB_RATE_80_GBPS; 168 case 48: return IB_RATE_120_GBPS; 169 case 6: return IB_RATE_14_GBPS; 170 case 22: return IB_RATE_56_GBPS; 171 case 45: return IB_RATE_112_GBPS; 172 case 67: return IB_RATE_168_GBPS; 173 case 10: return IB_RATE_25_GBPS; 174 case 40: return IB_RATE_100_GBPS; 175 case 80: return IB_RATE_200_GBPS; 176 case 120: return IB_RATE_300_GBPS; 177 case 11: return IB_RATE_28_GBPS; 178 case 20: return IB_RATE_50_GBPS; 179 case 160: return IB_RATE_400_GBPS; 180 case 240: return IB_RATE_600_GBPS; 181 default: return IB_RATE_PORT_CURRENT; 182 } 183 } 184 EXPORT_SYMBOL(mult_to_ib_rate); 185 186 __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) 187 { 188 switch (rate) { 189 case IB_RATE_2_5_GBPS: return 2500; 190 case IB_RATE_5_GBPS: return 5000; 191 case IB_RATE_10_GBPS: return 10000; 192 case IB_RATE_20_GBPS: return 20000; 193 case IB_RATE_30_GBPS: return 30000; 194 case IB_RATE_40_GBPS: return 40000; 195 case IB_RATE_60_GBPS: return 60000; 196 case IB_RATE_80_GBPS: return 80000; 197 case IB_RATE_120_GBPS: return 120000; 198 case IB_RATE_14_GBPS: return 14062; 199 case IB_RATE_56_GBPS: return 56250; 200 case IB_RATE_112_GBPS: return 112500; 201 case IB_RATE_168_GBPS: return 168750; 202 case IB_RATE_25_GBPS: return 25781; 203 case IB_RATE_100_GBPS: return 103125; 204 case IB_RATE_200_GBPS: return 206250; 205 case IB_RATE_300_GBPS: return 309375; 206 case IB_RATE_28_GBPS: return 28125; 207 case IB_RATE_50_GBPS: return 53125; 208 case IB_RATE_400_GBPS: return 425000; 209 case IB_RATE_600_GBPS: return 637500; 210 default: return -1; 211 } 212 } 213 EXPORT_SYMBOL(ib_rate_to_mbps); 214 215 __attribute_const__ enum rdma_transport_type 216 rdma_node_get_transport(enum rdma_node_type node_type) 217 { 218 switch (node_type) { 219 case RDMA_NODE_IB_CA: 220 case RDMA_NODE_IB_SWITCH: 221 case RDMA_NODE_IB_ROUTER: 222 return RDMA_TRANSPORT_IB; 223 case RDMA_NODE_RNIC: 224 return RDMA_TRANSPORT_IWARP; 225 case RDMA_NODE_USNIC: 226 return RDMA_TRANSPORT_USNIC; 227 case RDMA_NODE_USNIC_UDP: 228 return RDMA_TRANSPORT_USNIC_UDP; 229 default: 230 BUG(); 231 return 0; 232 } 233 } 234 EXPORT_SYMBOL(rdma_node_get_transport); 235 236 enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num) 237 { 238 if (device->get_link_layer) 239 return device->get_link_layer(device, port_num); 240 241 switch (rdma_node_get_transport(device->node_type)) { 242 case RDMA_TRANSPORT_IB: 243 return IB_LINK_LAYER_INFINIBAND; 244 case RDMA_TRANSPORT_IWARP: 245 case RDMA_TRANSPORT_USNIC: 246 case RDMA_TRANSPORT_USNIC_UDP: 247 return IB_LINK_LAYER_ETHERNET; 248 default: 249 return IB_LINK_LAYER_UNSPECIFIED; 250 } 251 } 252 EXPORT_SYMBOL(rdma_port_get_link_layer); 253 254 /* Protection domains */ 255 256 /** 257 * ib_alloc_pd - Allocates an unused protection domain. 258 * @device: The device on which to allocate the protection domain. 259 * 260 * A protection domain object provides an association between QPs, shared 261 * receive queues, address handles, memory regions, and memory windows. 262 * 263 * Every PD has a local_dma_lkey which can be used as the lkey value for local 264 * memory operations. 265 */ 266 struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, 267 const char *caller) 268 { 269 struct ib_pd *pd; 270 int mr_access_flags = 0; 271 int ret; 272 273 pd = rdma_zalloc_drv_obj(device, ib_pd); 274 if (!pd) 275 return ERR_PTR(-ENOMEM); 276 277 pd->device = device; 278 pd->uobject = NULL; 279 pd->__internal_mr = NULL; 280 atomic_set(&pd->usecnt, 0); 281 pd->flags = flags; 282 283 ret = device->alloc_pd(pd, NULL); 284 if (ret) { 285 kfree(pd); 286 return ERR_PTR(ret); 287 } 288 289 if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) 290 pd->local_dma_lkey = device->local_dma_lkey; 291 else 292 mr_access_flags |= IB_ACCESS_LOCAL_WRITE; 293 294 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { 295 pr_warn("%s: enabling unsafe global rkey\n", caller); 296 mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; 297 } 298 299 if (mr_access_flags) { 300 struct ib_mr *mr; 301 302 mr = pd->device->get_dma_mr(pd, mr_access_flags); 303 if (IS_ERR(mr)) { 304 ib_dealloc_pd(pd); 305 return ERR_CAST(mr); 306 } 307 308 mr->device = pd->device; 309 mr->pd = pd; 310 mr->type = IB_MR_TYPE_DMA; 311 mr->uobject = NULL; 312 mr->need_inval = false; 313 314 pd->__internal_mr = mr; 315 316 if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) 317 pd->local_dma_lkey = pd->__internal_mr->lkey; 318 319 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) 320 pd->unsafe_global_rkey = pd->__internal_mr->rkey; 321 } 322 323 return pd; 324 } 325 EXPORT_SYMBOL(__ib_alloc_pd); 326 327 /** 328 * ib_dealloc_pd_user - Deallocates a protection domain. 329 * @pd: The protection domain to deallocate. 330 * @udata: Valid user data or NULL for kernel object 331 * 332 * It is an error to call this function while any resources in the pd still 333 * exist. The caller is responsible to synchronously destroy them and 334 * guarantee no new allocations will happen. 335 */ 336 void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) 337 { 338 int ret; 339 340 if (pd->__internal_mr) { 341 ret = pd->device->dereg_mr(pd->__internal_mr, NULL); 342 WARN_ON(ret); 343 pd->__internal_mr = NULL; 344 } 345 346 /* uverbs manipulates usecnt with proper locking, while the kabi 347 requires the caller to guarantee we can't race here. */ 348 WARN_ON(atomic_read(&pd->usecnt)); 349 350 pd->device->dealloc_pd(pd, udata); 351 kfree(pd); 352 } 353 EXPORT_SYMBOL(ib_dealloc_pd_user); 354 355 /* Address handles */ 356 357 static struct ib_ah *_ib_create_ah(struct ib_pd *pd, 358 struct ib_ah_attr *ah_attr, 359 u32 flags, 360 struct ib_udata *udata) 361 { 362 struct ib_device *device = pd->device; 363 struct ib_ah *ah; 364 int ret; 365 366 might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE); 367 368 if (!device->create_ah) 369 return ERR_PTR(-EOPNOTSUPP); 370 371 ah = rdma_zalloc_drv_obj_gfp( 372 device, ib_ah, 373 (flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC); 374 if (!ah) 375 return ERR_PTR(-ENOMEM); 376 377 ah->device = device; 378 ah->pd = pd; 379 380 ret = device->create_ah(ah, ah_attr, flags, udata); 381 if (ret) { 382 kfree(ah); 383 return ERR_PTR(ret); 384 } 385 386 atomic_inc(&pd->usecnt); 387 return ah; 388 } 389 390 /** 391 * rdma_create_ah - Creates an address handle for the 392 * given address vector. 393 * @pd: The protection domain associated with the address handle. 394 * @ah_attr: The attributes of the address vector. 395 * @flags: Create address handle flags (see enum rdma_create_ah_flags). 396 * 397 * It returns 0 on success and returns appropriate error code on error. 398 * The address handle is used to reference a local or global destination 399 * in all UD QP post sends. 400 */ 401 struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, 402 u32 flags) 403 { 404 struct ib_ah *ah; 405 406 ah = _ib_create_ah(pd, ah_attr, flags, NULL); 407 408 return ah; 409 } 410 EXPORT_SYMBOL(ib_create_ah); 411 412 /** 413 * ib_create_user_ah - Creates an address handle for the 414 * given address vector. 415 * It resolves destination mac address for ah attribute of RoCE type. 416 * @pd: The protection domain associated with the address handle. 417 * @ah_attr: The attributes of the address vector. 418 * @udata: pointer to user's input output buffer information need by 419 * provider driver. 420 * 421 * It returns a valid address handle pointer on success and 422 * returns appropriate error code on error. 423 * The address handle is used to reference a local or global destination 424 * in all UD QP post sends. 425 */ 426 struct ib_ah *ib_create_user_ah(struct ib_pd *pd, 427 struct ib_ah_attr *ah_attr, 428 struct ib_udata *udata) 429 { 430 int err; 431 432 if (rdma_protocol_roce(pd->device, ah_attr->port_num)) { 433 err = ib_resolve_eth_dmac(pd->device, ah_attr); 434 if (err) 435 return ERR_PTR(err); 436 } 437 438 return _ib_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata); 439 } 440 EXPORT_SYMBOL(ib_create_user_ah); 441 442 static int ib_get_header_version(const union rdma_network_hdr *hdr) 443 { 444 const struct ip *ip4h = (const struct ip *)&hdr->roce4grh; 445 struct ip ip4h_checked; 446 const struct ip6_hdr *ip6h = (const struct ip6_hdr *)&hdr->ibgrh; 447 448 /* If it's IPv6, the version must be 6, otherwise, the first 449 * 20 bytes (before the IPv4 header) are garbled. 450 */ 451 if ((ip6h->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) 452 return (ip4h->ip_v == 4) ? 4 : 0; 453 /* version may be 6 or 4 because the first 20 bytes could be garbled */ 454 455 /* RoCE v2 requires no options, thus header length 456 * must be 5 words 457 */ 458 if (ip4h->ip_hl != 5) 459 return 6; 460 461 /* Verify checksum. 462 * We can't write on scattered buffers so we need to copy to 463 * temp buffer. 464 */ 465 memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked)); 466 ip4h_checked.ip_sum = 0; 467 #if defined(INET) || defined(INET6) 468 ip4h_checked.ip_sum = in_cksum_hdr(&ip4h_checked); 469 #endif 470 /* if IPv4 header checksum is OK, believe it */ 471 if (ip4h->ip_sum == ip4h_checked.ip_sum) 472 return 4; 473 return 6; 474 } 475 476 static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, 477 u8 port_num, 478 const struct ib_grh *grh) 479 { 480 int grh_version; 481 482 if (rdma_protocol_ib(device, port_num)) 483 return RDMA_NETWORK_IB; 484 485 grh_version = ib_get_header_version((const union rdma_network_hdr *)grh); 486 487 if (grh_version == 4) 488 return RDMA_NETWORK_IPV4; 489 490 if (grh->next_hdr == IPPROTO_UDP) 491 return RDMA_NETWORK_IPV6; 492 493 return RDMA_NETWORK_ROCE_V1; 494 } 495 496 struct find_gid_index_context { 497 u16 vlan_id; 498 enum ib_gid_type gid_type; 499 }; 500 501 502 /* 503 * This function will return true only if a inspected GID index 504 * matches the request based on the GID type and VLAN configuration 505 */ 506 static bool find_gid_index(const union ib_gid *gid, 507 const struct ib_gid_attr *gid_attr, 508 void *context) 509 { 510 u16 vlan_diff; 511 struct find_gid_index_context *ctx = 512 (struct find_gid_index_context *)context; 513 514 if (ctx->gid_type != gid_attr->gid_type) 515 return false; 516 517 /* 518 * The following will verify: 519 * 1. VLAN ID matching for VLAN tagged requests. 520 * 2. prio-tagged/untagged to prio-tagged/untagged matching. 521 * 522 * This XOR is valid, since 0x0 < vlan_id < 0x0FFF. 523 */ 524 vlan_diff = rdma_vlan_dev_vlan_id(gid_attr->ndev) ^ ctx->vlan_id; 525 526 return (vlan_diff == 0x0000 || vlan_diff == 0xFFFF); 527 } 528 529 static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, 530 u16 vlan_id, const union ib_gid *sgid, 531 enum ib_gid_type gid_type, 532 u16 *gid_index) 533 { 534 struct find_gid_index_context context = {.vlan_id = vlan_id, 535 .gid_type = gid_type}; 536 537 return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index, 538 &context, gid_index); 539 } 540 541 static int get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, 542 enum rdma_network_type net_type, 543 union ib_gid *sgid, union ib_gid *dgid) 544 { 545 struct sockaddr_in src_in; 546 struct sockaddr_in dst_in; 547 __be32 src_saddr, dst_saddr; 548 549 if (!sgid || !dgid) 550 return -EINVAL; 551 552 if (net_type == RDMA_NETWORK_IPV4) { 553 memcpy(&src_in.sin_addr.s_addr, 554 &hdr->roce4grh.ip_src, 4); 555 memcpy(&dst_in.sin_addr.s_addr, 556 &hdr->roce4grh.ip_dst, 4); 557 src_saddr = src_in.sin_addr.s_addr; 558 dst_saddr = dst_in.sin_addr.s_addr; 559 ipv6_addr_set_v4mapped(src_saddr, 560 (struct in6_addr *)sgid); 561 ipv6_addr_set_v4mapped(dst_saddr, 562 (struct in6_addr *)dgid); 563 return 0; 564 } else if (net_type == RDMA_NETWORK_IPV6 || 565 net_type == RDMA_NETWORK_IB) { 566 *dgid = hdr->ibgrh.dgid; 567 *sgid = hdr->ibgrh.sgid; 568 return 0; 569 } else { 570 return -EINVAL; 571 } 572 } 573 574 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, 575 const struct ib_wc *wc, const struct ib_grh *grh, 576 struct ib_ah_attr *ah_attr) 577 { 578 u32 flow_class; 579 u16 gid_index = 0; 580 int ret; 581 enum rdma_network_type net_type = RDMA_NETWORK_IB; 582 enum ib_gid_type gid_type = IB_GID_TYPE_IB; 583 int hoplimit = 0xff; 584 union ib_gid dgid; 585 union ib_gid sgid; 586 587 memset(ah_attr, 0, sizeof *ah_attr); 588 if (rdma_cap_eth_ah(device, port_num)) { 589 if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE) 590 net_type = wc->network_hdr_type; 591 else 592 net_type = ib_get_net_type_by_grh(device, port_num, grh); 593 gid_type = ib_network_to_gid_type(net_type); 594 } 595 ret = get_gids_from_rdma_hdr((const union rdma_network_hdr *)grh, net_type, 596 &sgid, &dgid); 597 if (ret) 598 return ret; 599 600 if (rdma_protocol_roce(device, port_num)) { 601 struct ib_gid_attr dgid_attr; 602 const u16 vlan_id = (wc->wc_flags & IB_WC_WITH_VLAN) ? 603 wc->vlan_id : 0xffff; 604 605 if (!(wc->wc_flags & IB_WC_GRH)) 606 return -EPROTOTYPE; 607 608 ret = get_sgid_index_from_eth(device, port_num, vlan_id, 609 &dgid, gid_type, &gid_index); 610 if (ret) 611 return ret; 612 613 ret = ib_get_cached_gid(device, port_num, gid_index, &dgid, &dgid_attr); 614 if (ret) 615 return ret; 616 617 if (dgid_attr.ndev == NULL) 618 return -ENODEV; 619 620 ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid, ah_attr->dmac, 621 dgid_attr.ndev, &hoplimit); 622 623 dev_put(dgid_attr.ndev); 624 if (ret) 625 return ret; 626 } 627 628 ah_attr->dlid = wc->slid; 629 ah_attr->sl = wc->sl; 630 ah_attr->src_path_bits = wc->dlid_path_bits; 631 ah_attr->port_num = port_num; 632 633 if (wc->wc_flags & IB_WC_GRH) { 634 ah_attr->ah_flags = IB_AH_GRH; 635 ah_attr->grh.dgid = sgid; 636 637 if (!rdma_cap_eth_ah(device, port_num)) { 638 if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { 639 ret = ib_find_cached_gid_by_port(device, &dgid, 640 IB_GID_TYPE_IB, 641 port_num, NULL, 642 &gid_index); 643 if (ret) 644 return ret; 645 } 646 } 647 648 ah_attr->grh.sgid_index = (u8) gid_index; 649 flow_class = be32_to_cpu(grh->version_tclass_flow); 650 ah_attr->grh.flow_label = flow_class & 0xFFFFF; 651 ah_attr->grh.hop_limit = hoplimit; 652 ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF; 653 } 654 return 0; 655 } 656 EXPORT_SYMBOL(ib_init_ah_from_wc); 657 658 struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc, 659 const struct ib_grh *grh, u8 port_num) 660 { 661 struct ib_ah_attr ah_attr; 662 int ret; 663 664 ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr); 665 if (ret) 666 return ERR_PTR(ret); 667 668 return ib_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE); 669 } 670 EXPORT_SYMBOL(ib_create_ah_from_wc); 671 672 int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) 673 { 674 return ah->device->modify_ah ? 675 ah->device->modify_ah(ah, ah_attr) : 676 -ENOSYS; 677 } 678 EXPORT_SYMBOL(ib_modify_ah); 679 680 int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr) 681 { 682 return ah->device->query_ah ? 683 ah->device->query_ah(ah, ah_attr) : 684 -ENOSYS; 685 } 686 EXPORT_SYMBOL(ib_query_ah); 687 688 int ib_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata) 689 { 690 struct ib_pd *pd; 691 692 might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE); 693 694 pd = ah->pd; 695 ah->device->destroy_ah(ah, flags); 696 atomic_dec(&pd->usecnt); 697 698 kfree(ah); 699 return 0; 700 } 701 EXPORT_SYMBOL(ib_destroy_ah_user); 702 703 /* Shared receive queues */ 704 705 struct ib_srq *ib_create_srq(struct ib_pd *pd, 706 struct ib_srq_init_attr *srq_init_attr) 707 { 708 struct ib_srq *srq; 709 int ret; 710 711 if (!pd->device->create_srq) 712 return ERR_PTR(-EOPNOTSUPP); 713 714 srq = rdma_zalloc_drv_obj(pd->device, ib_srq); 715 if (!srq) 716 return ERR_PTR(-ENOMEM); 717 718 srq->device = pd->device; 719 srq->pd = pd; 720 srq->event_handler = srq_init_attr->event_handler; 721 srq->srq_context = srq_init_attr->srq_context; 722 srq->srq_type = srq_init_attr->srq_type; 723 724 if (ib_srq_has_cq(srq->srq_type)) { 725 srq->ext.cq = srq_init_attr->ext.cq; 726 atomic_inc(&srq->ext.cq->usecnt); 727 } 728 if (srq->srq_type == IB_SRQT_XRC) { 729 srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; 730 atomic_inc(&srq->ext.xrc.xrcd->usecnt); 731 } 732 atomic_inc(&pd->usecnt); 733 734 ret = pd->device->create_srq(srq, srq_init_attr, NULL); 735 if (ret) { 736 atomic_dec(&srq->pd->usecnt); 737 if (srq->srq_type == IB_SRQT_XRC) 738 atomic_dec(&srq->ext.xrc.xrcd->usecnt); 739 if (ib_srq_has_cq(srq->srq_type)) 740 atomic_dec(&srq->ext.cq->usecnt); 741 kfree(srq); 742 return ERR_PTR(ret); 743 } 744 745 return srq; 746 } 747 EXPORT_SYMBOL(ib_create_srq); 748 749 int ib_modify_srq(struct ib_srq *srq, 750 struct ib_srq_attr *srq_attr, 751 enum ib_srq_attr_mask srq_attr_mask) 752 { 753 return srq->device->modify_srq ? 754 srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) : 755 -ENOSYS; 756 } 757 EXPORT_SYMBOL(ib_modify_srq); 758 759 int ib_query_srq(struct ib_srq *srq, 760 struct ib_srq_attr *srq_attr) 761 { 762 return srq->device->query_srq ? 763 srq->device->query_srq(srq, srq_attr) : -ENOSYS; 764 } 765 EXPORT_SYMBOL(ib_query_srq); 766 767 int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata) 768 { 769 if (atomic_read(&srq->usecnt)) 770 return -EBUSY; 771 772 srq->device->destroy_srq(srq, udata); 773 774 atomic_dec(&srq->pd->usecnt); 775 if (srq->srq_type == IB_SRQT_XRC) 776 atomic_dec(&srq->ext.xrc.xrcd->usecnt); 777 if (ib_srq_has_cq(srq->srq_type)) 778 atomic_dec(&srq->ext.cq->usecnt); 779 kfree(srq); 780 781 return 0; 782 } 783 EXPORT_SYMBOL(ib_destroy_srq_user); 784 785 /* Queue pairs */ 786 787 static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) 788 { 789 struct ib_qp *qp = context; 790 unsigned long flags; 791 792 spin_lock_irqsave(&qp->device->event_handler_lock, flags); 793 list_for_each_entry(event->element.qp, &qp->open_list, open_list) 794 if (event->element.qp->event_handler) 795 event->element.qp->event_handler(event, event->element.qp->qp_context); 796 spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); 797 } 798 799 static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) 800 { 801 mutex_lock(&xrcd->tgt_qp_mutex); 802 list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); 803 mutex_unlock(&xrcd->tgt_qp_mutex); 804 } 805 806 static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, 807 void (*event_handler)(struct ib_event *, void *), 808 void *qp_context) 809 { 810 struct ib_qp *qp; 811 unsigned long flags; 812 813 qp = kzalloc(sizeof *qp, GFP_KERNEL); 814 if (!qp) 815 return ERR_PTR(-ENOMEM); 816 817 qp->real_qp = real_qp; 818 atomic_inc(&real_qp->usecnt); 819 qp->device = real_qp->device; 820 qp->event_handler = event_handler; 821 qp->qp_context = qp_context; 822 qp->qp_num = real_qp->qp_num; 823 qp->qp_type = real_qp->qp_type; 824 825 spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); 826 list_add(&qp->open_list, &real_qp->open_list); 827 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); 828 829 return qp; 830 } 831 832 struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, 833 struct ib_qp_open_attr *qp_open_attr) 834 { 835 struct ib_qp *qp, *real_qp; 836 837 if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) 838 return ERR_PTR(-EINVAL); 839 840 qp = ERR_PTR(-EINVAL); 841 mutex_lock(&xrcd->tgt_qp_mutex); 842 list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { 843 if (real_qp->qp_num == qp_open_attr->qp_num) { 844 qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, 845 qp_open_attr->qp_context); 846 break; 847 } 848 } 849 mutex_unlock(&xrcd->tgt_qp_mutex); 850 return qp; 851 } 852 EXPORT_SYMBOL(ib_open_qp); 853 854 static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp, 855 struct ib_qp_init_attr *qp_init_attr) 856 { 857 struct ib_qp *real_qp = qp; 858 859 qp->event_handler = __ib_shared_qp_event_handler; 860 qp->qp_context = qp; 861 qp->pd = NULL; 862 qp->send_cq = qp->recv_cq = NULL; 863 qp->srq = NULL; 864 qp->xrcd = qp_init_attr->xrcd; 865 atomic_inc(&qp_init_attr->xrcd->usecnt); 866 INIT_LIST_HEAD(&qp->open_list); 867 868 qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, 869 qp_init_attr->qp_context); 870 if (!IS_ERR(qp)) 871 __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); 872 else 873 real_qp->device->destroy_qp(real_qp, NULL); 874 return qp; 875 } 876 877 struct ib_qp *ib_create_qp(struct ib_pd *pd, 878 struct ib_qp_init_attr *qp_init_attr) 879 { 880 struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device; 881 struct ib_qp *qp; 882 883 if (qp_init_attr->rwq_ind_tbl && 884 (qp_init_attr->recv_cq || 885 qp_init_attr->srq || qp_init_attr->cap.max_recv_wr || 886 qp_init_attr->cap.max_recv_sge)) 887 return ERR_PTR(-EINVAL); 888 889 qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL); 890 if (IS_ERR(qp)) 891 return qp; 892 893 qp->device = device; 894 qp->real_qp = qp; 895 qp->uobject = NULL; 896 qp->qp_type = qp_init_attr->qp_type; 897 qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl; 898 899 atomic_set(&qp->usecnt, 0); 900 spin_lock_init(&qp->mr_lock); 901 902 if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) 903 return ib_create_xrc_qp(qp, qp_init_attr); 904 905 qp->event_handler = qp_init_attr->event_handler; 906 qp->qp_context = qp_init_attr->qp_context; 907 if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { 908 qp->recv_cq = NULL; 909 qp->srq = NULL; 910 } else { 911 qp->recv_cq = qp_init_attr->recv_cq; 912 if (qp_init_attr->recv_cq) 913 atomic_inc(&qp_init_attr->recv_cq->usecnt); 914 qp->srq = qp_init_attr->srq; 915 if (qp->srq) 916 atomic_inc(&qp_init_attr->srq->usecnt); 917 } 918 919 qp->pd = pd; 920 qp->send_cq = qp_init_attr->send_cq; 921 qp->xrcd = NULL; 922 923 atomic_inc(&pd->usecnt); 924 if (qp_init_attr->send_cq) 925 atomic_inc(&qp_init_attr->send_cq->usecnt); 926 if (qp_init_attr->rwq_ind_tbl) 927 atomic_inc(&qp->rwq_ind_tbl->usecnt); 928 929 /* 930 * Note: all hw drivers guarantee that max_send_sge is lower than 931 * the device RDMA WRITE SGE limit but not all hw drivers ensure that 932 * max_send_sge <= max_sge_rd. 933 */ 934 qp->max_write_sge = qp_init_attr->cap.max_send_sge; 935 qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge, 936 device->attrs.max_sge_rd); 937 938 return qp; 939 } 940 EXPORT_SYMBOL(ib_create_qp); 941 942 static const struct { 943 int valid; 944 enum ib_qp_attr_mask req_param[IB_QPT_MAX]; 945 enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; 946 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 947 [IB_QPS_RESET] = { 948 [IB_QPS_RESET] = { .valid = 1 }, 949 [IB_QPS_INIT] = { 950 .valid = 1, 951 .req_param = { 952 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 953 IB_QP_PORT | 954 IB_QP_QKEY), 955 [IB_QPT_RAW_PACKET] = IB_QP_PORT, 956 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 957 IB_QP_PORT | 958 IB_QP_ACCESS_FLAGS), 959 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 960 IB_QP_PORT | 961 IB_QP_ACCESS_FLAGS), 962 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | 963 IB_QP_PORT | 964 IB_QP_ACCESS_FLAGS), 965 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | 966 IB_QP_PORT | 967 IB_QP_ACCESS_FLAGS), 968 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 969 IB_QP_QKEY), 970 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 971 IB_QP_QKEY), 972 } 973 }, 974 }, 975 [IB_QPS_INIT] = { 976 [IB_QPS_RESET] = { .valid = 1 }, 977 [IB_QPS_ERR] = { .valid = 1 }, 978 [IB_QPS_INIT] = { 979 .valid = 1, 980 .opt_param = { 981 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 982 IB_QP_PORT | 983 IB_QP_QKEY), 984 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 985 IB_QP_PORT | 986 IB_QP_ACCESS_FLAGS), 987 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 988 IB_QP_PORT | 989 IB_QP_ACCESS_FLAGS), 990 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX | 991 IB_QP_PORT | 992 IB_QP_ACCESS_FLAGS), 993 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX | 994 IB_QP_PORT | 995 IB_QP_ACCESS_FLAGS), 996 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 997 IB_QP_QKEY), 998 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 999 IB_QP_QKEY), 1000 } 1001 }, 1002 [IB_QPS_RTR] = { 1003 .valid = 1, 1004 .req_param = { 1005 [IB_QPT_UC] = (IB_QP_AV | 1006 IB_QP_PATH_MTU | 1007 IB_QP_DEST_QPN | 1008 IB_QP_RQ_PSN), 1009 [IB_QPT_RC] = (IB_QP_AV | 1010 IB_QP_PATH_MTU | 1011 IB_QP_DEST_QPN | 1012 IB_QP_RQ_PSN | 1013 IB_QP_MAX_DEST_RD_ATOMIC | 1014 IB_QP_MIN_RNR_TIMER), 1015 [IB_QPT_XRC_INI] = (IB_QP_AV | 1016 IB_QP_PATH_MTU | 1017 IB_QP_DEST_QPN | 1018 IB_QP_RQ_PSN), 1019 [IB_QPT_XRC_TGT] = (IB_QP_AV | 1020 IB_QP_PATH_MTU | 1021 IB_QP_DEST_QPN | 1022 IB_QP_RQ_PSN | 1023 IB_QP_MAX_DEST_RD_ATOMIC | 1024 IB_QP_MIN_RNR_TIMER), 1025 }, 1026 .opt_param = { 1027 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 1028 IB_QP_QKEY), 1029 [IB_QPT_UC] = (IB_QP_ALT_PATH | 1030 IB_QP_ACCESS_FLAGS | 1031 IB_QP_PKEY_INDEX), 1032 [IB_QPT_RC] = (IB_QP_ALT_PATH | 1033 IB_QP_ACCESS_FLAGS | 1034 IB_QP_PKEY_INDEX), 1035 [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH | 1036 IB_QP_ACCESS_FLAGS | 1037 IB_QP_PKEY_INDEX), 1038 [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH | 1039 IB_QP_ACCESS_FLAGS | 1040 IB_QP_PKEY_INDEX), 1041 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 1042 IB_QP_QKEY), 1043 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 1044 IB_QP_QKEY), 1045 }, 1046 }, 1047 }, 1048 [IB_QPS_RTR] = { 1049 [IB_QPS_RESET] = { .valid = 1 }, 1050 [IB_QPS_ERR] = { .valid = 1 }, 1051 [IB_QPS_RTS] = { 1052 .valid = 1, 1053 .req_param = { 1054 [IB_QPT_UD] = IB_QP_SQ_PSN, 1055 [IB_QPT_UC] = IB_QP_SQ_PSN, 1056 [IB_QPT_RC] = (IB_QP_TIMEOUT | 1057 IB_QP_RETRY_CNT | 1058 IB_QP_RNR_RETRY | 1059 IB_QP_SQ_PSN | 1060 IB_QP_MAX_QP_RD_ATOMIC), 1061 [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT | 1062 IB_QP_RETRY_CNT | 1063 IB_QP_RNR_RETRY | 1064 IB_QP_SQ_PSN | 1065 IB_QP_MAX_QP_RD_ATOMIC), 1066 [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT | 1067 IB_QP_SQ_PSN), 1068 [IB_QPT_SMI] = IB_QP_SQ_PSN, 1069 [IB_QPT_GSI] = IB_QP_SQ_PSN, 1070 }, 1071 .opt_param = { 1072 [IB_QPT_UD] = (IB_QP_CUR_STATE | 1073 IB_QP_QKEY), 1074 [IB_QPT_UC] = (IB_QP_CUR_STATE | 1075 IB_QP_ALT_PATH | 1076 IB_QP_ACCESS_FLAGS | 1077 IB_QP_PATH_MIG_STATE), 1078 [IB_QPT_RC] = (IB_QP_CUR_STATE | 1079 IB_QP_ALT_PATH | 1080 IB_QP_ACCESS_FLAGS | 1081 IB_QP_MIN_RNR_TIMER | 1082 IB_QP_PATH_MIG_STATE), 1083 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 1084 IB_QP_ALT_PATH | 1085 IB_QP_ACCESS_FLAGS | 1086 IB_QP_PATH_MIG_STATE), 1087 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 1088 IB_QP_ALT_PATH | 1089 IB_QP_ACCESS_FLAGS | 1090 IB_QP_MIN_RNR_TIMER | 1091 IB_QP_PATH_MIG_STATE), 1092 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1093 IB_QP_QKEY), 1094 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1095 IB_QP_QKEY), 1096 [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, 1097 } 1098 } 1099 }, 1100 [IB_QPS_RTS] = { 1101 [IB_QPS_RESET] = { .valid = 1 }, 1102 [IB_QPS_ERR] = { .valid = 1 }, 1103 [IB_QPS_RTS] = { 1104 .valid = 1, 1105 .opt_param = { 1106 [IB_QPT_UD] = (IB_QP_CUR_STATE | 1107 IB_QP_QKEY), 1108 [IB_QPT_UC] = (IB_QP_CUR_STATE | 1109 IB_QP_ACCESS_FLAGS | 1110 IB_QP_ALT_PATH | 1111 IB_QP_PATH_MIG_STATE), 1112 [IB_QPT_RC] = (IB_QP_CUR_STATE | 1113 IB_QP_ACCESS_FLAGS | 1114 IB_QP_ALT_PATH | 1115 IB_QP_PATH_MIG_STATE | 1116 IB_QP_MIN_RNR_TIMER), 1117 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 1118 IB_QP_ACCESS_FLAGS | 1119 IB_QP_ALT_PATH | 1120 IB_QP_PATH_MIG_STATE), 1121 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 1122 IB_QP_ACCESS_FLAGS | 1123 IB_QP_ALT_PATH | 1124 IB_QP_PATH_MIG_STATE | 1125 IB_QP_MIN_RNR_TIMER), 1126 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1127 IB_QP_QKEY), 1128 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1129 IB_QP_QKEY), 1130 [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, 1131 } 1132 }, 1133 [IB_QPS_SQD] = { 1134 .valid = 1, 1135 .opt_param = { 1136 [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, 1137 [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, 1138 [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, 1139 [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY, 1140 [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */ 1141 [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, 1142 [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY 1143 } 1144 }, 1145 }, 1146 [IB_QPS_SQD] = { 1147 [IB_QPS_RESET] = { .valid = 1 }, 1148 [IB_QPS_ERR] = { .valid = 1 }, 1149 [IB_QPS_RTS] = { 1150 .valid = 1, 1151 .opt_param = { 1152 [IB_QPT_UD] = (IB_QP_CUR_STATE | 1153 IB_QP_QKEY), 1154 [IB_QPT_UC] = (IB_QP_CUR_STATE | 1155 IB_QP_ALT_PATH | 1156 IB_QP_ACCESS_FLAGS | 1157 IB_QP_PATH_MIG_STATE), 1158 [IB_QPT_RC] = (IB_QP_CUR_STATE | 1159 IB_QP_ALT_PATH | 1160 IB_QP_ACCESS_FLAGS | 1161 IB_QP_MIN_RNR_TIMER | 1162 IB_QP_PATH_MIG_STATE), 1163 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE | 1164 IB_QP_ALT_PATH | 1165 IB_QP_ACCESS_FLAGS | 1166 IB_QP_PATH_MIG_STATE), 1167 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE | 1168 IB_QP_ALT_PATH | 1169 IB_QP_ACCESS_FLAGS | 1170 IB_QP_MIN_RNR_TIMER | 1171 IB_QP_PATH_MIG_STATE), 1172 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1173 IB_QP_QKEY), 1174 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1175 IB_QP_QKEY), 1176 } 1177 }, 1178 [IB_QPS_SQD] = { 1179 .valid = 1, 1180 .opt_param = { 1181 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 1182 IB_QP_QKEY), 1183 [IB_QPT_UC] = (IB_QP_AV | 1184 IB_QP_ALT_PATH | 1185 IB_QP_ACCESS_FLAGS | 1186 IB_QP_PKEY_INDEX | 1187 IB_QP_PATH_MIG_STATE), 1188 [IB_QPT_RC] = (IB_QP_PORT | 1189 IB_QP_AV | 1190 IB_QP_TIMEOUT | 1191 IB_QP_RETRY_CNT | 1192 IB_QP_RNR_RETRY | 1193 IB_QP_MAX_QP_RD_ATOMIC | 1194 IB_QP_MAX_DEST_RD_ATOMIC | 1195 IB_QP_ALT_PATH | 1196 IB_QP_ACCESS_FLAGS | 1197 IB_QP_PKEY_INDEX | 1198 IB_QP_MIN_RNR_TIMER | 1199 IB_QP_PATH_MIG_STATE), 1200 [IB_QPT_XRC_INI] = (IB_QP_PORT | 1201 IB_QP_AV | 1202 IB_QP_TIMEOUT | 1203 IB_QP_RETRY_CNT | 1204 IB_QP_RNR_RETRY | 1205 IB_QP_MAX_QP_RD_ATOMIC | 1206 IB_QP_ALT_PATH | 1207 IB_QP_ACCESS_FLAGS | 1208 IB_QP_PKEY_INDEX | 1209 IB_QP_PATH_MIG_STATE), 1210 [IB_QPT_XRC_TGT] = (IB_QP_PORT | 1211 IB_QP_AV | 1212 IB_QP_TIMEOUT | 1213 IB_QP_MAX_DEST_RD_ATOMIC | 1214 IB_QP_ALT_PATH | 1215 IB_QP_ACCESS_FLAGS | 1216 IB_QP_PKEY_INDEX | 1217 IB_QP_MIN_RNR_TIMER | 1218 IB_QP_PATH_MIG_STATE), 1219 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 1220 IB_QP_QKEY), 1221 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 1222 IB_QP_QKEY), 1223 } 1224 } 1225 }, 1226 [IB_QPS_SQE] = { 1227 [IB_QPS_RESET] = { .valid = 1 }, 1228 [IB_QPS_ERR] = { .valid = 1 }, 1229 [IB_QPS_RTS] = { 1230 .valid = 1, 1231 .opt_param = { 1232 [IB_QPT_UD] = (IB_QP_CUR_STATE | 1233 IB_QP_QKEY), 1234 [IB_QPT_UC] = (IB_QP_CUR_STATE | 1235 IB_QP_ACCESS_FLAGS), 1236 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 1237 IB_QP_QKEY), 1238 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 1239 IB_QP_QKEY), 1240 } 1241 } 1242 }, 1243 [IB_QPS_ERR] = { 1244 [IB_QPS_RESET] = { .valid = 1 }, 1245 [IB_QPS_ERR] = { .valid = 1 } 1246 } 1247 }; 1248 1249 bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, 1250 enum ib_qp_type type, enum ib_qp_attr_mask mask) 1251 { 1252 enum ib_qp_attr_mask req_param, opt_param; 1253 1254 if (mask & IB_QP_CUR_STATE && 1255 cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && 1256 cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) 1257 return false; 1258 1259 if (!qp_state_table[cur_state][next_state].valid) 1260 return false; 1261 1262 req_param = qp_state_table[cur_state][next_state].req_param[type]; 1263 opt_param = qp_state_table[cur_state][next_state].opt_param[type]; 1264 1265 if ((mask & req_param) != req_param) 1266 return false; 1267 1268 if (mask & ~(req_param | opt_param | IB_QP_STATE)) 1269 return false; 1270 1271 return true; 1272 } 1273 EXPORT_SYMBOL(ib_modify_qp_is_ok); 1274 1275 int ib_resolve_eth_dmac(struct ib_device *device, 1276 struct ib_ah_attr *ah_attr) 1277 { 1278 struct ib_gid_attr sgid_attr; 1279 union ib_gid sgid; 1280 int hop_limit; 1281 int ret; 1282 1283 if (ah_attr->port_num < rdma_start_port(device) || 1284 ah_attr->port_num > rdma_end_port(device)) 1285 return -EINVAL; 1286 1287 if (!rdma_cap_eth_ah(device, ah_attr->port_num)) 1288 return 0; 1289 1290 if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { 1291 if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) { 1292 __be32 addr = 0; 1293 1294 memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4); 1295 ip_eth_mc_map(addr, (char *)ah_attr->dmac); 1296 } else { 1297 ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw, 1298 (char *)ah_attr->dmac); 1299 } 1300 return 0; 1301 } 1302 1303 ret = ib_query_gid(device, 1304 ah_attr->port_num, 1305 ah_attr->grh.sgid_index, 1306 &sgid, &sgid_attr); 1307 if (ret != 0) 1308 return (ret); 1309 if (!sgid_attr.ndev) 1310 return -ENXIO; 1311 1312 ret = rdma_addr_find_l2_eth_by_grh(&sgid, 1313 &ah_attr->grh.dgid, 1314 ah_attr->dmac, 1315 sgid_attr.ndev, &hop_limit); 1316 dev_put(sgid_attr.ndev); 1317 1318 ah_attr->grh.hop_limit = hop_limit; 1319 return ret; 1320 } 1321 EXPORT_SYMBOL(ib_resolve_eth_dmac); 1322 1323 static bool is_qp_type_connected(const struct ib_qp *qp) 1324 { 1325 return (qp->qp_type == IB_QPT_UC || 1326 qp->qp_type == IB_QPT_RC || 1327 qp->qp_type == IB_QPT_XRC_INI || 1328 qp->qp_type == IB_QPT_XRC_TGT); 1329 } 1330 1331 /** 1332 * IB core internal function to perform QP attributes modification. 1333 */ 1334 static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, 1335 int attr_mask, struct ib_udata *udata) 1336 { 1337 u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 1338 int ret; 1339 1340 if (port < rdma_start_port(qp->device) || 1341 port > rdma_end_port(qp->device)) 1342 return -EINVAL; 1343 1344 if (attr_mask & IB_QP_ALT_PATH) { 1345 /* 1346 * Today the core code can only handle alternate paths and APM 1347 * for IB. Ban them in roce mode. 1348 */ 1349 if (!(rdma_protocol_ib(qp->device, 1350 attr->alt_ah_attr.port_num) && 1351 rdma_protocol_ib(qp->device, port))) { 1352 ret = EINVAL; 1353 goto out; 1354 } 1355 } 1356 1357 /* 1358 * If the user provided the qp_attr then we have to resolve it. Kernel 1359 * users have to provide already resolved rdma_ah_attr's 1360 */ 1361 if (udata && (attr_mask & IB_QP_AV) && 1362 rdma_protocol_roce(qp->device, port) && 1363 is_qp_type_connected(qp)) { 1364 ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); 1365 if (ret) 1366 goto out; 1367 } 1368 1369 if (rdma_ib_or_roce(qp->device, port)) { 1370 if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) { 1371 dev_warn(&qp->device->dev, 1372 "%s rq_psn overflow, masking to 24 bits\n", 1373 __func__); 1374 attr->rq_psn &= 0xffffff; 1375 } 1376 1377 if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) { 1378 dev_warn(&qp->device->dev, 1379 " %s sq_psn overflow, masking to 24 bits\n", 1380 __func__); 1381 attr->sq_psn &= 0xffffff; 1382 } 1383 } 1384 1385 ret = qp->device->modify_qp(qp, attr, attr_mask, udata); 1386 if (ret) 1387 goto out; 1388 1389 if (attr_mask & IB_QP_PORT) 1390 qp->port = attr->port_num; 1391 out: 1392 return ret; 1393 } 1394 1395 /** 1396 * ib_modify_qp_with_udata - Modifies the attributes for the specified QP. 1397 * @ib_qp: The QP to modify. 1398 * @attr: On input, specifies the QP attributes to modify. On output, 1399 * the current values of selected QP attributes are returned. 1400 * @attr_mask: A bit-mask used to specify which attributes of the QP 1401 * are being modified. 1402 * @udata: pointer to user's input output buffer information 1403 * are being modified. 1404 * It returns 0 on success and returns appropriate error code on error. 1405 */ 1406 int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, 1407 int attr_mask, struct ib_udata *udata) 1408 { 1409 return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata); 1410 } 1411 EXPORT_SYMBOL(ib_modify_qp_with_udata); 1412 1413 int ib_modify_qp(struct ib_qp *qp, 1414 struct ib_qp_attr *qp_attr, 1415 int qp_attr_mask) 1416 { 1417 if (qp_attr_mask & IB_QP_AV) { 1418 int ret; 1419 1420 ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr); 1421 if (ret) 1422 return ret; 1423 } 1424 1425 return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); 1426 } 1427 EXPORT_SYMBOL(ib_modify_qp); 1428 1429 int ib_query_qp(struct ib_qp *qp, 1430 struct ib_qp_attr *qp_attr, 1431 int qp_attr_mask, 1432 struct ib_qp_init_attr *qp_init_attr) 1433 { 1434 return qp->device->query_qp ? 1435 qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : 1436 -ENOSYS; 1437 } 1438 EXPORT_SYMBOL(ib_query_qp); 1439 1440 int ib_close_qp(struct ib_qp *qp) 1441 { 1442 struct ib_qp *real_qp; 1443 unsigned long flags; 1444 1445 real_qp = qp->real_qp; 1446 if (real_qp == qp) 1447 return -EINVAL; 1448 1449 spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); 1450 list_del(&qp->open_list); 1451 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); 1452 1453 atomic_dec(&real_qp->usecnt); 1454 kfree(qp); 1455 1456 return 0; 1457 } 1458 EXPORT_SYMBOL(ib_close_qp); 1459 1460 static int __ib_destroy_shared_qp(struct ib_qp *qp) 1461 { 1462 struct ib_xrcd *xrcd; 1463 struct ib_qp *real_qp; 1464 int ret; 1465 1466 real_qp = qp->real_qp; 1467 xrcd = real_qp->xrcd; 1468 1469 mutex_lock(&xrcd->tgt_qp_mutex); 1470 ib_close_qp(qp); 1471 if (atomic_read(&real_qp->usecnt) == 0) 1472 list_del(&real_qp->xrcd_list); 1473 else 1474 real_qp = NULL; 1475 mutex_unlock(&xrcd->tgt_qp_mutex); 1476 1477 if (real_qp) { 1478 ret = ib_destroy_qp(real_qp); 1479 if (!ret) 1480 atomic_dec(&xrcd->usecnt); 1481 else 1482 __ib_insert_xrcd_qp(xrcd, real_qp); 1483 } 1484 1485 return 0; 1486 } 1487 1488 int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata) 1489 { 1490 struct ib_pd *pd; 1491 struct ib_cq *scq, *rcq; 1492 struct ib_srq *srq; 1493 struct ib_rwq_ind_table *ind_tbl; 1494 int ret; 1495 1496 if (atomic_read(&qp->usecnt)) 1497 return -EBUSY; 1498 1499 if (qp->real_qp != qp) 1500 return __ib_destroy_shared_qp(qp); 1501 1502 pd = qp->pd; 1503 scq = qp->send_cq; 1504 rcq = qp->recv_cq; 1505 srq = qp->srq; 1506 ind_tbl = qp->rwq_ind_tbl; 1507 1508 ret = qp->device->destroy_qp(qp, udata); 1509 if (!ret) { 1510 if (pd) 1511 atomic_dec(&pd->usecnt); 1512 if (scq) 1513 atomic_dec(&scq->usecnt); 1514 if (rcq) 1515 atomic_dec(&rcq->usecnt); 1516 if (srq) 1517 atomic_dec(&srq->usecnt); 1518 if (ind_tbl) 1519 atomic_dec(&ind_tbl->usecnt); 1520 } 1521 1522 return ret; 1523 } 1524 EXPORT_SYMBOL(ib_destroy_qp_user); 1525 1526 /* Completion queues */ 1527 1528 struct ib_cq *__ib_create_cq(struct ib_device *device, 1529 ib_comp_handler comp_handler, 1530 void (*event_handler)(struct ib_event *, void *), 1531 void *cq_context, 1532 const struct ib_cq_init_attr *cq_attr, 1533 const char *caller) 1534 { 1535 struct ib_cq *cq; 1536 int ret; 1537 1538 cq = rdma_zalloc_drv_obj(device, ib_cq); 1539 if (!cq) 1540 return ERR_PTR(-ENOMEM); 1541 1542 cq->device = device; 1543 cq->uobject = NULL; 1544 cq->comp_handler = comp_handler; 1545 cq->event_handler = event_handler; 1546 cq->cq_context = cq_context; 1547 atomic_set(&cq->usecnt, 0); 1548 1549 ret = device->create_cq(cq, cq_attr, NULL); 1550 if (ret) { 1551 kfree(cq); 1552 return ERR_PTR(ret); 1553 } 1554 1555 return cq; 1556 } 1557 EXPORT_SYMBOL(__ib_create_cq); 1558 1559 int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) 1560 { 1561 return cq->device->modify_cq ? 1562 cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS; 1563 } 1564 EXPORT_SYMBOL(ib_modify_cq); 1565 1566 int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) 1567 { 1568 if (atomic_read(&cq->usecnt)) 1569 return -EBUSY; 1570 1571 cq->device->destroy_cq(cq, udata); 1572 kfree(cq); 1573 return 0; 1574 } 1575 EXPORT_SYMBOL(ib_destroy_cq_user); 1576 1577 int ib_resize_cq(struct ib_cq *cq, int cqe) 1578 { 1579 return cq->device->resize_cq ? 1580 cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS; 1581 } 1582 EXPORT_SYMBOL(ib_resize_cq); 1583 1584 /* Memory regions */ 1585 1586 int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) 1587 { 1588 struct ib_pd *pd = mr->pd; 1589 struct ib_dm *dm = mr->dm; 1590 struct ib_sig_attrs *sig_attrs = mr->sig_attrs; 1591 int ret; 1592 1593 ret = mr->device->dereg_mr(mr, udata); 1594 if (!ret) { 1595 atomic_dec(&pd->usecnt); 1596 if (dm) 1597 atomic_dec(&dm->usecnt); 1598 kfree(sig_attrs); 1599 } 1600 1601 return ret; 1602 } 1603 EXPORT_SYMBOL(ib_dereg_mr_user); 1604 1605 /** 1606 * ib_alloc_mr_user() - Allocates a memory region 1607 * @pd: protection domain associated with the region 1608 * @mr_type: memory region type 1609 * @max_num_sg: maximum sg entries available for registration. 1610 * @udata: user data or null for kernel objects 1611 * 1612 * Notes: 1613 * Memory registeration page/sg lists must not exceed max_num_sg. 1614 * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed 1615 * max_num_sg * used_page_size. 1616 * 1617 */ 1618 struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, 1619 u32 max_num_sg, struct ib_udata *udata) 1620 { 1621 struct ib_mr *mr; 1622 1623 if (!pd->device->alloc_mr) { 1624 mr = ERR_PTR(-EOPNOTSUPP); 1625 goto out; 1626 } 1627 1628 if (mr_type == IB_MR_TYPE_INTEGRITY) { 1629 WARN_ON_ONCE(1); 1630 mr = ERR_PTR(-EINVAL); 1631 goto out; 1632 } 1633 1634 mr = pd->device->alloc_mr(pd, mr_type, max_num_sg, udata); 1635 if (!IS_ERR(mr)) { 1636 mr->device = pd->device; 1637 mr->pd = pd; 1638 mr->dm = NULL; 1639 mr->uobject = NULL; 1640 atomic_inc(&pd->usecnt); 1641 mr->need_inval = false; 1642 mr->type = mr_type; 1643 mr->sig_attrs = NULL; 1644 } 1645 1646 out: 1647 return mr; 1648 } 1649 EXPORT_SYMBOL(ib_alloc_mr_user); 1650 1651 /* "Fast" memory regions */ 1652 1653 struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, 1654 int mr_access_flags, 1655 struct ib_fmr_attr *fmr_attr) 1656 { 1657 struct ib_fmr *fmr; 1658 1659 if (!pd->device->alloc_fmr) 1660 return ERR_PTR(-ENOSYS); 1661 1662 fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr); 1663 if (!IS_ERR(fmr)) { 1664 fmr->device = pd->device; 1665 fmr->pd = pd; 1666 atomic_inc(&pd->usecnt); 1667 } 1668 1669 return fmr; 1670 } 1671 EXPORT_SYMBOL(ib_alloc_fmr); 1672 1673 int ib_unmap_fmr(struct list_head *fmr_list) 1674 { 1675 struct ib_fmr *fmr; 1676 1677 if (list_empty(fmr_list)) 1678 return 0; 1679 1680 fmr = list_entry(fmr_list->next, struct ib_fmr, list); 1681 return fmr->device->unmap_fmr(fmr_list); 1682 } 1683 EXPORT_SYMBOL(ib_unmap_fmr); 1684 1685 int ib_dealloc_fmr(struct ib_fmr *fmr) 1686 { 1687 struct ib_pd *pd; 1688 int ret; 1689 1690 pd = fmr->pd; 1691 ret = fmr->device->dealloc_fmr(fmr); 1692 if (!ret) 1693 atomic_dec(&pd->usecnt); 1694 1695 return ret; 1696 } 1697 EXPORT_SYMBOL(ib_dealloc_fmr); 1698 1699 /* Multicast groups */ 1700 1701 static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid) 1702 { 1703 struct ib_qp_init_attr init_attr = {}; 1704 struct ib_qp_attr attr = {}; 1705 int num_eth_ports = 0; 1706 int port; 1707 1708 /* If QP state >= init, it is assigned to a port and we can check this 1709 * port only. 1710 */ 1711 if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) { 1712 if (attr.qp_state >= IB_QPS_INIT) { 1713 if (rdma_port_get_link_layer(qp->device, attr.port_num) != 1714 IB_LINK_LAYER_INFINIBAND) 1715 return true; 1716 goto lid_check; 1717 } 1718 } 1719 1720 /* Can't get a quick answer, iterate over all ports */ 1721 for (port = 0; port < qp->device->phys_port_cnt; port++) 1722 if (rdma_port_get_link_layer(qp->device, port) != 1723 IB_LINK_LAYER_INFINIBAND) 1724 num_eth_ports++; 1725 1726 /* If we have at lease one Ethernet port, RoCE annex declares that 1727 * multicast LID should be ignored. We can't tell at this step if the 1728 * QP belongs to an IB or Ethernet port. 1729 */ 1730 if (num_eth_ports) 1731 return true; 1732 1733 /* If all the ports are IB, we can check according to IB spec. */ 1734 lid_check: 1735 return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) || 1736 lid == be16_to_cpu(IB_LID_PERMISSIVE)); 1737 } 1738 1739 int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 1740 { 1741 int ret; 1742 1743 if (!qp->device->attach_mcast) 1744 return -ENOSYS; 1745 1746 if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || 1747 qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) 1748 return -EINVAL; 1749 1750 ret = qp->device->attach_mcast(qp, gid, lid); 1751 if (!ret) 1752 atomic_inc(&qp->usecnt); 1753 return ret; 1754 } 1755 EXPORT_SYMBOL(ib_attach_mcast); 1756 1757 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 1758 { 1759 int ret; 1760 1761 if (!qp->device->detach_mcast) 1762 return -ENOSYS; 1763 1764 if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) || 1765 qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid)) 1766 return -EINVAL; 1767 1768 ret = qp->device->detach_mcast(qp, gid, lid); 1769 if (!ret) 1770 atomic_dec(&qp->usecnt); 1771 return ret; 1772 } 1773 EXPORT_SYMBOL(ib_detach_mcast); 1774 1775 struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller) 1776 { 1777 struct ib_xrcd *xrcd; 1778 1779 if (!device->alloc_xrcd) 1780 return ERR_PTR(-EOPNOTSUPP); 1781 1782 xrcd = device->alloc_xrcd(device, NULL); 1783 if (!IS_ERR(xrcd)) { 1784 xrcd->device = device; 1785 xrcd->inode = NULL; 1786 atomic_set(&xrcd->usecnt, 0); 1787 mutex_init(&xrcd->tgt_qp_mutex); 1788 INIT_LIST_HEAD(&xrcd->tgt_qp_list); 1789 } 1790 1791 return xrcd; 1792 } 1793 EXPORT_SYMBOL(__ib_alloc_xrcd); 1794 1795 int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) 1796 { 1797 struct ib_qp *qp; 1798 int ret; 1799 1800 if (atomic_read(&xrcd->usecnt)) 1801 return -EBUSY; 1802 1803 while (!list_empty(&xrcd->tgt_qp_list)) { 1804 qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); 1805 ret = ib_destroy_qp(qp); 1806 if (ret) 1807 return ret; 1808 } 1809 mutex_destroy(&xrcd->tgt_qp_mutex); 1810 1811 return xrcd->device->dealloc_xrcd(xrcd, udata); 1812 } 1813 EXPORT_SYMBOL(ib_dealloc_xrcd); 1814 1815 /** 1816 * ib_create_wq - Creates a WQ associated with the specified protection 1817 * domain. 1818 * @pd: The protection domain associated with the WQ. 1819 * @wq_init_attr: A list of initial attributes required to create the 1820 * WQ. If WQ creation succeeds, then the attributes are updated to 1821 * the actual capabilities of the created WQ. 1822 * 1823 * wq_init_attr->max_wr and wq_init_attr->max_sge determine 1824 * the requested size of the WQ, and set to the actual values allocated 1825 * on return. 1826 * If ib_create_wq() succeeds, then max_wr and max_sge will always be 1827 * at least as large as the requested values. 1828 */ 1829 struct ib_wq *ib_create_wq(struct ib_pd *pd, 1830 struct ib_wq_init_attr *wq_attr) 1831 { 1832 struct ib_wq *wq; 1833 1834 if (!pd->device->create_wq) 1835 return ERR_PTR(-ENOSYS); 1836 1837 wq = pd->device->create_wq(pd, wq_attr, NULL); 1838 if (!IS_ERR(wq)) { 1839 wq->event_handler = wq_attr->event_handler; 1840 wq->wq_context = wq_attr->wq_context; 1841 wq->wq_type = wq_attr->wq_type; 1842 wq->cq = wq_attr->cq; 1843 wq->device = pd->device; 1844 wq->pd = pd; 1845 wq->uobject = NULL; 1846 atomic_inc(&pd->usecnt); 1847 atomic_inc(&wq_attr->cq->usecnt); 1848 atomic_set(&wq->usecnt, 0); 1849 } 1850 return wq; 1851 } 1852 EXPORT_SYMBOL(ib_create_wq); 1853 1854 /** 1855 * ib_destroy_wq - Destroys the specified user WQ. 1856 * @wq: The WQ to destroy. 1857 * @udata: Valid user data 1858 */ 1859 int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) 1860 { 1861 struct ib_cq *cq = wq->cq; 1862 struct ib_pd *pd = wq->pd; 1863 1864 if (atomic_read(&wq->usecnt)) 1865 return -EBUSY; 1866 1867 wq->device->destroy_wq(wq, udata); 1868 atomic_dec(&pd->usecnt); 1869 atomic_dec(&cq->usecnt); 1870 1871 return 0; 1872 } 1873 EXPORT_SYMBOL(ib_destroy_wq); 1874 1875 /** 1876 * ib_modify_wq - Modifies the specified WQ. 1877 * @wq: The WQ to modify. 1878 * @wq_attr: On input, specifies the WQ attributes to modify. 1879 * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ 1880 * are being modified. 1881 * On output, the current values of selected WQ attributes are returned. 1882 */ 1883 int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, 1884 u32 wq_attr_mask) 1885 { 1886 int err; 1887 1888 if (!wq->device->modify_wq) 1889 return -ENOSYS; 1890 1891 err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL); 1892 return err; 1893 } 1894 EXPORT_SYMBOL(ib_modify_wq); 1895 1896 /* 1897 * ib_create_rwq_ind_table - Creates a RQ Indirection Table. 1898 * @device: The device on which to create the rwq indirection table. 1899 * @ib_rwq_ind_table_init_attr: A list of initial attributes required to 1900 * create the Indirection Table. 1901 * 1902 * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less 1903 * than the created ib_rwq_ind_table object and the caller is responsible 1904 * for its memory allocation/free. 1905 */ 1906 struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device, 1907 struct ib_rwq_ind_table_init_attr *init_attr) 1908 { 1909 struct ib_rwq_ind_table *rwq_ind_table; 1910 int i; 1911 u32 table_size; 1912 1913 if (!device->create_rwq_ind_table) 1914 return ERR_PTR(-ENOSYS); 1915 1916 table_size = (1 << init_attr->log_ind_tbl_size); 1917 rwq_ind_table = device->create_rwq_ind_table(device, 1918 init_attr, NULL); 1919 if (IS_ERR(rwq_ind_table)) 1920 return rwq_ind_table; 1921 1922 rwq_ind_table->ind_tbl = init_attr->ind_tbl; 1923 rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size; 1924 rwq_ind_table->device = device; 1925 rwq_ind_table->uobject = NULL; 1926 atomic_set(&rwq_ind_table->usecnt, 0); 1927 1928 for (i = 0; i < table_size; i++) 1929 atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt); 1930 1931 return rwq_ind_table; 1932 } 1933 EXPORT_SYMBOL(ib_create_rwq_ind_table); 1934 1935 /* 1936 * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table. 1937 * @wq_ind_table: The Indirection Table to destroy. 1938 */ 1939 int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) 1940 { 1941 int err, i; 1942 u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size); 1943 struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl; 1944 1945 if (atomic_read(&rwq_ind_table->usecnt)) 1946 return -EBUSY; 1947 1948 err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table); 1949 if (!err) { 1950 for (i = 0; i < table_size; i++) 1951 atomic_dec(&ind_tbl[i]->usecnt); 1952 } 1953 1954 return err; 1955 } 1956 EXPORT_SYMBOL(ib_destroy_rwq_ind_table); 1957 1958 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, 1959 struct ib_mr_status *mr_status) 1960 { 1961 return mr->device->check_mr_status ? 1962 mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; 1963 } 1964 EXPORT_SYMBOL(ib_check_mr_status); 1965 1966 int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port, 1967 int state) 1968 { 1969 if (!device->set_vf_link_state) 1970 return -ENOSYS; 1971 1972 return device->set_vf_link_state(device, vf, port, state); 1973 } 1974 EXPORT_SYMBOL(ib_set_vf_link_state); 1975 1976 int ib_get_vf_config(struct ib_device *device, int vf, u8 port, 1977 struct ifla_vf_info *info) 1978 { 1979 if (!device->get_vf_config) 1980 return -ENOSYS; 1981 1982 return device->get_vf_config(device, vf, port, info); 1983 } 1984 EXPORT_SYMBOL(ib_get_vf_config); 1985 1986 int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, 1987 struct ifla_vf_stats *stats) 1988 { 1989 if (!device->get_vf_stats) 1990 return -ENOSYS; 1991 1992 return device->get_vf_stats(device, vf, port, stats); 1993 } 1994 EXPORT_SYMBOL(ib_get_vf_stats); 1995 1996 int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, 1997 int type) 1998 { 1999 if (!device->set_vf_guid) 2000 return -ENOSYS; 2001 2002 return device->set_vf_guid(device, vf, port, guid, type); 2003 } 2004 EXPORT_SYMBOL(ib_set_vf_guid); 2005 2006 /** 2007 * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list 2008 * and set it the memory region. 2009 * @mr: memory region 2010 * @sg: dma mapped scatterlist 2011 * @sg_nents: number of entries in sg 2012 * @sg_offset: offset in bytes into sg 2013 * @page_size: page vector desired page size 2014 * 2015 * Constraints: 2016 * - The first sg element is allowed to have an offset. 2017 * - Each sg element must either be aligned to page_size or virtually 2018 * contiguous to the previous element. In case an sg element has a 2019 * non-contiguous offset, the mapping prefix will not include it. 2020 * - The last sg element is allowed to have length less than page_size. 2021 * - If sg_nents total byte length exceeds the mr max_num_sge * page_size 2022 * then only max_num_sg entries will be mapped. 2023 * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these 2024 * constraints holds and the page_size argument is ignored. 2025 * 2026 * Returns the number of sg elements that were mapped to the memory region. 2027 * 2028 * After this completes successfully, the memory region 2029 * is ready for registration. 2030 */ 2031 int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, 2032 unsigned int *sg_offset, unsigned int page_size) 2033 { 2034 if (unlikely(!mr->device->map_mr_sg)) 2035 return -ENOSYS; 2036 2037 mr->page_size = page_size; 2038 2039 return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset); 2040 } 2041 EXPORT_SYMBOL(ib_map_mr_sg); 2042 2043 /** 2044 * ib_sg_to_pages() - Convert the largest prefix of a sg list 2045 * to a page vector 2046 * @mr: memory region 2047 * @sgl: dma mapped scatterlist 2048 * @sg_nents: number of entries in sg 2049 * @sg_offset_p: IN: start offset in bytes into sg 2050 * OUT: offset in bytes for element n of the sg of the first 2051 * byte that has not been processed where n is the return 2052 * value of this function. 2053 * @set_page: driver page assignment function pointer 2054 * 2055 * Core service helper for drivers to convert the largest 2056 * prefix of given sg list to a page vector. The sg list 2057 * prefix converted is the prefix that meet the requirements 2058 * of ib_map_mr_sg. 2059 * 2060 * Returns the number of sg elements that were assigned to 2061 * a page vector. 2062 */ 2063 int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, 2064 unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64)) 2065 { 2066 struct scatterlist *sg; 2067 u64 last_end_dma_addr = 0; 2068 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 2069 unsigned int last_page_off = 0; 2070 u64 page_mask = ~((u64)mr->page_size - 1); 2071 int i, ret; 2072 2073 if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0]))) 2074 return -EINVAL; 2075 2076 mr->iova = sg_dma_address(&sgl[0]) + sg_offset; 2077 mr->length = 0; 2078 2079 for_each_sg(sgl, sg, sg_nents, i) { 2080 u64 dma_addr = sg_dma_address(sg) + sg_offset; 2081 u64 prev_addr = dma_addr; 2082 unsigned int dma_len = sg_dma_len(sg) - sg_offset; 2083 u64 end_dma_addr = dma_addr + dma_len; 2084 u64 page_addr = dma_addr & page_mask; 2085 2086 /* 2087 * For the second and later elements, check whether either the 2088 * end of element i-1 or the start of element i is not aligned 2089 * on a page boundary. 2090 */ 2091 if (i && (last_page_off != 0 || page_addr != dma_addr)) { 2092 /* Stop mapping if there is a gap. */ 2093 if (last_end_dma_addr != dma_addr) 2094 break; 2095 2096 /* 2097 * Coalesce this element with the last. If it is small 2098 * enough just update mr->length. Otherwise start 2099 * mapping from the next page. 2100 */ 2101 goto next_page; 2102 } 2103 2104 do { 2105 ret = set_page(mr, page_addr); 2106 if (unlikely(ret < 0)) { 2107 sg_offset = prev_addr - sg_dma_address(sg); 2108 mr->length += prev_addr - dma_addr; 2109 if (sg_offset_p) 2110 *sg_offset_p = sg_offset; 2111 return i || sg_offset ? i : ret; 2112 } 2113 prev_addr = page_addr; 2114 next_page: 2115 page_addr += mr->page_size; 2116 } while (page_addr < end_dma_addr); 2117 2118 mr->length += dma_len; 2119 last_end_dma_addr = end_dma_addr; 2120 last_page_off = end_dma_addr & ~page_mask; 2121 2122 sg_offset = 0; 2123 } 2124 2125 if (sg_offset_p) 2126 *sg_offset_p = 0; 2127 return i; 2128 } 2129 EXPORT_SYMBOL(ib_sg_to_pages); 2130 2131 struct ib_drain_cqe { 2132 struct ib_cqe cqe; 2133 struct completion done; 2134 }; 2135 2136 static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) 2137 { 2138 struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, 2139 cqe); 2140 2141 complete(&cqe->done); 2142 } 2143 2144 /* 2145 * Post a WR and block until its completion is reaped for the SQ. 2146 */ 2147 static void __ib_drain_sq(struct ib_qp *qp) 2148 { 2149 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 2150 struct ib_drain_cqe sdrain; 2151 const struct ib_send_wr *bad_swr; 2152 struct ib_rdma_wr swr = { 2153 .wr = { 2154 .opcode = IB_WR_RDMA_WRITE, 2155 .wr_cqe = &sdrain.cqe, 2156 }, 2157 }; 2158 int ret; 2159 2160 if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) { 2161 WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT, 2162 "IB_POLL_DIRECT poll_ctx not supported for drain\n"); 2163 return; 2164 } 2165 2166 sdrain.cqe.done = ib_drain_qp_done; 2167 init_completion(&sdrain.done); 2168 2169 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 2170 if (ret) { 2171 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); 2172 return; 2173 } 2174 2175 ret = ib_post_send(qp, &swr.wr, &bad_swr); 2176 if (ret) { 2177 WARN_ONCE(ret, "failed to drain send queue: %d\n", ret); 2178 return; 2179 } 2180 2181 wait_for_completion(&sdrain.done); 2182 } 2183 2184 /* 2185 * Post a WR and block until its completion is reaped for the RQ. 2186 */ 2187 static void __ib_drain_rq(struct ib_qp *qp) 2188 { 2189 struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; 2190 struct ib_drain_cqe rdrain; 2191 struct ib_recv_wr rwr = {}; 2192 const struct ib_recv_wr *bad_rwr; 2193 int ret; 2194 2195 if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) { 2196 WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT, 2197 "IB_POLL_DIRECT poll_ctx not supported for drain\n"); 2198 return; 2199 } 2200 2201 rwr.wr_cqe = &rdrain.cqe; 2202 rdrain.cqe.done = ib_drain_qp_done; 2203 init_completion(&rdrain.done); 2204 2205 ret = ib_modify_qp(qp, &attr, IB_QP_STATE); 2206 if (ret) { 2207 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); 2208 return; 2209 } 2210 2211 ret = ib_post_recv(qp, &rwr, &bad_rwr); 2212 if (ret) { 2213 WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret); 2214 return; 2215 } 2216 2217 wait_for_completion(&rdrain.done); 2218 } 2219 2220 /** 2221 * ib_drain_sq() - Block until all SQ CQEs have been consumed by the 2222 * application. 2223 * @qp: queue pair to drain 2224 * 2225 * If the device has a provider-specific drain function, then 2226 * call that. Otherwise call the generic drain function 2227 * __ib_drain_sq(). 2228 * 2229 * The caller must: 2230 * 2231 * ensure there is room in the CQ and SQ for the drain work request and 2232 * completion. 2233 * 2234 * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be 2235 * IB_POLL_DIRECT. 2236 * 2237 * ensure that there are no other contexts that are posting WRs concurrently. 2238 * Otherwise the drain is not guaranteed. 2239 */ 2240 void ib_drain_sq(struct ib_qp *qp) 2241 { 2242 if (qp->device->drain_sq) 2243 qp->device->drain_sq(qp); 2244 else 2245 __ib_drain_sq(qp); 2246 } 2247 EXPORT_SYMBOL(ib_drain_sq); 2248 2249 /** 2250 * ib_drain_rq() - Block until all RQ CQEs have been consumed by the 2251 * application. 2252 * @qp: queue pair to drain 2253 * 2254 * If the device has a provider-specific drain function, then 2255 * call that. Otherwise call the generic drain function 2256 * __ib_drain_rq(). 2257 * 2258 * The caller must: 2259 * 2260 * ensure there is room in the CQ and RQ for the drain work request and 2261 * completion. 2262 * 2263 * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be 2264 * IB_POLL_DIRECT. 2265 * 2266 * ensure that there are no other contexts that are posting WRs concurrently. 2267 * Otherwise the drain is not guaranteed. 2268 */ 2269 void ib_drain_rq(struct ib_qp *qp) 2270 { 2271 if (qp->device->drain_rq) 2272 qp->device->drain_rq(qp); 2273 else 2274 __ib_drain_rq(qp); 2275 } 2276 EXPORT_SYMBOL(ib_drain_rq); 2277 2278 /** 2279 * ib_drain_qp() - Block until all CQEs have been consumed by the 2280 * application on both the RQ and SQ. 2281 * @qp: queue pair to drain 2282 * 2283 * The caller must: 2284 * 2285 * ensure there is room in the CQ(s), SQ, and RQ for drain work requests 2286 * and completions. 2287 * 2288 * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be 2289 * IB_POLL_DIRECT. 2290 * 2291 * ensure that there are no other contexts that are posting WRs concurrently. 2292 * Otherwise the drain is not guaranteed. 2293 */ 2294 void ib_drain_qp(struct ib_qp *qp) 2295 { 2296 ib_drain_sq(qp); 2297 if (!qp->srq) 2298 ib_drain_rq(qp); 2299 } 2300 EXPORT_SYMBOL(ib_drain_qp); 2301