1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 5 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 6 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 7 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 * 37 * $FreeBSD$ 38 */ 39 40 #define LINUXKPI_PARAM_PREFIX ibcore_ 41 42 #include <linux/completion.h> 43 #include <linux/in.h> 44 #include <linux/in6.h> 45 #include <linux/mutex.h> 46 #include <linux/random.h> 47 #include <linux/idr.h> 48 #include <linux/inetdevice.h> 49 #include <linux/slab.h> 50 #include <linux/module.h> 51 #include <net/route.h> 52 53 #include <net/tcp.h> 54 #include <net/ipv6.h> 55 56 #include <netinet6/scope6_var.h> 57 #include <netinet6/ip6_var.h> 58 59 #include <rdma/rdma_cm.h> 60 #include <rdma/rdma_cm_ib.h> 61 #include <rdma/ib.h> 62 #include <rdma/ib_addr.h> 63 #include <rdma/ib_cache.h> 64 #include <rdma/ib_cm.h> 65 #include <rdma/ib_sa.h> 66 #include <rdma/iw_cm.h> 67 68 #include <sys/priv.h> 69 70 #include "core_priv.h" 71 72 MODULE_AUTHOR("Sean Hefty"); 73 MODULE_DESCRIPTION("Generic RDMA CM Agent"); 74 MODULE_LICENSE("Dual BSD/GPL"); 75 76 #define CMA_CM_RESPONSE_TIMEOUT 20 77 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 78 #define CMA_MAX_CM_RETRIES 15 79 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 80 #define CMA_IBOE_PACKET_LIFETIME 18 81 82 static const char * const cma_events[] = { 83 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", 84 [RDMA_CM_EVENT_ADDR_ERROR] = "address error", 85 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", 86 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", 87 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", 88 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", 89 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", 90 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", 91 [RDMA_CM_EVENT_REJECTED] = "rejected", 92 [RDMA_CM_EVENT_ESTABLISHED] = "established", 93 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", 94 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", 95 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", 96 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", 97 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", 98 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 99 }; 100 101 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 102 { 103 size_t index = event; 104 105 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? 106 cma_events[index] : "unrecognized event"; 107 } 108 EXPORT_SYMBOL(rdma_event_msg); 109 110 static int cma_check_linklocal(struct rdma_dev_addr *, struct sockaddr *); 111 static void cma_add_one(struct ib_device *device); 112 static void cma_remove_one(struct ib_device *device, void *client_data); 113 114 static struct ib_client cma_client = { 115 .name = "cma", 116 .add = cma_add_one, 117 .remove = cma_remove_one 118 }; 119 120 static struct ib_sa_client sa_client; 121 static struct rdma_addr_client addr_client; 122 static LIST_HEAD(dev_list); 123 static LIST_HEAD(listen_any_list); 124 static DEFINE_MUTEX(lock); 125 static struct workqueue_struct *cma_wq; 126 127 struct cma_pernet { 128 struct idr tcp_ps; 129 struct idr udp_ps; 130 struct idr ipoib_ps; 131 struct idr ib_ps; 132 }; 133 134 VNET_DEFINE(struct cma_pernet, cma_pernet); 135 136 static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet) 137 { 138 struct cma_pernet *retval; 139 140 CURVNET_SET_QUIET(vnet); 141 retval = &VNET(cma_pernet); 142 CURVNET_RESTORE(); 143 144 return (retval); 145 } 146 147 static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps) 148 { 149 struct cma_pernet *pernet = cma_pernet_ptr(net); 150 151 switch (ps) { 152 case RDMA_PS_TCP: 153 return &pernet->tcp_ps; 154 case RDMA_PS_UDP: 155 return &pernet->udp_ps; 156 case RDMA_PS_IPOIB: 157 return &pernet->ipoib_ps; 158 case RDMA_PS_IB: 159 return &pernet->ib_ps; 160 default: 161 return NULL; 162 } 163 } 164 165 struct cma_device { 166 struct list_head list; 167 struct ib_device *device; 168 struct completion comp; 169 atomic_t refcount; 170 struct list_head id_list; 171 struct sysctl_ctx_list sysctl_ctx; 172 enum ib_gid_type *default_gid_type; 173 }; 174 175 struct rdma_bind_list { 176 enum rdma_port_space ps; 177 struct hlist_head owners; 178 unsigned short port; 179 }; 180 181 struct class_port_info_context { 182 struct ib_class_port_info *class_port_info; 183 struct ib_device *device; 184 struct completion done; 185 struct ib_sa_query *sa_query; 186 u8 port_num; 187 }; 188 189 static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps, 190 struct rdma_bind_list *bind_list, int snum) 191 { 192 struct idr *idr = cma_pernet_idr(vnet, ps); 193 194 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 195 } 196 197 static struct rdma_bind_list *cma_ps_find(struct vnet *net, 198 enum rdma_port_space ps, int snum) 199 { 200 struct idr *idr = cma_pernet_idr(net, ps); 201 202 return idr_find(idr, snum); 203 } 204 205 static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum) 206 { 207 struct idr *idr = cma_pernet_idr(net, ps); 208 209 idr_remove(idr, snum); 210 } 211 212 enum { 213 CMA_OPTION_AFONLY, 214 }; 215 216 void cma_ref_dev(struct cma_device *cma_dev) 217 { 218 atomic_inc(&cma_dev->refcount); 219 } 220 221 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, 222 void *cookie) 223 { 224 struct cma_device *cma_dev; 225 struct cma_device *found_cma_dev = NULL; 226 227 mutex_lock(&lock); 228 229 list_for_each_entry(cma_dev, &dev_list, list) 230 if (filter(cma_dev->device, cookie)) { 231 found_cma_dev = cma_dev; 232 break; 233 } 234 235 if (found_cma_dev) 236 cma_ref_dev(found_cma_dev); 237 mutex_unlock(&lock); 238 return found_cma_dev; 239 } 240 241 int cma_get_default_gid_type(struct cma_device *cma_dev, 242 unsigned int port) 243 { 244 if (port < rdma_start_port(cma_dev->device) || 245 port > rdma_end_port(cma_dev->device)) 246 return -EINVAL; 247 248 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; 249 } 250 251 int cma_set_default_gid_type(struct cma_device *cma_dev, 252 unsigned int port, 253 enum ib_gid_type default_gid_type) 254 { 255 unsigned long supported_gids; 256 257 if (port < rdma_start_port(cma_dev->device) || 258 port > rdma_end_port(cma_dev->device)) 259 return -EINVAL; 260 261 supported_gids = roce_gid_type_mask_support(cma_dev->device, port); 262 263 if (!(supported_gids & 1 << default_gid_type)) 264 return -EINVAL; 265 266 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = 267 default_gid_type; 268 269 return 0; 270 } 271 272 struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) 273 { 274 return cma_dev->device; 275 } 276 277 /* 278 * Device removal can occur at anytime, so we need extra handling to 279 * serialize notifying the user of device removal with other callbacks. 280 * We do this by disabling removal notification while a callback is in process, 281 * and reporting it after the callback completes. 282 */ 283 struct rdma_id_private { 284 struct rdma_cm_id id; 285 286 struct rdma_bind_list *bind_list; 287 struct hlist_node node; 288 struct list_head list; /* listen_any_list or cma_device.list */ 289 struct list_head listen_list; /* per device listens */ 290 struct cma_device *cma_dev; 291 struct list_head mc_list; 292 293 int internal_id; 294 enum rdma_cm_state state; 295 spinlock_t lock; 296 struct mutex qp_mutex; 297 298 struct completion comp; 299 atomic_t refcount; 300 struct mutex handler_mutex; 301 302 int backlog; 303 int timeout_ms; 304 struct ib_sa_query *query; 305 int query_id; 306 union { 307 struct ib_cm_id *ib; 308 struct iw_cm_id *iw; 309 } cm_id; 310 311 u32 seq_num; 312 u32 qkey; 313 u32 qp_num; 314 pid_t owner; 315 u32 options; 316 u8 srq; 317 u8 tos; 318 u8 reuseaddr; 319 u8 afonly; 320 enum ib_gid_type gid_type; 321 }; 322 323 struct cma_multicast { 324 struct rdma_id_private *id_priv; 325 union { 326 struct ib_sa_multicast *ib; 327 } multicast; 328 struct list_head list; 329 void *context; 330 struct sockaddr_storage addr; 331 struct kref mcref; 332 bool igmp_joined; 333 u8 join_state; 334 }; 335 336 struct cma_work { 337 struct work_struct work; 338 struct rdma_id_private *id; 339 enum rdma_cm_state old_state; 340 enum rdma_cm_state new_state; 341 struct rdma_cm_event event; 342 }; 343 344 struct cma_ndev_work { 345 struct work_struct work; 346 struct rdma_id_private *id; 347 struct rdma_cm_event event; 348 }; 349 350 struct iboe_mcast_work { 351 struct work_struct work; 352 struct rdma_id_private *id; 353 struct cma_multicast *mc; 354 }; 355 356 union cma_ip_addr { 357 struct in6_addr ip6; 358 struct { 359 __be32 pad[3]; 360 __be32 addr; 361 } ip4; 362 }; 363 364 struct cma_hdr { 365 u8 cma_version; 366 u8 ip_version; /* IP version: 7:4 */ 367 __be16 port; 368 union cma_ip_addr src_addr; 369 union cma_ip_addr dst_addr; 370 }; 371 372 #define CMA_VERSION 0x00 373 374 struct cma_req_info { 375 struct ib_device *device; 376 int port; 377 union ib_gid local_gid; 378 __be64 service_id; 379 u16 pkey; 380 bool has_gid:1; 381 }; 382 383 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 384 { 385 unsigned long flags; 386 int ret; 387 388 spin_lock_irqsave(&id_priv->lock, flags); 389 ret = (id_priv->state == comp); 390 spin_unlock_irqrestore(&id_priv->lock, flags); 391 return ret; 392 } 393 394 static int cma_comp_exch(struct rdma_id_private *id_priv, 395 enum rdma_cm_state comp, enum rdma_cm_state exch) 396 { 397 unsigned long flags; 398 int ret; 399 400 spin_lock_irqsave(&id_priv->lock, flags); 401 if ((ret = (id_priv->state == comp))) 402 id_priv->state = exch; 403 spin_unlock_irqrestore(&id_priv->lock, flags); 404 return ret; 405 } 406 407 static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, 408 enum rdma_cm_state exch) 409 { 410 unsigned long flags; 411 enum rdma_cm_state old; 412 413 spin_lock_irqsave(&id_priv->lock, flags); 414 old = id_priv->state; 415 id_priv->state = exch; 416 spin_unlock_irqrestore(&id_priv->lock, flags); 417 return old; 418 } 419 420 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 421 { 422 return hdr->ip_version >> 4; 423 } 424 425 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 426 { 427 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 428 } 429 430 static void _cma_attach_to_dev(struct rdma_id_private *id_priv, 431 struct cma_device *cma_dev) 432 { 433 cma_ref_dev(cma_dev); 434 id_priv->cma_dev = cma_dev; 435 id_priv->gid_type = 0; 436 id_priv->id.device = cma_dev->device; 437 id_priv->id.route.addr.dev_addr.transport = 438 rdma_node_get_transport(cma_dev->device->node_type); 439 list_add_tail(&id_priv->list, &cma_dev->id_list); 440 } 441 442 static void cma_attach_to_dev(struct rdma_id_private *id_priv, 443 struct cma_device *cma_dev) 444 { 445 _cma_attach_to_dev(id_priv, cma_dev); 446 id_priv->gid_type = 447 cma_dev->default_gid_type[id_priv->id.port_num - 448 rdma_start_port(cma_dev->device)]; 449 } 450 451 void cma_deref_dev(struct cma_device *cma_dev) 452 { 453 if (atomic_dec_and_test(&cma_dev->refcount)) 454 complete(&cma_dev->comp); 455 } 456 457 static inline void release_mc(struct kref *kref) 458 { 459 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); 460 461 kfree(mc->multicast.ib); 462 kfree(mc); 463 } 464 465 static void cma_release_dev(struct rdma_id_private *id_priv) 466 { 467 mutex_lock(&lock); 468 list_del(&id_priv->list); 469 cma_deref_dev(id_priv->cma_dev); 470 id_priv->cma_dev = NULL; 471 mutex_unlock(&lock); 472 } 473 474 static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 475 { 476 return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 477 } 478 479 static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 480 { 481 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 482 } 483 484 static inline unsigned short cma_family(struct rdma_id_private *id_priv) 485 { 486 return id_priv->id.route.addr.src_addr.ss_family; 487 } 488 489 static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) 490 { 491 struct ib_sa_mcmember_rec rec; 492 int ret = 0; 493 494 if (id_priv->qkey) { 495 if (qkey && id_priv->qkey != qkey) 496 return -EINVAL; 497 return 0; 498 } 499 500 if (qkey) { 501 id_priv->qkey = qkey; 502 return 0; 503 } 504 505 switch (id_priv->id.ps) { 506 case RDMA_PS_UDP: 507 case RDMA_PS_IB: 508 id_priv->qkey = RDMA_UDP_QKEY; 509 break; 510 case RDMA_PS_IPOIB: 511 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); 512 ret = ib_sa_get_mcmember_rec(id_priv->id.device, 513 id_priv->id.port_num, &rec.mgid, 514 &rec); 515 if (!ret) 516 id_priv->qkey = be32_to_cpu(rec.qkey); 517 break; 518 default: 519 break; 520 } 521 return ret; 522 } 523 524 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 525 { 526 dev_addr->dev_type = ARPHRD_INFINIBAND; 527 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); 528 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); 529 } 530 531 static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 532 { 533 int ret; 534 535 if (addr->sa_family != AF_IB) { 536 ret = rdma_translate_ip(addr, dev_addr); 537 } else { 538 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 539 ret = 0; 540 } 541 542 return ret; 543 } 544 545 static inline int cma_validate_port(struct ib_device *device, u8 port, 546 enum ib_gid_type gid_type, 547 union ib_gid *gid, int dev_type, 548 struct vnet *net, 549 int bound_if_index) 550 { 551 int ret = -ENODEV; 552 struct net_device *ndev = NULL; 553 554 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 555 return ret; 556 557 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 558 return ret; 559 560 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 561 ndev = dev_get_by_index(net, bound_if_index); 562 if (ndev && ndev->if_flags & IFF_LOOPBACK) { 563 pr_info("detected loopback device\n"); 564 dev_put(ndev); 565 566 if (!device->get_netdev) 567 return -EOPNOTSUPP; 568 569 ndev = device->get_netdev(device, port); 570 if (!ndev) 571 return -ENODEV; 572 } 573 } else { 574 gid_type = IB_GID_TYPE_IB; 575 } 576 577 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 578 ndev, NULL); 579 580 if (ndev) 581 dev_put(ndev); 582 583 return ret; 584 } 585 586 static int cma_acquire_dev(struct rdma_id_private *id_priv, 587 struct rdma_id_private *listen_id_priv) 588 { 589 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 590 struct cma_device *cma_dev; 591 union ib_gid gid, iboe_gid, *gidp; 592 int ret = -ENODEV; 593 u8 port; 594 595 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 596 id_priv->id.ps == RDMA_PS_IPOIB) 597 return -EINVAL; 598 599 mutex_lock(&lock); 600 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 601 &iboe_gid); 602 603 memcpy(&gid, dev_addr->src_dev_addr + 604 rdma_addr_gid_offset(dev_addr), sizeof gid); 605 606 if (listen_id_priv) { 607 cma_dev = listen_id_priv->cma_dev; 608 port = listen_id_priv->id.port_num; 609 gidp = rdma_protocol_roce(cma_dev->device, port) ? 610 &iboe_gid : &gid; 611 612 ret = cma_validate_port(cma_dev->device, port, 613 rdma_protocol_ib(cma_dev->device, port) ? 614 IB_GID_TYPE_IB : 615 listen_id_priv->gid_type, gidp, 616 dev_addr->dev_type, 617 dev_addr->net, 618 dev_addr->bound_dev_if); 619 if (!ret) { 620 id_priv->id.port_num = port; 621 goto out; 622 } 623 } 624 625 list_for_each_entry(cma_dev, &dev_list, list) { 626 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 627 if (listen_id_priv && 628 listen_id_priv->cma_dev == cma_dev && 629 listen_id_priv->id.port_num == port) 630 continue; 631 632 gidp = rdma_protocol_roce(cma_dev->device, port) ? 633 &iboe_gid : &gid; 634 635 ret = cma_validate_port(cma_dev->device, port, 636 rdma_protocol_ib(cma_dev->device, port) ? 637 IB_GID_TYPE_IB : 638 cma_dev->default_gid_type[port - 1], 639 gidp, dev_addr->dev_type, 640 dev_addr->net, 641 dev_addr->bound_dev_if); 642 if (!ret) { 643 id_priv->id.port_num = port; 644 goto out; 645 } 646 } 647 } 648 649 out: 650 if (!ret) 651 cma_attach_to_dev(id_priv, cma_dev); 652 653 mutex_unlock(&lock); 654 return ret; 655 } 656 657 /* 658 * Select the source IB device and address to reach the destination IB address. 659 */ 660 static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) 661 { 662 struct cma_device *cma_dev, *cur_dev; 663 struct sockaddr_ib *addr; 664 union ib_gid gid, sgid, *dgid; 665 u16 pkey, index; 666 u8 p; 667 int i; 668 669 cma_dev = NULL; 670 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); 671 dgid = (union ib_gid *) &addr->sib_addr; 672 pkey = ntohs(addr->sib_pkey); 673 674 list_for_each_entry(cur_dev, &dev_list, list) { 675 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 676 if (!rdma_cap_af_ib(cur_dev->device, p)) 677 continue; 678 679 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 680 continue; 681 682 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, 683 &gid, NULL); 684 i++) { 685 if (!memcmp(&gid, dgid, sizeof(gid))) { 686 cma_dev = cur_dev; 687 sgid = gid; 688 id_priv->id.port_num = p; 689 goto found; 690 } 691 692 if (!cma_dev && (gid.global.subnet_prefix == 693 dgid->global.subnet_prefix)) { 694 cma_dev = cur_dev; 695 sgid = gid; 696 id_priv->id.port_num = p; 697 } 698 } 699 } 700 } 701 702 if (!cma_dev) 703 return -ENODEV; 704 705 found: 706 cma_attach_to_dev(id_priv, cma_dev); 707 addr = (struct sockaddr_ib *) cma_src_addr(id_priv); 708 memcpy(&addr->sib_addr, &sgid, sizeof sgid); 709 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); 710 return 0; 711 } 712 713 static void cma_deref_id(struct rdma_id_private *id_priv) 714 { 715 if (atomic_dec_and_test(&id_priv->refcount)) 716 complete(&id_priv->comp); 717 } 718 719 struct rdma_cm_id *rdma_create_id(struct vnet *net, 720 rdma_cm_event_handler event_handler, 721 void *context, enum rdma_port_space ps, 722 enum ib_qp_type qp_type) 723 { 724 struct rdma_id_private *id_priv; 725 726 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); 727 if (!id_priv) 728 return ERR_PTR(-ENOMEM); 729 730 id_priv->owner = task_pid_nr(current); 731 id_priv->state = RDMA_CM_IDLE; 732 id_priv->id.context = context; 733 id_priv->id.event_handler = event_handler; 734 id_priv->id.ps = ps; 735 id_priv->id.qp_type = qp_type; 736 spin_lock_init(&id_priv->lock); 737 mutex_init(&id_priv->qp_mutex); 738 init_completion(&id_priv->comp); 739 atomic_set(&id_priv->refcount, 1); 740 mutex_init(&id_priv->handler_mutex); 741 INIT_LIST_HEAD(&id_priv->listen_list); 742 INIT_LIST_HEAD(&id_priv->mc_list); 743 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 744 id_priv->id.route.addr.dev_addr.net = TD_TO_VNET(curthread); 745 746 return &id_priv->id; 747 } 748 EXPORT_SYMBOL(rdma_create_id); 749 750 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 751 { 752 struct ib_qp_attr qp_attr; 753 int qp_attr_mask, ret; 754 755 qp_attr.qp_state = IB_QPS_INIT; 756 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 757 if (ret) 758 return ret; 759 760 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 761 if (ret) 762 return ret; 763 764 qp_attr.qp_state = IB_QPS_RTR; 765 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 766 if (ret) 767 return ret; 768 769 qp_attr.qp_state = IB_QPS_RTS; 770 qp_attr.sq_psn = 0; 771 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 772 773 return ret; 774 } 775 776 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 777 { 778 struct ib_qp_attr qp_attr; 779 int qp_attr_mask, ret; 780 781 qp_attr.qp_state = IB_QPS_INIT; 782 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 783 if (ret) 784 return ret; 785 786 return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 787 } 788 789 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 790 struct ib_qp_init_attr *qp_init_attr) 791 { 792 struct rdma_id_private *id_priv; 793 struct ib_qp *qp; 794 int ret; 795 796 id_priv = container_of(id, struct rdma_id_private, id); 797 if (id->device != pd->device) 798 return -EINVAL; 799 800 qp_init_attr->port_num = id->port_num; 801 qp = ib_create_qp(pd, qp_init_attr); 802 if (IS_ERR(qp)) 803 return PTR_ERR(qp); 804 805 if (id->qp_type == IB_QPT_UD) 806 ret = cma_init_ud_qp(id_priv, qp); 807 else 808 ret = cma_init_conn_qp(id_priv, qp); 809 if (ret) 810 goto err; 811 812 id->qp = qp; 813 id_priv->qp_num = qp->qp_num; 814 id_priv->srq = (qp->srq != NULL); 815 return 0; 816 err: 817 ib_destroy_qp(qp); 818 return ret; 819 } 820 EXPORT_SYMBOL(rdma_create_qp); 821 822 void rdma_destroy_qp(struct rdma_cm_id *id) 823 { 824 struct rdma_id_private *id_priv; 825 826 id_priv = container_of(id, struct rdma_id_private, id); 827 mutex_lock(&id_priv->qp_mutex); 828 ib_destroy_qp(id_priv->id.qp); 829 id_priv->id.qp = NULL; 830 mutex_unlock(&id_priv->qp_mutex); 831 } 832 EXPORT_SYMBOL(rdma_destroy_qp); 833 834 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, 835 struct rdma_conn_param *conn_param) 836 { 837 struct ib_qp_attr qp_attr; 838 int qp_attr_mask, ret; 839 union ib_gid sgid; 840 841 mutex_lock(&id_priv->qp_mutex); 842 if (!id_priv->id.qp) { 843 ret = 0; 844 goto out; 845 } 846 847 /* Need to update QP attributes from default values. */ 848 qp_attr.qp_state = IB_QPS_INIT; 849 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 850 if (ret) 851 goto out; 852 853 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 854 if (ret) 855 goto out; 856 857 qp_attr.qp_state = IB_QPS_RTR; 858 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 859 if (ret) 860 goto out; 861 862 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 863 qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); 864 if (ret) 865 goto out; 866 867 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 868 869 if (conn_param) 870 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 871 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 872 out: 873 mutex_unlock(&id_priv->qp_mutex); 874 return ret; 875 } 876 877 static int cma_modify_qp_rts(struct rdma_id_private *id_priv, 878 struct rdma_conn_param *conn_param) 879 { 880 struct ib_qp_attr qp_attr; 881 int qp_attr_mask, ret; 882 883 mutex_lock(&id_priv->qp_mutex); 884 if (!id_priv->id.qp) { 885 ret = 0; 886 goto out; 887 } 888 889 qp_attr.qp_state = IB_QPS_RTS; 890 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 891 if (ret) 892 goto out; 893 894 if (conn_param) 895 qp_attr.max_rd_atomic = conn_param->initiator_depth; 896 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 897 out: 898 mutex_unlock(&id_priv->qp_mutex); 899 return ret; 900 } 901 902 static int cma_modify_qp_err(struct rdma_id_private *id_priv) 903 { 904 struct ib_qp_attr qp_attr; 905 int ret; 906 907 mutex_lock(&id_priv->qp_mutex); 908 if (!id_priv->id.qp) { 909 ret = 0; 910 goto out; 911 } 912 913 qp_attr.qp_state = IB_QPS_ERR; 914 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 915 out: 916 mutex_unlock(&id_priv->qp_mutex); 917 return ret; 918 } 919 920 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 921 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 922 { 923 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 924 int ret; 925 u16 pkey; 926 927 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) 928 pkey = 0xffff; 929 else 930 pkey = ib_addr_get_pkey(dev_addr); 931 932 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 933 pkey, &qp_attr->pkey_index); 934 if (ret) 935 return ret; 936 937 qp_attr->port_num = id_priv->id.port_num; 938 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 939 940 if (id_priv->id.qp_type == IB_QPT_UD) { 941 ret = cma_set_qkey(id_priv, 0); 942 if (ret) 943 return ret; 944 945 qp_attr->qkey = id_priv->qkey; 946 *qp_attr_mask |= IB_QP_QKEY; 947 } else { 948 qp_attr->qp_access_flags = 0; 949 *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 950 } 951 return 0; 952 } 953 954 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 955 int *qp_attr_mask) 956 { 957 struct rdma_id_private *id_priv; 958 int ret = 0; 959 960 id_priv = container_of(id, struct rdma_id_private, id); 961 if (rdma_cap_ib_cm(id->device, id->port_num)) { 962 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) 963 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 964 else 965 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 966 qp_attr_mask); 967 968 if (qp_attr->qp_state == IB_QPS_RTR) 969 qp_attr->rq_psn = id_priv->seq_num; 970 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 971 if (!id_priv->cm_id.iw) { 972 qp_attr->qp_access_flags = 0; 973 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 974 } else 975 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 976 qp_attr_mask); 977 } else 978 ret = -ENOSYS; 979 980 return ret; 981 } 982 EXPORT_SYMBOL(rdma_init_qp_attr); 983 984 static inline int cma_zero_addr(struct sockaddr *addr) 985 { 986 switch (addr->sa_family) { 987 case AF_INET: 988 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 989 case AF_INET6: 990 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); 991 case AF_IB: 992 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); 993 default: 994 return 0; 995 } 996 } 997 998 static inline int cma_loopback_addr(struct sockaddr *addr) 999 { 1000 switch (addr->sa_family) { 1001 case AF_INET: 1002 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 1003 case AF_INET6: 1004 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); 1005 case AF_IB: 1006 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); 1007 default: 1008 return 0; 1009 } 1010 } 1011 1012 static inline int cma_any_addr(struct sockaddr *addr) 1013 { 1014 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1015 } 1016 1017 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) 1018 { 1019 if (src->sa_family != dst->sa_family) 1020 return -1; 1021 1022 switch (src->sa_family) { 1023 case AF_INET: 1024 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 1025 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 1026 case AF_INET6: 1027 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 1028 &((struct sockaddr_in6 *) dst)->sin6_addr); 1029 default: 1030 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, 1031 &((struct sockaddr_ib *) dst)->sib_addr); 1032 } 1033 } 1034 1035 static __be16 cma_port(struct sockaddr *addr) 1036 { 1037 struct sockaddr_ib *sib; 1038 1039 switch (addr->sa_family) { 1040 case AF_INET: 1041 return ((struct sockaddr_in *) addr)->sin_port; 1042 case AF_INET6: 1043 return ((struct sockaddr_in6 *) addr)->sin6_port; 1044 case AF_IB: 1045 sib = (struct sockaddr_ib *) addr; 1046 return htons((u16) (be64_to_cpu(sib->sib_sid) & 1047 be64_to_cpu(sib->sib_sid_mask))); 1048 default: 1049 return 0; 1050 } 1051 } 1052 1053 static inline int cma_any_port(struct sockaddr *addr) 1054 { 1055 return !cma_port(addr); 1056 } 1057 1058 static void cma_save_ib_info(struct sockaddr *src_addr, 1059 struct sockaddr *dst_addr, 1060 struct rdma_cm_id *listen_id, 1061 struct ib_sa_path_rec *path) 1062 { 1063 struct sockaddr_ib *listen_ib, *ib; 1064 1065 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; 1066 if (src_addr) { 1067 ib = (struct sockaddr_ib *)src_addr; 1068 ib->sib_family = AF_IB; 1069 if (path) { 1070 ib->sib_pkey = path->pkey; 1071 ib->sib_flowinfo = path->flow_label; 1072 memcpy(&ib->sib_addr, &path->sgid, 16); 1073 ib->sib_sid = path->service_id; 1074 ib->sib_scope_id = 0; 1075 } else { 1076 ib->sib_pkey = listen_ib->sib_pkey; 1077 ib->sib_flowinfo = listen_ib->sib_flowinfo; 1078 ib->sib_addr = listen_ib->sib_addr; 1079 ib->sib_sid = listen_ib->sib_sid; 1080 ib->sib_scope_id = listen_ib->sib_scope_id; 1081 } 1082 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); 1083 } 1084 if (dst_addr) { 1085 ib = (struct sockaddr_ib *)dst_addr; 1086 ib->sib_family = AF_IB; 1087 if (path) { 1088 ib->sib_pkey = path->pkey; 1089 ib->sib_flowinfo = path->flow_label; 1090 memcpy(&ib->sib_addr, &path->dgid, 16); 1091 } 1092 } 1093 } 1094 1095 static void cma_save_ip4_info(struct sockaddr_in *src_addr, 1096 struct sockaddr_in *dst_addr, 1097 struct cma_hdr *hdr, 1098 __be16 local_port) 1099 { 1100 if (src_addr) { 1101 *src_addr = (struct sockaddr_in) { 1102 .sin_len = sizeof(struct sockaddr_in), 1103 .sin_family = AF_INET, 1104 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1105 .sin_port = local_port, 1106 }; 1107 } 1108 1109 if (dst_addr) { 1110 *dst_addr = (struct sockaddr_in) { 1111 .sin_len = sizeof(struct sockaddr_in), 1112 .sin_family = AF_INET, 1113 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1114 .sin_port = hdr->port, 1115 }; 1116 } 1117 } 1118 1119 static void cma_ip6_clear_scope_id(struct in6_addr *addr) 1120 { 1121 /* make sure link local scope ID gets zeroed */ 1122 if (IN6_IS_SCOPE_LINKLOCAL(addr) || 1123 IN6_IS_ADDR_MC_INTFACELOCAL(addr)) { 1124 /* use byte-access to be alignment safe */ 1125 addr->s6_addr[2] = 0; 1126 addr->s6_addr[3] = 0; 1127 } 1128 } 1129 1130 static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, 1131 struct sockaddr_in6 *dst_addr, 1132 struct cma_hdr *hdr, 1133 __be16 local_port) 1134 { 1135 if (src_addr) { 1136 *src_addr = (struct sockaddr_in6) { 1137 .sin6_len = sizeof(struct sockaddr_in6), 1138 .sin6_family = AF_INET6, 1139 .sin6_addr = hdr->dst_addr.ip6, 1140 .sin6_port = local_port, 1141 }; 1142 cma_ip6_clear_scope_id(&src_addr->sin6_addr); 1143 } 1144 1145 if (dst_addr) { 1146 *dst_addr = (struct sockaddr_in6) { 1147 .sin6_len = sizeof(struct sockaddr_in6), 1148 .sin6_family = AF_INET6, 1149 .sin6_addr = hdr->src_addr.ip6, 1150 .sin6_port = hdr->port, 1151 }; 1152 cma_ip6_clear_scope_id(&dst_addr->sin6_addr); 1153 } 1154 } 1155 1156 static u16 cma_port_from_service_id(__be64 service_id) 1157 { 1158 return (u16)be64_to_cpu(service_id); 1159 } 1160 1161 static int cma_save_ip_info(struct sockaddr *src_addr, 1162 struct sockaddr *dst_addr, 1163 struct ib_cm_event *ib_event, 1164 __be64 service_id) 1165 { 1166 struct cma_hdr *hdr; 1167 __be16 port; 1168 1169 hdr = ib_event->private_data; 1170 if (hdr->cma_version != CMA_VERSION) 1171 return -EINVAL; 1172 1173 port = htons(cma_port_from_service_id(service_id)); 1174 1175 switch (cma_get_ip_ver(hdr)) { 1176 case 4: 1177 cma_save_ip4_info((struct sockaddr_in *)src_addr, 1178 (struct sockaddr_in *)dst_addr, hdr, port); 1179 break; 1180 case 6: 1181 cma_save_ip6_info((struct sockaddr_in6 *)src_addr, 1182 (struct sockaddr_in6 *)dst_addr, hdr, port); 1183 break; 1184 default: 1185 return -EAFNOSUPPORT; 1186 } 1187 1188 return 0; 1189 } 1190 1191 static int cma_save_net_info(struct sockaddr *src_addr, 1192 struct sockaddr *dst_addr, 1193 struct rdma_cm_id *listen_id, 1194 struct ib_cm_event *ib_event, 1195 sa_family_t sa_family, __be64 service_id) 1196 { 1197 if (sa_family == AF_IB) { 1198 if (ib_event->event == IB_CM_REQ_RECEIVED) 1199 cma_save_ib_info(src_addr, dst_addr, listen_id, 1200 ib_event->param.req_rcvd.primary_path); 1201 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1202 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); 1203 return 0; 1204 } 1205 1206 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); 1207 } 1208 1209 static int cma_save_req_info(const struct ib_cm_event *ib_event, 1210 struct cma_req_info *req) 1211 { 1212 const struct ib_cm_req_event_param *req_param = 1213 &ib_event->param.req_rcvd; 1214 const struct ib_cm_sidr_req_event_param *sidr_param = 1215 &ib_event->param.sidr_req_rcvd; 1216 1217 switch (ib_event->event) { 1218 case IB_CM_REQ_RECEIVED: 1219 req->device = req_param->listen_id->device; 1220 req->port = req_param->port; 1221 memcpy(&req->local_gid, &req_param->primary_path->sgid, 1222 sizeof(req->local_gid)); 1223 req->has_gid = true; 1224 req->service_id = req_param->primary_path->service_id; 1225 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1226 if (req->pkey != req_param->bth_pkey) 1227 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1228 "RDMA CMA: in the future this may cause the request to be dropped\n", 1229 req_param->bth_pkey, req->pkey); 1230 break; 1231 case IB_CM_SIDR_REQ_RECEIVED: 1232 req->device = sidr_param->listen_id->device; 1233 req->port = sidr_param->port; 1234 req->has_gid = false; 1235 req->service_id = sidr_param->service_id; 1236 req->pkey = sidr_param->pkey; 1237 if (req->pkey != sidr_param->bth_pkey) 1238 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1239 "RDMA CMA: in the future this may cause the request to be dropped\n", 1240 sidr_param->bth_pkey, req->pkey); 1241 break; 1242 default: 1243 return -EINVAL; 1244 } 1245 1246 return 0; 1247 } 1248 1249 static bool validate_ipv4_net_dev(struct net_device *net_dev, 1250 const struct sockaddr_in *dst_addr, 1251 const struct sockaddr_in *src_addr) 1252 { 1253 #ifdef INET 1254 struct sockaddr_in dst_tmp = *dst_addr; 1255 __be32 daddr = dst_addr->sin_addr.s_addr, 1256 saddr = src_addr->sin_addr.s_addr; 1257 struct net_device *src_dev; 1258 struct rtentry *rte; 1259 bool ret; 1260 1261 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1262 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || 1263 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || 1264 ipv4_is_loopback(saddr)) 1265 return false; 1266 1267 src_dev = ip_dev_find(net_dev->if_vnet, saddr); 1268 if (src_dev != net_dev) { 1269 if (src_dev != NULL) 1270 dev_put(src_dev); 1271 return false; 1272 } 1273 dev_put(src_dev); 1274 1275 /* 1276 * Make sure the socket address length field 1277 * is set, else rtalloc1() will fail. 1278 */ 1279 dst_tmp.sin_len = sizeof(dst_tmp); 1280 1281 CURVNET_SET(net_dev->if_vnet); 1282 rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); 1283 CURVNET_RESTORE(); 1284 if (rte != NULL) { 1285 ret = (rte->rt_ifp == net_dev); 1286 RTFREE_LOCKED(rte); 1287 } else { 1288 ret = false; 1289 } 1290 return ret; 1291 #else 1292 return false; 1293 #endif 1294 } 1295 1296 static bool validate_ipv6_net_dev(struct net_device *net_dev, 1297 const struct sockaddr_in6 *dst_addr, 1298 const struct sockaddr_in6 *src_addr) 1299 { 1300 #ifdef INET6 1301 struct sockaddr_in6 dst_tmp = *dst_addr; 1302 struct in6_addr in6_addr = src_addr->sin6_addr; 1303 struct net_device *src_dev; 1304 struct rtentry *rte; 1305 bool ret; 1306 1307 src_dev = ip6_dev_find(net_dev->if_vnet, in6_addr); 1308 if (src_dev != net_dev) 1309 return false; 1310 1311 /* 1312 * Make sure the socket address length field 1313 * is set, else rtalloc1() will fail. 1314 */ 1315 dst_tmp.sin6_len = sizeof(dst_tmp); 1316 1317 CURVNET_SET(net_dev->if_vnet); 1318 rte = rtalloc1((struct sockaddr *)&dst_tmp, 1, 0); 1319 CURVNET_RESTORE(); 1320 if (rte != NULL) { 1321 ret = (rte->rt_ifp == net_dev); 1322 RTFREE_LOCKED(rte); 1323 } else { 1324 ret = false; 1325 } 1326 return ret; 1327 #else 1328 return false; 1329 #endif 1330 } 1331 1332 static bool validate_net_dev(struct net_device *net_dev, 1333 const struct sockaddr *daddr, 1334 const struct sockaddr *saddr) 1335 { 1336 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; 1337 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; 1338 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1339 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; 1340 1341 switch (daddr->sa_family) { 1342 case AF_INET: 1343 return saddr->sa_family == AF_INET && 1344 validate_ipv4_net_dev(net_dev, daddr4, saddr4); 1345 1346 case AF_INET6: 1347 return saddr->sa_family == AF_INET6 && 1348 validate_ipv6_net_dev(net_dev, daddr6, saddr6); 1349 1350 default: 1351 return false; 1352 } 1353 } 1354 1355 static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, 1356 const struct cma_req_info *req) 1357 { 1358 struct sockaddr_storage listen_addr_storage, src_addr_storage; 1359 struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, 1360 *src_addr = (struct sockaddr *)&src_addr_storage; 1361 struct net_device *net_dev; 1362 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; 1363 int err; 1364 1365 err = cma_save_ip_info(listen_addr, src_addr, ib_event, 1366 req->service_id); 1367 if (err) 1368 return ERR_PTR(err); 1369 1370 net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey, 1371 gid, listen_addr); 1372 if (!net_dev) 1373 return ERR_PTR(-ENODEV); 1374 1375 if (!validate_net_dev(net_dev, listen_addr, src_addr)) { 1376 dev_put(net_dev); 1377 return ERR_PTR(-EHOSTUNREACH); 1378 } 1379 1380 return net_dev; 1381 } 1382 1383 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) 1384 { 1385 return (be64_to_cpu(service_id) >> 16) & 0xffff; 1386 } 1387 1388 static bool cma_match_private_data(struct rdma_id_private *id_priv, 1389 const struct cma_hdr *hdr) 1390 { 1391 struct sockaddr *addr = cma_src_addr(id_priv); 1392 __be32 ip4_addr; 1393 struct in6_addr ip6_addr; 1394 1395 if (cma_any_addr(addr) && !id_priv->afonly) 1396 return true; 1397 1398 switch (addr->sa_family) { 1399 case AF_INET: 1400 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1401 if (cma_get_ip_ver(hdr) != 4) 1402 return false; 1403 if (!cma_any_addr(addr) && 1404 hdr->dst_addr.ip4.addr != ip4_addr) 1405 return false; 1406 break; 1407 case AF_INET6: 1408 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1409 if (cma_get_ip_ver(hdr) != 6) 1410 return false; 1411 cma_ip6_clear_scope_id(&ip6_addr); 1412 if (!cma_any_addr(addr) && 1413 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1414 return false; 1415 break; 1416 case AF_IB: 1417 return true; 1418 default: 1419 return false; 1420 } 1421 1422 return true; 1423 } 1424 1425 static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) 1426 { 1427 enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); 1428 enum rdma_transport_type transport = 1429 rdma_node_get_transport(device->node_type); 1430 1431 return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; 1432 } 1433 1434 static bool cma_protocol_roce(const struct rdma_cm_id *id) 1435 { 1436 struct ib_device *device = id->device; 1437 const int port_num = id->port_num ?: rdma_start_port(device); 1438 1439 return cma_protocol_roce_dev_port(device, port_num); 1440 } 1441 1442 static bool cma_match_net_dev(const struct rdma_cm_id *id, 1443 const struct net_device *net_dev, 1444 u8 port_num) 1445 { 1446 const struct rdma_addr *addr = &id->route.addr; 1447 1448 if (!net_dev) 1449 /* This request is an AF_IB request or a RoCE request */ 1450 return (!id->port_num || id->port_num == port_num) && 1451 (addr->src_addr.ss_family == AF_IB || 1452 cma_protocol_roce_dev_port(id->device, port_num)); 1453 1454 return !addr->dev_addr.bound_dev_if || 1455 (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1456 addr->dev_addr.bound_dev_if == net_dev->if_index); 1457 } 1458 1459 static struct rdma_id_private *cma_find_listener( 1460 const struct rdma_bind_list *bind_list, 1461 const struct ib_cm_id *cm_id, 1462 const struct ib_cm_event *ib_event, 1463 const struct cma_req_info *req, 1464 const struct net_device *net_dev) 1465 { 1466 struct rdma_id_private *id_priv, *id_priv_dev; 1467 1468 if (!bind_list) 1469 return ERR_PTR(-EINVAL); 1470 1471 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1472 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1473 if (id_priv->id.device == cm_id->device && 1474 cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1475 return id_priv; 1476 list_for_each_entry(id_priv_dev, 1477 &id_priv->listen_list, 1478 listen_list) { 1479 if (id_priv_dev->id.device == cm_id->device && 1480 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1481 return id_priv_dev; 1482 } 1483 } 1484 } 1485 1486 return ERR_PTR(-EINVAL); 1487 } 1488 1489 static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, 1490 struct ib_cm_event *ib_event, 1491 struct net_device **net_dev) 1492 { 1493 struct cma_req_info req; 1494 struct rdma_bind_list *bind_list; 1495 struct rdma_id_private *id_priv; 1496 int err; 1497 1498 err = cma_save_req_info(ib_event, &req); 1499 if (err) 1500 return ERR_PTR(err); 1501 1502 *net_dev = cma_get_net_dev(ib_event, &req); 1503 if (IS_ERR(*net_dev)) { 1504 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1505 /* Assuming the protocol is AF_IB */ 1506 *net_dev = NULL; 1507 } else if (cma_protocol_roce_dev_port(req.device, req.port)) { 1508 /* TODO find the net dev matching the request parameters 1509 * through the RoCE GID table */ 1510 *net_dev = NULL; 1511 } else { 1512 return ERR_CAST(*net_dev); 1513 } 1514 } 1515 1516 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1517 rdma_ps_from_service_id(req.service_id), 1518 cma_port_from_service_id(req.service_id)); 1519 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1520 if (IS_ERR(id_priv) && *net_dev) { 1521 dev_put(*net_dev); 1522 *net_dev = NULL; 1523 } 1524 1525 return id_priv; 1526 } 1527 1528 static inline int cma_user_data_offset(struct rdma_id_private *id_priv) 1529 { 1530 return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); 1531 } 1532 1533 static void cma_cancel_route(struct rdma_id_private *id_priv) 1534 { 1535 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { 1536 if (id_priv->query) 1537 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 1538 } 1539 } 1540 1541 static void cma_cancel_listens(struct rdma_id_private *id_priv) 1542 { 1543 struct rdma_id_private *dev_id_priv; 1544 1545 /* 1546 * Remove from listen_any_list to prevent added devices from spawning 1547 * additional listen requests. 1548 */ 1549 mutex_lock(&lock); 1550 list_del(&id_priv->list); 1551 1552 while (!list_empty(&id_priv->listen_list)) { 1553 dev_id_priv = list_entry(id_priv->listen_list.next, 1554 struct rdma_id_private, listen_list); 1555 /* sync with device removal to avoid duplicate destruction */ 1556 list_del_init(&dev_id_priv->list); 1557 list_del(&dev_id_priv->listen_list); 1558 mutex_unlock(&lock); 1559 1560 rdma_destroy_id(&dev_id_priv->id); 1561 mutex_lock(&lock); 1562 } 1563 mutex_unlock(&lock); 1564 } 1565 1566 static void cma_cancel_operation(struct rdma_id_private *id_priv, 1567 enum rdma_cm_state state) 1568 { 1569 switch (state) { 1570 case RDMA_CM_ADDR_QUERY: 1571 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); 1572 break; 1573 case RDMA_CM_ROUTE_QUERY: 1574 cma_cancel_route(id_priv); 1575 break; 1576 case RDMA_CM_LISTEN: 1577 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) 1578 cma_cancel_listens(id_priv); 1579 break; 1580 default: 1581 break; 1582 } 1583 } 1584 1585 static void cma_release_port(struct rdma_id_private *id_priv) 1586 { 1587 struct rdma_bind_list *bind_list = id_priv->bind_list; 1588 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 1589 1590 if (!bind_list) 1591 return; 1592 1593 mutex_lock(&lock); 1594 hlist_del(&id_priv->node); 1595 if (hlist_empty(&bind_list->owners)) { 1596 cma_ps_remove(net, bind_list->ps, bind_list->port); 1597 kfree(bind_list); 1598 } 1599 mutex_unlock(&lock); 1600 } 1601 1602 static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1603 { 1604 struct cma_multicast *mc; 1605 1606 while (!list_empty(&id_priv->mc_list)) { 1607 mc = container_of(id_priv->mc_list.next, 1608 struct cma_multicast, list); 1609 list_del(&mc->list); 1610 if (rdma_cap_ib_mcast(id_priv->cma_dev->device, 1611 id_priv->id.port_num)) { 1612 ib_sa_free_multicast(mc->multicast.ib); 1613 kfree(mc); 1614 } else { 1615 if (mc->igmp_joined) { 1616 struct rdma_dev_addr *dev_addr = 1617 &id_priv->id.route.addr.dev_addr; 1618 struct net_device *ndev = NULL; 1619 1620 if (dev_addr->bound_dev_if) 1621 ndev = dev_get_by_index(dev_addr->net, 1622 dev_addr->bound_dev_if); 1623 if (ndev) { 1624 dev_put(ndev); 1625 } 1626 } 1627 kref_put(&mc->mcref, release_mc); 1628 } 1629 } 1630 } 1631 1632 void rdma_destroy_id(struct rdma_cm_id *id) 1633 { 1634 struct rdma_id_private *id_priv; 1635 enum rdma_cm_state state; 1636 1637 id_priv = container_of(id, struct rdma_id_private, id); 1638 state = cma_exch(id_priv, RDMA_CM_DESTROYING); 1639 cma_cancel_operation(id_priv, state); 1640 1641 /* 1642 * Wait for any active callback to finish. New callbacks will find 1643 * the id_priv state set to destroying and abort. 1644 */ 1645 mutex_lock(&id_priv->handler_mutex); 1646 mutex_unlock(&id_priv->handler_mutex); 1647 1648 if (id_priv->cma_dev) { 1649 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1650 if (id_priv->cm_id.ib) 1651 ib_destroy_cm_id(id_priv->cm_id.ib); 1652 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { 1653 if (id_priv->cm_id.iw) 1654 iw_destroy_cm_id(id_priv->cm_id.iw); 1655 } 1656 cma_leave_mc_groups(id_priv); 1657 cma_release_dev(id_priv); 1658 } 1659 1660 cma_release_port(id_priv); 1661 cma_deref_id(id_priv); 1662 wait_for_completion(&id_priv->comp); 1663 1664 if (id_priv->internal_id) 1665 cma_deref_id(id_priv->id.context); 1666 1667 kfree(id_priv->id.route.path_rec); 1668 kfree(id_priv); 1669 } 1670 EXPORT_SYMBOL(rdma_destroy_id); 1671 1672 static int cma_rep_recv(struct rdma_id_private *id_priv) 1673 { 1674 int ret; 1675 1676 ret = cma_modify_qp_rtr(id_priv, NULL); 1677 if (ret) 1678 goto reject; 1679 1680 ret = cma_modify_qp_rts(id_priv, NULL); 1681 if (ret) 1682 goto reject; 1683 1684 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); 1685 if (ret) 1686 goto reject; 1687 1688 return 0; 1689 reject: 1690 cma_modify_qp_err(id_priv); 1691 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 1692 NULL, 0, NULL, 0); 1693 return ret; 1694 } 1695 1696 static void cma_set_rep_event_data(struct rdma_cm_event *event, 1697 struct ib_cm_rep_event_param *rep_data, 1698 void *private_data) 1699 { 1700 event->param.conn.private_data = private_data; 1701 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; 1702 event->param.conn.responder_resources = rep_data->responder_resources; 1703 event->param.conn.initiator_depth = rep_data->initiator_depth; 1704 event->param.conn.flow_control = rep_data->flow_control; 1705 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; 1706 event->param.conn.srq = rep_data->srq; 1707 event->param.conn.qp_num = rep_data->remote_qpn; 1708 } 1709 1710 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1711 { 1712 struct rdma_id_private *id_priv = cm_id->context; 1713 struct rdma_cm_event event; 1714 int ret = 0; 1715 1716 mutex_lock(&id_priv->handler_mutex); 1717 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 1718 id_priv->state != RDMA_CM_CONNECT) || 1719 (ib_event->event == IB_CM_TIMEWAIT_EXIT && 1720 id_priv->state != RDMA_CM_DISCONNECT)) 1721 goto out; 1722 1723 memset(&event, 0, sizeof event); 1724 switch (ib_event->event) { 1725 case IB_CM_REQ_ERROR: 1726 case IB_CM_REP_ERROR: 1727 event.event = RDMA_CM_EVENT_UNREACHABLE; 1728 event.status = -ETIMEDOUT; 1729 break; 1730 case IB_CM_REP_RECEIVED: 1731 if (id_priv->id.qp) { 1732 event.status = cma_rep_recv(id_priv); 1733 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1734 RDMA_CM_EVENT_ESTABLISHED; 1735 } else { 1736 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1737 } 1738 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1739 ib_event->private_data); 1740 break; 1741 case IB_CM_RTU_RECEIVED: 1742 case IB_CM_USER_ESTABLISHED: 1743 event.event = RDMA_CM_EVENT_ESTABLISHED; 1744 break; 1745 case IB_CM_DREQ_ERROR: 1746 event.status = -ETIMEDOUT; /* fall through */ 1747 case IB_CM_DREQ_RECEIVED: 1748 case IB_CM_DREP_RECEIVED: 1749 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, 1750 RDMA_CM_DISCONNECT)) 1751 goto out; 1752 event.event = RDMA_CM_EVENT_DISCONNECTED; 1753 break; 1754 case IB_CM_TIMEWAIT_EXIT: 1755 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 1756 break; 1757 case IB_CM_MRA_RECEIVED: 1758 /* ignore event */ 1759 goto out; 1760 case IB_CM_REJ_RECEIVED: 1761 cma_modify_qp_err(id_priv); 1762 event.status = ib_event->param.rej_rcvd.reason; 1763 event.event = RDMA_CM_EVENT_REJECTED; 1764 event.param.conn.private_data = ib_event->private_data; 1765 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 1766 break; 1767 default: 1768 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 1769 ib_event->event); 1770 goto out; 1771 } 1772 1773 ret = id_priv->id.event_handler(&id_priv->id, &event); 1774 if (ret) { 1775 /* Destroy the CM ID by returning a non-zero value. */ 1776 id_priv->cm_id.ib = NULL; 1777 cma_exch(id_priv, RDMA_CM_DESTROYING); 1778 mutex_unlock(&id_priv->handler_mutex); 1779 rdma_destroy_id(&id_priv->id); 1780 return ret; 1781 } 1782 out: 1783 mutex_unlock(&id_priv->handler_mutex); 1784 return ret; 1785 } 1786 1787 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, 1788 struct ib_cm_event *ib_event, 1789 struct net_device *net_dev) 1790 { 1791 struct rdma_id_private *id_priv; 1792 struct rdma_cm_id *id; 1793 struct rdma_route *rt; 1794 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1795 const __be64 service_id = 1796 ib_event->param.req_rcvd.primary_path->service_id; 1797 int ret; 1798 1799 id = rdma_create_id(listen_id->route.addr.dev_addr.net, 1800 listen_id->event_handler, listen_id->context, 1801 listen_id->ps, ib_event->param.req_rcvd.qp_type); 1802 if (IS_ERR(id)) 1803 return NULL; 1804 1805 id_priv = container_of(id, struct rdma_id_private, id); 1806 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1807 (struct sockaddr *)&id->route.addr.dst_addr, 1808 listen_id, ib_event, ss_family, service_id)) 1809 goto err; 1810 1811 rt = &id->route; 1812 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 1813 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, 1814 GFP_KERNEL); 1815 if (!rt->path_rec) 1816 goto err; 1817 1818 rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; 1819 if (rt->num_paths == 2) 1820 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 1821 1822 if (net_dev) { 1823 ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 1824 if (ret) 1825 goto err; 1826 } else { 1827 if (!cma_protocol_roce(listen_id) && 1828 cma_any_addr(cma_src_addr(id_priv))) { 1829 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 1830 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 1831 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 1832 } else if (!cma_any_addr(cma_src_addr(id_priv))) { 1833 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); 1834 if (ret) 1835 goto err; 1836 } 1837 } 1838 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 1839 1840 id_priv->state = RDMA_CM_CONNECT; 1841 return id_priv; 1842 1843 err: 1844 rdma_destroy_id(id); 1845 return NULL; 1846 } 1847 1848 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, 1849 struct ib_cm_event *ib_event, 1850 struct net_device *net_dev) 1851 { 1852 struct rdma_id_private *id_priv; 1853 struct rdma_cm_id *id; 1854 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 1855 struct vnet *net = listen_id->route.addr.dev_addr.net; 1856 int ret; 1857 1858 id = rdma_create_id(net, listen_id->event_handler, listen_id->context, 1859 listen_id->ps, IB_QPT_UD); 1860 if (IS_ERR(id)) 1861 return NULL; 1862 1863 id_priv = container_of(id, struct rdma_id_private, id); 1864 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 1865 (struct sockaddr *)&id->route.addr.dst_addr, 1866 listen_id, ib_event, ss_family, 1867 ib_event->param.sidr_req_rcvd.service_id)) 1868 goto err; 1869 1870 if (net_dev) { 1871 ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 1872 if (ret) 1873 goto err; 1874 } else { 1875 if (!cma_any_addr(cma_src_addr(id_priv))) { 1876 ret = cma_translate_addr(cma_src_addr(id_priv), 1877 &id->route.addr.dev_addr); 1878 if (ret) 1879 goto err; 1880 } 1881 } 1882 1883 id_priv->state = RDMA_CM_CONNECT; 1884 return id_priv; 1885 err: 1886 rdma_destroy_id(id); 1887 return NULL; 1888 } 1889 1890 static void cma_set_req_event_data(struct rdma_cm_event *event, 1891 struct ib_cm_req_event_param *req_data, 1892 void *private_data, int offset) 1893 { 1894 event->param.conn.private_data = (char *)private_data + offset; 1895 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; 1896 event->param.conn.responder_resources = req_data->responder_resources; 1897 event->param.conn.initiator_depth = req_data->initiator_depth; 1898 event->param.conn.flow_control = req_data->flow_control; 1899 event->param.conn.retry_count = req_data->retry_count; 1900 event->param.conn.rnr_retry_count = req_data->rnr_retry_count; 1901 event->param.conn.srq = req_data->srq; 1902 event->param.conn.qp_num = req_data->remote_qpn; 1903 } 1904 1905 static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) 1906 { 1907 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 1908 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 1909 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && 1910 (id->qp_type == IB_QPT_UD)) || 1911 (!id->qp_type)); 1912 } 1913 1914 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1915 { 1916 struct rdma_id_private *listen_id, *conn_id = NULL; 1917 struct rdma_cm_event event; 1918 struct net_device *net_dev; 1919 int offset, ret; 1920 1921 listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); 1922 if (IS_ERR(listen_id)) 1923 return PTR_ERR(listen_id); 1924 1925 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { 1926 ret = -EINVAL; 1927 goto net_dev_put; 1928 } 1929 1930 mutex_lock(&listen_id->handler_mutex); 1931 if (listen_id->state != RDMA_CM_LISTEN) { 1932 ret = -ECONNABORTED; 1933 goto err1; 1934 } 1935 1936 memset(&event, 0, sizeof event); 1937 offset = cma_user_data_offset(listen_id); 1938 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1939 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1940 conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); 1941 event.param.ud.private_data = (char *)ib_event->private_data + offset; 1942 event.param.ud.private_data_len = 1943 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 1944 } else { 1945 conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); 1946 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 1947 ib_event->private_data, offset); 1948 } 1949 if (!conn_id) { 1950 ret = -ENOMEM; 1951 goto err1; 1952 } 1953 1954 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1955 ret = cma_acquire_dev(conn_id, listen_id); 1956 if (ret) 1957 goto err2; 1958 1959 conn_id->cm_id.ib = cm_id; 1960 cm_id->context = conn_id; 1961 cm_id->cm_handler = cma_ib_handler; 1962 1963 /* 1964 * Protect against the user destroying conn_id from another thread 1965 * until we're done accessing it. 1966 */ 1967 atomic_inc(&conn_id->refcount); 1968 ret = conn_id->id.event_handler(&conn_id->id, &event); 1969 if (ret) 1970 goto err3; 1971 /* 1972 * Acquire mutex to prevent user executing rdma_destroy_id() 1973 * while we're accessing the cm_id. 1974 */ 1975 mutex_lock(&lock); 1976 if (cma_comp(conn_id, RDMA_CM_CONNECT) && 1977 (conn_id->id.qp_type != IB_QPT_UD)) 1978 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1979 mutex_unlock(&lock); 1980 mutex_unlock(&conn_id->handler_mutex); 1981 mutex_unlock(&listen_id->handler_mutex); 1982 cma_deref_id(conn_id); 1983 if (net_dev) 1984 dev_put(net_dev); 1985 return 0; 1986 1987 err3: 1988 cma_deref_id(conn_id); 1989 /* Destroy the CM ID by returning a non-zero value. */ 1990 conn_id->cm_id.ib = NULL; 1991 err2: 1992 cma_exch(conn_id, RDMA_CM_DESTROYING); 1993 mutex_unlock(&conn_id->handler_mutex); 1994 err1: 1995 mutex_unlock(&listen_id->handler_mutex); 1996 if (conn_id) 1997 rdma_destroy_id(&conn_id->id); 1998 1999 net_dev_put: 2000 if (net_dev) 2001 dev_put(net_dev); 2002 2003 return ret; 2004 } 2005 2006 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) 2007 { 2008 if (addr->sa_family == AF_IB) 2009 return ((struct sockaddr_ib *) addr)->sib_sid; 2010 2011 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); 2012 } 2013 EXPORT_SYMBOL(rdma_get_service_id); 2014 2015 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2016 { 2017 struct rdma_id_private *id_priv = iw_id->context; 2018 struct rdma_cm_event event; 2019 int ret = 0; 2020 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2021 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2022 2023 mutex_lock(&id_priv->handler_mutex); 2024 if (id_priv->state != RDMA_CM_CONNECT) 2025 goto out; 2026 2027 memset(&event, 0, sizeof event); 2028 switch (iw_event->event) { 2029 case IW_CM_EVENT_CLOSE: 2030 event.event = RDMA_CM_EVENT_DISCONNECTED; 2031 break; 2032 case IW_CM_EVENT_CONNECT_REPLY: 2033 memcpy(cma_src_addr(id_priv), laddr, 2034 rdma_addr_size(laddr)); 2035 memcpy(cma_dst_addr(id_priv), raddr, 2036 rdma_addr_size(raddr)); 2037 switch (iw_event->status) { 2038 case 0: 2039 event.event = RDMA_CM_EVENT_ESTABLISHED; 2040 event.param.conn.initiator_depth = iw_event->ird; 2041 event.param.conn.responder_resources = iw_event->ord; 2042 break; 2043 case -ECONNRESET: 2044 case -ECONNREFUSED: 2045 event.event = RDMA_CM_EVENT_REJECTED; 2046 break; 2047 case -ETIMEDOUT: 2048 event.event = RDMA_CM_EVENT_UNREACHABLE; 2049 break; 2050 default: 2051 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 2052 break; 2053 } 2054 break; 2055 case IW_CM_EVENT_ESTABLISHED: 2056 event.event = RDMA_CM_EVENT_ESTABLISHED; 2057 event.param.conn.initiator_depth = iw_event->ird; 2058 event.param.conn.responder_resources = iw_event->ord; 2059 break; 2060 default: 2061 BUG_ON(1); 2062 } 2063 2064 event.status = iw_event->status; 2065 event.param.conn.private_data = iw_event->private_data; 2066 event.param.conn.private_data_len = iw_event->private_data_len; 2067 ret = id_priv->id.event_handler(&id_priv->id, &event); 2068 if (ret) { 2069 /* Destroy the CM ID by returning a non-zero value. */ 2070 id_priv->cm_id.iw = NULL; 2071 cma_exch(id_priv, RDMA_CM_DESTROYING); 2072 mutex_unlock(&id_priv->handler_mutex); 2073 rdma_destroy_id(&id_priv->id); 2074 return ret; 2075 } 2076 2077 out: 2078 mutex_unlock(&id_priv->handler_mutex); 2079 return ret; 2080 } 2081 2082 static int iw_conn_req_handler(struct iw_cm_id *cm_id, 2083 struct iw_cm_event *iw_event) 2084 { 2085 struct rdma_cm_id *new_cm_id; 2086 struct rdma_id_private *listen_id, *conn_id; 2087 struct rdma_cm_event event; 2088 int ret = -ECONNABORTED; 2089 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2090 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2091 2092 listen_id = cm_id->context; 2093 2094 mutex_lock(&listen_id->handler_mutex); 2095 if (listen_id->state != RDMA_CM_LISTEN) 2096 goto out; 2097 2098 /* Create a new RDMA id for the new IW CM ID */ 2099 new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, 2100 listen_id->id.event_handler, 2101 listen_id->id.context, 2102 RDMA_PS_TCP, IB_QPT_RC); 2103 if (IS_ERR(new_cm_id)) { 2104 ret = -ENOMEM; 2105 goto out; 2106 } 2107 conn_id = container_of(new_cm_id, struct rdma_id_private, id); 2108 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2109 conn_id->state = RDMA_CM_CONNECT; 2110 2111 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); 2112 if (ret) { 2113 mutex_unlock(&conn_id->handler_mutex); 2114 rdma_destroy_id(new_cm_id); 2115 goto out; 2116 } 2117 2118 ret = cma_acquire_dev(conn_id, listen_id); 2119 if (ret) { 2120 mutex_unlock(&conn_id->handler_mutex); 2121 rdma_destroy_id(new_cm_id); 2122 goto out; 2123 } 2124 2125 conn_id->cm_id.iw = cm_id; 2126 cm_id->context = conn_id; 2127 cm_id->cm_handler = cma_iw_handler; 2128 2129 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2130 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2131 2132 memset(&event, 0, sizeof event); 2133 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2134 event.param.conn.private_data = iw_event->private_data; 2135 event.param.conn.private_data_len = iw_event->private_data_len; 2136 event.param.conn.initiator_depth = iw_event->ird; 2137 event.param.conn.responder_resources = iw_event->ord; 2138 2139 /* 2140 * Protect against the user destroying conn_id from another thread 2141 * until we're done accessing it. 2142 */ 2143 atomic_inc(&conn_id->refcount); 2144 ret = conn_id->id.event_handler(&conn_id->id, &event); 2145 if (ret) { 2146 /* User wants to destroy the CM ID */ 2147 conn_id->cm_id.iw = NULL; 2148 cma_exch(conn_id, RDMA_CM_DESTROYING); 2149 mutex_unlock(&conn_id->handler_mutex); 2150 cma_deref_id(conn_id); 2151 rdma_destroy_id(&conn_id->id); 2152 goto out; 2153 } 2154 2155 mutex_unlock(&conn_id->handler_mutex); 2156 cma_deref_id(conn_id); 2157 2158 out: 2159 mutex_unlock(&listen_id->handler_mutex); 2160 return ret; 2161 } 2162 2163 static int cma_ib_listen(struct rdma_id_private *id_priv) 2164 { 2165 struct sockaddr *addr; 2166 struct ib_cm_id *id; 2167 __be64 svc_id; 2168 2169 addr = cma_src_addr(id_priv); 2170 svc_id = rdma_get_service_id(&id_priv->id, addr); 2171 id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); 2172 if (IS_ERR(id)) 2173 return PTR_ERR(id); 2174 id_priv->cm_id.ib = id; 2175 2176 return 0; 2177 } 2178 2179 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) 2180 { 2181 int ret; 2182 struct iw_cm_id *id; 2183 2184 id = iw_create_cm_id(id_priv->id.device, 2185 iw_conn_req_handler, 2186 id_priv); 2187 if (IS_ERR(id)) 2188 return PTR_ERR(id); 2189 2190 id->tos = id_priv->tos; 2191 id_priv->cm_id.iw = id; 2192 2193 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), 2194 rdma_addr_size(cma_src_addr(id_priv))); 2195 2196 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 2197 2198 if (ret) { 2199 iw_destroy_cm_id(id_priv->cm_id.iw); 2200 id_priv->cm_id.iw = NULL; 2201 } 2202 2203 return ret; 2204 } 2205 2206 static int cma_listen_handler(struct rdma_cm_id *id, 2207 struct rdma_cm_event *event) 2208 { 2209 struct rdma_id_private *id_priv = id->context; 2210 2211 id->context = id_priv->id.context; 2212 id->event_handler = id_priv->id.event_handler; 2213 return id_priv->id.event_handler(id, event); 2214 } 2215 2216 static void cma_listen_on_dev(struct rdma_id_private *id_priv, 2217 struct cma_device *cma_dev) 2218 { 2219 struct rdma_id_private *dev_id_priv; 2220 struct rdma_cm_id *id; 2221 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 2222 int ret; 2223 2224 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2225 return; 2226 2227 id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, 2228 id_priv->id.qp_type); 2229 if (IS_ERR(id)) 2230 return; 2231 2232 dev_id_priv = container_of(id, struct rdma_id_private, id); 2233 2234 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2235 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2236 rdma_addr_size(cma_src_addr(id_priv))); 2237 2238 _cma_attach_to_dev(dev_id_priv, cma_dev); 2239 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2240 atomic_inc(&id_priv->refcount); 2241 dev_id_priv->internal_id = 1; 2242 dev_id_priv->afonly = id_priv->afonly; 2243 2244 ret = rdma_listen(id, id_priv->backlog); 2245 if (ret) 2246 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2247 ret, cma_dev->device->name); 2248 } 2249 2250 static void cma_listen_on_all(struct rdma_id_private *id_priv) 2251 { 2252 struct cma_device *cma_dev; 2253 2254 mutex_lock(&lock); 2255 list_add_tail(&id_priv->list, &listen_any_list); 2256 list_for_each_entry(cma_dev, &dev_list, list) 2257 cma_listen_on_dev(id_priv, cma_dev); 2258 mutex_unlock(&lock); 2259 } 2260 2261 void rdma_set_service_type(struct rdma_cm_id *id, int tos) 2262 { 2263 struct rdma_id_private *id_priv; 2264 2265 id_priv = container_of(id, struct rdma_id_private, id); 2266 id_priv->tos = (u8) tos; 2267 } 2268 EXPORT_SYMBOL(rdma_set_service_type); 2269 2270 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, 2271 void *context) 2272 { 2273 struct cma_work *work = context; 2274 struct rdma_route *route; 2275 2276 route = &work->id->id.route; 2277 2278 if (!status) { 2279 route->num_paths = 1; 2280 *route->path_rec = *path_rec; 2281 } else { 2282 work->old_state = RDMA_CM_ROUTE_QUERY; 2283 work->new_state = RDMA_CM_ADDR_RESOLVED; 2284 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; 2285 work->event.status = status; 2286 } 2287 2288 queue_work(cma_wq, &work->work); 2289 } 2290 2291 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2292 struct cma_work *work) 2293 { 2294 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2295 struct ib_sa_path_rec path_rec; 2296 ib_sa_comp_mask comp_mask; 2297 struct sockaddr_in6 *sin6; 2298 struct sockaddr_ib *sib; 2299 2300 memset(&path_rec, 0, sizeof path_rec); 2301 rdma_addr_get_sgid(dev_addr, &path_rec.sgid); 2302 rdma_addr_get_dgid(dev_addr, &path_rec.dgid); 2303 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2304 path_rec.numb_path = 1; 2305 path_rec.reversible = 1; 2306 path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 2307 2308 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 2309 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 2310 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 2311 2312 switch (cma_family(id_priv)) { 2313 case AF_INET: 2314 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 2315 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 2316 break; 2317 case AF_INET6: 2318 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 2319 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 2320 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2321 break; 2322 case AF_IB: 2323 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 2324 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); 2325 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2326 break; 2327 } 2328 2329 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 2330 id_priv->id.port_num, &path_rec, 2331 comp_mask, timeout_ms, 2332 GFP_KERNEL, cma_query_handler, 2333 work, &id_priv->query); 2334 2335 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 2336 } 2337 2338 static void cma_work_handler(struct work_struct *_work) 2339 { 2340 struct cma_work *work = container_of(_work, struct cma_work, work); 2341 struct rdma_id_private *id_priv = work->id; 2342 int destroy = 0; 2343 2344 mutex_lock(&id_priv->handler_mutex); 2345 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2346 goto out; 2347 2348 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2349 cma_exch(id_priv, RDMA_CM_DESTROYING); 2350 destroy = 1; 2351 } 2352 out: 2353 mutex_unlock(&id_priv->handler_mutex); 2354 cma_deref_id(id_priv); 2355 if (destroy) 2356 rdma_destroy_id(&id_priv->id); 2357 kfree(work); 2358 } 2359 2360 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2361 { 2362 struct rdma_route *route = &id_priv->id.route; 2363 struct cma_work *work; 2364 int ret; 2365 2366 work = kzalloc(sizeof *work, GFP_KERNEL); 2367 if (!work) 2368 return -ENOMEM; 2369 2370 work->id = id_priv; 2371 INIT_WORK(&work->work, cma_work_handler); 2372 work->old_state = RDMA_CM_ROUTE_QUERY; 2373 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2374 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2375 2376 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2377 if (!route->path_rec) { 2378 ret = -ENOMEM; 2379 goto err1; 2380 } 2381 2382 ret = cma_query_ib_route(id_priv, timeout_ms, work); 2383 if (ret) 2384 goto err2; 2385 2386 return 0; 2387 err2: 2388 kfree(route->path_rec); 2389 route->path_rec = NULL; 2390 err1: 2391 kfree(work); 2392 return ret; 2393 } 2394 2395 int rdma_set_ib_paths(struct rdma_cm_id *id, 2396 struct ib_sa_path_rec *path_rec, int num_paths) 2397 { 2398 struct rdma_id_private *id_priv; 2399 int ret; 2400 2401 id_priv = container_of(id, struct rdma_id_private, id); 2402 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2403 RDMA_CM_ROUTE_RESOLVED)) 2404 return -EINVAL; 2405 2406 id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, 2407 GFP_KERNEL); 2408 if (!id->route.path_rec) { 2409 ret = -ENOMEM; 2410 goto err; 2411 } 2412 2413 id->route.num_paths = num_paths; 2414 return 0; 2415 err: 2416 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2417 return ret; 2418 } 2419 EXPORT_SYMBOL(rdma_set_ib_paths); 2420 2421 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2422 { 2423 struct cma_work *work; 2424 2425 work = kzalloc(sizeof *work, GFP_KERNEL); 2426 if (!work) 2427 return -ENOMEM; 2428 2429 work->id = id_priv; 2430 INIT_WORK(&work->work, cma_work_handler); 2431 work->old_state = RDMA_CM_ROUTE_QUERY; 2432 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2433 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2434 queue_work(cma_wq, &work->work); 2435 return 0; 2436 } 2437 2438 static int iboe_tos_to_sl(struct net_device *ndev, int tos) 2439 { 2440 /* get service level, SL, from type of service, TOS */ 2441 int sl = tos; 2442 2443 /* range check input argument and map 1:1 */ 2444 if (sl > 255) 2445 sl = 255; 2446 else if (sl < 0) 2447 sl = 0; 2448 2449 /* final mappings are done by the vendor specific drivers */ 2450 return sl; 2451 } 2452 2453 static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, 2454 unsigned long supported_gids, 2455 enum ib_gid_type default_gid) 2456 { 2457 if ((network_type == RDMA_NETWORK_IPV4 || 2458 network_type == RDMA_NETWORK_IPV6) && 2459 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) 2460 return IB_GID_TYPE_ROCE_UDP_ENCAP; 2461 2462 return default_gid; 2463 } 2464 2465 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2466 { 2467 struct rdma_route *route = &id_priv->id.route; 2468 struct rdma_addr *addr = &route->addr; 2469 struct cma_work *work; 2470 int ret; 2471 struct net_device *ndev = NULL; 2472 2473 2474 work = kzalloc(sizeof *work, GFP_KERNEL); 2475 if (!work) 2476 return -ENOMEM; 2477 2478 work->id = id_priv; 2479 INIT_WORK(&work->work, cma_work_handler); 2480 2481 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 2482 if (!route->path_rec) { 2483 ret = -ENOMEM; 2484 goto err1; 2485 } 2486 2487 route->num_paths = 1; 2488 2489 if (addr->dev_addr.bound_dev_if) { 2490 unsigned long supported_gids; 2491 2492 ndev = dev_get_by_index(addr->dev_addr.net, 2493 addr->dev_addr.bound_dev_if); 2494 if (!ndev) { 2495 ret = -ENODEV; 2496 goto err2; 2497 } 2498 2499 if (ndev->if_flags & IFF_LOOPBACK) { 2500 dev_put(ndev); 2501 if (!id_priv->id.device->get_netdev) { 2502 ret = -EOPNOTSUPP; 2503 goto err2; 2504 } 2505 2506 ndev = id_priv->id.device->get_netdev(id_priv->id.device, 2507 id_priv->id.port_num); 2508 if (!ndev) { 2509 ret = -ENODEV; 2510 goto err2; 2511 } 2512 } 2513 2514 route->path_rec->net = ndev->if_vnet; 2515 route->path_rec->ifindex = ndev->if_index; 2516 supported_gids = roce_gid_type_mask_support(id_priv->id.device, 2517 id_priv->id.port_num); 2518 route->path_rec->gid_type = 2519 cma_route_gid_type(addr->dev_addr.network, 2520 supported_gids, 2521 id_priv->gid_type); 2522 } 2523 if (!ndev) { 2524 ret = -ENODEV; 2525 goto err2; 2526 } 2527 2528 memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); 2529 2530 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2531 &route->path_rec->sgid); 2532 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2533 &route->path_rec->dgid); 2534 2535 /* Use the hint from IP Stack to select GID Type */ 2536 if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network)) 2537 route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network); 2538 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 2539 /* TODO: get the hoplimit from the inet/inet6 device */ 2540 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 2541 else 2542 route->path_rec->hop_limit = 1; 2543 route->path_rec->reversible = 1; 2544 route->path_rec->pkey = cpu_to_be16(0xffff); 2545 route->path_rec->mtu_selector = IB_SA_EQ; 2546 route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); 2547 route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu); 2548 route->path_rec->rate_selector = IB_SA_EQ; 2549 route->path_rec->rate = iboe_get_rate(ndev); 2550 dev_put(ndev); 2551 route->path_rec->packet_life_time_selector = IB_SA_EQ; 2552 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; 2553 if (!route->path_rec->mtu) { 2554 ret = -EINVAL; 2555 goto err2; 2556 } 2557 2558 work->old_state = RDMA_CM_ROUTE_QUERY; 2559 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2560 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2561 work->event.status = 0; 2562 2563 queue_work(cma_wq, &work->work); 2564 2565 return 0; 2566 2567 err2: 2568 kfree(route->path_rec); 2569 route->path_rec = NULL; 2570 err1: 2571 kfree(work); 2572 return ret; 2573 } 2574 2575 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2576 { 2577 struct rdma_id_private *id_priv; 2578 int ret; 2579 2580 id_priv = container_of(id, struct rdma_id_private, id); 2581 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) 2582 return -EINVAL; 2583 2584 atomic_inc(&id_priv->refcount); 2585 if (rdma_cap_ib_sa(id->device, id->port_num)) 2586 ret = cma_resolve_ib_route(id_priv, timeout_ms); 2587 else if (rdma_protocol_roce(id->device, id->port_num)) 2588 ret = cma_resolve_iboe_route(id_priv); 2589 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2590 ret = cma_resolve_iw_route(id_priv, timeout_ms); 2591 else 2592 ret = -ENOSYS; 2593 2594 if (ret) 2595 goto err; 2596 2597 return 0; 2598 err: 2599 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); 2600 cma_deref_id(id_priv); 2601 return ret; 2602 } 2603 EXPORT_SYMBOL(rdma_resolve_route); 2604 2605 static void cma_set_loopback(struct sockaddr *addr) 2606 { 2607 switch (addr->sa_family) { 2608 case AF_INET: 2609 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 2610 break; 2611 case AF_INET6: 2612 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 2613 0, 0, 0, htonl(1)); 2614 break; 2615 default: 2616 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 2617 0, 0, 0, htonl(1)); 2618 break; 2619 } 2620 } 2621 2622 static int cma_bind_loopback(struct rdma_id_private *id_priv) 2623 { 2624 struct cma_device *cma_dev, *cur_dev; 2625 struct ib_port_attr port_attr; 2626 union ib_gid gid; 2627 u16 pkey; 2628 int ret; 2629 u8 p; 2630 2631 cma_dev = NULL; 2632 mutex_lock(&lock); 2633 list_for_each_entry(cur_dev, &dev_list, list) { 2634 if (cma_family(id_priv) == AF_IB && 2635 !rdma_cap_ib_cm(cur_dev->device, 1)) 2636 continue; 2637 2638 if (!cma_dev) 2639 cma_dev = cur_dev; 2640 2641 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 2642 if (!ib_query_port(cur_dev->device, p, &port_attr) && 2643 port_attr.state == IB_PORT_ACTIVE) { 2644 cma_dev = cur_dev; 2645 goto port_found; 2646 } 2647 } 2648 } 2649 2650 if (!cma_dev) { 2651 ret = -ENODEV; 2652 goto out; 2653 } 2654 2655 p = 1; 2656 2657 port_found: 2658 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); 2659 if (ret) 2660 goto out; 2661 2662 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); 2663 if (ret) 2664 goto out; 2665 2666 id_priv->id.route.addr.dev_addr.dev_type = 2667 (rdma_protocol_ib(cma_dev->device, p)) ? 2668 ARPHRD_INFINIBAND : ARPHRD_ETHER; 2669 2670 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2671 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2672 id_priv->id.port_num = p; 2673 cma_attach_to_dev(id_priv, cma_dev); 2674 cma_set_loopback(cma_src_addr(id_priv)); 2675 out: 2676 mutex_unlock(&lock); 2677 return ret; 2678 } 2679 2680 static void addr_handler(int status, struct sockaddr *src_addr, 2681 struct rdma_dev_addr *dev_addr, void *context) 2682 { 2683 struct rdma_id_private *id_priv = context; 2684 struct rdma_cm_event event; 2685 2686 memset(&event, 0, sizeof event); 2687 mutex_lock(&id_priv->handler_mutex); 2688 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 2689 RDMA_CM_ADDR_RESOLVED)) 2690 goto out; 2691 2692 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2693 if (!status && !id_priv->cma_dev) 2694 status = cma_acquire_dev(id_priv, NULL); 2695 2696 if (status) { 2697 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2698 RDMA_CM_ADDR_BOUND)) 2699 goto out; 2700 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2701 event.status = status; 2702 } else 2703 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2704 2705 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2706 cma_exch(id_priv, RDMA_CM_DESTROYING); 2707 mutex_unlock(&id_priv->handler_mutex); 2708 cma_deref_id(id_priv); 2709 rdma_destroy_id(&id_priv->id); 2710 return; 2711 } 2712 out: 2713 mutex_unlock(&id_priv->handler_mutex); 2714 cma_deref_id(id_priv); 2715 } 2716 2717 static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2718 { 2719 struct cma_work *work; 2720 union ib_gid gid; 2721 int ret; 2722 2723 work = kzalloc(sizeof *work, GFP_KERNEL); 2724 if (!work) 2725 return -ENOMEM; 2726 2727 if (!id_priv->cma_dev) { 2728 ret = cma_bind_loopback(id_priv); 2729 if (ret) 2730 goto err; 2731 } 2732 2733 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2734 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2735 2736 work->id = id_priv; 2737 INIT_WORK(&work->work, cma_work_handler); 2738 work->old_state = RDMA_CM_ADDR_QUERY; 2739 work->new_state = RDMA_CM_ADDR_RESOLVED; 2740 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2741 queue_work(cma_wq, &work->work); 2742 return 0; 2743 err: 2744 kfree(work); 2745 return ret; 2746 } 2747 2748 static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) 2749 { 2750 struct cma_work *work; 2751 int ret; 2752 2753 work = kzalloc(sizeof *work, GFP_KERNEL); 2754 if (!work) 2755 return -ENOMEM; 2756 2757 if (!id_priv->cma_dev) { 2758 ret = cma_resolve_ib_dev(id_priv); 2759 if (ret) 2760 goto err; 2761 } 2762 2763 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 2764 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 2765 2766 work->id = id_priv; 2767 INIT_WORK(&work->work, cma_work_handler); 2768 work->old_state = RDMA_CM_ADDR_QUERY; 2769 work->new_state = RDMA_CM_ADDR_RESOLVED; 2770 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2771 queue_work(cma_wq, &work->work); 2772 return 0; 2773 err: 2774 kfree(work); 2775 return ret; 2776 } 2777 2778 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2779 struct sockaddr *dst_addr) 2780 { 2781 if (!src_addr || !src_addr->sa_family) { 2782 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2783 src_addr->sa_family = dst_addr->sa_family; 2784 if (dst_addr->sa_family == AF_INET6) { 2785 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; 2786 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; 2787 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; 2788 if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr) || 2789 IN6_IS_ADDR_MC_INTFACELOCAL(&dst_addr6->sin6_addr)) 2790 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; 2791 } else if (dst_addr->sa_family == AF_IB) { 2792 ((struct sockaddr_ib *) src_addr)->sib_pkey = 2793 ((struct sockaddr_ib *) dst_addr)->sib_pkey; 2794 } 2795 } 2796 return rdma_bind_addr(id, src_addr); 2797 } 2798 2799 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2800 struct sockaddr *dst_addr, int timeout_ms) 2801 { 2802 struct rdma_id_private *id_priv; 2803 int ret; 2804 2805 id_priv = container_of(id, struct rdma_id_private, id); 2806 if (id_priv->state == RDMA_CM_IDLE) { 2807 ret = cma_bind_addr(id, src_addr, dst_addr); 2808 if (ret) 2809 return ret; 2810 } 2811 2812 if (cma_family(id_priv) != dst_addr->sa_family) 2813 return -EINVAL; 2814 2815 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) 2816 return -EINVAL; 2817 2818 atomic_inc(&id_priv->refcount); 2819 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 2820 if (cma_any_addr(dst_addr)) { 2821 ret = cma_resolve_loopback(id_priv); 2822 } else { 2823 if (dst_addr->sa_family == AF_IB) { 2824 ret = cma_resolve_ib_addr(id_priv); 2825 } else { 2826 ret = cma_check_linklocal(&id->route.addr.dev_addr, dst_addr); 2827 if (ret) 2828 goto err; 2829 2830 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), 2831 dst_addr, &id->route.addr.dev_addr, 2832 timeout_ms, addr_handler, id_priv); 2833 } 2834 } 2835 if (ret) 2836 goto err; 2837 2838 return 0; 2839 err: 2840 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 2841 cma_deref_id(id_priv); 2842 return ret; 2843 } 2844 EXPORT_SYMBOL(rdma_resolve_addr); 2845 2846 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) 2847 { 2848 struct rdma_id_private *id_priv; 2849 unsigned long flags; 2850 int ret; 2851 2852 id_priv = container_of(id, struct rdma_id_private, id); 2853 spin_lock_irqsave(&id_priv->lock, flags); 2854 if (reuse || id_priv->state == RDMA_CM_IDLE) { 2855 id_priv->reuseaddr = reuse; 2856 ret = 0; 2857 } else { 2858 ret = -EINVAL; 2859 } 2860 spin_unlock_irqrestore(&id_priv->lock, flags); 2861 return ret; 2862 } 2863 EXPORT_SYMBOL(rdma_set_reuseaddr); 2864 2865 int rdma_set_afonly(struct rdma_cm_id *id, int afonly) 2866 { 2867 struct rdma_id_private *id_priv; 2868 unsigned long flags; 2869 int ret; 2870 2871 id_priv = container_of(id, struct rdma_id_private, id); 2872 spin_lock_irqsave(&id_priv->lock, flags); 2873 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { 2874 id_priv->options |= (1 << CMA_OPTION_AFONLY); 2875 id_priv->afonly = afonly; 2876 ret = 0; 2877 } else { 2878 ret = -EINVAL; 2879 } 2880 spin_unlock_irqrestore(&id_priv->lock, flags); 2881 return ret; 2882 } 2883 EXPORT_SYMBOL(rdma_set_afonly); 2884 2885 static void cma_bind_port(struct rdma_bind_list *bind_list, 2886 struct rdma_id_private *id_priv) 2887 { 2888 struct sockaddr *addr; 2889 struct sockaddr_ib *sib; 2890 u64 sid, mask; 2891 __be16 port; 2892 2893 addr = cma_src_addr(id_priv); 2894 port = htons(bind_list->port); 2895 2896 switch (addr->sa_family) { 2897 case AF_INET: 2898 ((struct sockaddr_in *) addr)->sin_port = port; 2899 break; 2900 case AF_INET6: 2901 ((struct sockaddr_in6 *) addr)->sin6_port = port; 2902 break; 2903 case AF_IB: 2904 sib = (struct sockaddr_ib *) addr; 2905 sid = be64_to_cpu(sib->sib_sid); 2906 mask = be64_to_cpu(sib->sib_sid_mask); 2907 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); 2908 sib->sib_sid_mask = cpu_to_be64(~0ULL); 2909 break; 2910 } 2911 id_priv->bind_list = bind_list; 2912 hlist_add_head(&id_priv->node, &bind_list->owners); 2913 } 2914 2915 static int cma_alloc_port(enum rdma_port_space ps, 2916 struct rdma_id_private *id_priv, unsigned short snum) 2917 { 2918 struct rdma_bind_list *bind_list; 2919 int ret; 2920 2921 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 2922 if (!bind_list) 2923 return -ENOMEM; 2924 2925 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 2926 snum); 2927 if (ret < 0) 2928 goto err; 2929 2930 bind_list->ps = ps; 2931 bind_list->port = (unsigned short)ret; 2932 cma_bind_port(bind_list, id_priv); 2933 return 0; 2934 err: 2935 kfree(bind_list); 2936 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; 2937 } 2938 2939 static int cma_alloc_any_port(enum rdma_port_space ps, 2940 struct rdma_id_private *id_priv) 2941 { 2942 static unsigned int last_used_port; 2943 int low, high, remaining; 2944 unsigned int rover; 2945 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 2946 u32 rand; 2947 2948 inet_get_local_port_range(net, &low, &high); 2949 remaining = (high - low) + 1; 2950 get_random_bytes(&rand, sizeof(rand)); 2951 rover = rand % remaining + low; 2952 retry: 2953 if (last_used_port != rover && 2954 !cma_ps_find(net, ps, (unsigned short)rover)) { 2955 int ret = cma_alloc_port(ps, id_priv, rover); 2956 /* 2957 * Remember previously used port number in order to avoid 2958 * re-using same port immediately after it is closed. 2959 */ 2960 if (!ret) 2961 last_used_port = rover; 2962 if (ret != -EADDRNOTAVAIL) 2963 return ret; 2964 } 2965 if (--remaining) { 2966 rover++; 2967 if ((rover < low) || (rover > high)) 2968 rover = low; 2969 goto retry; 2970 } 2971 return -EADDRNOTAVAIL; 2972 } 2973 2974 /* 2975 * Check that the requested port is available. This is called when trying to 2976 * bind to a specific port, or when trying to listen on a bound port. In 2977 * the latter case, the provided id_priv may already be on the bind_list, but 2978 * we still need to check that it's okay to start listening. 2979 */ 2980 static int cma_check_port(struct rdma_bind_list *bind_list, 2981 struct rdma_id_private *id_priv, uint8_t reuseaddr) 2982 { 2983 struct rdma_id_private *cur_id; 2984 struct sockaddr *addr, *cur_addr; 2985 2986 addr = cma_src_addr(id_priv); 2987 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 2988 if (id_priv == cur_id) 2989 continue; 2990 2991 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && 2992 cur_id->reuseaddr) 2993 continue; 2994 2995 cur_addr = cma_src_addr(cur_id); 2996 if (id_priv->afonly && cur_id->afonly && 2997 (addr->sa_family != cur_addr->sa_family)) 2998 continue; 2999 3000 if (cma_any_addr(addr) || cma_any_addr(cur_addr)) 3001 return -EADDRNOTAVAIL; 3002 3003 if (!cma_addr_cmp(addr, cur_addr)) 3004 return -EADDRINUSE; 3005 } 3006 return 0; 3007 } 3008 3009 static int cma_use_port(enum rdma_port_space ps, 3010 struct rdma_id_private *id_priv) 3011 { 3012 struct rdma_bind_list *bind_list; 3013 unsigned short snum; 3014 int ret; 3015 3016 snum = ntohs(cma_port(cma_src_addr(id_priv))); 3017 if (snum < IPPORT_RESERVED && 3018 priv_check(curthread, PRIV_NETINET_BINDANY) != 0) 3019 return -EACCES; 3020 3021 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 3022 if (!bind_list) { 3023 ret = cma_alloc_port(ps, id_priv, snum); 3024 } else { 3025 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); 3026 if (!ret) 3027 cma_bind_port(bind_list, id_priv); 3028 } 3029 return ret; 3030 } 3031 3032 static int cma_bind_listen(struct rdma_id_private *id_priv) 3033 { 3034 struct rdma_bind_list *bind_list = id_priv->bind_list; 3035 int ret = 0; 3036 3037 mutex_lock(&lock); 3038 if (bind_list->owners.first->next) 3039 ret = cma_check_port(bind_list, id_priv, 0); 3040 mutex_unlock(&lock); 3041 return ret; 3042 } 3043 3044 static enum rdma_port_space cma_select_inet_ps( 3045 struct rdma_id_private *id_priv) 3046 { 3047 switch (id_priv->id.ps) { 3048 case RDMA_PS_TCP: 3049 case RDMA_PS_UDP: 3050 case RDMA_PS_IPOIB: 3051 case RDMA_PS_IB: 3052 return id_priv->id.ps; 3053 default: 3054 3055 return 0; 3056 } 3057 } 3058 3059 static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) 3060 { 3061 enum rdma_port_space ps = 0; 3062 struct sockaddr_ib *sib; 3063 u64 sid_ps, mask, sid; 3064 3065 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 3066 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; 3067 sid = be64_to_cpu(sib->sib_sid) & mask; 3068 3069 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { 3070 sid_ps = RDMA_IB_IP_PS_IB; 3071 ps = RDMA_PS_IB; 3072 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && 3073 (sid == (RDMA_IB_IP_PS_TCP & mask))) { 3074 sid_ps = RDMA_IB_IP_PS_TCP; 3075 ps = RDMA_PS_TCP; 3076 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && 3077 (sid == (RDMA_IB_IP_PS_UDP & mask))) { 3078 sid_ps = RDMA_IB_IP_PS_UDP; 3079 ps = RDMA_PS_UDP; 3080 } 3081 3082 if (ps) { 3083 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); 3084 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | 3085 be64_to_cpu(sib->sib_sid_mask)); 3086 } 3087 return ps; 3088 } 3089 3090 static int cma_get_port(struct rdma_id_private *id_priv) 3091 { 3092 enum rdma_port_space ps; 3093 int ret; 3094 3095 if (cma_family(id_priv) != AF_IB) 3096 ps = cma_select_inet_ps(id_priv); 3097 else 3098 ps = cma_select_ib_ps(id_priv); 3099 if (!ps) 3100 return -EPROTONOSUPPORT; 3101 3102 mutex_lock(&lock); 3103 if (cma_any_port(cma_src_addr(id_priv))) 3104 ret = cma_alloc_any_port(ps, id_priv); 3105 else 3106 ret = cma_use_port(ps, id_priv); 3107 mutex_unlock(&lock); 3108 3109 return ret; 3110 } 3111 3112 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 3113 struct sockaddr *addr) 3114 { 3115 #ifdef INET6 3116 struct sockaddr_in6 sin6; 3117 3118 if (addr->sa_family != AF_INET6) 3119 return 0; 3120 3121 sin6 = *(struct sockaddr_in6 *)addr; 3122 3123 if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr) || 3124 IN6_IS_ADDR_MC_INTFACELOCAL(&sin6.sin6_addr)) { 3125 bool failure; 3126 3127 CURVNET_SET_QUIET(dev_addr->net); 3128 failure = sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0; 3129 CURVNET_RESTORE(); 3130 3131 /* check if IPv6 scope ID is not set */ 3132 if (failure) 3133 return -EINVAL; 3134 dev_addr->bound_dev_if = sin6.sin6_scope_id; 3135 } 3136 #endif 3137 return 0; 3138 } 3139 3140 int rdma_listen(struct rdma_cm_id *id, int backlog) 3141 { 3142 struct rdma_id_private *id_priv; 3143 int ret; 3144 3145 id_priv = container_of(id, struct rdma_id_private, id); 3146 if (id_priv->state == RDMA_CM_IDLE) { 3147 id->route.addr.src_addr.ss_family = AF_INET; 3148 ret = rdma_bind_addr(id, cma_src_addr(id_priv)); 3149 if (ret) 3150 return ret; 3151 } 3152 3153 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) 3154 return -EINVAL; 3155 3156 if (id_priv->reuseaddr) { 3157 ret = cma_bind_listen(id_priv); 3158 if (ret) 3159 goto err; 3160 } 3161 3162 id_priv->backlog = backlog; 3163 if (id->device) { 3164 if (rdma_cap_ib_cm(id->device, 1)) { 3165 ret = cma_ib_listen(id_priv); 3166 if (ret) 3167 goto err; 3168 } else if (rdma_cap_iw_cm(id->device, 1)) { 3169 ret = cma_iw_listen(id_priv, backlog); 3170 if (ret) 3171 goto err; 3172 } else { 3173 ret = -ENOSYS; 3174 goto err; 3175 } 3176 } else 3177 cma_listen_on_all(id_priv); 3178 3179 return 0; 3180 err: 3181 id_priv->backlog = 0; 3182 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); 3183 return ret; 3184 } 3185 EXPORT_SYMBOL(rdma_listen); 3186 3187 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 3188 { 3189 struct rdma_id_private *id_priv; 3190 int ret; 3191 3192 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && 3193 addr->sa_family != AF_IB) 3194 return -EAFNOSUPPORT; 3195 3196 id_priv = container_of(id, struct rdma_id_private, id); 3197 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) 3198 return -EINVAL; 3199 3200 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); 3201 if (ret) 3202 goto err1; 3203 3204 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 3205 if (!cma_any_addr(addr)) { 3206 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 3207 if (ret) 3208 goto err1; 3209 3210 ret = cma_acquire_dev(id_priv, NULL); 3211 if (ret) 3212 goto err1; 3213 } 3214 3215 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 3216 if (addr->sa_family == AF_INET) 3217 id_priv->afonly = 1; 3218 #ifdef INET6 3219 else if (addr->sa_family == AF_INET6) { 3220 CURVNET_SET_QUIET(id_priv->id.route.addr.dev_addr.net); 3221 id_priv->afonly = V_ip6_v6only; 3222 CURVNET_RESTORE(); 3223 } 3224 #endif 3225 } 3226 ret = cma_get_port(id_priv); 3227 if (ret) 3228 goto err2; 3229 3230 return 0; 3231 err2: 3232 if (id_priv->cma_dev) 3233 cma_release_dev(id_priv); 3234 err1: 3235 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3236 return ret; 3237 } 3238 EXPORT_SYMBOL(rdma_bind_addr); 3239 3240 static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) 3241 { 3242 struct cma_hdr *cma_hdr; 3243 3244 cma_hdr = hdr; 3245 cma_hdr->cma_version = CMA_VERSION; 3246 if (cma_family(id_priv) == AF_INET) { 3247 struct sockaddr_in *src4, *dst4; 3248 3249 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3250 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3251 3252 cma_set_ip_ver(cma_hdr, 4); 3253 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3254 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3255 cma_hdr->port = src4->sin_port; 3256 } else if (cma_family(id_priv) == AF_INET6) { 3257 struct sockaddr_in6 *src6, *dst6; 3258 3259 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3260 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3261 3262 cma_set_ip_ver(cma_hdr, 6); 3263 cma_hdr->src_addr.ip6 = src6->sin6_addr; 3264 cma_hdr->dst_addr.ip6 = dst6->sin6_addr; 3265 cma_hdr->port = src6->sin6_port; 3266 cma_ip6_clear_scope_id(&cma_hdr->src_addr.ip6); 3267 cma_ip6_clear_scope_id(&cma_hdr->dst_addr.ip6); 3268 } 3269 return 0; 3270 } 3271 3272 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3273 struct ib_cm_event *ib_event) 3274 { 3275 struct rdma_id_private *id_priv = cm_id->context; 3276 struct rdma_cm_event event; 3277 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 3278 int ret = 0; 3279 3280 mutex_lock(&id_priv->handler_mutex); 3281 if (id_priv->state != RDMA_CM_CONNECT) 3282 goto out; 3283 3284 memset(&event, 0, sizeof event); 3285 switch (ib_event->event) { 3286 case IB_CM_SIDR_REQ_ERROR: 3287 event.event = RDMA_CM_EVENT_UNREACHABLE; 3288 event.status = -ETIMEDOUT; 3289 break; 3290 case IB_CM_SIDR_REP_RECEIVED: 3291 event.param.ud.private_data = ib_event->private_data; 3292 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; 3293 if (rep->status != IB_SIDR_SUCCESS) { 3294 event.event = RDMA_CM_EVENT_UNREACHABLE; 3295 event.status = ib_event->param.sidr_rep_rcvd.status; 3296 break; 3297 } 3298 ret = cma_set_qkey(id_priv, rep->qkey); 3299 if (ret) { 3300 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3301 event.status = ret; 3302 break; 3303 } 3304 ret = ib_init_ah_from_path(id_priv->id.device, 3305 id_priv->id.port_num, 3306 id_priv->id.route.path_rec, 3307 &event.param.ud.ah_attr); 3308 if (ret) { 3309 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3310 event.status = ret; 3311 break; 3312 } 3313 event.param.ud.qp_num = rep->qpn; 3314 event.param.ud.qkey = rep->qkey; 3315 event.event = RDMA_CM_EVENT_ESTABLISHED; 3316 event.status = 0; 3317 break; 3318 default: 3319 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 3320 ib_event->event); 3321 goto out; 3322 } 3323 3324 ret = id_priv->id.event_handler(&id_priv->id, &event); 3325 if (ret) { 3326 /* Destroy the CM ID by returning a non-zero value. */ 3327 id_priv->cm_id.ib = NULL; 3328 cma_exch(id_priv, RDMA_CM_DESTROYING); 3329 mutex_unlock(&id_priv->handler_mutex); 3330 rdma_destroy_id(&id_priv->id); 3331 return ret; 3332 } 3333 out: 3334 mutex_unlock(&id_priv->handler_mutex); 3335 return ret; 3336 } 3337 3338 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, 3339 struct rdma_conn_param *conn_param) 3340 { 3341 struct ib_cm_sidr_req_param req; 3342 struct ib_cm_id *id; 3343 void *private_data; 3344 int offset, ret; 3345 3346 memset(&req, 0, sizeof req); 3347 offset = cma_user_data_offset(id_priv); 3348 req.private_data_len = offset + conn_param->private_data_len; 3349 if (req.private_data_len < conn_param->private_data_len) 3350 return -EINVAL; 3351 3352 if (req.private_data_len) { 3353 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3354 if (!private_data) 3355 return -ENOMEM; 3356 } else { 3357 private_data = NULL; 3358 } 3359 3360 if (conn_param->private_data && conn_param->private_data_len) 3361 memcpy((char *)private_data + offset, conn_param->private_data, 3362 conn_param->private_data_len); 3363 3364 if (private_data) { 3365 ret = cma_format_hdr(private_data, id_priv); 3366 if (ret) 3367 goto out; 3368 req.private_data = private_data; 3369 } 3370 3371 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 3372 id_priv); 3373 if (IS_ERR(id)) { 3374 ret = PTR_ERR(id); 3375 goto out; 3376 } 3377 id_priv->cm_id.ib = id; 3378 3379 req.path = id_priv->id.route.path_rec; 3380 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3381 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 3382 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3383 3384 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); 3385 if (ret) { 3386 ib_destroy_cm_id(id_priv->cm_id.ib); 3387 id_priv->cm_id.ib = NULL; 3388 } 3389 out: 3390 kfree(private_data); 3391 return ret; 3392 } 3393 3394 static int cma_connect_ib(struct rdma_id_private *id_priv, 3395 struct rdma_conn_param *conn_param) 3396 { 3397 struct ib_cm_req_param req; 3398 struct rdma_route *route; 3399 void *private_data; 3400 struct ib_cm_id *id; 3401 int offset, ret; 3402 3403 memset(&req, 0, sizeof req); 3404 offset = cma_user_data_offset(id_priv); 3405 req.private_data_len = offset + conn_param->private_data_len; 3406 if (req.private_data_len < conn_param->private_data_len) 3407 return -EINVAL; 3408 3409 if (req.private_data_len) { 3410 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3411 if (!private_data) 3412 return -ENOMEM; 3413 } else { 3414 private_data = NULL; 3415 } 3416 3417 if (conn_param->private_data && conn_param->private_data_len) 3418 memcpy((char *)private_data + offset, conn_param->private_data, 3419 conn_param->private_data_len); 3420 3421 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); 3422 if (IS_ERR(id)) { 3423 ret = PTR_ERR(id); 3424 goto out; 3425 } 3426 id_priv->cm_id.ib = id; 3427 3428 route = &id_priv->id.route; 3429 if (private_data) { 3430 ret = cma_format_hdr(private_data, id_priv); 3431 if (ret) 3432 goto out; 3433 req.private_data = private_data; 3434 } 3435 3436 req.primary_path = &route->path_rec[0]; 3437 if (route->num_paths == 2) 3438 req.alternate_path = &route->path_rec[1]; 3439 3440 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3441 req.qp_num = id_priv->qp_num; 3442 req.qp_type = id_priv->id.qp_type; 3443 req.starting_psn = id_priv->seq_num; 3444 req.responder_resources = conn_param->responder_resources; 3445 req.initiator_depth = conn_param->initiator_depth; 3446 req.flow_control = conn_param->flow_control; 3447 req.retry_count = min_t(u8, 7, conn_param->retry_count); 3448 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3449 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3450 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3451 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3452 req.srq = id_priv->srq ? 1 : 0; 3453 3454 ret = ib_send_cm_req(id_priv->cm_id.ib, &req); 3455 out: 3456 if (ret && !IS_ERR(id)) { 3457 ib_destroy_cm_id(id); 3458 id_priv->cm_id.ib = NULL; 3459 } 3460 3461 kfree(private_data); 3462 return ret; 3463 } 3464 3465 static int cma_connect_iw(struct rdma_id_private *id_priv, 3466 struct rdma_conn_param *conn_param) 3467 { 3468 struct iw_cm_id *cm_id; 3469 int ret; 3470 struct iw_cm_conn_param iw_param; 3471 3472 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); 3473 if (IS_ERR(cm_id)) 3474 return PTR_ERR(cm_id); 3475 3476 cm_id->tos = id_priv->tos; 3477 id_priv->cm_id.iw = cm_id; 3478 3479 memcpy(&cm_id->local_addr, cma_src_addr(id_priv), 3480 rdma_addr_size(cma_src_addr(id_priv))); 3481 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), 3482 rdma_addr_size(cma_dst_addr(id_priv))); 3483 3484 ret = cma_modify_qp_rtr(id_priv, conn_param); 3485 if (ret) 3486 goto out; 3487 3488 if (conn_param) { 3489 iw_param.ord = conn_param->initiator_depth; 3490 iw_param.ird = conn_param->responder_resources; 3491 iw_param.private_data = conn_param->private_data; 3492 iw_param.private_data_len = conn_param->private_data_len; 3493 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; 3494 } else { 3495 memset(&iw_param, 0, sizeof iw_param); 3496 iw_param.qpn = id_priv->qp_num; 3497 } 3498 ret = iw_cm_connect(cm_id, &iw_param); 3499 out: 3500 if (ret) { 3501 iw_destroy_cm_id(cm_id); 3502 id_priv->cm_id.iw = NULL; 3503 } 3504 return ret; 3505 } 3506 3507 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3508 { 3509 struct rdma_id_private *id_priv; 3510 int ret; 3511 3512 id_priv = container_of(id, struct rdma_id_private, id); 3513 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 3514 return -EINVAL; 3515 3516 if (!id->qp) { 3517 id_priv->qp_num = conn_param->qp_num; 3518 id_priv->srq = conn_param->srq; 3519 } 3520 3521 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3522 if (id->qp_type == IB_QPT_UD) 3523 ret = cma_resolve_ib_udp(id_priv, conn_param); 3524 else 3525 ret = cma_connect_ib(id_priv, conn_param); 3526 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3527 ret = cma_connect_iw(id_priv, conn_param); 3528 else 3529 ret = -ENOSYS; 3530 if (ret) 3531 goto err; 3532 3533 return 0; 3534 err: 3535 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); 3536 return ret; 3537 } 3538 EXPORT_SYMBOL(rdma_connect); 3539 3540 static int cma_accept_ib(struct rdma_id_private *id_priv, 3541 struct rdma_conn_param *conn_param) 3542 { 3543 struct ib_cm_rep_param rep; 3544 int ret; 3545 3546 ret = cma_modify_qp_rtr(id_priv, conn_param); 3547 if (ret) 3548 goto out; 3549 3550 ret = cma_modify_qp_rts(id_priv, conn_param); 3551 if (ret) 3552 goto out; 3553 3554 memset(&rep, 0, sizeof rep); 3555 rep.qp_num = id_priv->qp_num; 3556 rep.starting_psn = id_priv->seq_num; 3557 rep.private_data = conn_param->private_data; 3558 rep.private_data_len = conn_param->private_data_len; 3559 rep.responder_resources = conn_param->responder_resources; 3560 rep.initiator_depth = conn_param->initiator_depth; 3561 rep.failover_accepted = 0; 3562 rep.flow_control = conn_param->flow_control; 3563 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3564 rep.srq = id_priv->srq ? 1 : 0; 3565 3566 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 3567 out: 3568 return ret; 3569 } 3570 3571 static int cma_accept_iw(struct rdma_id_private *id_priv, 3572 struct rdma_conn_param *conn_param) 3573 { 3574 struct iw_cm_conn_param iw_param; 3575 int ret; 3576 3577 ret = cma_modify_qp_rtr(id_priv, conn_param); 3578 if (ret) 3579 return ret; 3580 3581 iw_param.ord = conn_param->initiator_depth; 3582 iw_param.ird = conn_param->responder_resources; 3583 iw_param.private_data = conn_param->private_data; 3584 iw_param.private_data_len = conn_param->private_data_len; 3585 if (id_priv->id.qp) { 3586 iw_param.qpn = id_priv->qp_num; 3587 } else 3588 iw_param.qpn = conn_param->qp_num; 3589 3590 return iw_cm_accept(id_priv->cm_id.iw, &iw_param); 3591 } 3592 3593 static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 3594 enum ib_cm_sidr_status status, u32 qkey, 3595 const void *private_data, int private_data_len) 3596 { 3597 struct ib_cm_sidr_rep_param rep; 3598 int ret; 3599 3600 memset(&rep, 0, sizeof rep); 3601 rep.status = status; 3602 if (status == IB_SIDR_SUCCESS) { 3603 ret = cma_set_qkey(id_priv, qkey); 3604 if (ret) 3605 return ret; 3606 rep.qp_num = id_priv->qp_num; 3607 rep.qkey = id_priv->qkey; 3608 } 3609 rep.private_data = private_data; 3610 rep.private_data_len = private_data_len; 3611 3612 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); 3613 } 3614 3615 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3616 { 3617 struct rdma_id_private *id_priv; 3618 int ret; 3619 3620 id_priv = container_of(id, struct rdma_id_private, id); 3621 3622 id_priv->owner = task_pid_nr(current); 3623 3624 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3625 return -EINVAL; 3626 3627 if (!id->qp && conn_param) { 3628 id_priv->qp_num = conn_param->qp_num; 3629 id_priv->srq = conn_param->srq; 3630 } 3631 3632 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3633 if (id->qp_type == IB_QPT_UD) { 3634 if (conn_param) 3635 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3636 conn_param->qkey, 3637 conn_param->private_data, 3638 conn_param->private_data_len); 3639 else 3640 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3641 0, NULL, 0); 3642 } else { 3643 if (conn_param) 3644 ret = cma_accept_ib(id_priv, conn_param); 3645 else 3646 ret = cma_rep_recv(id_priv); 3647 } 3648 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3649 ret = cma_accept_iw(id_priv, conn_param); 3650 else 3651 ret = -ENOSYS; 3652 3653 if (ret) 3654 goto reject; 3655 3656 return 0; 3657 reject: 3658 cma_modify_qp_err(id_priv); 3659 rdma_reject(id, NULL, 0); 3660 return ret; 3661 } 3662 EXPORT_SYMBOL(rdma_accept); 3663 3664 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) 3665 { 3666 struct rdma_id_private *id_priv; 3667 int ret; 3668 3669 id_priv = container_of(id, struct rdma_id_private, id); 3670 if (!id_priv->cm_id.ib) 3671 return -EINVAL; 3672 3673 switch (id->device->node_type) { 3674 case RDMA_NODE_IB_CA: 3675 ret = ib_cm_notify(id_priv->cm_id.ib, event); 3676 break; 3677 default: 3678 ret = 0; 3679 break; 3680 } 3681 return ret; 3682 } 3683 EXPORT_SYMBOL(rdma_notify); 3684 3685 int rdma_reject(struct rdma_cm_id *id, const void *private_data, 3686 u8 private_data_len) 3687 { 3688 struct rdma_id_private *id_priv; 3689 int ret; 3690 3691 id_priv = container_of(id, struct rdma_id_private, id); 3692 if (!id_priv->cm_id.ib) 3693 return -EINVAL; 3694 3695 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3696 if (id->qp_type == IB_QPT_UD) 3697 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, 3698 private_data, private_data_len); 3699 else 3700 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3701 IB_CM_REJ_CONSUMER_DEFINED, NULL, 3702 0, private_data, private_data_len); 3703 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3704 ret = iw_cm_reject(id_priv->cm_id.iw, 3705 private_data, private_data_len); 3706 } else 3707 ret = -ENOSYS; 3708 3709 return ret; 3710 } 3711 EXPORT_SYMBOL(rdma_reject); 3712 3713 int rdma_disconnect(struct rdma_cm_id *id) 3714 { 3715 struct rdma_id_private *id_priv; 3716 int ret; 3717 3718 id_priv = container_of(id, struct rdma_id_private, id); 3719 if (!id_priv->cm_id.ib) 3720 return -EINVAL; 3721 3722 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3723 ret = cma_modify_qp_err(id_priv); 3724 if (ret) 3725 goto out; 3726 /* Initiate or respond to a disconnect. */ 3727 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) 3728 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); 3729 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3730 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); 3731 } else 3732 ret = -EINVAL; 3733 3734 out: 3735 return ret; 3736 } 3737 EXPORT_SYMBOL(rdma_disconnect); 3738 3739 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 3740 { 3741 struct rdma_id_private *id_priv; 3742 struct cma_multicast *mc = multicast->context; 3743 struct rdma_cm_event event; 3744 int ret = 0; 3745 3746 id_priv = mc->id_priv; 3747 mutex_lock(&id_priv->handler_mutex); 3748 if (id_priv->state != RDMA_CM_ADDR_BOUND && 3749 id_priv->state != RDMA_CM_ADDR_RESOLVED) 3750 goto out; 3751 3752 if (!status) 3753 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); 3754 mutex_lock(&id_priv->qp_mutex); 3755 if (!status && id_priv->id.qp) 3756 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 3757 be16_to_cpu(multicast->rec.mlid)); 3758 mutex_unlock(&id_priv->qp_mutex); 3759 3760 memset(&event, 0, sizeof event); 3761 event.status = status; 3762 event.param.ud.private_data = mc->context; 3763 if (!status) { 3764 struct rdma_dev_addr *dev_addr = 3765 &id_priv->id.route.addr.dev_addr; 3766 struct net_device *ndev = 3767 dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 3768 enum ib_gid_type gid_type = 3769 id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 3770 rdma_start_port(id_priv->cma_dev->device)]; 3771 3772 event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 3773 ib_init_ah_from_mcmember(id_priv->id.device, 3774 id_priv->id.port_num, &multicast->rec, 3775 ndev, gid_type, 3776 &event.param.ud.ah_attr); 3777 event.param.ud.qp_num = 0xFFFFFF; 3778 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 3779 if (ndev) 3780 dev_put(ndev); 3781 } else 3782 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 3783 3784 ret = id_priv->id.event_handler(&id_priv->id, &event); 3785 if (ret) { 3786 cma_exch(id_priv, RDMA_CM_DESTROYING); 3787 mutex_unlock(&id_priv->handler_mutex); 3788 rdma_destroy_id(&id_priv->id); 3789 return 0; 3790 } 3791 3792 out: 3793 mutex_unlock(&id_priv->handler_mutex); 3794 return 0; 3795 } 3796 3797 static void cma_set_mgid(struct rdma_id_private *id_priv, 3798 struct sockaddr *addr, union ib_gid *mgid) 3799 { 3800 unsigned char mc_map[MAX_ADDR_LEN]; 3801 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3802 struct sockaddr_in *sin = (struct sockaddr_in *) addr; 3803 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 3804 3805 if (cma_any_addr(addr)) { 3806 memset(mgid, 0, sizeof *mgid); 3807 } else if ((addr->sa_family == AF_INET6) && 3808 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 3809 0xFF10A01B)) { 3810 /* IPv6 address is an SA assigned MGID. */ 3811 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 3812 } else if (addr->sa_family == AF_IB) { 3813 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 3814 } else if (addr->sa_family == AF_INET6) { 3815 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 3816 if (id_priv->id.ps == RDMA_PS_UDP) 3817 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3818 *mgid = *(union ib_gid *) (mc_map + 4); 3819 } else { 3820 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 3821 if (id_priv->id.ps == RDMA_PS_UDP) 3822 mc_map[7] = 0x01; /* Use RDMA CM signature */ 3823 *mgid = *(union ib_gid *) (mc_map + 4); 3824 } 3825 } 3826 3827 static void cma_query_sa_classport_info_cb(int status, 3828 struct ib_class_port_info *rec, 3829 void *context) 3830 { 3831 struct class_port_info_context *cb_ctx = context; 3832 3833 WARN_ON(!context); 3834 3835 if (status || !rec) { 3836 pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", 3837 cb_ctx->device->name, cb_ctx->port_num, status); 3838 goto out; 3839 } 3840 3841 memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); 3842 3843 out: 3844 complete(&cb_ctx->done); 3845 } 3846 3847 static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, 3848 struct ib_class_port_info *class_port_info) 3849 { 3850 struct class_port_info_context *cb_ctx; 3851 int ret; 3852 3853 cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); 3854 if (!cb_ctx) 3855 return -ENOMEM; 3856 3857 cb_ctx->device = device; 3858 cb_ctx->class_port_info = class_port_info; 3859 cb_ctx->port_num = port_num; 3860 init_completion(&cb_ctx->done); 3861 3862 ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, 3863 CMA_QUERY_CLASSPORT_INFO_TIMEOUT, 3864 GFP_KERNEL, cma_query_sa_classport_info_cb, 3865 cb_ctx, &cb_ctx->sa_query); 3866 if (ret < 0) { 3867 pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", 3868 device->name, port_num, ret); 3869 goto out; 3870 } 3871 3872 wait_for_completion(&cb_ctx->done); 3873 3874 out: 3875 kfree(cb_ctx); 3876 return ret; 3877 } 3878 3879 static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 3880 struct cma_multicast *mc) 3881 { 3882 struct ib_sa_mcmember_rec rec; 3883 struct ib_class_port_info class_port_info; 3884 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3885 ib_sa_comp_mask comp_mask; 3886 int ret; 3887 3888 ib_addr_get_mgid(dev_addr, &rec.mgid); 3889 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 3890 &rec.mgid, &rec); 3891 if (ret) 3892 return ret; 3893 3894 ret = cma_set_qkey(id_priv, 0); 3895 if (ret) 3896 return ret; 3897 3898 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 3899 rec.qkey = cpu_to_be32(id_priv->qkey); 3900 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 3901 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 3902 rec.join_state = mc->join_state; 3903 3904 if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { 3905 ret = cma_query_sa_classport_info(id_priv->id.device, 3906 id_priv->id.port_num, 3907 &class_port_info); 3908 3909 if (ret) 3910 return ret; 3911 3912 if (!(ib_get_cpi_capmask2(&class_port_info) & 3913 IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { 3914 pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 3915 "RDMA CM: SM doesn't support Send Only Full Member option\n", 3916 id_priv->id.device->name, id_priv->id.port_num); 3917 return -EOPNOTSUPP; 3918 } 3919 } 3920 3921 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 3922 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 3923 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 3924 IB_SA_MCMEMBER_REC_FLOW_LABEL | 3925 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 3926 3927 if (id_priv->id.ps == RDMA_PS_IPOIB) 3928 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 3929 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 3930 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 3931 IB_SA_MCMEMBER_REC_MTU | 3932 IB_SA_MCMEMBER_REC_HOP_LIMIT; 3933 3934 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 3935 id_priv->id.port_num, &rec, 3936 comp_mask, GFP_KERNEL, 3937 cma_ib_mc_handler, mc); 3938 return PTR_ERR_OR_ZERO(mc->multicast.ib); 3939 } 3940 3941 static void iboe_mcast_work_handler(struct work_struct *work) 3942 { 3943 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); 3944 struct cma_multicast *mc = mw->mc; 3945 struct ib_sa_multicast *m = mc->multicast.ib; 3946 3947 mc->multicast.ib->context = mc; 3948 cma_ib_mc_handler(0, m); 3949 kref_put(&mc->mcref, release_mc); 3950 kfree(mw); 3951 } 3952 3953 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid) 3954 { 3955 struct sockaddr_in *sin = (struct sockaddr_in *)addr; 3956 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; 3957 3958 if (cma_any_addr(addr)) { 3959 memset(mgid, 0, sizeof *mgid); 3960 } else if (addr->sa_family == AF_INET6) { 3961 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 3962 } else { 3963 mgid->raw[0] = 0xff; 3964 mgid->raw[1] = 0x0e; 3965 mgid->raw[2] = 0; 3966 mgid->raw[3] = 0; 3967 mgid->raw[4] = 0; 3968 mgid->raw[5] = 0; 3969 mgid->raw[6] = 0; 3970 mgid->raw[7] = 0; 3971 mgid->raw[8] = 0; 3972 mgid->raw[9] = 0; 3973 mgid->raw[10] = 0xff; 3974 mgid->raw[11] = 0xff; 3975 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; 3976 } 3977 } 3978 3979 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, 3980 struct cma_multicast *mc) 3981 { 3982 struct iboe_mcast_work *work; 3983 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 3984 int err = 0; 3985 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 3986 struct net_device *ndev = NULL; 3987 enum ib_gid_type gid_type; 3988 bool send_only; 3989 3990 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 3991 3992 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 3993 return -EINVAL; 3994 3995 work = kzalloc(sizeof *work, GFP_KERNEL); 3996 if (!work) 3997 return -ENOMEM; 3998 3999 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); 4000 if (!mc->multicast.ib) { 4001 err = -ENOMEM; 4002 goto out1; 4003 } 4004 4005 cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid); 4006 4007 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); 4008 if (id_priv->id.ps == RDMA_PS_UDP) 4009 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 4010 4011 if (dev_addr->bound_dev_if) 4012 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4013 if (!ndev) { 4014 err = -ENODEV; 4015 goto out2; 4016 } 4017 mc->multicast.ib->rec.rate = iboe_get_rate(ndev); 4018 mc->multicast.ib->rec.hop_limit = 1; 4019 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu); 4020 4021 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4022 rdma_start_port(id_priv->cma_dev->device)]; 4023 if (addr->sa_family == AF_INET) { 4024 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 4025 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 4026 if (!send_only) { 4027 mc->igmp_joined = true; 4028 } 4029 } 4030 } else { 4031 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 4032 err = -ENOTSUPP; 4033 } 4034 dev_put(ndev); 4035 if (err || !mc->multicast.ib->rec.mtu) { 4036 if (!err) 4037 err = -EINVAL; 4038 goto out2; 4039 } 4040 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 4041 &mc->multicast.ib->rec.port_gid); 4042 work->id = id_priv; 4043 work->mc = mc; 4044 INIT_WORK(&work->work, iboe_mcast_work_handler); 4045 kref_get(&mc->mcref); 4046 queue_work(cma_wq, &work->work); 4047 4048 return 0; 4049 4050 out2: 4051 kfree(mc->multicast.ib); 4052 out1: 4053 kfree(work); 4054 return err; 4055 } 4056 4057 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4058 u8 join_state, void *context) 4059 { 4060 struct rdma_id_private *id_priv; 4061 struct cma_multicast *mc; 4062 int ret; 4063 4064 id_priv = container_of(id, struct rdma_id_private, id); 4065 if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && 4066 !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) 4067 return -EINVAL; 4068 4069 mc = kmalloc(sizeof *mc, GFP_KERNEL); 4070 if (!mc) 4071 return -ENOMEM; 4072 4073 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4074 mc->context = context; 4075 mc->id_priv = id_priv; 4076 mc->igmp_joined = false; 4077 mc->join_state = join_state; 4078 spin_lock(&id_priv->lock); 4079 list_add(&mc->list, &id_priv->mc_list); 4080 spin_unlock(&id_priv->lock); 4081 4082 if (rdma_protocol_roce(id->device, id->port_num)) { 4083 kref_init(&mc->mcref); 4084 ret = cma_iboe_join_multicast(id_priv, mc); 4085 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) 4086 ret = cma_join_ib_multicast(id_priv, mc); 4087 else 4088 ret = -ENOSYS; 4089 4090 if (ret) { 4091 spin_lock_irq(&id_priv->lock); 4092 list_del(&mc->list); 4093 spin_unlock_irq(&id_priv->lock); 4094 kfree(mc); 4095 } 4096 return ret; 4097 } 4098 EXPORT_SYMBOL(rdma_join_multicast); 4099 4100 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 4101 { 4102 struct rdma_id_private *id_priv; 4103 struct cma_multicast *mc; 4104 4105 id_priv = container_of(id, struct rdma_id_private, id); 4106 spin_lock_irq(&id_priv->lock); 4107 list_for_each_entry(mc, &id_priv->mc_list, list) { 4108 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { 4109 list_del(&mc->list); 4110 spin_unlock_irq(&id_priv->lock); 4111 4112 if (id->qp) 4113 ib_detach_mcast(id->qp, 4114 &mc->multicast.ib->rec.mgid, 4115 be16_to_cpu(mc->multicast.ib->rec.mlid)); 4116 4117 BUG_ON(id_priv->cma_dev->device != id->device); 4118 4119 if (rdma_cap_ib_mcast(id->device, id->port_num)) { 4120 ib_sa_free_multicast(mc->multicast.ib); 4121 kfree(mc); 4122 } else if (rdma_protocol_roce(id->device, id->port_num)) { 4123 if (mc->igmp_joined) { 4124 struct rdma_dev_addr *dev_addr = 4125 &id->route.addr.dev_addr; 4126 struct net_device *ndev = NULL; 4127 4128 if (dev_addr->bound_dev_if) 4129 ndev = dev_get_by_index(dev_addr->net, 4130 dev_addr->bound_dev_if); 4131 if (ndev) { 4132 dev_put(ndev); 4133 } 4134 mc->igmp_joined = false; 4135 } 4136 kref_put(&mc->mcref, release_mc); 4137 } 4138 return; 4139 } 4140 } 4141 spin_unlock_irq(&id_priv->lock); 4142 } 4143 EXPORT_SYMBOL(rdma_leave_multicast); 4144 4145 static int 4146 sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS) 4147 { 4148 struct cma_device *cma_dev = arg1; 4149 const int port = arg2; 4150 char buf[64]; 4151 int error; 4152 4153 strlcpy(buf, ib_cache_gid_type_str( 4154 cma_get_default_gid_type(cma_dev, port)), sizeof(buf)); 4155 4156 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 4157 if (error != 0 || req->newptr == NULL) 4158 goto done; 4159 4160 error = ib_cache_gid_parse_type_str(buf); 4161 if (error < 0) { 4162 error = EINVAL; 4163 goto done; 4164 } 4165 4166 cma_set_default_gid_type(cma_dev, port, error); 4167 error = 0; 4168 done: 4169 return (error); 4170 } 4171 4172 static void cma_add_one(struct ib_device *device) 4173 { 4174 struct cma_device *cma_dev; 4175 struct rdma_id_private *id_priv; 4176 unsigned int i; 4177 4178 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4179 if (!cma_dev) 4180 return; 4181 4182 sysctl_ctx_init(&cma_dev->sysctl_ctx); 4183 4184 cma_dev->device = device; 4185 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4186 sizeof(*cma_dev->default_gid_type), 4187 GFP_KERNEL); 4188 if (!cma_dev->default_gid_type) { 4189 kfree(cma_dev); 4190 return; 4191 } 4192 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4193 unsigned long supported_gids; 4194 unsigned int default_gid_type; 4195 4196 supported_gids = roce_gid_type_mask_support(device, i); 4197 4198 if (WARN_ON(!supported_gids)) { 4199 /* set something valid */ 4200 default_gid_type = 0; 4201 } else if (test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) { 4202 /* prefer RoCEv2, if supported */ 4203 default_gid_type = IB_GID_TYPE_ROCE_UDP_ENCAP; 4204 } else { 4205 default_gid_type = find_first_bit(&supported_gids, 4206 BITS_PER_LONG); 4207 } 4208 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4209 default_gid_type; 4210 } 4211 4212 init_completion(&cma_dev->comp); 4213 atomic_set(&cma_dev->refcount, 1); 4214 INIT_LIST_HEAD(&cma_dev->id_list); 4215 ib_set_client_data(device, &cma_client, cma_dev); 4216 4217 mutex_lock(&lock); 4218 list_add_tail(&cma_dev->list, &dev_list); 4219 list_for_each_entry(id_priv, &listen_any_list, list) 4220 cma_listen_on_dev(id_priv, cma_dev); 4221 mutex_unlock(&lock); 4222 4223 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4224 char buf[64]; 4225 4226 snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i); 4227 4228 (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx, 4229 SYSCTL_CHILDREN(device->ports_parent->parent->oidp), 4230 OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 4231 cma_dev, i, &sysctl_cma_default_roce_mode, "A", 4232 "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2"); 4233 } 4234 } 4235 4236 static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4237 { 4238 struct rdma_cm_event event; 4239 enum rdma_cm_state state; 4240 int ret = 0; 4241 4242 /* Record that we want to remove the device */ 4243 state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); 4244 if (state == RDMA_CM_DESTROYING) 4245 return 0; 4246 4247 cma_cancel_operation(id_priv, state); 4248 mutex_lock(&id_priv->handler_mutex); 4249 4250 /* Check for destruction from another callback. */ 4251 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4252 goto out; 4253 4254 memset(&event, 0, sizeof event); 4255 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4256 ret = id_priv->id.event_handler(&id_priv->id, &event); 4257 out: 4258 mutex_unlock(&id_priv->handler_mutex); 4259 return ret; 4260 } 4261 4262 static void cma_process_remove(struct cma_device *cma_dev) 4263 { 4264 struct rdma_id_private *id_priv; 4265 int ret; 4266 4267 mutex_lock(&lock); 4268 while (!list_empty(&cma_dev->id_list)) { 4269 id_priv = list_entry(cma_dev->id_list.next, 4270 struct rdma_id_private, list); 4271 4272 list_del(&id_priv->listen_list); 4273 list_del_init(&id_priv->list); 4274 atomic_inc(&id_priv->refcount); 4275 mutex_unlock(&lock); 4276 4277 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 4278 cma_deref_id(id_priv); 4279 if (ret) 4280 rdma_destroy_id(&id_priv->id); 4281 4282 mutex_lock(&lock); 4283 } 4284 mutex_unlock(&lock); 4285 4286 cma_deref_dev(cma_dev); 4287 wait_for_completion(&cma_dev->comp); 4288 } 4289 4290 static void cma_remove_one(struct ib_device *device, void *client_data) 4291 { 4292 struct cma_device *cma_dev = client_data; 4293 4294 if (!cma_dev) 4295 return; 4296 4297 mutex_lock(&lock); 4298 list_del(&cma_dev->list); 4299 mutex_unlock(&lock); 4300 4301 cma_process_remove(cma_dev); 4302 sysctl_ctx_free(&cma_dev->sysctl_ctx); 4303 kfree(cma_dev->default_gid_type); 4304 kfree(cma_dev); 4305 } 4306 4307 static void cma_init_vnet(void *arg) 4308 { 4309 struct cma_pernet *pernet = &VNET(cma_pernet); 4310 4311 idr_init(&pernet->tcp_ps); 4312 idr_init(&pernet->udp_ps); 4313 idr_init(&pernet->ipoib_ps); 4314 idr_init(&pernet->ib_ps); 4315 } 4316 VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL); 4317 4318 static void cma_destroy_vnet(void *arg) 4319 { 4320 struct cma_pernet *pernet = &VNET(cma_pernet); 4321 4322 idr_destroy(&pernet->tcp_ps); 4323 idr_destroy(&pernet->udp_ps); 4324 idr_destroy(&pernet->ipoib_ps); 4325 idr_destroy(&pernet->ib_ps); 4326 } 4327 VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL); 4328 4329 static int __init cma_init(void) 4330 { 4331 int ret; 4332 4333 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); 4334 if (!cma_wq) 4335 return -ENOMEM; 4336 4337 ib_sa_register_client(&sa_client); 4338 rdma_addr_register_client(&addr_client); 4339 4340 ret = ib_register_client(&cma_client); 4341 if (ret) 4342 goto err; 4343 4344 cma_configfs_init(); 4345 4346 return 0; 4347 4348 err: 4349 rdma_addr_unregister_client(&addr_client); 4350 ib_sa_unregister_client(&sa_client); 4351 destroy_workqueue(cma_wq); 4352 return ret; 4353 } 4354 4355 static void __exit cma_cleanup(void) 4356 { 4357 cma_configfs_exit(); 4358 ib_unregister_client(&cma_client); 4359 rdma_addr_unregister_client(&addr_client); 4360 ib_sa_unregister_client(&sa_client); 4361 destroy_workqueue(cma_wq); 4362 } 4363 4364 module_init(cma_init); 4365 module_exit(cma_cleanup); 4366