1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 5 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 6 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 7 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 */ 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #define LINUXKPI_PARAM_PREFIX ibcore_ 42 43 #include <linux/completion.h> 44 #include <linux/in.h> 45 #include <linux/in6.h> 46 #include <linux/mutex.h> 47 #include <linux/random.h> 48 #include <linux/idr.h> 49 #include <linux/inetdevice.h> 50 #include <linux/slab.h> 51 #include <linux/module.h> 52 #include <net/route.h> 53 #include <net/route/nhop.h> 54 55 #include <net/tcp.h> 56 #include <net/ipv6.h> 57 58 #include <netinet/in_fib.h> 59 60 #include <netinet6/in6_fib.h> 61 #include <netinet6/scope6_var.h> 62 #include <netinet6/ip6_var.h> 63 64 #include <rdma/rdma_cm.h> 65 #include <rdma/rdma_cm_ib.h> 66 #include <rdma/rdma_sdp.h> 67 #include <rdma/ib.h> 68 #include <rdma/ib_addr.h> 69 #include <rdma/ib_cache.h> 70 #include <rdma/ib_cm.h> 71 #include <rdma/ib_sa.h> 72 #include <rdma/iw_cm.h> 73 74 #include <sys/priv.h> 75 76 #include "core_priv.h" 77 78 MODULE_AUTHOR("Sean Hefty"); 79 MODULE_DESCRIPTION("Generic RDMA CM Agent"); 80 MODULE_LICENSE("Dual BSD/GPL"); 81 82 #define CMA_CM_RESPONSE_TIMEOUT 20 83 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 84 #define CMA_MAX_CM_RETRIES 15 85 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 86 #define CMA_IBOE_PACKET_LIFETIME 18 87 88 static const char * const cma_events[] = { 89 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", 90 [RDMA_CM_EVENT_ADDR_ERROR] = "address error", 91 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", 92 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", 93 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", 94 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", 95 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", 96 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", 97 [RDMA_CM_EVENT_REJECTED] = "rejected", 98 [RDMA_CM_EVENT_ESTABLISHED] = "established", 99 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", 100 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", 101 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", 102 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", 103 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", 104 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 105 }; 106 107 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 108 { 109 size_t index = event; 110 111 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? 112 cma_events[index] : "unrecognized event"; 113 } 114 EXPORT_SYMBOL(rdma_event_msg); 115 116 static int cma_check_linklocal(struct rdma_dev_addr *, struct sockaddr *); 117 static void cma_add_one(struct ib_device *device); 118 static void cma_remove_one(struct ib_device *device, void *client_data); 119 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id); 120 121 static struct ib_client cma_client = { 122 .name = "cma", 123 .add = cma_add_one, 124 .remove = cma_remove_one 125 }; 126 127 static struct ib_sa_client sa_client; 128 static struct rdma_addr_client addr_client; 129 static LIST_HEAD(dev_list); 130 static LIST_HEAD(listen_any_list); 131 static DEFINE_MUTEX(lock); 132 static struct workqueue_struct *cma_wq; 133 134 struct cma_pernet { 135 struct idr tcp_ps; 136 struct idr udp_ps; 137 struct idr ipoib_ps; 138 struct idr ib_ps; 139 struct idr sdp_ps; 140 }; 141 142 VNET_DEFINE(struct cma_pernet, cma_pernet); 143 144 static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet) 145 { 146 struct cma_pernet *retval; 147 148 CURVNET_SET_QUIET(vnet); 149 retval = &VNET(cma_pernet); 150 CURVNET_RESTORE(); 151 152 return (retval); 153 } 154 155 static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps) 156 { 157 struct cma_pernet *pernet = cma_pernet_ptr(net); 158 159 switch (ps) { 160 case RDMA_PS_TCP: 161 return &pernet->tcp_ps; 162 case RDMA_PS_UDP: 163 return &pernet->udp_ps; 164 case RDMA_PS_IPOIB: 165 return &pernet->ipoib_ps; 166 case RDMA_PS_IB: 167 return &pernet->ib_ps; 168 case RDMA_PS_SDP: 169 return &pernet->sdp_ps; 170 default: 171 return NULL; 172 } 173 } 174 175 struct cma_device { 176 struct list_head list; 177 struct ib_device *device; 178 struct completion comp; 179 atomic_t refcount; 180 struct list_head id_list; 181 struct sysctl_ctx_list sysctl_ctx; 182 enum ib_gid_type *default_gid_type; 183 }; 184 185 struct rdma_bind_list { 186 enum rdma_port_space ps; 187 struct hlist_head owners; 188 unsigned short port; 189 }; 190 191 struct class_port_info_context { 192 struct ib_class_port_info *class_port_info; 193 struct ib_device *device; 194 struct completion done; 195 struct ib_sa_query *sa_query; 196 u8 port_num; 197 }; 198 199 static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps, 200 struct rdma_bind_list *bind_list, int snum) 201 { 202 struct idr *idr = cma_pernet_idr(vnet, ps); 203 204 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 205 } 206 207 static struct rdma_bind_list *cma_ps_find(struct vnet *net, 208 enum rdma_port_space ps, int snum) 209 { 210 struct idr *idr = cma_pernet_idr(net, ps); 211 212 return idr_find(idr, snum); 213 } 214 215 static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum) 216 { 217 struct idr *idr = cma_pernet_idr(net, ps); 218 219 idr_remove(idr, snum); 220 } 221 222 enum { 223 CMA_OPTION_AFONLY, 224 }; 225 226 void cma_ref_dev(struct cma_device *cma_dev) 227 { 228 atomic_inc(&cma_dev->refcount); 229 } 230 231 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, 232 void *cookie) 233 { 234 struct cma_device *cma_dev; 235 struct cma_device *found_cma_dev = NULL; 236 237 mutex_lock(&lock); 238 239 list_for_each_entry(cma_dev, &dev_list, list) 240 if (filter(cma_dev->device, cookie)) { 241 found_cma_dev = cma_dev; 242 break; 243 } 244 245 if (found_cma_dev) 246 cma_ref_dev(found_cma_dev); 247 mutex_unlock(&lock); 248 return found_cma_dev; 249 } 250 251 int cma_get_default_gid_type(struct cma_device *cma_dev, 252 unsigned int port) 253 { 254 if (port < rdma_start_port(cma_dev->device) || 255 port > rdma_end_port(cma_dev->device)) 256 return -EINVAL; 257 258 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; 259 } 260 261 int cma_set_default_gid_type(struct cma_device *cma_dev, 262 unsigned int port, 263 enum ib_gid_type default_gid_type) 264 { 265 unsigned long supported_gids; 266 267 if (port < rdma_start_port(cma_dev->device) || 268 port > rdma_end_port(cma_dev->device)) 269 return -EINVAL; 270 271 supported_gids = roce_gid_type_mask_support(cma_dev->device, port); 272 273 if (!(supported_gids & 1 << default_gid_type)) 274 return -EINVAL; 275 276 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = 277 default_gid_type; 278 279 return 0; 280 } 281 282 struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) 283 { 284 return cma_dev->device; 285 } 286 287 /* 288 * Device removal can occur at anytime, so we need extra handling to 289 * serialize notifying the user of device removal with other callbacks. 290 * We do this by disabling removal notification while a callback is in process, 291 * and reporting it after the callback completes. 292 */ 293 struct rdma_id_private { 294 struct rdma_cm_id id; 295 296 struct rdma_bind_list *bind_list; 297 struct hlist_node node; 298 struct list_head list; /* listen_any_list or cma_device.list */ 299 struct list_head listen_list; /* per device listens */ 300 struct cma_device *cma_dev; 301 struct list_head mc_list; 302 303 int internal_id; 304 enum rdma_cm_state state; 305 spinlock_t lock; 306 struct mutex qp_mutex; 307 308 struct completion comp; 309 atomic_t refcount; 310 struct mutex handler_mutex; 311 312 int backlog; 313 int timeout_ms; 314 struct ib_sa_query *query; 315 int query_id; 316 union { 317 struct ib_cm_id *ib; 318 struct iw_cm_id *iw; 319 } cm_id; 320 321 u32 seq_num; 322 u32 qkey; 323 u32 qp_num; 324 pid_t owner; 325 u32 options; 326 u8 srq; 327 u8 tos; 328 u8 reuseaddr; 329 u8 afonly; 330 enum ib_gid_type gid_type; 331 }; 332 333 struct cma_multicast { 334 struct rdma_id_private *id_priv; 335 union { 336 struct ib_sa_multicast *ib; 337 } multicast; 338 struct list_head list; 339 void *context; 340 struct sockaddr_storage addr; 341 struct kref mcref; 342 bool igmp_joined; 343 u8 join_state; 344 }; 345 346 struct cma_work { 347 struct work_struct work; 348 struct rdma_id_private *id; 349 enum rdma_cm_state old_state; 350 enum rdma_cm_state new_state; 351 struct rdma_cm_event event; 352 }; 353 354 struct cma_ndev_work { 355 struct work_struct work; 356 struct rdma_id_private *id; 357 struct rdma_cm_event event; 358 }; 359 360 struct iboe_mcast_work { 361 struct work_struct work; 362 struct rdma_id_private *id; 363 struct cma_multicast *mc; 364 }; 365 366 struct cma_hdr { 367 u8 cma_version; 368 u8 ip_version; /* IP version: 7:4 */ 369 __be16 port; 370 union cma_ip_addr src_addr; 371 union cma_ip_addr dst_addr; 372 }; 373 374 #define CMA_VERSION 0x00 375 #define SDP_MAJ_VERSION 0x2 376 377 struct cma_req_info { 378 struct ib_device *device; 379 int port; 380 union ib_gid local_gid; 381 __be64 service_id; 382 u16 pkey; 383 bool has_gid:1; 384 }; 385 386 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 387 { 388 unsigned long flags; 389 int ret; 390 391 spin_lock_irqsave(&id_priv->lock, flags); 392 ret = (id_priv->state == comp); 393 spin_unlock_irqrestore(&id_priv->lock, flags); 394 return ret; 395 } 396 397 static int cma_comp_exch(struct rdma_id_private *id_priv, 398 enum rdma_cm_state comp, enum rdma_cm_state exch) 399 { 400 unsigned long flags; 401 int ret; 402 403 spin_lock_irqsave(&id_priv->lock, flags); 404 if ((ret = (id_priv->state == comp))) 405 id_priv->state = exch; 406 spin_unlock_irqrestore(&id_priv->lock, flags); 407 return ret; 408 } 409 410 static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, 411 enum rdma_cm_state exch) 412 { 413 unsigned long flags; 414 enum rdma_cm_state old; 415 416 spin_lock_irqsave(&id_priv->lock, flags); 417 old = id_priv->state; 418 id_priv->state = exch; 419 spin_unlock_irqrestore(&id_priv->lock, flags); 420 return old; 421 } 422 423 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 424 { 425 return hdr->ip_version >> 4; 426 } 427 428 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 429 { 430 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 431 } 432 433 static inline u8 sdp_get_majv(u8 sdp_version) 434 { 435 return sdp_version >> 4; 436 } 437 438 static inline u8 sdp_get_ip_ver(const struct sdp_hh *hh) 439 { 440 return hh->ipv_cap >> 4; 441 } 442 443 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) 444 { 445 hh->ipv_cap = (ip_ver << 4) | (hh->ipv_cap & 0xF); 446 } 447 448 static int cma_igmp_send(struct net_device *ndev, const union ib_gid *mgid, bool join) 449 { 450 int retval; 451 452 if (ndev) { 453 union { 454 struct sockaddr sock; 455 struct sockaddr_storage storage; 456 } addr; 457 458 rdma_gid2ip(&addr.sock, mgid); 459 460 CURVNET_SET_QUIET(ndev->if_vnet); 461 if (join) 462 retval = -if_addmulti(ndev, &addr.sock, NULL); 463 else 464 retval = -if_delmulti(ndev, &addr.sock); 465 CURVNET_RESTORE(); 466 } else { 467 retval = -ENODEV; 468 } 469 return retval; 470 } 471 472 static void _cma_attach_to_dev(struct rdma_id_private *id_priv, 473 struct cma_device *cma_dev) 474 { 475 cma_ref_dev(cma_dev); 476 id_priv->cma_dev = cma_dev; 477 id_priv->gid_type = 0; 478 id_priv->id.device = cma_dev->device; 479 id_priv->id.route.addr.dev_addr.transport = 480 rdma_node_get_transport(cma_dev->device->node_type); 481 list_add_tail(&id_priv->list, &cma_dev->id_list); 482 } 483 484 static void cma_attach_to_dev(struct rdma_id_private *id_priv, 485 struct cma_device *cma_dev) 486 { 487 _cma_attach_to_dev(id_priv, cma_dev); 488 id_priv->gid_type = 489 cma_dev->default_gid_type[id_priv->id.port_num - 490 rdma_start_port(cma_dev->device)]; 491 } 492 493 void cma_deref_dev(struct cma_device *cma_dev) 494 { 495 if (atomic_dec_and_test(&cma_dev->refcount)) 496 complete(&cma_dev->comp); 497 } 498 499 static inline void release_mc(struct kref *kref) 500 { 501 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); 502 503 kfree(mc->multicast.ib); 504 kfree(mc); 505 } 506 507 static void cma_release_dev(struct rdma_id_private *id_priv) 508 { 509 mutex_lock(&lock); 510 list_del(&id_priv->list); 511 cma_deref_dev(id_priv->cma_dev); 512 id_priv->cma_dev = NULL; 513 mutex_unlock(&lock); 514 } 515 516 static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 517 { 518 return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 519 } 520 521 static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 522 { 523 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 524 } 525 526 static inline unsigned short cma_family(struct rdma_id_private *id_priv) 527 { 528 return id_priv->id.route.addr.src_addr.ss_family; 529 } 530 531 static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) 532 { 533 struct ib_sa_mcmember_rec rec; 534 int ret = 0; 535 536 if (id_priv->qkey) { 537 if (qkey && id_priv->qkey != qkey) 538 return -EINVAL; 539 return 0; 540 } 541 542 if (qkey) { 543 id_priv->qkey = qkey; 544 return 0; 545 } 546 547 switch (id_priv->id.ps) { 548 case RDMA_PS_UDP: 549 case RDMA_PS_IB: 550 id_priv->qkey = RDMA_UDP_QKEY; 551 break; 552 case RDMA_PS_IPOIB: 553 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); 554 ret = ib_sa_get_mcmember_rec(id_priv->id.device, 555 id_priv->id.port_num, &rec.mgid, 556 &rec); 557 if (!ret) 558 id_priv->qkey = be32_to_cpu(rec.qkey); 559 break; 560 default: 561 break; 562 } 563 return ret; 564 } 565 566 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 567 { 568 dev_addr->dev_type = ARPHRD_INFINIBAND; 569 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); 570 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); 571 } 572 573 static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 574 { 575 int ret; 576 577 if (addr->sa_family != AF_IB) { 578 ret = rdma_translate_ip(addr, dev_addr); 579 } else { 580 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 581 ret = 0; 582 } 583 584 return ret; 585 } 586 587 static inline int cma_validate_port(struct ib_device *device, u8 port, 588 enum ib_gid_type gid_type, 589 union ib_gid *gid, 590 const struct rdma_dev_addr *dev_addr) 591 { 592 const int dev_type = dev_addr->dev_type; 593 struct net_device *ndev; 594 int ret = -ENODEV; 595 596 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 597 return ret; 598 599 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 600 return ret; 601 602 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 603 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 604 } else { 605 ndev = NULL; 606 gid_type = IB_GID_TYPE_IB; 607 } 608 609 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 610 ndev, NULL); 611 612 if (ndev) 613 dev_put(ndev); 614 615 return ret; 616 } 617 618 static int cma_acquire_dev(struct rdma_id_private *id_priv, 619 struct rdma_id_private *listen_id_priv) 620 { 621 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 622 struct cma_device *cma_dev; 623 union ib_gid gid, iboe_gid, *gidp; 624 int ret = -ENODEV; 625 u8 port; 626 627 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 628 id_priv->id.ps == RDMA_PS_IPOIB) 629 return -EINVAL; 630 631 mutex_lock(&lock); 632 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 633 &iboe_gid); 634 635 memcpy(&gid, dev_addr->src_dev_addr + 636 rdma_addr_gid_offset(dev_addr), sizeof gid); 637 638 if (listen_id_priv) { 639 cma_dev = listen_id_priv->cma_dev; 640 port = listen_id_priv->id.port_num; 641 642 if (rdma_is_port_valid(cma_dev->device, port)) { 643 gidp = rdma_protocol_roce(cma_dev->device, port) ? 644 &iboe_gid : &gid; 645 646 ret = cma_validate_port(cma_dev->device, port, 647 rdma_protocol_ib(cma_dev->device, port) ? 648 IB_GID_TYPE_IB : 649 listen_id_priv->gid_type, gidp, dev_addr); 650 if (!ret) { 651 id_priv->id.port_num = port; 652 goto out; 653 } 654 } 655 } 656 657 list_for_each_entry(cma_dev, &dev_list, list) { 658 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 659 if (listen_id_priv && 660 listen_id_priv->cma_dev == cma_dev && 661 listen_id_priv->id.port_num == port) 662 continue; 663 664 gidp = rdma_protocol_roce(cma_dev->device, port) ? 665 &iboe_gid : &gid; 666 667 ret = cma_validate_port(cma_dev->device, port, 668 rdma_protocol_ib(cma_dev->device, port) ? 669 IB_GID_TYPE_IB : 670 cma_dev->default_gid_type[port - 1], 671 gidp, dev_addr); 672 if (!ret) { 673 id_priv->id.port_num = port; 674 goto out; 675 } 676 } 677 } 678 679 out: 680 if (!ret) 681 cma_attach_to_dev(id_priv, cma_dev); 682 683 mutex_unlock(&lock); 684 return ret; 685 } 686 687 /* 688 * Select the source IB device and address to reach the destination IB address. 689 */ 690 static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) 691 { 692 struct cma_device *cma_dev, *cur_dev; 693 struct sockaddr_ib *addr; 694 union ib_gid gid, sgid, *dgid; 695 u16 pkey, index; 696 u8 p; 697 int i; 698 699 cma_dev = NULL; 700 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); 701 dgid = (union ib_gid *) &addr->sib_addr; 702 pkey = ntohs(addr->sib_pkey); 703 704 list_for_each_entry(cur_dev, &dev_list, list) { 705 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 706 if (!rdma_cap_af_ib(cur_dev->device, p)) 707 continue; 708 709 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 710 continue; 711 712 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, 713 &gid, NULL); 714 i++) { 715 if (!memcmp(&gid, dgid, sizeof(gid))) { 716 cma_dev = cur_dev; 717 sgid = gid; 718 id_priv->id.port_num = p; 719 goto found; 720 } 721 722 if (!cma_dev && (gid.global.subnet_prefix == 723 dgid->global.subnet_prefix)) { 724 cma_dev = cur_dev; 725 sgid = gid; 726 id_priv->id.port_num = p; 727 } 728 } 729 } 730 } 731 732 if (!cma_dev) 733 return -ENODEV; 734 735 found: 736 cma_attach_to_dev(id_priv, cma_dev); 737 addr = (struct sockaddr_ib *) cma_src_addr(id_priv); 738 memcpy(&addr->sib_addr, &sgid, sizeof sgid); 739 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); 740 return 0; 741 } 742 743 static void cma_deref_id(struct rdma_id_private *id_priv) 744 { 745 if (atomic_dec_and_test(&id_priv->refcount)) 746 complete(&id_priv->comp); 747 } 748 749 struct rdma_cm_id *rdma_create_id(struct vnet *net, 750 rdma_cm_event_handler event_handler, 751 void *context, enum rdma_port_space ps, 752 enum ib_qp_type qp_type) 753 { 754 struct rdma_id_private *id_priv; 755 756 #ifdef VIMAGE 757 if (net == NULL) 758 return ERR_PTR(-EINVAL); 759 #endif 760 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); 761 if (!id_priv) 762 return ERR_PTR(-ENOMEM); 763 764 id_priv->owner = task_pid_nr(current); 765 id_priv->state = RDMA_CM_IDLE; 766 id_priv->id.context = context; 767 id_priv->id.event_handler = event_handler; 768 id_priv->id.ps = ps; 769 id_priv->id.qp_type = qp_type; 770 spin_lock_init(&id_priv->lock); 771 mutex_init(&id_priv->qp_mutex); 772 init_completion(&id_priv->comp); 773 atomic_set(&id_priv->refcount, 1); 774 mutex_init(&id_priv->handler_mutex); 775 INIT_LIST_HEAD(&id_priv->listen_list); 776 INIT_LIST_HEAD(&id_priv->mc_list); 777 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 778 id_priv->id.route.addr.dev_addr.net = net; 779 780 return &id_priv->id; 781 } 782 EXPORT_SYMBOL(rdma_create_id); 783 784 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 785 { 786 struct ib_qp_attr qp_attr; 787 int qp_attr_mask, ret; 788 789 qp_attr.qp_state = IB_QPS_INIT; 790 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 791 if (ret) 792 return ret; 793 794 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 795 if (ret) 796 return ret; 797 798 qp_attr.qp_state = IB_QPS_RTR; 799 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 800 if (ret) 801 return ret; 802 803 qp_attr.qp_state = IB_QPS_RTS; 804 qp_attr.sq_psn = 0; 805 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 806 807 return ret; 808 } 809 810 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 811 { 812 struct ib_qp_attr qp_attr; 813 int qp_attr_mask, ret; 814 815 qp_attr.qp_state = IB_QPS_INIT; 816 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 817 if (ret) 818 return ret; 819 820 return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 821 } 822 823 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 824 struct ib_qp_init_attr *qp_init_attr) 825 { 826 struct rdma_id_private *id_priv; 827 struct ib_qp *qp; 828 int ret; 829 830 id_priv = container_of(id, struct rdma_id_private, id); 831 if (id->device != pd->device) 832 return -EINVAL; 833 834 qp_init_attr->port_num = id->port_num; 835 qp = ib_create_qp(pd, qp_init_attr); 836 if (IS_ERR(qp)) 837 return PTR_ERR(qp); 838 839 if (id->qp_type == IB_QPT_UD) 840 ret = cma_init_ud_qp(id_priv, qp); 841 else 842 ret = cma_init_conn_qp(id_priv, qp); 843 if (ret) 844 goto err; 845 846 id->qp = qp; 847 id_priv->qp_num = qp->qp_num; 848 id_priv->srq = (qp->srq != NULL); 849 return 0; 850 err: 851 ib_destroy_qp(qp); 852 return ret; 853 } 854 EXPORT_SYMBOL(rdma_create_qp); 855 856 void rdma_destroy_qp(struct rdma_cm_id *id) 857 { 858 struct rdma_id_private *id_priv; 859 860 id_priv = container_of(id, struct rdma_id_private, id); 861 mutex_lock(&id_priv->qp_mutex); 862 ib_destroy_qp(id_priv->id.qp); 863 id_priv->id.qp = NULL; 864 mutex_unlock(&id_priv->qp_mutex); 865 } 866 EXPORT_SYMBOL(rdma_destroy_qp); 867 868 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, 869 struct rdma_conn_param *conn_param) 870 { 871 struct ib_qp_attr qp_attr; 872 int qp_attr_mask, ret; 873 union ib_gid sgid; 874 875 mutex_lock(&id_priv->qp_mutex); 876 if (!id_priv->id.qp) { 877 ret = 0; 878 goto out; 879 } 880 881 /* Need to update QP attributes from default values. */ 882 qp_attr.qp_state = IB_QPS_INIT; 883 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 884 if (ret) 885 goto out; 886 887 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 888 if (ret) 889 goto out; 890 891 qp_attr.qp_state = IB_QPS_RTR; 892 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 893 if (ret) 894 goto out; 895 896 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 897 qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); 898 if (ret) 899 goto out; 900 901 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 902 903 if (conn_param) 904 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 905 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 906 out: 907 mutex_unlock(&id_priv->qp_mutex); 908 return ret; 909 } 910 911 static int cma_modify_qp_rts(struct rdma_id_private *id_priv, 912 struct rdma_conn_param *conn_param) 913 { 914 struct ib_qp_attr qp_attr; 915 int qp_attr_mask, ret; 916 917 mutex_lock(&id_priv->qp_mutex); 918 if (!id_priv->id.qp) { 919 ret = 0; 920 goto out; 921 } 922 923 qp_attr.qp_state = IB_QPS_RTS; 924 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 925 if (ret) 926 goto out; 927 928 if (conn_param) 929 qp_attr.max_rd_atomic = conn_param->initiator_depth; 930 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 931 out: 932 mutex_unlock(&id_priv->qp_mutex); 933 return ret; 934 } 935 936 static int cma_modify_qp_err(struct rdma_id_private *id_priv) 937 { 938 struct ib_qp_attr qp_attr; 939 int ret; 940 941 mutex_lock(&id_priv->qp_mutex); 942 if (!id_priv->id.qp) { 943 ret = 0; 944 goto out; 945 } 946 947 qp_attr.qp_state = IB_QPS_ERR; 948 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 949 out: 950 mutex_unlock(&id_priv->qp_mutex); 951 return ret; 952 } 953 954 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 955 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 956 { 957 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 958 int ret; 959 u16 pkey; 960 961 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) 962 pkey = 0xffff; 963 else 964 pkey = ib_addr_get_pkey(dev_addr); 965 966 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 967 pkey, &qp_attr->pkey_index); 968 if (ret) 969 return ret; 970 971 qp_attr->port_num = id_priv->id.port_num; 972 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 973 974 if (id_priv->id.qp_type == IB_QPT_UD) { 975 ret = cma_set_qkey(id_priv, 0); 976 if (ret) 977 return ret; 978 979 qp_attr->qkey = id_priv->qkey; 980 *qp_attr_mask |= IB_QP_QKEY; 981 } else { 982 qp_attr->qp_access_flags = 0; 983 *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 984 } 985 return 0; 986 } 987 988 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 989 int *qp_attr_mask) 990 { 991 struct rdma_id_private *id_priv; 992 int ret = 0; 993 994 id_priv = container_of(id, struct rdma_id_private, id); 995 if (rdma_cap_ib_cm(id->device, id->port_num)) { 996 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) 997 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 998 else 999 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 1000 qp_attr_mask); 1001 1002 if (qp_attr->qp_state == IB_QPS_RTR) 1003 qp_attr->rq_psn = id_priv->seq_num; 1004 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 1005 if (!id_priv->cm_id.iw) { 1006 qp_attr->qp_access_flags = 0; 1007 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1008 } else 1009 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 1010 qp_attr_mask); 1011 qp_attr->port_num = id_priv->id.port_num; 1012 *qp_attr_mask |= IB_QP_PORT; 1013 } else 1014 ret = -ENOSYS; 1015 1016 return ret; 1017 } 1018 EXPORT_SYMBOL(rdma_init_qp_attr); 1019 1020 static inline int cma_zero_addr(struct sockaddr *addr) 1021 { 1022 switch (addr->sa_family) { 1023 case AF_INET: 1024 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 1025 case AF_INET6: 1026 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); 1027 case AF_IB: 1028 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); 1029 default: 1030 return 0; 1031 } 1032 } 1033 1034 static inline int cma_loopback_addr(struct sockaddr *addr) 1035 { 1036 switch (addr->sa_family) { 1037 case AF_INET: 1038 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 1039 case AF_INET6: 1040 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); 1041 case AF_IB: 1042 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); 1043 default: 1044 return 0; 1045 } 1046 } 1047 1048 static inline int cma_any_addr(struct sockaddr *addr) 1049 { 1050 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1051 } 1052 1053 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) 1054 { 1055 if (src->sa_family != dst->sa_family) 1056 return -1; 1057 1058 switch (src->sa_family) { 1059 case AF_INET: 1060 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 1061 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 1062 case AF_INET6: 1063 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 1064 &((struct sockaddr_in6 *) dst)->sin6_addr); 1065 default: 1066 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, 1067 &((struct sockaddr_ib *) dst)->sib_addr); 1068 } 1069 } 1070 1071 static __be16 cma_port(struct sockaddr *addr) 1072 { 1073 struct sockaddr_ib *sib; 1074 1075 switch (addr->sa_family) { 1076 case AF_INET: 1077 return ((struct sockaddr_in *) addr)->sin_port; 1078 case AF_INET6: 1079 return ((struct sockaddr_in6 *) addr)->sin6_port; 1080 case AF_IB: 1081 sib = (struct sockaddr_ib *) addr; 1082 return htons((u16) (be64_to_cpu(sib->sib_sid) & 1083 be64_to_cpu(sib->sib_sid_mask))); 1084 default: 1085 return 0; 1086 } 1087 } 1088 1089 static inline int cma_any_port(struct sockaddr *addr) 1090 { 1091 return !cma_port(addr); 1092 } 1093 1094 static void cma_save_ib_info(struct sockaddr *src_addr, 1095 struct sockaddr *dst_addr, 1096 struct rdma_cm_id *listen_id, 1097 struct ib_sa_path_rec *path) 1098 { 1099 struct sockaddr_ib *listen_ib, *ib; 1100 1101 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; 1102 if (src_addr) { 1103 ib = (struct sockaddr_ib *)src_addr; 1104 ib->sib_family = AF_IB; 1105 if (path) { 1106 ib->sib_pkey = path->pkey; 1107 ib->sib_flowinfo = path->flow_label; 1108 memcpy(&ib->sib_addr, &path->sgid, 16); 1109 ib->sib_sid = path->service_id; 1110 ib->sib_scope_id = 0; 1111 } else { 1112 ib->sib_pkey = listen_ib->sib_pkey; 1113 ib->sib_flowinfo = listen_ib->sib_flowinfo; 1114 ib->sib_addr = listen_ib->sib_addr; 1115 ib->sib_sid = listen_ib->sib_sid; 1116 ib->sib_scope_id = listen_ib->sib_scope_id; 1117 } 1118 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); 1119 } 1120 if (dst_addr) { 1121 ib = (struct sockaddr_ib *)dst_addr; 1122 ib->sib_family = AF_IB; 1123 if (path) { 1124 ib->sib_pkey = path->pkey; 1125 ib->sib_flowinfo = path->flow_label; 1126 memcpy(&ib->sib_addr, &path->dgid, 16); 1127 } 1128 } 1129 } 1130 1131 static void cma_save_ip4_info(struct sockaddr_in *src_addr, 1132 struct sockaddr_in *dst_addr, 1133 struct cma_hdr *hdr, 1134 __be16 local_port) 1135 { 1136 if (src_addr) { 1137 *src_addr = (struct sockaddr_in) { 1138 .sin_len = sizeof(struct sockaddr_in), 1139 .sin_family = AF_INET, 1140 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1141 .sin_port = local_port, 1142 }; 1143 } 1144 1145 if (dst_addr) { 1146 *dst_addr = (struct sockaddr_in) { 1147 .sin_len = sizeof(struct sockaddr_in), 1148 .sin_family = AF_INET, 1149 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1150 .sin_port = hdr->port, 1151 }; 1152 } 1153 } 1154 1155 static void cma_ip6_clear_scope_id(struct in6_addr *addr) 1156 { 1157 /* make sure link local scope ID gets zeroed */ 1158 if (IN6_IS_SCOPE_LINKLOCAL(addr) || 1159 IN6_IS_ADDR_MC_INTFACELOCAL(addr)) { 1160 /* use byte-access to be alignment safe */ 1161 addr->s6_addr[2] = 0; 1162 addr->s6_addr[3] = 0; 1163 } 1164 } 1165 1166 static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, 1167 struct sockaddr_in6 *dst_addr, 1168 struct cma_hdr *hdr, 1169 __be16 local_port) 1170 { 1171 if (src_addr) { 1172 *src_addr = (struct sockaddr_in6) { 1173 .sin6_len = sizeof(struct sockaddr_in6), 1174 .sin6_family = AF_INET6, 1175 .sin6_addr = hdr->dst_addr.ip6, 1176 .sin6_port = local_port, 1177 }; 1178 cma_ip6_clear_scope_id(&src_addr->sin6_addr); 1179 } 1180 1181 if (dst_addr) { 1182 *dst_addr = (struct sockaddr_in6) { 1183 .sin6_len = sizeof(struct sockaddr_in6), 1184 .sin6_family = AF_INET6, 1185 .sin6_addr = hdr->src_addr.ip6, 1186 .sin6_port = hdr->port, 1187 }; 1188 cma_ip6_clear_scope_id(&dst_addr->sin6_addr); 1189 } 1190 } 1191 1192 static u16 cma_port_from_service_id(__be64 service_id) 1193 { 1194 return (u16)be64_to_cpu(service_id); 1195 } 1196 1197 static int sdp_save_ip_info(struct sockaddr *src_addr, 1198 struct sockaddr *dst_addr, 1199 const struct sdp_hh *hdr, 1200 __be64 service_id) 1201 { 1202 __be16 local_port; 1203 1204 BUG_ON(src_addr == NULL || dst_addr == NULL); 1205 1206 if (sdp_get_majv(hdr->majv_minv) != SDP_MAJ_VERSION) 1207 return -EINVAL; 1208 1209 local_port = htons(cma_port_from_service_id(service_id)); 1210 1211 switch (sdp_get_ip_ver(hdr)) { 1212 case 4: { 1213 struct sockaddr_in *s4, *d4; 1214 1215 s4 = (void *)src_addr; 1216 d4 = (void *)dst_addr; 1217 1218 *s4 = (struct sockaddr_in) { 1219 .sin_len = sizeof(*s4), 1220 .sin_family = AF_INET, 1221 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1222 .sin_port = local_port, 1223 }; 1224 *d4 = (struct sockaddr_in) { 1225 .sin_len = sizeof(*d4), 1226 .sin_family = AF_INET, 1227 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1228 .sin_port = hdr->port, 1229 }; 1230 break; 1231 } 1232 case 6: { 1233 struct sockaddr_in6 *s6, *d6; 1234 1235 s6 = (void *)src_addr; 1236 d6 = (void *)dst_addr; 1237 1238 *s6 = (struct sockaddr_in6) { 1239 .sin6_len = sizeof(*s6), 1240 .sin6_family = AF_INET6, 1241 .sin6_addr = hdr->dst_addr.ip6, 1242 .sin6_port = local_port, 1243 }; 1244 *d6 = (struct sockaddr_in6) { 1245 .sin6_len = sizeof(*d6), 1246 .sin6_family = AF_INET6, 1247 .sin6_addr = hdr->src_addr.ip6, 1248 .sin6_port = hdr->port, 1249 }; 1250 cma_ip6_clear_scope_id(&s6->sin6_addr); 1251 cma_ip6_clear_scope_id(&d6->sin6_addr); 1252 break; 1253 } 1254 default: 1255 return -EAFNOSUPPORT; 1256 } 1257 1258 return 0; 1259 } 1260 1261 static int cma_save_ip_info(struct sockaddr *src_addr, 1262 struct sockaddr *dst_addr, 1263 struct ib_cm_event *ib_event, 1264 __be64 service_id) 1265 { 1266 struct cma_hdr *hdr; 1267 __be16 port; 1268 1269 if (rdma_ps_from_service_id(service_id) == RDMA_PS_SDP) 1270 return sdp_save_ip_info(src_addr, dst_addr, 1271 ib_event->private_data, service_id); 1272 1273 hdr = ib_event->private_data; 1274 if (hdr->cma_version != CMA_VERSION) 1275 return -EINVAL; 1276 1277 port = htons(cma_port_from_service_id(service_id)); 1278 1279 switch (cma_get_ip_ver(hdr)) { 1280 case 4: 1281 cma_save_ip4_info((struct sockaddr_in *)src_addr, 1282 (struct sockaddr_in *)dst_addr, hdr, port); 1283 break; 1284 case 6: 1285 cma_save_ip6_info((struct sockaddr_in6 *)src_addr, 1286 (struct sockaddr_in6 *)dst_addr, hdr, port); 1287 break; 1288 default: 1289 return -EAFNOSUPPORT; 1290 } 1291 1292 return 0; 1293 } 1294 1295 static int cma_save_net_info(struct sockaddr *src_addr, 1296 struct sockaddr *dst_addr, 1297 struct rdma_cm_id *listen_id, 1298 struct ib_cm_event *ib_event, 1299 sa_family_t sa_family, __be64 service_id) 1300 { 1301 if (sa_family == AF_IB) { 1302 if (ib_event->event == IB_CM_REQ_RECEIVED) 1303 cma_save_ib_info(src_addr, dst_addr, listen_id, 1304 ib_event->param.req_rcvd.primary_path); 1305 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1306 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); 1307 return 0; 1308 } 1309 1310 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); 1311 } 1312 1313 static int cma_save_req_info(const struct ib_cm_event *ib_event, 1314 struct cma_req_info *req) 1315 { 1316 const struct ib_cm_req_event_param *req_param = 1317 &ib_event->param.req_rcvd; 1318 const struct ib_cm_sidr_req_event_param *sidr_param = 1319 &ib_event->param.sidr_req_rcvd; 1320 1321 switch (ib_event->event) { 1322 case IB_CM_REQ_RECEIVED: 1323 req->device = req_param->listen_id->device; 1324 req->port = req_param->port; 1325 memcpy(&req->local_gid, &req_param->primary_path->sgid, 1326 sizeof(req->local_gid)); 1327 req->has_gid = true; 1328 req->service_id = req_param->primary_path->service_id; 1329 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1330 if (req->pkey != req_param->bth_pkey) 1331 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1332 "RDMA CMA: in the future this may cause the request to be dropped\n", 1333 req_param->bth_pkey, req->pkey); 1334 break; 1335 case IB_CM_SIDR_REQ_RECEIVED: 1336 req->device = sidr_param->listen_id->device; 1337 req->port = sidr_param->port; 1338 req->has_gid = false; 1339 req->service_id = sidr_param->service_id; 1340 req->pkey = sidr_param->pkey; 1341 if (req->pkey != sidr_param->bth_pkey) 1342 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1343 "RDMA CMA: in the future this may cause the request to be dropped\n", 1344 sidr_param->bth_pkey, req->pkey); 1345 break; 1346 default: 1347 return -EINVAL; 1348 } 1349 1350 return 0; 1351 } 1352 1353 static bool validate_ipv4_net_dev(struct net_device *net_dev, 1354 const struct sockaddr_in *dst_addr, 1355 const struct sockaddr_in *src_addr) 1356 { 1357 #ifdef INET 1358 __be32 daddr = dst_addr->sin_addr.s_addr, 1359 saddr = src_addr->sin_addr.s_addr; 1360 struct net_device *dst_dev; 1361 struct nhop_object *nh; 1362 bool ret; 1363 1364 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1365 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || 1366 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || 1367 ipv4_is_loopback(saddr)) 1368 return false; 1369 1370 dst_dev = ip_dev_find(net_dev->if_vnet, daddr); 1371 if (dst_dev != net_dev) { 1372 if (dst_dev != NULL) 1373 dev_put(dst_dev); 1374 return false; 1375 } 1376 dev_put(dst_dev); 1377 1378 /* 1379 * Check for loopback. 1380 */ 1381 if (saddr == daddr) 1382 return true; 1383 1384 CURVNET_SET(net_dev->if_vnet); 1385 nh = fib4_lookup(RT_DEFAULT_FIB, src_addr->sin_addr, 0, NHR_NONE, 0); 1386 if (nh != NULL) 1387 ret = (nh->nh_ifp == net_dev); 1388 else 1389 ret = false; 1390 CURVNET_RESTORE(); 1391 return ret; 1392 #else 1393 return false; 1394 #endif 1395 } 1396 1397 static bool validate_ipv6_net_dev(struct net_device *net_dev, 1398 const struct sockaddr_in6 *dst_addr, 1399 const struct sockaddr_in6 *src_addr) 1400 { 1401 #ifdef INET6 1402 struct sockaddr_in6 src_tmp = *src_addr; 1403 struct sockaddr_in6 dst_tmp = *dst_addr; 1404 struct net_device *dst_dev; 1405 struct nhop_object *nh; 1406 bool ret; 1407 1408 dst_dev = ip6_dev_find(net_dev->if_vnet, dst_tmp.sin6_addr, 1409 net_dev->if_index); 1410 if (dst_dev != net_dev) { 1411 if (dst_dev != NULL) 1412 dev_put(dst_dev); 1413 return false; 1414 } 1415 dev_put(dst_dev); 1416 1417 CURVNET_SET(net_dev->if_vnet); 1418 1419 /* 1420 * Make sure the scope ID gets embedded. 1421 */ 1422 src_tmp.sin6_scope_id = net_dev->if_index; 1423 sa6_embedscope(&src_tmp, 0); 1424 1425 dst_tmp.sin6_scope_id = net_dev->if_index; 1426 sa6_embedscope(&dst_tmp, 0); 1427 1428 /* 1429 * Check for loopback after scope ID 1430 * has been embedded: 1431 */ 1432 if (memcmp(&src_tmp.sin6_addr, &dst_tmp.sin6_addr, 1433 sizeof(dst_tmp.sin6_addr)) == 0) { 1434 ret = true; 1435 } else { 1436 /* non-loopback case */ 1437 nh = fib6_lookup(RT_DEFAULT_FIB, &src_addr->sin6_addr, 1438 net_dev->if_index, NHR_NONE, 0); 1439 if (nh != NULL) 1440 ret = (nh->nh_ifp == net_dev); 1441 else 1442 ret = false; 1443 } 1444 CURVNET_RESTORE(); 1445 return ret; 1446 #else 1447 return false; 1448 #endif 1449 } 1450 1451 static bool validate_net_dev(struct net_device *net_dev, 1452 const struct sockaddr *daddr, 1453 const struct sockaddr *saddr) 1454 { 1455 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; 1456 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; 1457 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1458 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; 1459 1460 switch (daddr->sa_family) { 1461 case AF_INET: 1462 return saddr->sa_family == AF_INET && 1463 validate_ipv4_net_dev(net_dev, daddr4, saddr4); 1464 1465 case AF_INET6: 1466 return saddr->sa_family == AF_INET6 && 1467 validate_ipv6_net_dev(net_dev, daddr6, saddr6); 1468 1469 default: 1470 return false; 1471 } 1472 } 1473 1474 static struct net_device * 1475 roce_get_net_dev_by_cm_event(struct ib_device *device, u8 port_num, 1476 const struct ib_cm_event *ib_event) 1477 { 1478 struct ib_gid_attr sgid_attr; 1479 union ib_gid sgid; 1480 int err = -EINVAL; 1481 1482 if (ib_event->event == IB_CM_REQ_RECEIVED) { 1483 err = ib_get_cached_gid(device, port_num, 1484 ib_event->param.req_rcvd.ppath_sgid_index, &sgid, &sgid_attr); 1485 } else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1486 err = ib_get_cached_gid(device, port_num, 1487 ib_event->param.sidr_req_rcvd.sgid_index, &sgid, &sgid_attr); 1488 } 1489 if (err) 1490 return (NULL); 1491 return (sgid_attr.ndev); 1492 } 1493 1494 static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, 1495 const struct cma_req_info *req) 1496 { 1497 struct sockaddr_storage listen_addr_storage, src_addr_storage; 1498 struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, 1499 *src_addr = (struct sockaddr *)&src_addr_storage; 1500 struct net_device *net_dev; 1501 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; 1502 struct epoch_tracker et; 1503 int err; 1504 1505 err = cma_save_ip_info(listen_addr, src_addr, ib_event, 1506 req->service_id); 1507 if (err) 1508 return ERR_PTR(err); 1509 1510 if (rdma_protocol_roce(req->device, req->port)) { 1511 net_dev = roce_get_net_dev_by_cm_event(req->device, req->port, 1512 ib_event); 1513 } else { 1514 net_dev = ib_get_net_dev_by_params(req->device, req->port, 1515 req->pkey, 1516 gid, listen_addr); 1517 } 1518 if (!net_dev) 1519 return ERR_PTR(-ENODEV); 1520 1521 NET_EPOCH_ENTER(et); 1522 if (!validate_net_dev(net_dev, listen_addr, src_addr)) { 1523 NET_EPOCH_EXIT(et); 1524 dev_put(net_dev); 1525 return ERR_PTR(-EHOSTUNREACH); 1526 } 1527 NET_EPOCH_EXIT(et); 1528 1529 return net_dev; 1530 } 1531 1532 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) 1533 { 1534 return (be64_to_cpu(service_id) >> 16) & 0xffff; 1535 } 1536 1537 static bool sdp_match_private_data(struct rdma_id_private *id_priv, 1538 const struct sdp_hh *hdr, 1539 struct sockaddr *addr) 1540 { 1541 __be32 ip4_addr; 1542 struct in6_addr ip6_addr; 1543 1544 switch (addr->sa_family) { 1545 case AF_INET: 1546 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1547 if (sdp_get_ip_ver(hdr) != 4) 1548 return false; 1549 if (!cma_any_addr(addr) && 1550 hdr->dst_addr.ip4.addr != ip4_addr) 1551 return false; 1552 break; 1553 case AF_INET6: 1554 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1555 if (sdp_get_ip_ver(hdr) != 6) 1556 return false; 1557 cma_ip6_clear_scope_id(&ip6_addr); 1558 if (!cma_any_addr(addr) && 1559 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1560 return false; 1561 break; 1562 case AF_IB: 1563 return true; 1564 default: 1565 return false; 1566 } 1567 1568 return true; 1569 } 1570 1571 static bool cma_match_private_data(struct rdma_id_private *id_priv, 1572 const void *vhdr) 1573 { 1574 const struct cma_hdr *hdr = vhdr; 1575 struct sockaddr *addr = cma_src_addr(id_priv); 1576 __be32 ip4_addr; 1577 struct in6_addr ip6_addr; 1578 1579 if (cma_any_addr(addr) && !id_priv->afonly) 1580 return true; 1581 1582 if (id_priv->id.ps == RDMA_PS_SDP) 1583 return sdp_match_private_data(id_priv, vhdr, addr); 1584 1585 switch (addr->sa_family) { 1586 case AF_INET: 1587 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1588 if (cma_get_ip_ver(hdr) != 4) 1589 return false; 1590 if (!cma_any_addr(addr) && 1591 hdr->dst_addr.ip4.addr != ip4_addr) 1592 return false; 1593 break; 1594 case AF_INET6: 1595 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1596 if (cma_get_ip_ver(hdr) != 6) 1597 return false; 1598 cma_ip6_clear_scope_id(&ip6_addr); 1599 if (!cma_any_addr(addr) && 1600 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1601 return false; 1602 break; 1603 case AF_IB: 1604 return true; 1605 default: 1606 return false; 1607 } 1608 1609 return true; 1610 } 1611 1612 static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) 1613 { 1614 enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); 1615 enum rdma_transport_type transport = 1616 rdma_node_get_transport(device->node_type); 1617 1618 return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; 1619 } 1620 1621 static bool cma_protocol_roce(const struct rdma_cm_id *id) 1622 { 1623 struct ib_device *device = id->device; 1624 const int port_num = id->port_num ?: rdma_start_port(device); 1625 1626 return cma_protocol_roce_dev_port(device, port_num); 1627 } 1628 1629 static bool cma_match_net_dev(const struct rdma_cm_id *id, 1630 const struct net_device *net_dev, 1631 u8 port_num) 1632 { 1633 const struct rdma_addr *addr = &id->route.addr; 1634 1635 if (!net_dev) { 1636 if (id->port_num && id->port_num != port_num) 1637 return false; 1638 1639 if (id->ps == RDMA_PS_SDP) { 1640 if (addr->src_addr.ss_family == AF_INET || 1641 addr->src_addr.ss_family == AF_INET6) 1642 return true; 1643 return false; 1644 } 1645 /* This request is an AF_IB request or a RoCE request */ 1646 return addr->src_addr.ss_family == AF_IB || 1647 cma_protocol_roce_dev_port(id->device, port_num); 1648 } 1649 1650 return !addr->dev_addr.bound_dev_if || 1651 (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1652 addr->dev_addr.bound_dev_if == net_dev->if_index); 1653 } 1654 1655 static struct rdma_id_private *cma_find_listener( 1656 const struct rdma_bind_list *bind_list, 1657 const struct ib_cm_id *cm_id, 1658 const struct ib_cm_event *ib_event, 1659 const struct cma_req_info *req, 1660 const struct net_device *net_dev) 1661 { 1662 struct rdma_id_private *id_priv, *id_priv_dev; 1663 1664 if (!bind_list) 1665 return ERR_PTR(-EINVAL); 1666 1667 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1668 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1669 if (id_priv->id.device == cm_id->device && 1670 cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1671 return id_priv; 1672 list_for_each_entry(id_priv_dev, 1673 &id_priv->listen_list, 1674 listen_list) { 1675 if (id_priv_dev->id.device == cm_id->device && 1676 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1677 return id_priv_dev; 1678 } 1679 } 1680 } 1681 1682 return ERR_PTR(-EINVAL); 1683 } 1684 1685 static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, 1686 struct ib_cm_event *ib_event, 1687 struct net_device **net_dev) 1688 { 1689 struct cma_req_info req; 1690 struct rdma_bind_list *bind_list; 1691 struct rdma_id_private *id_priv; 1692 int err; 1693 1694 err = cma_save_req_info(ib_event, &req); 1695 if (err) 1696 return ERR_PTR(err); 1697 1698 if (rdma_ps_from_service_id(cm_id->service_id) == RDMA_PS_SDP) { 1699 *net_dev = NULL; 1700 goto there_is_no_net_dev; 1701 } 1702 1703 *net_dev = cma_get_net_dev(ib_event, &req); 1704 if (IS_ERR(*net_dev)) { 1705 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1706 /* Assuming the protocol is AF_IB */ 1707 *net_dev = NULL; 1708 } else { 1709 return ERR_CAST(*net_dev); 1710 } 1711 } 1712 1713 there_is_no_net_dev: 1714 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1715 rdma_ps_from_service_id(req.service_id), 1716 cma_port_from_service_id(req.service_id)); 1717 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1718 if (IS_ERR(id_priv) && *net_dev) { 1719 dev_put(*net_dev); 1720 *net_dev = NULL; 1721 } 1722 1723 return id_priv; 1724 } 1725 1726 static inline int cma_user_data_offset(struct rdma_id_private *id_priv) 1727 { 1728 if (cma_family(id_priv) == AF_IB) 1729 return 0; 1730 if (id_priv->id.ps == RDMA_PS_SDP) 1731 return 0; 1732 return sizeof(struct cma_hdr); 1733 } 1734 1735 static void cma_cancel_route(struct rdma_id_private *id_priv) 1736 { 1737 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { 1738 if (id_priv->query) 1739 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 1740 } 1741 } 1742 1743 static void cma_cancel_listens(struct rdma_id_private *id_priv) 1744 { 1745 struct rdma_id_private *dev_id_priv; 1746 1747 /* 1748 * Remove from listen_any_list to prevent added devices from spawning 1749 * additional listen requests. 1750 */ 1751 mutex_lock(&lock); 1752 list_del(&id_priv->list); 1753 1754 while (!list_empty(&id_priv->listen_list)) { 1755 dev_id_priv = list_entry(id_priv->listen_list.next, 1756 struct rdma_id_private, listen_list); 1757 /* sync with device removal to avoid duplicate destruction */ 1758 list_del_init(&dev_id_priv->list); 1759 list_del(&dev_id_priv->listen_list); 1760 mutex_unlock(&lock); 1761 1762 rdma_destroy_id(&dev_id_priv->id); 1763 mutex_lock(&lock); 1764 } 1765 mutex_unlock(&lock); 1766 } 1767 1768 static void cma_cancel_operation(struct rdma_id_private *id_priv, 1769 enum rdma_cm_state state) 1770 { 1771 switch (state) { 1772 case RDMA_CM_ADDR_QUERY: 1773 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); 1774 break; 1775 case RDMA_CM_ROUTE_QUERY: 1776 cma_cancel_route(id_priv); 1777 break; 1778 case RDMA_CM_LISTEN: 1779 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) 1780 cma_cancel_listens(id_priv); 1781 break; 1782 default: 1783 break; 1784 } 1785 } 1786 1787 static void cma_release_port(struct rdma_id_private *id_priv) 1788 { 1789 struct rdma_bind_list *bind_list = id_priv->bind_list; 1790 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 1791 1792 if (!bind_list) 1793 return; 1794 1795 mutex_lock(&lock); 1796 hlist_del(&id_priv->node); 1797 if (hlist_empty(&bind_list->owners)) { 1798 cma_ps_remove(net, bind_list->ps, bind_list->port); 1799 kfree(bind_list); 1800 } 1801 mutex_unlock(&lock); 1802 } 1803 1804 static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1805 { 1806 struct cma_multicast *mc; 1807 1808 while (!list_empty(&id_priv->mc_list)) { 1809 mc = container_of(id_priv->mc_list.next, 1810 struct cma_multicast, list); 1811 list_del(&mc->list); 1812 if (rdma_cap_ib_mcast(id_priv->cma_dev->device, 1813 id_priv->id.port_num)) { 1814 ib_sa_free_multicast(mc->multicast.ib); 1815 kfree(mc); 1816 } else { 1817 if (mc->igmp_joined) { 1818 struct rdma_dev_addr *dev_addr = 1819 &id_priv->id.route.addr.dev_addr; 1820 struct net_device *ndev = NULL; 1821 1822 if (dev_addr->bound_dev_if) 1823 ndev = dev_get_by_index(dev_addr->net, 1824 dev_addr->bound_dev_if); 1825 if (ndev) { 1826 cma_igmp_send(ndev, 1827 &mc->multicast.ib->rec.mgid, 1828 false); 1829 dev_put(ndev); 1830 } 1831 } 1832 kref_put(&mc->mcref, release_mc); 1833 } 1834 } 1835 } 1836 1837 void rdma_destroy_id(struct rdma_cm_id *id) 1838 { 1839 struct rdma_id_private *id_priv; 1840 enum rdma_cm_state state; 1841 1842 id_priv = container_of(id, struct rdma_id_private, id); 1843 state = cma_exch(id_priv, RDMA_CM_DESTROYING); 1844 cma_cancel_operation(id_priv, state); 1845 1846 /* 1847 * Wait for any active callback to finish. New callbacks will find 1848 * the id_priv state set to destroying and abort. 1849 */ 1850 mutex_lock(&id_priv->handler_mutex); 1851 mutex_unlock(&id_priv->handler_mutex); 1852 1853 if (id_priv->cma_dev) { 1854 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1855 if (id_priv->cm_id.ib) 1856 ib_destroy_cm_id(id_priv->cm_id.ib); 1857 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { 1858 if (id_priv->cm_id.iw) 1859 iw_destroy_cm_id(id_priv->cm_id.iw); 1860 } 1861 cma_leave_mc_groups(id_priv); 1862 cma_release_dev(id_priv); 1863 } 1864 1865 cma_release_port(id_priv); 1866 cma_deref_id(id_priv); 1867 wait_for_completion(&id_priv->comp); 1868 1869 if (id_priv->internal_id) 1870 cma_deref_id(id_priv->id.context); 1871 1872 kfree(id_priv->id.route.path_rec); 1873 kfree(id_priv); 1874 } 1875 EXPORT_SYMBOL(rdma_destroy_id); 1876 1877 static int cma_rep_recv(struct rdma_id_private *id_priv) 1878 { 1879 int ret; 1880 1881 ret = cma_modify_qp_rtr(id_priv, NULL); 1882 if (ret) 1883 goto reject; 1884 1885 ret = cma_modify_qp_rts(id_priv, NULL); 1886 if (ret) 1887 goto reject; 1888 1889 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); 1890 if (ret) 1891 goto reject; 1892 1893 return 0; 1894 reject: 1895 cma_modify_qp_err(id_priv); 1896 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 1897 NULL, 0, NULL, 0); 1898 return ret; 1899 } 1900 1901 static int sdp_verify_rep(const struct sdp_hah *data) 1902 { 1903 if (sdp_get_majv(data->majv_minv) != SDP_MAJ_VERSION) 1904 return -EINVAL; 1905 return 0; 1906 } 1907 1908 static void cma_set_rep_event_data(struct rdma_cm_event *event, 1909 struct ib_cm_rep_event_param *rep_data, 1910 void *private_data) 1911 { 1912 event->param.conn.private_data = private_data; 1913 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; 1914 event->param.conn.responder_resources = rep_data->responder_resources; 1915 event->param.conn.initiator_depth = rep_data->initiator_depth; 1916 event->param.conn.flow_control = rep_data->flow_control; 1917 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; 1918 event->param.conn.srq = rep_data->srq; 1919 event->param.conn.qp_num = rep_data->remote_qpn; 1920 } 1921 1922 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1923 { 1924 struct rdma_id_private *id_priv = cm_id->context; 1925 struct rdma_cm_event event; 1926 int ret = 0; 1927 1928 mutex_lock(&id_priv->handler_mutex); 1929 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 1930 id_priv->state != RDMA_CM_CONNECT) || 1931 (ib_event->event == IB_CM_TIMEWAIT_EXIT && 1932 id_priv->state != RDMA_CM_DISCONNECT)) 1933 goto out; 1934 1935 memset(&event, 0, sizeof event); 1936 switch (ib_event->event) { 1937 case IB_CM_REQ_ERROR: 1938 case IB_CM_REP_ERROR: 1939 event.event = RDMA_CM_EVENT_UNREACHABLE; 1940 event.status = -ETIMEDOUT; 1941 break; 1942 case IB_CM_REP_RECEIVED: 1943 if (id_priv->id.ps == RDMA_PS_SDP) { 1944 event.status = sdp_verify_rep(ib_event->private_data); 1945 if (event.status) 1946 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 1947 else 1948 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1949 } else { 1950 if (id_priv->id.qp) { 1951 event.status = cma_rep_recv(id_priv); 1952 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1953 RDMA_CM_EVENT_ESTABLISHED; 1954 } else { 1955 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1956 } 1957 } 1958 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1959 ib_event->private_data); 1960 break; 1961 case IB_CM_RTU_RECEIVED: 1962 case IB_CM_USER_ESTABLISHED: 1963 event.event = RDMA_CM_EVENT_ESTABLISHED; 1964 break; 1965 case IB_CM_DREQ_ERROR: 1966 event.status = -ETIMEDOUT; /* fall through */ 1967 case IB_CM_DREQ_RECEIVED: 1968 case IB_CM_DREP_RECEIVED: 1969 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, 1970 RDMA_CM_DISCONNECT)) 1971 goto out; 1972 event.event = RDMA_CM_EVENT_DISCONNECTED; 1973 break; 1974 case IB_CM_TIMEWAIT_EXIT: 1975 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 1976 break; 1977 case IB_CM_MRA_RECEIVED: 1978 /* ignore event */ 1979 goto out; 1980 case IB_CM_REJ_RECEIVED: 1981 cma_modify_qp_err(id_priv); 1982 event.status = ib_event->param.rej_rcvd.reason; 1983 event.event = RDMA_CM_EVENT_REJECTED; 1984 event.param.conn.private_data = ib_event->private_data; 1985 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 1986 break; 1987 default: 1988 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 1989 ib_event->event); 1990 goto out; 1991 } 1992 1993 ret = id_priv->id.event_handler(&id_priv->id, &event); 1994 if (ret) { 1995 /* Destroy the CM ID by returning a non-zero value. */ 1996 id_priv->cm_id.ib = NULL; 1997 cma_exch(id_priv, RDMA_CM_DESTROYING); 1998 mutex_unlock(&id_priv->handler_mutex); 1999 rdma_destroy_id(&id_priv->id); 2000 return ret; 2001 } 2002 out: 2003 mutex_unlock(&id_priv->handler_mutex); 2004 return ret; 2005 } 2006 2007 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, 2008 struct ib_cm_event *ib_event, 2009 struct net_device *net_dev) 2010 { 2011 struct rdma_id_private *id_priv; 2012 struct rdma_cm_id *id; 2013 struct rdma_route *rt; 2014 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 2015 const __be64 service_id = 2016 ib_event->param.req_rcvd.primary_path->service_id; 2017 int ret; 2018 2019 id = rdma_create_id(listen_id->route.addr.dev_addr.net, 2020 listen_id->event_handler, listen_id->context, 2021 listen_id->ps, ib_event->param.req_rcvd.qp_type); 2022 if (IS_ERR(id)) 2023 return NULL; 2024 2025 id_priv = container_of(id, struct rdma_id_private, id); 2026 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 2027 (struct sockaddr *)&id->route.addr.dst_addr, 2028 listen_id, ib_event, ss_family, service_id)) 2029 goto err; 2030 2031 rt = &id->route; 2032 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 2033 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, 2034 GFP_KERNEL); 2035 if (!rt->path_rec) 2036 goto err; 2037 2038 rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; 2039 if (rt->num_paths == 2) 2040 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 2041 2042 if (net_dev) { 2043 ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 2044 if (ret) 2045 goto err; 2046 } else { 2047 if (!cma_protocol_roce(listen_id) && 2048 cma_any_addr(cma_src_addr(id_priv))) { 2049 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 2050 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 2051 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 2052 } else if (!cma_any_addr(cma_src_addr(id_priv))) { 2053 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); 2054 if (ret) 2055 goto err; 2056 } 2057 } 2058 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 2059 2060 id_priv->state = RDMA_CM_CONNECT; 2061 return id_priv; 2062 2063 err: 2064 rdma_destroy_id(id); 2065 return NULL; 2066 } 2067 2068 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, 2069 struct ib_cm_event *ib_event, 2070 struct net_device *net_dev) 2071 { 2072 struct rdma_id_private *id_priv; 2073 struct rdma_cm_id *id; 2074 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 2075 struct vnet *net = listen_id->route.addr.dev_addr.net; 2076 int ret; 2077 2078 id = rdma_create_id(net, listen_id->event_handler, listen_id->context, 2079 listen_id->ps, IB_QPT_UD); 2080 if (IS_ERR(id)) 2081 return NULL; 2082 2083 id_priv = container_of(id, struct rdma_id_private, id); 2084 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 2085 (struct sockaddr *)&id->route.addr.dst_addr, 2086 listen_id, ib_event, ss_family, 2087 ib_event->param.sidr_req_rcvd.service_id)) 2088 goto err; 2089 2090 if (net_dev) { 2091 ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 2092 if (ret) 2093 goto err; 2094 } else { 2095 if (!cma_any_addr(cma_src_addr(id_priv))) { 2096 ret = cma_translate_addr(cma_src_addr(id_priv), 2097 &id->route.addr.dev_addr); 2098 if (ret) 2099 goto err; 2100 } 2101 } 2102 2103 id_priv->state = RDMA_CM_CONNECT; 2104 return id_priv; 2105 err: 2106 rdma_destroy_id(id); 2107 return NULL; 2108 } 2109 2110 static void cma_set_req_event_data(struct rdma_cm_event *event, 2111 struct ib_cm_req_event_param *req_data, 2112 void *private_data, int offset) 2113 { 2114 event->param.conn.private_data = (char *)private_data + offset; 2115 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; 2116 event->param.conn.responder_resources = req_data->responder_resources; 2117 event->param.conn.initiator_depth = req_data->initiator_depth; 2118 event->param.conn.flow_control = req_data->flow_control; 2119 event->param.conn.retry_count = req_data->retry_count; 2120 event->param.conn.rnr_retry_count = req_data->rnr_retry_count; 2121 event->param.conn.srq = req_data->srq; 2122 event->param.conn.qp_num = req_data->remote_qpn; 2123 } 2124 2125 static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) 2126 { 2127 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 2128 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 2129 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && 2130 (id->qp_type == IB_QPT_UD)) || 2131 (!id->qp_type)); 2132 } 2133 2134 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 2135 { 2136 struct rdma_id_private *listen_id, *conn_id = NULL; 2137 struct rdma_cm_event event; 2138 struct net_device *net_dev; 2139 int offset, ret; 2140 2141 listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); 2142 if (IS_ERR(listen_id)) 2143 return PTR_ERR(listen_id); 2144 2145 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { 2146 ret = -EINVAL; 2147 goto net_dev_put; 2148 } 2149 2150 mutex_lock(&listen_id->handler_mutex); 2151 if (listen_id->state != RDMA_CM_LISTEN) { 2152 ret = -ECONNABORTED; 2153 goto err1; 2154 } 2155 2156 memset(&event, 0, sizeof event); 2157 offset = cma_user_data_offset(listen_id); 2158 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2159 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 2160 conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); 2161 event.param.ud.private_data = (char *)ib_event->private_data + offset; 2162 event.param.ud.private_data_len = 2163 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 2164 } else { 2165 conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); 2166 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 2167 ib_event->private_data, offset); 2168 } 2169 if (!conn_id) { 2170 ret = -ENOMEM; 2171 goto err1; 2172 } 2173 2174 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2175 ret = cma_acquire_dev(conn_id, listen_id); 2176 if (ret) 2177 goto err2; 2178 2179 conn_id->cm_id.ib = cm_id; 2180 cm_id->context = conn_id; 2181 cm_id->cm_handler = cma_ib_handler; 2182 2183 /* 2184 * Protect against the user destroying conn_id from another thread 2185 * until we're done accessing it. 2186 */ 2187 atomic_inc(&conn_id->refcount); 2188 ret = conn_id->id.event_handler(&conn_id->id, &event); 2189 if (ret) 2190 goto err3; 2191 /* 2192 * Acquire mutex to prevent user executing rdma_destroy_id() 2193 * while we're accessing the cm_id. 2194 */ 2195 mutex_lock(&lock); 2196 if (cma_comp(conn_id, RDMA_CM_CONNECT) && 2197 (conn_id->id.qp_type != IB_QPT_UD)) 2198 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 2199 mutex_unlock(&lock); 2200 mutex_unlock(&conn_id->handler_mutex); 2201 mutex_unlock(&listen_id->handler_mutex); 2202 cma_deref_id(conn_id); 2203 if (net_dev) 2204 dev_put(net_dev); 2205 return 0; 2206 2207 err3: 2208 cma_deref_id(conn_id); 2209 /* Destroy the CM ID by returning a non-zero value. */ 2210 conn_id->cm_id.ib = NULL; 2211 err2: 2212 cma_exch(conn_id, RDMA_CM_DESTROYING); 2213 mutex_unlock(&conn_id->handler_mutex); 2214 err1: 2215 mutex_unlock(&listen_id->handler_mutex); 2216 if (conn_id) 2217 rdma_destroy_id(&conn_id->id); 2218 2219 net_dev_put: 2220 if (net_dev) 2221 dev_put(net_dev); 2222 2223 return ret; 2224 } 2225 2226 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) 2227 { 2228 if (addr->sa_family == AF_IB) 2229 return ((struct sockaddr_ib *) addr)->sib_sid; 2230 2231 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); 2232 } 2233 EXPORT_SYMBOL(rdma_get_service_id); 2234 2235 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2236 { 2237 struct rdma_id_private *id_priv = iw_id->context; 2238 struct rdma_cm_event event; 2239 int ret = 0; 2240 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2241 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2242 2243 mutex_lock(&id_priv->handler_mutex); 2244 if (id_priv->state != RDMA_CM_CONNECT) 2245 goto out; 2246 2247 memset(&event, 0, sizeof event); 2248 switch (iw_event->event) { 2249 case IW_CM_EVENT_CLOSE: 2250 event.event = RDMA_CM_EVENT_DISCONNECTED; 2251 break; 2252 case IW_CM_EVENT_CONNECT_REPLY: 2253 memcpy(cma_src_addr(id_priv), laddr, 2254 rdma_addr_size(laddr)); 2255 memcpy(cma_dst_addr(id_priv), raddr, 2256 rdma_addr_size(raddr)); 2257 switch (iw_event->status) { 2258 case 0: 2259 event.event = RDMA_CM_EVENT_ESTABLISHED; 2260 event.param.conn.initiator_depth = iw_event->ird; 2261 event.param.conn.responder_resources = iw_event->ord; 2262 break; 2263 case -ECONNRESET: 2264 case -ECONNREFUSED: 2265 event.event = RDMA_CM_EVENT_REJECTED; 2266 break; 2267 case -ETIMEDOUT: 2268 event.event = RDMA_CM_EVENT_UNREACHABLE; 2269 break; 2270 default: 2271 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 2272 break; 2273 } 2274 break; 2275 case IW_CM_EVENT_ESTABLISHED: 2276 event.event = RDMA_CM_EVENT_ESTABLISHED; 2277 event.param.conn.initiator_depth = iw_event->ird; 2278 event.param.conn.responder_resources = iw_event->ord; 2279 break; 2280 default: 2281 BUG_ON(1); 2282 } 2283 2284 event.status = iw_event->status; 2285 event.param.conn.private_data = iw_event->private_data; 2286 event.param.conn.private_data_len = iw_event->private_data_len; 2287 ret = id_priv->id.event_handler(&id_priv->id, &event); 2288 if (ret) { 2289 /* Destroy the CM ID by returning a non-zero value. */ 2290 id_priv->cm_id.iw = NULL; 2291 cma_exch(id_priv, RDMA_CM_DESTROYING); 2292 mutex_unlock(&id_priv->handler_mutex); 2293 rdma_destroy_id(&id_priv->id); 2294 return ret; 2295 } 2296 2297 out: 2298 mutex_unlock(&id_priv->handler_mutex); 2299 return ret; 2300 } 2301 2302 static int iw_conn_req_handler(struct iw_cm_id *cm_id, 2303 struct iw_cm_event *iw_event) 2304 { 2305 struct rdma_cm_id *new_cm_id; 2306 struct rdma_id_private *listen_id, *conn_id; 2307 struct rdma_cm_event event; 2308 int ret = -ECONNABORTED; 2309 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2310 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2311 2312 listen_id = cm_id->context; 2313 2314 mutex_lock(&listen_id->handler_mutex); 2315 if (listen_id->state != RDMA_CM_LISTEN) 2316 goto out; 2317 2318 /* Create a new RDMA id for the new IW CM ID */ 2319 new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, 2320 listen_id->id.event_handler, 2321 listen_id->id.context, 2322 RDMA_PS_TCP, IB_QPT_RC); 2323 if (IS_ERR(new_cm_id)) { 2324 ret = -ENOMEM; 2325 goto out; 2326 } 2327 conn_id = container_of(new_cm_id, struct rdma_id_private, id); 2328 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2329 conn_id->state = RDMA_CM_CONNECT; 2330 2331 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); 2332 if (ret) { 2333 mutex_unlock(&conn_id->handler_mutex); 2334 rdma_destroy_id(new_cm_id); 2335 goto out; 2336 } 2337 2338 ret = cma_acquire_dev(conn_id, listen_id); 2339 if (ret) { 2340 mutex_unlock(&conn_id->handler_mutex); 2341 rdma_destroy_id(new_cm_id); 2342 goto out; 2343 } 2344 2345 conn_id->cm_id.iw = cm_id; 2346 cm_id->context = conn_id; 2347 cm_id->cm_handler = cma_iw_handler; 2348 2349 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2350 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2351 2352 memset(&event, 0, sizeof event); 2353 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2354 event.param.conn.private_data = iw_event->private_data; 2355 event.param.conn.private_data_len = iw_event->private_data_len; 2356 event.param.conn.initiator_depth = iw_event->ird; 2357 event.param.conn.responder_resources = iw_event->ord; 2358 2359 /* 2360 * Protect against the user destroying conn_id from another thread 2361 * until we're done accessing it. 2362 */ 2363 atomic_inc(&conn_id->refcount); 2364 ret = conn_id->id.event_handler(&conn_id->id, &event); 2365 if (ret) { 2366 /* User wants to destroy the CM ID */ 2367 conn_id->cm_id.iw = NULL; 2368 cma_exch(conn_id, RDMA_CM_DESTROYING); 2369 mutex_unlock(&conn_id->handler_mutex); 2370 cma_deref_id(conn_id); 2371 rdma_destroy_id(&conn_id->id); 2372 goto out; 2373 } 2374 2375 mutex_unlock(&conn_id->handler_mutex); 2376 cma_deref_id(conn_id); 2377 2378 out: 2379 mutex_unlock(&listen_id->handler_mutex); 2380 return ret; 2381 } 2382 2383 static int cma_ib_listen(struct rdma_id_private *id_priv) 2384 { 2385 struct sockaddr *addr; 2386 struct ib_cm_id *id; 2387 __be64 svc_id; 2388 2389 addr = cma_src_addr(id_priv); 2390 svc_id = rdma_get_service_id(&id_priv->id, addr); 2391 id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); 2392 if (IS_ERR(id)) 2393 return PTR_ERR(id); 2394 id_priv->cm_id.ib = id; 2395 2396 return 0; 2397 } 2398 2399 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) 2400 { 2401 int ret; 2402 struct iw_cm_id *id; 2403 2404 id = iw_create_cm_id(id_priv->id.device, 2405 iw_conn_req_handler, 2406 id_priv); 2407 if (IS_ERR(id)) 2408 return PTR_ERR(id); 2409 2410 id->tos = id_priv->tos; 2411 id_priv->cm_id.iw = id; 2412 2413 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), 2414 rdma_addr_size(cma_src_addr(id_priv))); 2415 2416 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 2417 2418 if (ret) { 2419 iw_destroy_cm_id(id_priv->cm_id.iw); 2420 id_priv->cm_id.iw = NULL; 2421 } 2422 2423 return ret; 2424 } 2425 2426 static int cma_listen_handler(struct rdma_cm_id *id, 2427 struct rdma_cm_event *event) 2428 { 2429 struct rdma_id_private *id_priv = id->context; 2430 2431 id->context = id_priv->id.context; 2432 id->event_handler = id_priv->id.event_handler; 2433 return id_priv->id.event_handler(id, event); 2434 } 2435 2436 static void cma_listen_on_dev(struct rdma_id_private *id_priv, 2437 struct cma_device *cma_dev) 2438 { 2439 struct rdma_id_private *dev_id_priv; 2440 struct rdma_cm_id *id; 2441 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 2442 int ret; 2443 2444 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2445 return; 2446 2447 id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, 2448 id_priv->id.qp_type); 2449 if (IS_ERR(id)) 2450 return; 2451 2452 dev_id_priv = container_of(id, struct rdma_id_private, id); 2453 2454 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2455 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2456 rdma_addr_size(cma_src_addr(id_priv))); 2457 2458 _cma_attach_to_dev(dev_id_priv, cma_dev); 2459 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2460 atomic_inc(&id_priv->refcount); 2461 dev_id_priv->internal_id = 1; 2462 dev_id_priv->afonly = id_priv->afonly; 2463 2464 ret = rdma_listen(id, id_priv->backlog); 2465 if (ret) 2466 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2467 ret, cma_dev->device->name); 2468 } 2469 2470 static void cma_listen_on_all(struct rdma_id_private *id_priv) 2471 { 2472 struct cma_device *cma_dev; 2473 2474 mutex_lock(&lock); 2475 list_add_tail(&id_priv->list, &listen_any_list); 2476 list_for_each_entry(cma_dev, &dev_list, list) 2477 cma_listen_on_dev(id_priv, cma_dev); 2478 mutex_unlock(&lock); 2479 } 2480 2481 void rdma_set_service_type(struct rdma_cm_id *id, int tos) 2482 { 2483 struct rdma_id_private *id_priv; 2484 2485 id_priv = container_of(id, struct rdma_id_private, id); 2486 id_priv->tos = (u8) tos; 2487 } 2488 EXPORT_SYMBOL(rdma_set_service_type); 2489 2490 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, 2491 void *context) 2492 { 2493 struct cma_work *work = context; 2494 struct rdma_route *route; 2495 2496 route = &work->id->id.route; 2497 2498 if (!status) { 2499 route->num_paths = 1; 2500 *route->path_rec = *path_rec; 2501 } else { 2502 work->old_state = RDMA_CM_ROUTE_QUERY; 2503 work->new_state = RDMA_CM_ADDR_RESOLVED; 2504 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; 2505 work->event.status = status; 2506 } 2507 2508 queue_work(cma_wq, &work->work); 2509 } 2510 2511 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2512 struct cma_work *work) 2513 { 2514 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2515 struct ib_sa_path_rec path_rec; 2516 ib_sa_comp_mask comp_mask; 2517 struct sockaddr_in6 *sin6; 2518 struct sockaddr_ib *sib; 2519 2520 memset(&path_rec, 0, sizeof path_rec); 2521 rdma_addr_get_sgid(dev_addr, &path_rec.sgid); 2522 rdma_addr_get_dgid(dev_addr, &path_rec.dgid); 2523 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2524 path_rec.numb_path = 1; 2525 path_rec.reversible = 1; 2526 path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 2527 2528 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 2529 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 2530 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 2531 2532 switch (cma_family(id_priv)) { 2533 case AF_INET: 2534 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 2535 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 2536 break; 2537 case AF_INET6: 2538 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 2539 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 2540 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2541 break; 2542 case AF_IB: 2543 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 2544 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); 2545 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2546 break; 2547 } 2548 2549 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 2550 id_priv->id.port_num, &path_rec, 2551 comp_mask, timeout_ms, 2552 GFP_KERNEL, cma_query_handler, 2553 work, &id_priv->query); 2554 2555 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 2556 } 2557 2558 static void cma_work_handler(struct work_struct *_work) 2559 { 2560 struct cma_work *work = container_of(_work, struct cma_work, work); 2561 struct rdma_id_private *id_priv = work->id; 2562 int destroy = 0; 2563 2564 mutex_lock(&id_priv->handler_mutex); 2565 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2566 goto out; 2567 2568 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2569 cma_exch(id_priv, RDMA_CM_DESTROYING); 2570 destroy = 1; 2571 } 2572 out: 2573 mutex_unlock(&id_priv->handler_mutex); 2574 cma_deref_id(id_priv); 2575 if (destroy) 2576 rdma_destroy_id(&id_priv->id); 2577 kfree(work); 2578 } 2579 2580 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2581 { 2582 struct rdma_route *route = &id_priv->id.route; 2583 struct cma_work *work; 2584 int ret; 2585 2586 work = kzalloc(sizeof *work, GFP_KERNEL); 2587 if (!work) 2588 return -ENOMEM; 2589 2590 work->id = id_priv; 2591 INIT_WORK(&work->work, cma_work_handler); 2592 work->old_state = RDMA_CM_ROUTE_QUERY; 2593 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2594 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2595 2596 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2597 if (!route->path_rec) { 2598 ret = -ENOMEM; 2599 goto err1; 2600 } 2601 2602 ret = cma_query_ib_route(id_priv, timeout_ms, work); 2603 if (ret) 2604 goto err2; 2605 2606 return 0; 2607 err2: 2608 kfree(route->path_rec); 2609 route->path_rec = NULL; 2610 err1: 2611 kfree(work); 2612 return ret; 2613 } 2614 2615 int rdma_set_ib_paths(struct rdma_cm_id *id, 2616 struct ib_sa_path_rec *path_rec, int num_paths) 2617 { 2618 struct rdma_id_private *id_priv; 2619 int ret; 2620 2621 id_priv = container_of(id, struct rdma_id_private, id); 2622 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2623 RDMA_CM_ROUTE_RESOLVED)) 2624 return -EINVAL; 2625 2626 id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, 2627 GFP_KERNEL); 2628 if (!id->route.path_rec) { 2629 ret = -ENOMEM; 2630 goto err; 2631 } 2632 2633 id->route.num_paths = num_paths; 2634 return 0; 2635 err: 2636 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2637 return ret; 2638 } 2639 EXPORT_SYMBOL(rdma_set_ib_paths); 2640 2641 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2642 { 2643 struct cma_work *work; 2644 2645 work = kzalloc(sizeof *work, GFP_KERNEL); 2646 if (!work) 2647 return -ENOMEM; 2648 2649 work->id = id_priv; 2650 INIT_WORK(&work->work, cma_work_handler); 2651 work->old_state = RDMA_CM_ROUTE_QUERY; 2652 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2653 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2654 queue_work(cma_wq, &work->work); 2655 return 0; 2656 } 2657 2658 static int iboe_tos_to_sl(struct net_device *ndev, int tos) 2659 { 2660 /* get service level, SL, from IPv4 type of service, TOS */ 2661 int sl = (tos >> 5) & 0x7; 2662 2663 /* final mappings are done by the vendor specific drivers */ 2664 return sl; 2665 } 2666 2667 static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, 2668 unsigned long supported_gids, 2669 enum ib_gid_type default_gid) 2670 { 2671 if ((network_type == RDMA_NETWORK_IPV4 || 2672 network_type == RDMA_NETWORK_IPV6) && 2673 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) 2674 return IB_GID_TYPE_ROCE_UDP_ENCAP; 2675 2676 return default_gid; 2677 } 2678 2679 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2680 { 2681 struct rdma_route *route = &id_priv->id.route; 2682 struct rdma_addr *addr = &route->addr; 2683 struct cma_work *work; 2684 int ret; 2685 struct net_device *ndev = NULL; 2686 2687 2688 work = kzalloc(sizeof *work, GFP_KERNEL); 2689 if (!work) 2690 return -ENOMEM; 2691 2692 work->id = id_priv; 2693 INIT_WORK(&work->work, cma_work_handler); 2694 2695 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 2696 if (!route->path_rec) { 2697 ret = -ENOMEM; 2698 goto err1; 2699 } 2700 2701 route->num_paths = 1; 2702 2703 if (addr->dev_addr.bound_dev_if) { 2704 unsigned long supported_gids; 2705 2706 ndev = dev_get_by_index(addr->dev_addr.net, 2707 addr->dev_addr.bound_dev_if); 2708 if (!ndev) { 2709 ret = -ENODEV; 2710 goto err2; 2711 } 2712 2713 route->path_rec->net = ndev->if_vnet; 2714 route->path_rec->ifindex = ndev->if_index; 2715 supported_gids = roce_gid_type_mask_support(id_priv->id.device, 2716 id_priv->id.port_num); 2717 route->path_rec->gid_type = 2718 cma_route_gid_type(addr->dev_addr.network, 2719 supported_gids, 2720 id_priv->gid_type); 2721 } 2722 if (!ndev) { 2723 ret = -ENODEV; 2724 goto err2; 2725 } 2726 2727 memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); 2728 2729 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2730 &route->path_rec->sgid); 2731 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2732 &route->path_rec->dgid); 2733 2734 /* Use the hint from IP Stack to select GID Type */ 2735 if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network)) 2736 route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network); 2737 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 2738 /* TODO: get the hoplimit from the inet/inet6 device */ 2739 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 2740 else 2741 route->path_rec->hop_limit = 1; 2742 route->path_rec->reversible = 1; 2743 route->path_rec->pkey = cpu_to_be16(0xffff); 2744 route->path_rec->mtu_selector = IB_SA_EQ; 2745 route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); 2746 route->path_rec->traffic_class = id_priv->tos; 2747 route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu); 2748 route->path_rec->rate_selector = IB_SA_EQ; 2749 route->path_rec->rate = iboe_get_rate(ndev); 2750 dev_put(ndev); 2751 route->path_rec->packet_life_time_selector = IB_SA_EQ; 2752 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; 2753 if (!route->path_rec->mtu) { 2754 ret = -EINVAL; 2755 goto err2; 2756 } 2757 2758 work->old_state = RDMA_CM_ROUTE_QUERY; 2759 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2760 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2761 work->event.status = 0; 2762 2763 queue_work(cma_wq, &work->work); 2764 2765 return 0; 2766 2767 err2: 2768 kfree(route->path_rec); 2769 route->path_rec = NULL; 2770 err1: 2771 kfree(work); 2772 return ret; 2773 } 2774 2775 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2776 { 2777 struct rdma_id_private *id_priv; 2778 int ret; 2779 2780 id_priv = container_of(id, struct rdma_id_private, id); 2781 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) 2782 return -EINVAL; 2783 2784 atomic_inc(&id_priv->refcount); 2785 if (rdma_cap_ib_sa(id->device, id->port_num)) 2786 ret = cma_resolve_ib_route(id_priv, timeout_ms); 2787 else if (rdma_protocol_roce(id->device, id->port_num)) 2788 ret = cma_resolve_iboe_route(id_priv); 2789 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2790 ret = cma_resolve_iw_route(id_priv, timeout_ms); 2791 else 2792 ret = -ENOSYS; 2793 2794 if (ret) 2795 goto err; 2796 2797 return 0; 2798 err: 2799 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); 2800 cma_deref_id(id_priv); 2801 return ret; 2802 } 2803 EXPORT_SYMBOL(rdma_resolve_route); 2804 2805 static void cma_set_loopback(struct sockaddr *addr) 2806 { 2807 switch (addr->sa_family) { 2808 case AF_INET: 2809 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 2810 break; 2811 case AF_INET6: 2812 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 2813 0, 0, 0, htonl(1)); 2814 break; 2815 default: 2816 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 2817 0, 0, 0, htonl(1)); 2818 break; 2819 } 2820 } 2821 2822 static int cma_bind_loopback(struct rdma_id_private *id_priv) 2823 { 2824 struct cma_device *cma_dev, *cur_dev; 2825 struct ib_port_attr port_attr; 2826 union ib_gid gid; 2827 u16 pkey; 2828 int ret; 2829 u8 p; 2830 2831 cma_dev = NULL; 2832 mutex_lock(&lock); 2833 list_for_each_entry(cur_dev, &dev_list, list) { 2834 if (cma_family(id_priv) == AF_IB && 2835 !rdma_cap_ib_cm(cur_dev->device, 1)) 2836 continue; 2837 2838 if (!cma_dev) 2839 cma_dev = cur_dev; 2840 2841 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 2842 if (!ib_query_port(cur_dev->device, p, &port_attr) && 2843 port_attr.state == IB_PORT_ACTIVE) { 2844 cma_dev = cur_dev; 2845 goto port_found; 2846 } 2847 } 2848 } 2849 2850 if (!cma_dev) { 2851 ret = -ENODEV; 2852 goto out; 2853 } 2854 2855 p = 1; 2856 2857 port_found: 2858 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); 2859 if (ret) 2860 goto out; 2861 2862 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); 2863 if (ret) 2864 goto out; 2865 2866 id_priv->id.route.addr.dev_addr.dev_type = 2867 (rdma_protocol_ib(cma_dev->device, p)) ? 2868 ARPHRD_INFINIBAND : ARPHRD_ETHER; 2869 2870 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2871 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2872 id_priv->id.port_num = p; 2873 cma_attach_to_dev(id_priv, cma_dev); 2874 cma_set_loopback(cma_src_addr(id_priv)); 2875 out: 2876 mutex_unlock(&lock); 2877 return ret; 2878 } 2879 2880 static void addr_handler(int status, struct sockaddr *src_addr, 2881 struct rdma_dev_addr *dev_addr, void *context) 2882 { 2883 struct rdma_id_private *id_priv = context; 2884 struct rdma_cm_event event; 2885 2886 memset(&event, 0, sizeof event); 2887 mutex_lock(&id_priv->handler_mutex); 2888 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 2889 RDMA_CM_ADDR_RESOLVED)) 2890 goto out; 2891 2892 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2893 if (!status && !id_priv->cma_dev) 2894 status = cma_acquire_dev(id_priv, NULL); 2895 2896 if (status) { 2897 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2898 RDMA_CM_ADDR_BOUND)) 2899 goto out; 2900 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2901 event.status = status; 2902 } else 2903 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2904 2905 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2906 cma_exch(id_priv, RDMA_CM_DESTROYING); 2907 mutex_unlock(&id_priv->handler_mutex); 2908 cma_deref_id(id_priv); 2909 rdma_destroy_id(&id_priv->id); 2910 return; 2911 } 2912 out: 2913 mutex_unlock(&id_priv->handler_mutex); 2914 cma_deref_id(id_priv); 2915 } 2916 2917 static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2918 { 2919 struct cma_work *work; 2920 union ib_gid gid; 2921 int ret; 2922 2923 work = kzalloc(sizeof *work, GFP_KERNEL); 2924 if (!work) 2925 return -ENOMEM; 2926 2927 if (!id_priv->cma_dev) { 2928 ret = cma_bind_loopback(id_priv); 2929 if (ret) 2930 goto err; 2931 } 2932 2933 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2934 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2935 2936 work->id = id_priv; 2937 INIT_WORK(&work->work, cma_work_handler); 2938 work->old_state = RDMA_CM_ADDR_QUERY; 2939 work->new_state = RDMA_CM_ADDR_RESOLVED; 2940 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2941 queue_work(cma_wq, &work->work); 2942 return 0; 2943 err: 2944 kfree(work); 2945 return ret; 2946 } 2947 2948 static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) 2949 { 2950 struct cma_work *work; 2951 int ret; 2952 2953 work = kzalloc(sizeof *work, GFP_KERNEL); 2954 if (!work) 2955 return -ENOMEM; 2956 2957 if (!id_priv->cma_dev) { 2958 ret = cma_resolve_ib_dev(id_priv); 2959 if (ret) 2960 goto err; 2961 } 2962 2963 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 2964 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 2965 2966 work->id = id_priv; 2967 INIT_WORK(&work->work, cma_work_handler); 2968 work->old_state = RDMA_CM_ADDR_QUERY; 2969 work->new_state = RDMA_CM_ADDR_RESOLVED; 2970 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2971 queue_work(cma_wq, &work->work); 2972 return 0; 2973 err: 2974 kfree(work); 2975 return ret; 2976 } 2977 2978 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2979 struct sockaddr *dst_addr) 2980 { 2981 if (!src_addr || !src_addr->sa_family) { 2982 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2983 src_addr->sa_family = dst_addr->sa_family; 2984 if (dst_addr->sa_family == AF_INET6) { 2985 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; 2986 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; 2987 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; 2988 if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr) || 2989 IN6_IS_ADDR_MC_INTFACELOCAL(&dst_addr6->sin6_addr)) 2990 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; 2991 } else if (dst_addr->sa_family == AF_IB) { 2992 ((struct sockaddr_ib *) src_addr)->sib_pkey = 2993 ((struct sockaddr_ib *) dst_addr)->sib_pkey; 2994 } 2995 } 2996 return rdma_bind_addr(id, src_addr); 2997 } 2998 2999 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 3000 struct sockaddr *dst_addr, int timeout_ms) 3001 { 3002 struct rdma_id_private *id_priv; 3003 int ret; 3004 3005 id_priv = container_of(id, struct rdma_id_private, id); 3006 if (id_priv->state == RDMA_CM_IDLE) { 3007 ret = cma_bind_addr(id, src_addr, dst_addr); 3008 if (ret) 3009 return ret; 3010 } 3011 3012 if (cma_family(id_priv) != dst_addr->sa_family) 3013 return -EINVAL; 3014 3015 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) 3016 return -EINVAL; 3017 3018 atomic_inc(&id_priv->refcount); 3019 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 3020 if (cma_any_addr(dst_addr)) { 3021 ret = cma_resolve_loopback(id_priv); 3022 } else { 3023 if (dst_addr->sa_family == AF_IB) { 3024 ret = cma_resolve_ib_addr(id_priv); 3025 } else { 3026 ret = cma_check_linklocal(&id->route.addr.dev_addr, dst_addr); 3027 if (ret) 3028 goto err; 3029 3030 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), 3031 dst_addr, &id->route.addr.dev_addr, 3032 timeout_ms, addr_handler, id_priv); 3033 } 3034 } 3035 if (ret) 3036 goto err; 3037 3038 return 0; 3039 err: 3040 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 3041 cma_deref_id(id_priv); 3042 return ret; 3043 } 3044 EXPORT_SYMBOL(rdma_resolve_addr); 3045 3046 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) 3047 { 3048 struct rdma_id_private *id_priv; 3049 unsigned long flags; 3050 int ret; 3051 3052 id_priv = container_of(id, struct rdma_id_private, id); 3053 spin_lock_irqsave(&id_priv->lock, flags); 3054 if (reuse || id_priv->state == RDMA_CM_IDLE) { 3055 id_priv->reuseaddr = reuse; 3056 ret = 0; 3057 } else { 3058 ret = -EINVAL; 3059 } 3060 spin_unlock_irqrestore(&id_priv->lock, flags); 3061 return ret; 3062 } 3063 EXPORT_SYMBOL(rdma_set_reuseaddr); 3064 3065 int rdma_set_afonly(struct rdma_cm_id *id, int afonly) 3066 { 3067 struct rdma_id_private *id_priv; 3068 unsigned long flags; 3069 int ret; 3070 3071 id_priv = container_of(id, struct rdma_id_private, id); 3072 spin_lock_irqsave(&id_priv->lock, flags); 3073 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { 3074 id_priv->options |= (1 << CMA_OPTION_AFONLY); 3075 id_priv->afonly = afonly; 3076 ret = 0; 3077 } else { 3078 ret = -EINVAL; 3079 } 3080 spin_unlock_irqrestore(&id_priv->lock, flags); 3081 return ret; 3082 } 3083 EXPORT_SYMBOL(rdma_set_afonly); 3084 3085 static void cma_bind_port(struct rdma_bind_list *bind_list, 3086 struct rdma_id_private *id_priv) 3087 { 3088 struct sockaddr *addr; 3089 struct sockaddr_ib *sib; 3090 u64 sid, mask; 3091 __be16 port; 3092 3093 addr = cma_src_addr(id_priv); 3094 port = htons(bind_list->port); 3095 3096 switch (addr->sa_family) { 3097 case AF_INET: 3098 ((struct sockaddr_in *) addr)->sin_port = port; 3099 break; 3100 case AF_INET6: 3101 ((struct sockaddr_in6 *) addr)->sin6_port = port; 3102 break; 3103 case AF_IB: 3104 sib = (struct sockaddr_ib *) addr; 3105 sid = be64_to_cpu(sib->sib_sid); 3106 mask = be64_to_cpu(sib->sib_sid_mask); 3107 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); 3108 sib->sib_sid_mask = cpu_to_be64(~0ULL); 3109 break; 3110 } 3111 id_priv->bind_list = bind_list; 3112 hlist_add_head(&id_priv->node, &bind_list->owners); 3113 } 3114 3115 static int cma_alloc_port(enum rdma_port_space ps, 3116 struct rdma_id_private *id_priv, unsigned short snum) 3117 { 3118 struct rdma_bind_list *bind_list; 3119 int ret; 3120 3121 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 3122 if (!bind_list) 3123 return -ENOMEM; 3124 3125 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 3126 snum); 3127 if (ret < 0) 3128 goto err; 3129 3130 bind_list->ps = ps; 3131 bind_list->port = (unsigned short)ret; 3132 cma_bind_port(bind_list, id_priv); 3133 return 0; 3134 err: 3135 kfree(bind_list); 3136 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; 3137 } 3138 3139 static int cma_alloc_any_port(enum rdma_port_space ps, 3140 struct rdma_id_private *id_priv) 3141 { 3142 static unsigned int last_used_port; 3143 int low, high, remaining; 3144 unsigned int rover; 3145 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 3146 u32 rand; 3147 3148 inet_get_local_port_range(net, &low, &high); 3149 remaining = (high - low) + 1; 3150 get_random_bytes(&rand, sizeof(rand)); 3151 rover = rand % remaining + low; 3152 retry: 3153 if (last_used_port != rover && 3154 !cma_ps_find(net, ps, (unsigned short)rover)) { 3155 int ret = cma_alloc_port(ps, id_priv, rover); 3156 /* 3157 * Remember previously used port number in order to avoid 3158 * re-using same port immediately after it is closed. 3159 */ 3160 if (!ret) 3161 last_used_port = rover; 3162 if (ret != -EADDRNOTAVAIL) 3163 return ret; 3164 } 3165 if (--remaining) { 3166 rover++; 3167 if ((rover < low) || (rover > high)) 3168 rover = low; 3169 goto retry; 3170 } 3171 return -EADDRNOTAVAIL; 3172 } 3173 3174 /* 3175 * Check that the requested port is available. This is called when trying to 3176 * bind to a specific port, or when trying to listen on a bound port. In 3177 * the latter case, the provided id_priv may already be on the bind_list, but 3178 * we still need to check that it's okay to start listening. 3179 */ 3180 static int cma_check_port(struct rdma_bind_list *bind_list, 3181 struct rdma_id_private *id_priv, uint8_t reuseaddr) 3182 { 3183 struct rdma_id_private *cur_id; 3184 struct sockaddr *addr, *cur_addr; 3185 3186 addr = cma_src_addr(id_priv); 3187 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3188 if (id_priv == cur_id) 3189 continue; 3190 3191 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && 3192 cur_id->reuseaddr) 3193 continue; 3194 3195 cur_addr = cma_src_addr(cur_id); 3196 if (id_priv->afonly && cur_id->afonly && 3197 (addr->sa_family != cur_addr->sa_family)) 3198 continue; 3199 3200 if (cma_any_addr(addr) || cma_any_addr(cur_addr)) 3201 return -EADDRNOTAVAIL; 3202 3203 if (!cma_addr_cmp(addr, cur_addr)) 3204 return -EADDRINUSE; 3205 } 3206 return 0; 3207 } 3208 3209 static int cma_use_port(enum rdma_port_space ps, 3210 struct rdma_id_private *id_priv) 3211 { 3212 struct rdma_bind_list *bind_list; 3213 unsigned short snum; 3214 int ret; 3215 3216 snum = ntohs(cma_port(cma_src_addr(id_priv))); 3217 if (snum < IPPORT_RESERVED && 3218 priv_check(curthread, PRIV_NETINET_BINDANY) != 0) 3219 return -EACCES; 3220 3221 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 3222 if (!bind_list) { 3223 ret = cma_alloc_port(ps, id_priv, snum); 3224 } else { 3225 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); 3226 if (!ret) 3227 cma_bind_port(bind_list, id_priv); 3228 } 3229 return ret; 3230 } 3231 3232 static int cma_bind_listen(struct rdma_id_private *id_priv) 3233 { 3234 struct rdma_bind_list *bind_list = id_priv->bind_list; 3235 int ret = 0; 3236 3237 mutex_lock(&lock); 3238 if (bind_list->owners.first->next) 3239 ret = cma_check_port(bind_list, id_priv, 0); 3240 mutex_unlock(&lock); 3241 return ret; 3242 } 3243 3244 static enum rdma_port_space cma_select_inet_ps( 3245 struct rdma_id_private *id_priv) 3246 { 3247 switch (id_priv->id.ps) { 3248 case RDMA_PS_TCP: 3249 case RDMA_PS_UDP: 3250 case RDMA_PS_IPOIB: 3251 case RDMA_PS_IB: 3252 case RDMA_PS_SDP: 3253 return id_priv->id.ps; 3254 default: 3255 3256 return 0; 3257 } 3258 } 3259 3260 static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) 3261 { 3262 enum rdma_port_space ps = 0; 3263 struct sockaddr_ib *sib; 3264 u64 sid_ps, mask, sid; 3265 3266 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 3267 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; 3268 sid = be64_to_cpu(sib->sib_sid) & mask; 3269 3270 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { 3271 sid_ps = RDMA_IB_IP_PS_IB; 3272 ps = RDMA_PS_IB; 3273 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && 3274 (sid == (RDMA_IB_IP_PS_TCP & mask))) { 3275 sid_ps = RDMA_IB_IP_PS_TCP; 3276 ps = RDMA_PS_TCP; 3277 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && 3278 (sid == (RDMA_IB_IP_PS_UDP & mask))) { 3279 sid_ps = RDMA_IB_IP_PS_UDP; 3280 ps = RDMA_PS_UDP; 3281 } 3282 3283 if (ps) { 3284 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); 3285 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | 3286 be64_to_cpu(sib->sib_sid_mask)); 3287 } 3288 return ps; 3289 } 3290 3291 static int cma_get_port(struct rdma_id_private *id_priv) 3292 { 3293 enum rdma_port_space ps; 3294 int ret; 3295 3296 if (cma_family(id_priv) != AF_IB) 3297 ps = cma_select_inet_ps(id_priv); 3298 else 3299 ps = cma_select_ib_ps(id_priv); 3300 if (!ps) 3301 return -EPROTONOSUPPORT; 3302 3303 mutex_lock(&lock); 3304 if (cma_any_port(cma_src_addr(id_priv))) 3305 ret = cma_alloc_any_port(ps, id_priv); 3306 else 3307 ret = cma_use_port(ps, id_priv); 3308 mutex_unlock(&lock); 3309 3310 return ret; 3311 } 3312 3313 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 3314 struct sockaddr *addr) 3315 { 3316 #ifdef INET6 3317 struct sockaddr_in6 sin6; 3318 3319 if (addr->sa_family != AF_INET6) 3320 return 0; 3321 3322 sin6 = *(struct sockaddr_in6 *)addr; 3323 3324 if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr) || 3325 IN6_IS_ADDR_MC_INTFACELOCAL(&sin6.sin6_addr)) { 3326 bool failure; 3327 3328 CURVNET_SET_QUIET(dev_addr->net); 3329 failure = sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0; 3330 CURVNET_RESTORE(); 3331 3332 /* check if IPv6 scope ID is not set */ 3333 if (failure) 3334 return -EINVAL; 3335 dev_addr->bound_dev_if = sin6.sin6_scope_id; 3336 } 3337 #endif 3338 return 0; 3339 } 3340 3341 int rdma_listen(struct rdma_cm_id *id, int backlog) 3342 { 3343 struct rdma_id_private *id_priv; 3344 int ret; 3345 3346 id_priv = container_of(id, struct rdma_id_private, id); 3347 if (id_priv->state == RDMA_CM_IDLE) { 3348 id->route.addr.src_addr.ss_family = AF_INET; 3349 ret = rdma_bind_addr(id, cma_src_addr(id_priv)); 3350 if (ret) 3351 return ret; 3352 } 3353 3354 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) 3355 return -EINVAL; 3356 3357 if (id_priv->reuseaddr) { 3358 ret = cma_bind_listen(id_priv); 3359 if (ret) 3360 goto err; 3361 } 3362 3363 id_priv->backlog = backlog; 3364 if (id->device) { 3365 if (rdma_cap_ib_cm(id->device, 1)) { 3366 ret = cma_ib_listen(id_priv); 3367 if (ret) 3368 goto err; 3369 } else if (rdma_cap_iw_cm(id->device, 1)) { 3370 ret = cma_iw_listen(id_priv, backlog); 3371 if (ret) 3372 goto err; 3373 } else { 3374 ret = -ENOSYS; 3375 goto err; 3376 } 3377 } else 3378 cma_listen_on_all(id_priv); 3379 3380 return 0; 3381 err: 3382 id_priv->backlog = 0; 3383 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); 3384 return ret; 3385 } 3386 EXPORT_SYMBOL(rdma_listen); 3387 3388 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 3389 { 3390 struct rdma_id_private *id_priv; 3391 int ret; 3392 3393 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && 3394 addr->sa_family != AF_IB) 3395 return -EAFNOSUPPORT; 3396 3397 id_priv = container_of(id, struct rdma_id_private, id); 3398 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) 3399 return -EINVAL; 3400 3401 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); 3402 if (ret) 3403 goto err1; 3404 3405 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 3406 if (!cma_any_addr(addr)) { 3407 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 3408 if (ret) 3409 goto err1; 3410 3411 ret = cma_acquire_dev(id_priv, NULL); 3412 if (ret) 3413 goto err1; 3414 } 3415 3416 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 3417 if (addr->sa_family == AF_INET) 3418 id_priv->afonly = 1; 3419 #ifdef INET6 3420 else if (addr->sa_family == AF_INET6) { 3421 CURVNET_SET_QUIET(id_priv->id.route.addr.dev_addr.net); 3422 id_priv->afonly = V_ip6_v6only; 3423 CURVNET_RESTORE(); 3424 } 3425 #endif 3426 } 3427 ret = cma_get_port(id_priv); 3428 if (ret) 3429 goto err2; 3430 3431 return 0; 3432 err2: 3433 if (id_priv->cma_dev) 3434 cma_release_dev(id_priv); 3435 err1: 3436 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3437 return ret; 3438 } 3439 EXPORT_SYMBOL(rdma_bind_addr); 3440 3441 static int sdp_format_hdr(struct sdp_hh *sdp_hdr, struct rdma_id_private *id_priv) 3442 { 3443 /* 3444 * XXXCEM: CMA just sets the version itself rather than relying on 3445 * passed in packet to have the major version set. Should we? 3446 */ 3447 if (sdp_get_majv(sdp_hdr->majv_minv) != SDP_MAJ_VERSION) 3448 return -EINVAL; 3449 3450 if (cma_family(id_priv) == AF_INET) { 3451 struct sockaddr_in *src4, *dst4; 3452 3453 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3454 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3455 3456 sdp_set_ip_ver(sdp_hdr, 4); 3457 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3458 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3459 sdp_hdr->port = src4->sin_port; 3460 } else if (cma_family(id_priv) == AF_INET6) { 3461 struct sockaddr_in6 *src6, *dst6; 3462 3463 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3464 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3465 3466 sdp_set_ip_ver(sdp_hdr, 6); 3467 sdp_hdr->src_addr.ip6 = src6->sin6_addr; 3468 sdp_hdr->dst_addr.ip6 = dst6->sin6_addr; 3469 sdp_hdr->port = src6->sin6_port; 3470 cma_ip6_clear_scope_id(&sdp_hdr->src_addr.ip6); 3471 cma_ip6_clear_scope_id(&sdp_hdr->dst_addr.ip6); 3472 } else 3473 return -EAFNOSUPPORT; 3474 return 0; 3475 } 3476 3477 static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) 3478 { 3479 struct cma_hdr *cma_hdr; 3480 3481 if (id_priv->id.ps == RDMA_PS_SDP) 3482 return sdp_format_hdr(hdr, id_priv); 3483 3484 cma_hdr = hdr; 3485 cma_hdr->cma_version = CMA_VERSION; 3486 if (cma_family(id_priv) == AF_INET) { 3487 struct sockaddr_in *src4, *dst4; 3488 3489 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3490 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3491 3492 cma_set_ip_ver(cma_hdr, 4); 3493 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3494 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3495 cma_hdr->port = src4->sin_port; 3496 } else if (cma_family(id_priv) == AF_INET6) { 3497 struct sockaddr_in6 *src6, *dst6; 3498 3499 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3500 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3501 3502 cma_set_ip_ver(cma_hdr, 6); 3503 cma_hdr->src_addr.ip6 = src6->sin6_addr; 3504 cma_hdr->dst_addr.ip6 = dst6->sin6_addr; 3505 cma_hdr->port = src6->sin6_port; 3506 cma_ip6_clear_scope_id(&cma_hdr->src_addr.ip6); 3507 cma_ip6_clear_scope_id(&cma_hdr->dst_addr.ip6); 3508 } 3509 return 0; 3510 } 3511 3512 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3513 struct ib_cm_event *ib_event) 3514 { 3515 struct rdma_id_private *id_priv = cm_id->context; 3516 struct rdma_cm_event event; 3517 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 3518 int ret = 0; 3519 3520 mutex_lock(&id_priv->handler_mutex); 3521 if (id_priv->state != RDMA_CM_CONNECT) 3522 goto out; 3523 3524 memset(&event, 0, sizeof event); 3525 switch (ib_event->event) { 3526 case IB_CM_SIDR_REQ_ERROR: 3527 event.event = RDMA_CM_EVENT_UNREACHABLE; 3528 event.status = -ETIMEDOUT; 3529 break; 3530 case IB_CM_SIDR_REP_RECEIVED: 3531 event.param.ud.private_data = ib_event->private_data; 3532 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; 3533 if (rep->status != IB_SIDR_SUCCESS) { 3534 event.event = RDMA_CM_EVENT_UNREACHABLE; 3535 event.status = ib_event->param.sidr_rep_rcvd.status; 3536 break; 3537 } 3538 ret = cma_set_qkey(id_priv, rep->qkey); 3539 if (ret) { 3540 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3541 event.status = ret; 3542 break; 3543 } 3544 ret = ib_init_ah_from_path(id_priv->id.device, 3545 id_priv->id.port_num, 3546 id_priv->id.route.path_rec, 3547 &event.param.ud.ah_attr); 3548 if (ret) { 3549 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3550 event.status = ret; 3551 break; 3552 } 3553 event.param.ud.qp_num = rep->qpn; 3554 event.param.ud.qkey = rep->qkey; 3555 event.event = RDMA_CM_EVENT_ESTABLISHED; 3556 event.status = 0; 3557 break; 3558 default: 3559 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 3560 ib_event->event); 3561 goto out; 3562 } 3563 3564 ret = id_priv->id.event_handler(&id_priv->id, &event); 3565 if (ret) { 3566 /* Destroy the CM ID by returning a non-zero value. */ 3567 id_priv->cm_id.ib = NULL; 3568 cma_exch(id_priv, RDMA_CM_DESTROYING); 3569 mutex_unlock(&id_priv->handler_mutex); 3570 rdma_destroy_id(&id_priv->id); 3571 return ret; 3572 } 3573 out: 3574 mutex_unlock(&id_priv->handler_mutex); 3575 return ret; 3576 } 3577 3578 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, 3579 struct rdma_conn_param *conn_param) 3580 { 3581 struct ib_cm_sidr_req_param req; 3582 struct ib_cm_id *id; 3583 void *private_data; 3584 int offset, ret; 3585 3586 memset(&req, 0, sizeof req); 3587 offset = cma_user_data_offset(id_priv); 3588 req.private_data_len = offset + conn_param->private_data_len; 3589 if (req.private_data_len < conn_param->private_data_len) 3590 return -EINVAL; 3591 3592 if (req.private_data_len) { 3593 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3594 if (!private_data) 3595 return -ENOMEM; 3596 } else { 3597 private_data = NULL; 3598 } 3599 3600 if (conn_param->private_data && conn_param->private_data_len) 3601 memcpy((char *)private_data + offset, conn_param->private_data, 3602 conn_param->private_data_len); 3603 3604 if (private_data) { 3605 ret = cma_format_hdr(private_data, id_priv); 3606 if (ret) 3607 goto out; 3608 req.private_data = private_data; 3609 } 3610 3611 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 3612 id_priv); 3613 if (IS_ERR(id)) { 3614 ret = PTR_ERR(id); 3615 goto out; 3616 } 3617 id_priv->cm_id.ib = id; 3618 3619 req.path = id_priv->id.route.path_rec; 3620 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3621 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 3622 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3623 3624 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); 3625 if (ret) { 3626 ib_destroy_cm_id(id_priv->cm_id.ib); 3627 id_priv->cm_id.ib = NULL; 3628 } 3629 out: 3630 kfree(private_data); 3631 return ret; 3632 } 3633 3634 static int cma_connect_ib(struct rdma_id_private *id_priv, 3635 struct rdma_conn_param *conn_param) 3636 { 3637 struct ib_cm_req_param req; 3638 struct rdma_route *route; 3639 void *private_data; 3640 struct ib_cm_id *id; 3641 int offset, ret; 3642 3643 memset(&req, 0, sizeof req); 3644 offset = cma_user_data_offset(id_priv); 3645 req.private_data_len = offset + conn_param->private_data_len; 3646 if (req.private_data_len < conn_param->private_data_len) 3647 return -EINVAL; 3648 3649 if (req.private_data_len) { 3650 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3651 if (!private_data) 3652 return -ENOMEM; 3653 } else { 3654 private_data = NULL; 3655 } 3656 3657 if (conn_param->private_data && conn_param->private_data_len) 3658 memcpy((char *)private_data + offset, conn_param->private_data, 3659 conn_param->private_data_len); 3660 3661 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); 3662 if (IS_ERR(id)) { 3663 ret = PTR_ERR(id); 3664 goto out; 3665 } 3666 id_priv->cm_id.ib = id; 3667 3668 route = &id_priv->id.route; 3669 if (private_data) { 3670 ret = cma_format_hdr(private_data, id_priv); 3671 if (ret) 3672 goto out; 3673 req.private_data = private_data; 3674 } 3675 3676 req.primary_path = &route->path_rec[0]; 3677 if (route->num_paths == 2) 3678 req.alternate_path = &route->path_rec[1]; 3679 3680 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3681 req.qp_num = id_priv->qp_num; 3682 req.qp_type = id_priv->id.qp_type; 3683 req.starting_psn = id_priv->seq_num; 3684 req.responder_resources = conn_param->responder_resources; 3685 req.initiator_depth = conn_param->initiator_depth; 3686 req.flow_control = conn_param->flow_control; 3687 req.retry_count = min_t(u8, 7, conn_param->retry_count); 3688 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3689 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3690 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3691 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3692 req.srq = id_priv->srq ? 1 : 0; 3693 3694 ret = ib_send_cm_req(id_priv->cm_id.ib, &req); 3695 out: 3696 if (ret && !IS_ERR(id)) { 3697 ib_destroy_cm_id(id); 3698 id_priv->cm_id.ib = NULL; 3699 } 3700 3701 kfree(private_data); 3702 return ret; 3703 } 3704 3705 static int cma_connect_iw(struct rdma_id_private *id_priv, 3706 struct rdma_conn_param *conn_param) 3707 { 3708 struct iw_cm_id *cm_id; 3709 int ret; 3710 struct iw_cm_conn_param iw_param; 3711 3712 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); 3713 if (IS_ERR(cm_id)) 3714 return PTR_ERR(cm_id); 3715 3716 cm_id->tos = id_priv->tos; 3717 id_priv->cm_id.iw = cm_id; 3718 3719 memcpy(&cm_id->local_addr, cma_src_addr(id_priv), 3720 rdma_addr_size(cma_src_addr(id_priv))); 3721 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), 3722 rdma_addr_size(cma_dst_addr(id_priv))); 3723 3724 ret = cma_modify_qp_rtr(id_priv, conn_param); 3725 if (ret) 3726 goto out; 3727 3728 if (conn_param) { 3729 iw_param.ord = conn_param->initiator_depth; 3730 iw_param.ird = conn_param->responder_resources; 3731 iw_param.private_data = conn_param->private_data; 3732 iw_param.private_data_len = conn_param->private_data_len; 3733 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; 3734 } else { 3735 memset(&iw_param, 0, sizeof iw_param); 3736 iw_param.qpn = id_priv->qp_num; 3737 } 3738 ret = iw_cm_connect(cm_id, &iw_param); 3739 out: 3740 if (ret) { 3741 iw_destroy_cm_id(cm_id); 3742 id_priv->cm_id.iw = NULL; 3743 } 3744 return ret; 3745 } 3746 3747 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3748 { 3749 struct rdma_id_private *id_priv; 3750 int ret; 3751 3752 id_priv = container_of(id, struct rdma_id_private, id); 3753 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 3754 return -EINVAL; 3755 3756 if (!id->qp) { 3757 id_priv->qp_num = conn_param->qp_num; 3758 id_priv->srq = conn_param->srq; 3759 } 3760 3761 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3762 if (id->qp_type == IB_QPT_UD) 3763 ret = cma_resolve_ib_udp(id_priv, conn_param); 3764 else 3765 ret = cma_connect_ib(id_priv, conn_param); 3766 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3767 ret = cma_connect_iw(id_priv, conn_param); 3768 else 3769 ret = -ENOSYS; 3770 if (ret) 3771 goto err; 3772 3773 return 0; 3774 err: 3775 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); 3776 return ret; 3777 } 3778 EXPORT_SYMBOL(rdma_connect); 3779 3780 static int cma_accept_ib(struct rdma_id_private *id_priv, 3781 struct rdma_conn_param *conn_param) 3782 { 3783 struct ib_cm_rep_param rep; 3784 int ret; 3785 3786 ret = cma_modify_qp_rtr(id_priv, conn_param); 3787 if (ret) 3788 goto out; 3789 3790 ret = cma_modify_qp_rts(id_priv, conn_param); 3791 if (ret) 3792 goto out; 3793 3794 memset(&rep, 0, sizeof rep); 3795 rep.qp_num = id_priv->qp_num; 3796 rep.starting_psn = id_priv->seq_num; 3797 rep.private_data = conn_param->private_data; 3798 rep.private_data_len = conn_param->private_data_len; 3799 rep.responder_resources = conn_param->responder_resources; 3800 rep.initiator_depth = conn_param->initiator_depth; 3801 rep.failover_accepted = 0; 3802 rep.flow_control = conn_param->flow_control; 3803 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3804 rep.srq = id_priv->srq ? 1 : 0; 3805 3806 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 3807 out: 3808 return ret; 3809 } 3810 3811 static int cma_accept_iw(struct rdma_id_private *id_priv, 3812 struct rdma_conn_param *conn_param) 3813 { 3814 struct iw_cm_conn_param iw_param; 3815 int ret; 3816 3817 ret = cma_modify_qp_rtr(id_priv, conn_param); 3818 if (ret) 3819 return ret; 3820 3821 iw_param.ord = conn_param->initiator_depth; 3822 iw_param.ird = conn_param->responder_resources; 3823 iw_param.private_data = conn_param->private_data; 3824 iw_param.private_data_len = conn_param->private_data_len; 3825 if (id_priv->id.qp) { 3826 iw_param.qpn = id_priv->qp_num; 3827 } else 3828 iw_param.qpn = conn_param->qp_num; 3829 3830 return iw_cm_accept(id_priv->cm_id.iw, &iw_param); 3831 } 3832 3833 static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 3834 enum ib_cm_sidr_status status, u32 qkey, 3835 const void *private_data, int private_data_len) 3836 { 3837 struct ib_cm_sidr_rep_param rep; 3838 int ret; 3839 3840 memset(&rep, 0, sizeof rep); 3841 rep.status = status; 3842 if (status == IB_SIDR_SUCCESS) { 3843 ret = cma_set_qkey(id_priv, qkey); 3844 if (ret) 3845 return ret; 3846 rep.qp_num = id_priv->qp_num; 3847 rep.qkey = id_priv->qkey; 3848 } 3849 rep.private_data = private_data; 3850 rep.private_data_len = private_data_len; 3851 3852 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); 3853 } 3854 3855 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3856 { 3857 struct rdma_id_private *id_priv; 3858 int ret; 3859 3860 id_priv = container_of(id, struct rdma_id_private, id); 3861 3862 id_priv->owner = task_pid_nr(current); 3863 3864 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3865 return -EINVAL; 3866 3867 if (!id->qp && conn_param) { 3868 id_priv->qp_num = conn_param->qp_num; 3869 id_priv->srq = conn_param->srq; 3870 } 3871 3872 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3873 if (id->qp_type == IB_QPT_UD) { 3874 if (conn_param) 3875 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3876 conn_param->qkey, 3877 conn_param->private_data, 3878 conn_param->private_data_len); 3879 else 3880 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3881 0, NULL, 0); 3882 } else { 3883 if (conn_param) 3884 ret = cma_accept_ib(id_priv, conn_param); 3885 else 3886 ret = cma_rep_recv(id_priv); 3887 } 3888 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3889 ret = cma_accept_iw(id_priv, conn_param); 3890 else 3891 ret = -ENOSYS; 3892 3893 if (ret) 3894 goto reject; 3895 3896 return 0; 3897 reject: 3898 cma_modify_qp_err(id_priv); 3899 rdma_reject(id, NULL, 0); 3900 return ret; 3901 } 3902 EXPORT_SYMBOL(rdma_accept); 3903 3904 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) 3905 { 3906 struct rdma_id_private *id_priv; 3907 int ret; 3908 3909 id_priv = container_of(id, struct rdma_id_private, id); 3910 if (!id_priv->cm_id.ib) 3911 return -EINVAL; 3912 3913 switch (id->device->node_type) { 3914 case RDMA_NODE_IB_CA: 3915 ret = ib_cm_notify(id_priv->cm_id.ib, event); 3916 break; 3917 default: 3918 ret = 0; 3919 break; 3920 } 3921 return ret; 3922 } 3923 EXPORT_SYMBOL(rdma_notify); 3924 3925 int rdma_reject(struct rdma_cm_id *id, const void *private_data, 3926 u8 private_data_len) 3927 { 3928 struct rdma_id_private *id_priv; 3929 int ret; 3930 3931 id_priv = container_of(id, struct rdma_id_private, id); 3932 if (!id_priv->cm_id.ib) 3933 return -EINVAL; 3934 3935 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3936 if (id->qp_type == IB_QPT_UD) 3937 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, 3938 private_data, private_data_len); 3939 else 3940 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3941 IB_CM_REJ_CONSUMER_DEFINED, NULL, 3942 0, private_data, private_data_len); 3943 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3944 ret = iw_cm_reject(id_priv->cm_id.iw, 3945 private_data, private_data_len); 3946 } else 3947 ret = -ENOSYS; 3948 3949 return ret; 3950 } 3951 EXPORT_SYMBOL(rdma_reject); 3952 3953 int rdma_disconnect(struct rdma_cm_id *id) 3954 { 3955 struct rdma_id_private *id_priv; 3956 int ret; 3957 3958 id_priv = container_of(id, struct rdma_id_private, id); 3959 if (!id_priv->cm_id.ib) 3960 return -EINVAL; 3961 3962 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3963 ret = cma_modify_qp_err(id_priv); 3964 if (ret) 3965 goto out; 3966 /* Initiate or respond to a disconnect. */ 3967 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) 3968 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); 3969 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3970 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); 3971 } else 3972 ret = -EINVAL; 3973 3974 out: 3975 return ret; 3976 } 3977 EXPORT_SYMBOL(rdma_disconnect); 3978 3979 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 3980 { 3981 struct rdma_id_private *id_priv; 3982 struct cma_multicast *mc = multicast->context; 3983 struct rdma_cm_event event; 3984 int ret = 0; 3985 3986 id_priv = mc->id_priv; 3987 mutex_lock(&id_priv->handler_mutex); 3988 if (id_priv->state != RDMA_CM_ADDR_BOUND && 3989 id_priv->state != RDMA_CM_ADDR_RESOLVED) 3990 goto out; 3991 3992 if (!status) 3993 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); 3994 mutex_lock(&id_priv->qp_mutex); 3995 if (!status && id_priv->id.qp) 3996 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 3997 be16_to_cpu(multicast->rec.mlid)); 3998 mutex_unlock(&id_priv->qp_mutex); 3999 4000 memset(&event, 0, sizeof event); 4001 event.status = status; 4002 event.param.ud.private_data = mc->context; 4003 if (!status) { 4004 struct rdma_dev_addr *dev_addr = 4005 &id_priv->id.route.addr.dev_addr; 4006 struct net_device *ndev = 4007 dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4008 enum ib_gid_type gid_type = 4009 id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4010 rdma_start_port(id_priv->cma_dev->device)]; 4011 4012 event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 4013 ret = ib_init_ah_from_mcmember(id_priv->id.device, 4014 id_priv->id.port_num, 4015 &multicast->rec, 4016 ndev, gid_type, 4017 &event.param.ud.ah_attr); 4018 if (ret) 4019 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 4020 4021 event.param.ud.qp_num = 0xFFFFFF; 4022 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 4023 if (ndev) 4024 dev_put(ndev); 4025 } else 4026 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 4027 4028 ret = id_priv->id.event_handler(&id_priv->id, &event); 4029 if (ret) { 4030 cma_exch(id_priv, RDMA_CM_DESTROYING); 4031 mutex_unlock(&id_priv->handler_mutex); 4032 rdma_destroy_id(&id_priv->id); 4033 return 0; 4034 } 4035 4036 out: 4037 mutex_unlock(&id_priv->handler_mutex); 4038 return 0; 4039 } 4040 4041 static void cma_set_mgid(struct rdma_id_private *id_priv, 4042 struct sockaddr *addr, union ib_gid *mgid) 4043 { 4044 unsigned char mc_map[MAX_ADDR_LEN]; 4045 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4046 struct sockaddr_in *sin = (struct sockaddr_in *) addr; 4047 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 4048 4049 if (cma_any_addr(addr)) { 4050 memset(mgid, 0, sizeof *mgid); 4051 } else if ((addr->sa_family == AF_INET6) && 4052 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 4053 0xFF10A01B)) { 4054 /* IPv6 address is an SA assigned MGID. */ 4055 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4056 } else if (addr->sa_family == AF_IB) { 4057 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 4058 } else if (addr->sa_family == AF_INET6) { 4059 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 4060 if (id_priv->id.ps == RDMA_PS_UDP) 4061 mc_map[7] = 0x01; /* Use RDMA CM signature */ 4062 *mgid = *(union ib_gid *) (mc_map + 4); 4063 } else { 4064 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 4065 if (id_priv->id.ps == RDMA_PS_UDP) 4066 mc_map[7] = 0x01; /* Use RDMA CM signature */ 4067 *mgid = *(union ib_gid *) (mc_map + 4); 4068 } 4069 } 4070 4071 static void cma_query_sa_classport_info_cb(int status, 4072 struct ib_class_port_info *rec, 4073 void *context) 4074 { 4075 struct class_port_info_context *cb_ctx = context; 4076 4077 WARN_ON(!context); 4078 4079 if (status || !rec) { 4080 pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", 4081 cb_ctx->device->name, cb_ctx->port_num, status); 4082 goto out; 4083 } 4084 4085 memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); 4086 4087 out: 4088 complete(&cb_ctx->done); 4089 } 4090 4091 static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, 4092 struct ib_class_port_info *class_port_info) 4093 { 4094 struct class_port_info_context *cb_ctx; 4095 int ret; 4096 4097 cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); 4098 if (!cb_ctx) 4099 return -ENOMEM; 4100 4101 cb_ctx->device = device; 4102 cb_ctx->class_port_info = class_port_info; 4103 cb_ctx->port_num = port_num; 4104 init_completion(&cb_ctx->done); 4105 4106 ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, 4107 CMA_QUERY_CLASSPORT_INFO_TIMEOUT, 4108 GFP_KERNEL, cma_query_sa_classport_info_cb, 4109 cb_ctx, &cb_ctx->sa_query); 4110 if (ret < 0) { 4111 pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", 4112 device->name, port_num, ret); 4113 goto out; 4114 } 4115 4116 wait_for_completion(&cb_ctx->done); 4117 4118 out: 4119 kfree(cb_ctx); 4120 return ret; 4121 } 4122 4123 static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 4124 struct cma_multicast *mc) 4125 { 4126 struct ib_sa_mcmember_rec rec; 4127 struct ib_class_port_info class_port_info; 4128 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4129 ib_sa_comp_mask comp_mask; 4130 int ret; 4131 4132 ib_addr_get_mgid(dev_addr, &rec.mgid); 4133 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 4134 &rec.mgid, &rec); 4135 if (ret) 4136 return ret; 4137 4138 ret = cma_set_qkey(id_priv, 0); 4139 if (ret) 4140 return ret; 4141 4142 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 4143 rec.qkey = cpu_to_be32(id_priv->qkey); 4144 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 4145 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 4146 rec.join_state = mc->join_state; 4147 4148 if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { 4149 ret = cma_query_sa_classport_info(id_priv->id.device, 4150 id_priv->id.port_num, 4151 &class_port_info); 4152 4153 if (ret) 4154 return ret; 4155 4156 if (!(ib_get_cpi_capmask2(&class_port_info) & 4157 IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { 4158 pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 4159 "RDMA CM: SM doesn't support Send Only Full Member option\n", 4160 id_priv->id.device->name, id_priv->id.port_num); 4161 return -EOPNOTSUPP; 4162 } 4163 } 4164 4165 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 4166 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 4167 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 4168 IB_SA_MCMEMBER_REC_FLOW_LABEL | 4169 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 4170 4171 if (id_priv->id.ps == RDMA_PS_IPOIB) 4172 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 4173 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 4174 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 4175 IB_SA_MCMEMBER_REC_MTU | 4176 IB_SA_MCMEMBER_REC_HOP_LIMIT; 4177 4178 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 4179 id_priv->id.port_num, &rec, 4180 comp_mask, GFP_KERNEL, 4181 cma_ib_mc_handler, mc); 4182 return PTR_ERR_OR_ZERO(mc->multicast.ib); 4183 } 4184 4185 static void iboe_mcast_work_handler(struct work_struct *work) 4186 { 4187 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); 4188 struct cma_multicast *mc = mw->mc; 4189 struct ib_sa_multicast *m = mc->multicast.ib; 4190 4191 mc->multicast.ib->context = mc; 4192 cma_ib_mc_handler(0, m); 4193 kref_put(&mc->mcref, release_mc); 4194 kfree(mw); 4195 } 4196 4197 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, 4198 enum ib_gid_type gid_type) 4199 { 4200 struct sockaddr_in *sin = (struct sockaddr_in *)addr; 4201 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; 4202 4203 if (cma_any_addr(addr)) { 4204 memset(mgid, 0, sizeof *mgid); 4205 } else if (addr->sa_family == AF_INET6) { 4206 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4207 } else { 4208 mgid->raw[0] = 4209 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff; 4210 mgid->raw[1] = 4211 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e; 4212 mgid->raw[2] = 0; 4213 mgid->raw[3] = 0; 4214 mgid->raw[4] = 0; 4215 mgid->raw[5] = 0; 4216 mgid->raw[6] = 0; 4217 mgid->raw[7] = 0; 4218 mgid->raw[8] = 0; 4219 mgid->raw[9] = 0; 4220 mgid->raw[10] = 0xff; 4221 mgid->raw[11] = 0xff; 4222 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; 4223 } 4224 } 4225 4226 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, 4227 struct cma_multicast *mc) 4228 { 4229 struct iboe_mcast_work *work; 4230 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4231 int err = 0; 4232 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 4233 struct net_device *ndev = NULL; 4234 enum ib_gid_type gid_type; 4235 bool send_only; 4236 4237 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 4238 4239 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 4240 return -EINVAL; 4241 4242 work = kzalloc(sizeof *work, GFP_KERNEL); 4243 if (!work) 4244 return -ENOMEM; 4245 4246 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); 4247 if (!mc->multicast.ib) { 4248 err = -ENOMEM; 4249 goto out1; 4250 } 4251 4252 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4253 rdma_start_port(id_priv->cma_dev->device)]; 4254 cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type); 4255 4256 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); 4257 if (id_priv->id.ps == RDMA_PS_UDP) 4258 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 4259 4260 if (dev_addr->bound_dev_if) 4261 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4262 if (!ndev) { 4263 err = -ENODEV; 4264 goto out2; 4265 } 4266 mc->multicast.ib->rec.rate = iboe_get_rate(ndev); 4267 mc->multicast.ib->rec.hop_limit = 1; 4268 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu); 4269 4270 if (addr->sa_family == AF_INET || addr->sa_family == AF_INET6) { 4271 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 4272 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 4273 if (!send_only) { 4274 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 4275 true); 4276 if (!err) 4277 mc->igmp_joined = true; 4278 } 4279 } 4280 } else { 4281 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 4282 err = -ENOTSUPP; 4283 } 4284 dev_put(ndev); 4285 if (err || !mc->multicast.ib->rec.mtu) { 4286 if (!err) 4287 err = -EINVAL; 4288 goto out2; 4289 } 4290 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 4291 &mc->multicast.ib->rec.port_gid); 4292 work->id = id_priv; 4293 work->mc = mc; 4294 INIT_WORK(&work->work, iboe_mcast_work_handler); 4295 kref_get(&mc->mcref); 4296 queue_work(cma_wq, &work->work); 4297 4298 return 0; 4299 4300 out2: 4301 kfree(mc->multicast.ib); 4302 out1: 4303 kfree(work); 4304 return err; 4305 } 4306 4307 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4308 u8 join_state, void *context) 4309 { 4310 struct rdma_id_private *id_priv; 4311 struct cma_multicast *mc; 4312 int ret; 4313 4314 if (!id->device) 4315 return -EINVAL; 4316 4317 id_priv = container_of(id, struct rdma_id_private, id); 4318 if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && 4319 !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) 4320 return -EINVAL; 4321 4322 mc = kmalloc(sizeof *mc, GFP_KERNEL); 4323 if (!mc) 4324 return -ENOMEM; 4325 4326 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4327 mc->context = context; 4328 mc->id_priv = id_priv; 4329 mc->igmp_joined = false; 4330 mc->join_state = join_state; 4331 spin_lock(&id_priv->lock); 4332 list_add(&mc->list, &id_priv->mc_list); 4333 spin_unlock(&id_priv->lock); 4334 4335 if (rdma_protocol_roce(id->device, id->port_num)) { 4336 kref_init(&mc->mcref); 4337 ret = cma_iboe_join_multicast(id_priv, mc); 4338 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) 4339 ret = cma_join_ib_multicast(id_priv, mc); 4340 else 4341 ret = -ENOSYS; 4342 4343 if (ret) { 4344 spin_lock_irq(&id_priv->lock); 4345 list_del(&mc->list); 4346 spin_unlock_irq(&id_priv->lock); 4347 kfree(mc); 4348 } 4349 return ret; 4350 } 4351 EXPORT_SYMBOL(rdma_join_multicast); 4352 4353 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 4354 { 4355 struct rdma_id_private *id_priv; 4356 struct cma_multicast *mc; 4357 4358 id_priv = container_of(id, struct rdma_id_private, id); 4359 spin_lock_irq(&id_priv->lock); 4360 list_for_each_entry(mc, &id_priv->mc_list, list) { 4361 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { 4362 list_del(&mc->list); 4363 spin_unlock_irq(&id_priv->lock); 4364 4365 if (id->qp) 4366 ib_detach_mcast(id->qp, 4367 &mc->multicast.ib->rec.mgid, 4368 be16_to_cpu(mc->multicast.ib->rec.mlid)); 4369 4370 BUG_ON(id_priv->cma_dev->device != id->device); 4371 4372 if (rdma_cap_ib_mcast(id->device, id->port_num)) { 4373 ib_sa_free_multicast(mc->multicast.ib); 4374 kfree(mc); 4375 } else if (rdma_protocol_roce(id->device, id->port_num)) { 4376 if (mc->igmp_joined) { 4377 struct rdma_dev_addr *dev_addr = 4378 &id->route.addr.dev_addr; 4379 struct net_device *ndev = NULL; 4380 4381 if (dev_addr->bound_dev_if) 4382 ndev = dev_get_by_index(dev_addr->net, 4383 dev_addr->bound_dev_if); 4384 if (ndev) { 4385 cma_igmp_send(ndev, 4386 &mc->multicast.ib->rec.mgid, 4387 false); 4388 dev_put(ndev); 4389 } 4390 mc->igmp_joined = false; 4391 } 4392 kref_put(&mc->mcref, release_mc); 4393 } 4394 return; 4395 } 4396 } 4397 spin_unlock_irq(&id_priv->lock); 4398 } 4399 EXPORT_SYMBOL(rdma_leave_multicast); 4400 4401 static int 4402 sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS) 4403 { 4404 struct cma_device *cma_dev = arg1; 4405 const int port = arg2; 4406 char buf[64]; 4407 int error; 4408 4409 strlcpy(buf, ib_cache_gid_type_str( 4410 cma_get_default_gid_type(cma_dev, port)), sizeof(buf)); 4411 4412 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 4413 if (error != 0 || req->newptr == NULL) 4414 goto done; 4415 4416 error = ib_cache_gid_parse_type_str(buf); 4417 if (error < 0) { 4418 error = EINVAL; 4419 goto done; 4420 } 4421 4422 cma_set_default_gid_type(cma_dev, port, error); 4423 error = 0; 4424 done: 4425 return (error); 4426 } 4427 4428 static void cma_add_one(struct ib_device *device) 4429 { 4430 struct cma_device *cma_dev; 4431 struct rdma_id_private *id_priv; 4432 unsigned int i; 4433 4434 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4435 if (!cma_dev) 4436 return; 4437 4438 sysctl_ctx_init(&cma_dev->sysctl_ctx); 4439 4440 cma_dev->device = device; 4441 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4442 sizeof(*cma_dev->default_gid_type), 4443 GFP_KERNEL); 4444 if (!cma_dev->default_gid_type) { 4445 kfree(cma_dev); 4446 return; 4447 } 4448 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4449 unsigned long supported_gids; 4450 unsigned int default_gid_type; 4451 4452 supported_gids = roce_gid_type_mask_support(device, i); 4453 4454 if (WARN_ON(!supported_gids)) { 4455 /* set something valid */ 4456 default_gid_type = 0; 4457 } else if (test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) { 4458 /* prefer RoCEv2, if supported */ 4459 default_gid_type = IB_GID_TYPE_ROCE_UDP_ENCAP; 4460 } else { 4461 default_gid_type = find_first_bit(&supported_gids, 4462 BITS_PER_LONG); 4463 } 4464 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4465 default_gid_type; 4466 } 4467 4468 init_completion(&cma_dev->comp); 4469 atomic_set(&cma_dev->refcount, 1); 4470 INIT_LIST_HEAD(&cma_dev->id_list); 4471 ib_set_client_data(device, &cma_client, cma_dev); 4472 4473 mutex_lock(&lock); 4474 list_add_tail(&cma_dev->list, &dev_list); 4475 list_for_each_entry(id_priv, &listen_any_list, list) 4476 cma_listen_on_dev(id_priv, cma_dev); 4477 mutex_unlock(&lock); 4478 4479 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4480 char buf[64]; 4481 4482 snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i); 4483 4484 (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx, 4485 SYSCTL_CHILDREN(device->ports_parent->parent->oidp), 4486 OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 4487 cma_dev, i, &sysctl_cma_default_roce_mode, "A", 4488 "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2"); 4489 } 4490 } 4491 4492 static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4493 { 4494 struct rdma_cm_event event; 4495 enum rdma_cm_state state; 4496 int ret = 0; 4497 4498 /* Record that we want to remove the device */ 4499 state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); 4500 if (state == RDMA_CM_DESTROYING) 4501 return 0; 4502 4503 cma_cancel_operation(id_priv, state); 4504 mutex_lock(&id_priv->handler_mutex); 4505 4506 /* Check for destruction from another callback. */ 4507 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4508 goto out; 4509 4510 memset(&event, 0, sizeof event); 4511 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4512 ret = id_priv->id.event_handler(&id_priv->id, &event); 4513 out: 4514 mutex_unlock(&id_priv->handler_mutex); 4515 return ret; 4516 } 4517 4518 static void cma_process_remove(struct cma_device *cma_dev) 4519 { 4520 struct rdma_id_private *id_priv; 4521 int ret; 4522 4523 mutex_lock(&lock); 4524 while (!list_empty(&cma_dev->id_list)) { 4525 id_priv = list_entry(cma_dev->id_list.next, 4526 struct rdma_id_private, list); 4527 4528 list_del(&id_priv->listen_list); 4529 list_del_init(&id_priv->list); 4530 atomic_inc(&id_priv->refcount); 4531 mutex_unlock(&lock); 4532 4533 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 4534 cma_deref_id(id_priv); 4535 if (ret) 4536 rdma_destroy_id(&id_priv->id); 4537 4538 mutex_lock(&lock); 4539 } 4540 mutex_unlock(&lock); 4541 4542 cma_deref_dev(cma_dev); 4543 wait_for_completion(&cma_dev->comp); 4544 } 4545 4546 static void cma_remove_one(struct ib_device *device, void *client_data) 4547 { 4548 struct cma_device *cma_dev = client_data; 4549 4550 if (!cma_dev) 4551 return; 4552 4553 mutex_lock(&lock); 4554 list_del(&cma_dev->list); 4555 mutex_unlock(&lock); 4556 4557 cma_process_remove(cma_dev); 4558 sysctl_ctx_free(&cma_dev->sysctl_ctx); 4559 kfree(cma_dev->default_gid_type); 4560 kfree(cma_dev); 4561 } 4562 4563 static void cma_init_vnet(void *arg) 4564 { 4565 struct cma_pernet *pernet = &VNET(cma_pernet); 4566 4567 idr_init(&pernet->tcp_ps); 4568 idr_init(&pernet->udp_ps); 4569 idr_init(&pernet->ipoib_ps); 4570 idr_init(&pernet->ib_ps); 4571 idr_init(&pernet->sdp_ps); 4572 } 4573 VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL); 4574 4575 static void cma_destroy_vnet(void *arg) 4576 { 4577 struct cma_pernet *pernet = &VNET(cma_pernet); 4578 4579 idr_destroy(&pernet->tcp_ps); 4580 idr_destroy(&pernet->udp_ps); 4581 idr_destroy(&pernet->ipoib_ps); 4582 idr_destroy(&pernet->ib_ps); 4583 idr_destroy(&pernet->sdp_ps); 4584 } 4585 VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL); 4586 4587 static int __init cma_init(void) 4588 { 4589 int ret; 4590 4591 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); 4592 if (!cma_wq) 4593 return -ENOMEM; 4594 4595 ib_sa_register_client(&sa_client); 4596 rdma_addr_register_client(&addr_client); 4597 4598 ret = ib_register_client(&cma_client); 4599 if (ret) 4600 goto err; 4601 4602 cma_configfs_init(); 4603 4604 return 0; 4605 4606 err: 4607 rdma_addr_unregister_client(&addr_client); 4608 ib_sa_unregister_client(&sa_client); 4609 destroy_workqueue(cma_wq); 4610 return ret; 4611 } 4612 4613 static void __exit cma_cleanup(void) 4614 { 4615 cma_configfs_exit(); 4616 ib_unregister_client(&cma_client); 4617 rdma_addr_unregister_client(&addr_client); 4618 ib_sa_unregister_client(&sa_client); 4619 destroy_workqueue(cma_wq); 4620 } 4621 4622 module_init(cma_init); 4623 module_exit(cma_cleanup); 4624