1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2005 Voltaire Inc. All rights reserved. 5 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. 6 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. 7 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 */ 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #define LINUXKPI_PARAM_PREFIX ibcore_ 42 43 #include <linux/completion.h> 44 #include <linux/in.h> 45 #include <linux/in6.h> 46 #include <linux/mutex.h> 47 #include <linux/random.h> 48 #include <linux/idr.h> 49 #include <linux/inetdevice.h> 50 #include <linux/slab.h> 51 #include <linux/module.h> 52 #include <net/route.h> 53 54 #include <net/tcp.h> 55 #include <net/ipv6.h> 56 57 #include <netinet6/scope6_var.h> 58 #include <netinet6/ip6_var.h> 59 60 #include <rdma/rdma_cm.h> 61 #include <rdma/rdma_cm_ib.h> 62 #include <rdma/rdma_sdp.h> 63 #include <rdma/ib.h> 64 #include <rdma/ib_addr.h> 65 #include <rdma/ib_cache.h> 66 #include <rdma/ib_cm.h> 67 #include <rdma/ib_sa.h> 68 #include <rdma/iw_cm.h> 69 70 #include <sys/priv.h> 71 72 #include "core_priv.h" 73 74 MODULE_AUTHOR("Sean Hefty"); 75 MODULE_DESCRIPTION("Generic RDMA CM Agent"); 76 MODULE_LICENSE("Dual BSD/GPL"); 77 78 #define CMA_CM_RESPONSE_TIMEOUT 20 79 #define CMA_QUERY_CLASSPORT_INFO_TIMEOUT 3000 80 #define CMA_MAX_CM_RETRIES 15 81 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) 82 #define CMA_IBOE_PACKET_LIFETIME 18 83 84 static const char * const cma_events[] = { 85 [RDMA_CM_EVENT_ADDR_RESOLVED] = "address resolved", 86 [RDMA_CM_EVENT_ADDR_ERROR] = "address error", 87 [RDMA_CM_EVENT_ROUTE_RESOLVED] = "route resolved ", 88 [RDMA_CM_EVENT_ROUTE_ERROR] = "route error", 89 [RDMA_CM_EVENT_CONNECT_REQUEST] = "connect request", 90 [RDMA_CM_EVENT_CONNECT_RESPONSE] = "connect response", 91 [RDMA_CM_EVENT_CONNECT_ERROR] = "connect error", 92 [RDMA_CM_EVENT_UNREACHABLE] = "unreachable", 93 [RDMA_CM_EVENT_REJECTED] = "rejected", 94 [RDMA_CM_EVENT_ESTABLISHED] = "established", 95 [RDMA_CM_EVENT_DISCONNECTED] = "disconnected", 96 [RDMA_CM_EVENT_DEVICE_REMOVAL] = "device removal", 97 [RDMA_CM_EVENT_MULTICAST_JOIN] = "multicast join", 98 [RDMA_CM_EVENT_MULTICAST_ERROR] = "multicast error", 99 [RDMA_CM_EVENT_ADDR_CHANGE] = "address change", 100 [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", 101 }; 102 103 const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) 104 { 105 size_t index = event; 106 107 return (index < ARRAY_SIZE(cma_events) && cma_events[index]) ? 108 cma_events[index] : "unrecognized event"; 109 } 110 EXPORT_SYMBOL(rdma_event_msg); 111 112 static int cma_check_linklocal(struct rdma_dev_addr *, struct sockaddr *); 113 static void cma_add_one(struct ib_device *device); 114 static void cma_remove_one(struct ib_device *device, void *client_data); 115 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id); 116 117 static struct ib_client cma_client = { 118 .name = "cma", 119 .add = cma_add_one, 120 .remove = cma_remove_one 121 }; 122 123 static struct ib_sa_client sa_client; 124 static struct rdma_addr_client addr_client; 125 static LIST_HEAD(dev_list); 126 static LIST_HEAD(listen_any_list); 127 static DEFINE_MUTEX(lock); 128 static struct workqueue_struct *cma_wq; 129 130 struct cma_pernet { 131 struct idr tcp_ps; 132 struct idr udp_ps; 133 struct idr ipoib_ps; 134 struct idr ib_ps; 135 struct idr sdp_ps; 136 }; 137 138 VNET_DEFINE(struct cma_pernet, cma_pernet); 139 140 static struct cma_pernet *cma_pernet_ptr(struct vnet *vnet) 141 { 142 struct cma_pernet *retval; 143 144 CURVNET_SET_QUIET(vnet); 145 retval = &VNET(cma_pernet); 146 CURVNET_RESTORE(); 147 148 return (retval); 149 } 150 151 static struct idr *cma_pernet_idr(struct vnet *net, enum rdma_port_space ps) 152 { 153 struct cma_pernet *pernet = cma_pernet_ptr(net); 154 155 switch (ps) { 156 case RDMA_PS_TCP: 157 return &pernet->tcp_ps; 158 case RDMA_PS_UDP: 159 return &pernet->udp_ps; 160 case RDMA_PS_IPOIB: 161 return &pernet->ipoib_ps; 162 case RDMA_PS_IB: 163 return &pernet->ib_ps; 164 case RDMA_PS_SDP: 165 return &pernet->sdp_ps; 166 default: 167 return NULL; 168 } 169 } 170 171 struct cma_device { 172 struct list_head list; 173 struct ib_device *device; 174 struct completion comp; 175 atomic_t refcount; 176 struct list_head id_list; 177 struct sysctl_ctx_list sysctl_ctx; 178 enum ib_gid_type *default_gid_type; 179 }; 180 181 struct rdma_bind_list { 182 enum rdma_port_space ps; 183 struct hlist_head owners; 184 unsigned short port; 185 }; 186 187 struct class_port_info_context { 188 struct ib_class_port_info *class_port_info; 189 struct ib_device *device; 190 struct completion done; 191 struct ib_sa_query *sa_query; 192 u8 port_num; 193 }; 194 195 static int cma_ps_alloc(struct vnet *vnet, enum rdma_port_space ps, 196 struct rdma_bind_list *bind_list, int snum) 197 { 198 struct idr *idr = cma_pernet_idr(vnet, ps); 199 200 return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL); 201 } 202 203 static struct rdma_bind_list *cma_ps_find(struct vnet *net, 204 enum rdma_port_space ps, int snum) 205 { 206 struct idr *idr = cma_pernet_idr(net, ps); 207 208 return idr_find(idr, snum); 209 } 210 211 static void cma_ps_remove(struct vnet *net, enum rdma_port_space ps, int snum) 212 { 213 struct idr *idr = cma_pernet_idr(net, ps); 214 215 idr_remove(idr, snum); 216 } 217 218 enum { 219 CMA_OPTION_AFONLY, 220 }; 221 222 void cma_ref_dev(struct cma_device *cma_dev) 223 { 224 atomic_inc(&cma_dev->refcount); 225 } 226 227 struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter, 228 void *cookie) 229 { 230 struct cma_device *cma_dev; 231 struct cma_device *found_cma_dev = NULL; 232 233 mutex_lock(&lock); 234 235 list_for_each_entry(cma_dev, &dev_list, list) 236 if (filter(cma_dev->device, cookie)) { 237 found_cma_dev = cma_dev; 238 break; 239 } 240 241 if (found_cma_dev) 242 cma_ref_dev(found_cma_dev); 243 mutex_unlock(&lock); 244 return found_cma_dev; 245 } 246 247 int cma_get_default_gid_type(struct cma_device *cma_dev, 248 unsigned int port) 249 { 250 if (port < rdma_start_port(cma_dev->device) || 251 port > rdma_end_port(cma_dev->device)) 252 return -EINVAL; 253 254 return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)]; 255 } 256 257 int cma_set_default_gid_type(struct cma_device *cma_dev, 258 unsigned int port, 259 enum ib_gid_type default_gid_type) 260 { 261 unsigned long supported_gids; 262 263 if (port < rdma_start_port(cma_dev->device) || 264 port > rdma_end_port(cma_dev->device)) 265 return -EINVAL; 266 267 supported_gids = roce_gid_type_mask_support(cma_dev->device, port); 268 269 if (!(supported_gids & 1 << default_gid_type)) 270 return -EINVAL; 271 272 cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] = 273 default_gid_type; 274 275 return 0; 276 } 277 278 struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) 279 { 280 return cma_dev->device; 281 } 282 283 /* 284 * Device removal can occur at anytime, so we need extra handling to 285 * serialize notifying the user of device removal with other callbacks. 286 * We do this by disabling removal notification while a callback is in process, 287 * and reporting it after the callback completes. 288 */ 289 struct rdma_id_private { 290 struct rdma_cm_id id; 291 292 struct rdma_bind_list *bind_list; 293 struct hlist_node node; 294 struct list_head list; /* listen_any_list or cma_device.list */ 295 struct list_head listen_list; /* per device listens */ 296 struct cma_device *cma_dev; 297 struct list_head mc_list; 298 299 int internal_id; 300 enum rdma_cm_state state; 301 spinlock_t lock; 302 struct mutex qp_mutex; 303 304 struct completion comp; 305 atomic_t refcount; 306 struct mutex handler_mutex; 307 308 int backlog; 309 int timeout_ms; 310 struct ib_sa_query *query; 311 int query_id; 312 union { 313 struct ib_cm_id *ib; 314 struct iw_cm_id *iw; 315 } cm_id; 316 317 u32 seq_num; 318 u32 qkey; 319 u32 qp_num; 320 pid_t owner; 321 u32 options; 322 u8 srq; 323 u8 tos; 324 u8 reuseaddr; 325 u8 afonly; 326 enum ib_gid_type gid_type; 327 }; 328 329 struct cma_multicast { 330 struct rdma_id_private *id_priv; 331 union { 332 struct ib_sa_multicast *ib; 333 } multicast; 334 struct list_head list; 335 void *context; 336 struct sockaddr_storage addr; 337 struct kref mcref; 338 bool igmp_joined; 339 u8 join_state; 340 }; 341 342 struct cma_work { 343 struct work_struct work; 344 struct rdma_id_private *id; 345 enum rdma_cm_state old_state; 346 enum rdma_cm_state new_state; 347 struct rdma_cm_event event; 348 }; 349 350 struct cma_ndev_work { 351 struct work_struct work; 352 struct rdma_id_private *id; 353 struct rdma_cm_event event; 354 }; 355 356 struct iboe_mcast_work { 357 struct work_struct work; 358 struct rdma_id_private *id; 359 struct cma_multicast *mc; 360 }; 361 362 struct cma_hdr { 363 u8 cma_version; 364 u8 ip_version; /* IP version: 7:4 */ 365 __be16 port; 366 union cma_ip_addr src_addr; 367 union cma_ip_addr dst_addr; 368 }; 369 370 #define CMA_VERSION 0x00 371 #define SDP_MAJ_VERSION 0x2 372 373 struct cma_req_info { 374 struct ib_device *device; 375 int port; 376 union ib_gid local_gid; 377 __be64 service_id; 378 u16 pkey; 379 bool has_gid:1; 380 }; 381 382 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) 383 { 384 unsigned long flags; 385 int ret; 386 387 spin_lock_irqsave(&id_priv->lock, flags); 388 ret = (id_priv->state == comp); 389 spin_unlock_irqrestore(&id_priv->lock, flags); 390 return ret; 391 } 392 393 static int cma_comp_exch(struct rdma_id_private *id_priv, 394 enum rdma_cm_state comp, enum rdma_cm_state exch) 395 { 396 unsigned long flags; 397 int ret; 398 399 spin_lock_irqsave(&id_priv->lock, flags); 400 if ((ret = (id_priv->state == comp))) 401 id_priv->state = exch; 402 spin_unlock_irqrestore(&id_priv->lock, flags); 403 return ret; 404 } 405 406 static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, 407 enum rdma_cm_state exch) 408 { 409 unsigned long flags; 410 enum rdma_cm_state old; 411 412 spin_lock_irqsave(&id_priv->lock, flags); 413 old = id_priv->state; 414 id_priv->state = exch; 415 spin_unlock_irqrestore(&id_priv->lock, flags); 416 return old; 417 } 418 419 static inline u8 cma_get_ip_ver(const struct cma_hdr *hdr) 420 { 421 return hdr->ip_version >> 4; 422 } 423 424 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 425 { 426 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 427 } 428 429 static inline u8 sdp_get_majv(u8 sdp_version) 430 { 431 return sdp_version >> 4; 432 } 433 434 static inline u8 sdp_get_ip_ver(const struct sdp_hh *hh) 435 { 436 return hh->ipv_cap >> 4; 437 } 438 439 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) 440 { 441 hh->ipv_cap = (ip_ver << 4) | (hh->ipv_cap & 0xF); 442 } 443 444 static int cma_igmp_send(struct net_device *ndev, const union ib_gid *mgid, bool join) 445 { 446 int retval; 447 448 if (ndev) { 449 union { 450 struct sockaddr sock; 451 struct sockaddr_storage storage; 452 } addr; 453 454 rdma_gid2ip(&addr.sock, mgid); 455 456 CURVNET_SET_QUIET(ndev->if_vnet); 457 if (join) 458 retval = -if_addmulti(ndev, &addr.sock, NULL); 459 else 460 retval = -if_delmulti(ndev, &addr.sock); 461 CURVNET_RESTORE(); 462 } else { 463 retval = -ENODEV; 464 } 465 return retval; 466 } 467 468 static void _cma_attach_to_dev(struct rdma_id_private *id_priv, 469 struct cma_device *cma_dev) 470 { 471 cma_ref_dev(cma_dev); 472 id_priv->cma_dev = cma_dev; 473 id_priv->gid_type = 0; 474 id_priv->id.device = cma_dev->device; 475 id_priv->id.route.addr.dev_addr.transport = 476 rdma_node_get_transport(cma_dev->device->node_type); 477 list_add_tail(&id_priv->list, &cma_dev->id_list); 478 } 479 480 static void cma_attach_to_dev(struct rdma_id_private *id_priv, 481 struct cma_device *cma_dev) 482 { 483 _cma_attach_to_dev(id_priv, cma_dev); 484 id_priv->gid_type = 485 cma_dev->default_gid_type[id_priv->id.port_num - 486 rdma_start_port(cma_dev->device)]; 487 } 488 489 void cma_deref_dev(struct cma_device *cma_dev) 490 { 491 if (atomic_dec_and_test(&cma_dev->refcount)) 492 complete(&cma_dev->comp); 493 } 494 495 static inline void release_mc(struct kref *kref) 496 { 497 struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); 498 499 kfree(mc->multicast.ib); 500 kfree(mc); 501 } 502 503 static void cma_release_dev(struct rdma_id_private *id_priv) 504 { 505 mutex_lock(&lock); 506 list_del(&id_priv->list); 507 cma_deref_dev(id_priv->cma_dev); 508 id_priv->cma_dev = NULL; 509 mutex_unlock(&lock); 510 } 511 512 static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 513 { 514 return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 515 } 516 517 static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 518 { 519 return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 520 } 521 522 static inline unsigned short cma_family(struct rdma_id_private *id_priv) 523 { 524 return id_priv->id.route.addr.src_addr.ss_family; 525 } 526 527 static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) 528 { 529 struct ib_sa_mcmember_rec rec; 530 int ret = 0; 531 532 if (id_priv->qkey) { 533 if (qkey && id_priv->qkey != qkey) 534 return -EINVAL; 535 return 0; 536 } 537 538 if (qkey) { 539 id_priv->qkey = qkey; 540 return 0; 541 } 542 543 switch (id_priv->id.ps) { 544 case RDMA_PS_UDP: 545 case RDMA_PS_IB: 546 id_priv->qkey = RDMA_UDP_QKEY; 547 break; 548 case RDMA_PS_IPOIB: 549 ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid); 550 ret = ib_sa_get_mcmember_rec(id_priv->id.device, 551 id_priv->id.port_num, &rec.mgid, 552 &rec); 553 if (!ret) 554 id_priv->qkey = be32_to_cpu(rec.qkey); 555 break; 556 default: 557 break; 558 } 559 return ret; 560 } 561 562 static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) 563 { 564 dev_addr->dev_type = ARPHRD_INFINIBAND; 565 rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); 566 ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); 567 } 568 569 static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) 570 { 571 int ret; 572 573 if (addr->sa_family != AF_IB) { 574 ret = rdma_translate_ip(addr, dev_addr); 575 } else { 576 cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); 577 ret = 0; 578 } 579 580 return ret; 581 } 582 583 static inline int cma_validate_port(struct ib_device *device, u8 port, 584 enum ib_gid_type gid_type, 585 union ib_gid *gid, 586 const struct rdma_dev_addr *dev_addr) 587 { 588 const int dev_type = dev_addr->dev_type; 589 struct net_device *ndev; 590 int ret = -ENODEV; 591 592 if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) 593 return ret; 594 595 if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) 596 return ret; 597 598 if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { 599 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 600 } else { 601 ndev = NULL; 602 gid_type = IB_GID_TYPE_IB; 603 } 604 605 ret = ib_find_cached_gid_by_port(device, gid, gid_type, port, 606 ndev, NULL); 607 608 if (ndev) 609 dev_put(ndev); 610 611 return ret; 612 } 613 614 static int cma_acquire_dev(struct rdma_id_private *id_priv, 615 struct rdma_id_private *listen_id_priv) 616 { 617 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 618 struct cma_device *cma_dev; 619 union ib_gid gid, iboe_gid, *gidp; 620 int ret = -ENODEV; 621 u8 port; 622 623 if (dev_addr->dev_type != ARPHRD_INFINIBAND && 624 id_priv->id.ps == RDMA_PS_IPOIB) 625 return -EINVAL; 626 627 mutex_lock(&lock); 628 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 629 &iboe_gid); 630 631 memcpy(&gid, dev_addr->src_dev_addr + 632 rdma_addr_gid_offset(dev_addr), sizeof gid); 633 634 if (listen_id_priv) { 635 cma_dev = listen_id_priv->cma_dev; 636 port = listen_id_priv->id.port_num; 637 638 if (rdma_is_port_valid(cma_dev->device, port)) { 639 gidp = rdma_protocol_roce(cma_dev->device, port) ? 640 &iboe_gid : &gid; 641 642 ret = cma_validate_port(cma_dev->device, port, 643 rdma_protocol_ib(cma_dev->device, port) ? 644 IB_GID_TYPE_IB : 645 listen_id_priv->gid_type, gidp, dev_addr); 646 if (!ret) { 647 id_priv->id.port_num = port; 648 goto out; 649 } 650 } 651 } 652 653 list_for_each_entry(cma_dev, &dev_list, list) { 654 for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { 655 if (listen_id_priv && 656 listen_id_priv->cma_dev == cma_dev && 657 listen_id_priv->id.port_num == port) 658 continue; 659 660 gidp = rdma_protocol_roce(cma_dev->device, port) ? 661 &iboe_gid : &gid; 662 663 ret = cma_validate_port(cma_dev->device, port, 664 rdma_protocol_ib(cma_dev->device, port) ? 665 IB_GID_TYPE_IB : 666 cma_dev->default_gid_type[port - 1], 667 gidp, dev_addr); 668 if (!ret) { 669 id_priv->id.port_num = port; 670 goto out; 671 } 672 } 673 } 674 675 out: 676 if (!ret) 677 cma_attach_to_dev(id_priv, cma_dev); 678 679 mutex_unlock(&lock); 680 return ret; 681 } 682 683 /* 684 * Select the source IB device and address to reach the destination IB address. 685 */ 686 static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) 687 { 688 struct cma_device *cma_dev, *cur_dev; 689 struct sockaddr_ib *addr; 690 union ib_gid gid, sgid, *dgid; 691 u16 pkey, index; 692 u8 p; 693 int i; 694 695 cma_dev = NULL; 696 addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); 697 dgid = (union ib_gid *) &addr->sib_addr; 698 pkey = ntohs(addr->sib_pkey); 699 700 list_for_each_entry(cur_dev, &dev_list, list) { 701 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 702 if (!rdma_cap_af_ib(cur_dev->device, p)) 703 continue; 704 705 if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) 706 continue; 707 708 for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, 709 &gid, NULL); 710 i++) { 711 if (!memcmp(&gid, dgid, sizeof(gid))) { 712 cma_dev = cur_dev; 713 sgid = gid; 714 id_priv->id.port_num = p; 715 goto found; 716 } 717 718 if (!cma_dev && (gid.global.subnet_prefix == 719 dgid->global.subnet_prefix)) { 720 cma_dev = cur_dev; 721 sgid = gid; 722 id_priv->id.port_num = p; 723 } 724 } 725 } 726 } 727 728 if (!cma_dev) 729 return -ENODEV; 730 731 found: 732 cma_attach_to_dev(id_priv, cma_dev); 733 addr = (struct sockaddr_ib *) cma_src_addr(id_priv); 734 memcpy(&addr->sib_addr, &sgid, sizeof sgid); 735 cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); 736 return 0; 737 } 738 739 static void cma_deref_id(struct rdma_id_private *id_priv) 740 { 741 if (atomic_dec_and_test(&id_priv->refcount)) 742 complete(&id_priv->comp); 743 } 744 745 struct rdma_cm_id *rdma_create_id(struct vnet *net, 746 rdma_cm_event_handler event_handler, 747 void *context, enum rdma_port_space ps, 748 enum ib_qp_type qp_type) 749 { 750 struct rdma_id_private *id_priv; 751 752 #ifdef VIMAGE 753 if (net == NULL) 754 return ERR_PTR(-EINVAL); 755 #endif 756 id_priv = kzalloc(sizeof *id_priv, GFP_KERNEL); 757 if (!id_priv) 758 return ERR_PTR(-ENOMEM); 759 760 id_priv->owner = task_pid_nr(current); 761 id_priv->state = RDMA_CM_IDLE; 762 id_priv->id.context = context; 763 id_priv->id.event_handler = event_handler; 764 id_priv->id.ps = ps; 765 id_priv->id.qp_type = qp_type; 766 spin_lock_init(&id_priv->lock); 767 mutex_init(&id_priv->qp_mutex); 768 init_completion(&id_priv->comp); 769 atomic_set(&id_priv->refcount, 1); 770 mutex_init(&id_priv->handler_mutex); 771 INIT_LIST_HEAD(&id_priv->listen_list); 772 INIT_LIST_HEAD(&id_priv->mc_list); 773 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); 774 id_priv->id.route.addr.dev_addr.net = net; 775 776 return &id_priv->id; 777 } 778 EXPORT_SYMBOL(rdma_create_id); 779 780 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 781 { 782 struct ib_qp_attr qp_attr; 783 int qp_attr_mask, ret; 784 785 qp_attr.qp_state = IB_QPS_INIT; 786 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 787 if (ret) 788 return ret; 789 790 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); 791 if (ret) 792 return ret; 793 794 qp_attr.qp_state = IB_QPS_RTR; 795 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 796 if (ret) 797 return ret; 798 799 qp_attr.qp_state = IB_QPS_RTS; 800 qp_attr.sq_psn = 0; 801 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); 802 803 return ret; 804 } 805 806 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) 807 { 808 struct ib_qp_attr qp_attr; 809 int qp_attr_mask, ret; 810 811 qp_attr.qp_state = IB_QPS_INIT; 812 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 813 if (ret) 814 return ret; 815 816 return ib_modify_qp(qp, &qp_attr, qp_attr_mask); 817 } 818 819 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, 820 struct ib_qp_init_attr *qp_init_attr) 821 { 822 struct rdma_id_private *id_priv; 823 struct ib_qp *qp; 824 int ret; 825 826 id_priv = container_of(id, struct rdma_id_private, id); 827 if (id->device != pd->device) 828 return -EINVAL; 829 830 qp_init_attr->port_num = id->port_num; 831 qp = ib_create_qp(pd, qp_init_attr); 832 if (IS_ERR(qp)) 833 return PTR_ERR(qp); 834 835 if (id->qp_type == IB_QPT_UD) 836 ret = cma_init_ud_qp(id_priv, qp); 837 else 838 ret = cma_init_conn_qp(id_priv, qp); 839 if (ret) 840 goto err; 841 842 id->qp = qp; 843 id_priv->qp_num = qp->qp_num; 844 id_priv->srq = (qp->srq != NULL); 845 return 0; 846 err: 847 ib_destroy_qp(qp); 848 return ret; 849 } 850 EXPORT_SYMBOL(rdma_create_qp); 851 852 void rdma_destroy_qp(struct rdma_cm_id *id) 853 { 854 struct rdma_id_private *id_priv; 855 856 id_priv = container_of(id, struct rdma_id_private, id); 857 mutex_lock(&id_priv->qp_mutex); 858 ib_destroy_qp(id_priv->id.qp); 859 id_priv->id.qp = NULL; 860 mutex_unlock(&id_priv->qp_mutex); 861 } 862 EXPORT_SYMBOL(rdma_destroy_qp); 863 864 static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, 865 struct rdma_conn_param *conn_param) 866 { 867 struct ib_qp_attr qp_attr; 868 int qp_attr_mask, ret; 869 union ib_gid sgid; 870 871 mutex_lock(&id_priv->qp_mutex); 872 if (!id_priv->id.qp) { 873 ret = 0; 874 goto out; 875 } 876 877 /* Need to update QP attributes from default values. */ 878 qp_attr.qp_state = IB_QPS_INIT; 879 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 880 if (ret) 881 goto out; 882 883 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 884 if (ret) 885 goto out; 886 887 qp_attr.qp_state = IB_QPS_RTR; 888 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 889 if (ret) 890 goto out; 891 892 ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, 893 qp_attr.ah_attr.grh.sgid_index, &sgid, NULL); 894 if (ret) 895 goto out; 896 897 BUG_ON(id_priv->cma_dev->device != id_priv->id.device); 898 899 if (conn_param) 900 qp_attr.max_dest_rd_atomic = conn_param->responder_resources; 901 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 902 out: 903 mutex_unlock(&id_priv->qp_mutex); 904 return ret; 905 } 906 907 static int cma_modify_qp_rts(struct rdma_id_private *id_priv, 908 struct rdma_conn_param *conn_param) 909 { 910 struct ib_qp_attr qp_attr; 911 int qp_attr_mask, ret; 912 913 mutex_lock(&id_priv->qp_mutex); 914 if (!id_priv->id.qp) { 915 ret = 0; 916 goto out; 917 } 918 919 qp_attr.qp_state = IB_QPS_RTS; 920 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); 921 if (ret) 922 goto out; 923 924 if (conn_param) 925 qp_attr.max_rd_atomic = conn_param->initiator_depth; 926 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); 927 out: 928 mutex_unlock(&id_priv->qp_mutex); 929 return ret; 930 } 931 932 static int cma_modify_qp_err(struct rdma_id_private *id_priv) 933 { 934 struct ib_qp_attr qp_attr; 935 int ret; 936 937 mutex_lock(&id_priv->qp_mutex); 938 if (!id_priv->id.qp) { 939 ret = 0; 940 goto out; 941 } 942 943 qp_attr.qp_state = IB_QPS_ERR; 944 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); 945 out: 946 mutex_unlock(&id_priv->qp_mutex); 947 return ret; 948 } 949 950 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 951 struct ib_qp_attr *qp_attr, int *qp_attr_mask) 952 { 953 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 954 int ret; 955 u16 pkey; 956 957 if (rdma_cap_eth_ah(id_priv->id.device, id_priv->id.port_num)) 958 pkey = 0xffff; 959 else 960 pkey = ib_addr_get_pkey(dev_addr); 961 962 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num, 963 pkey, &qp_attr->pkey_index); 964 if (ret) 965 return ret; 966 967 qp_attr->port_num = id_priv->id.port_num; 968 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; 969 970 if (id_priv->id.qp_type == IB_QPT_UD) { 971 ret = cma_set_qkey(id_priv, 0); 972 if (ret) 973 return ret; 974 975 qp_attr->qkey = id_priv->qkey; 976 *qp_attr_mask |= IB_QP_QKEY; 977 } else { 978 qp_attr->qp_access_flags = 0; 979 *qp_attr_mask |= IB_QP_ACCESS_FLAGS; 980 } 981 return 0; 982 } 983 984 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, 985 int *qp_attr_mask) 986 { 987 struct rdma_id_private *id_priv; 988 int ret = 0; 989 990 id_priv = container_of(id, struct rdma_id_private, id); 991 if (rdma_cap_ib_cm(id->device, id->port_num)) { 992 if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD)) 993 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask); 994 else 995 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, 996 qp_attr_mask); 997 998 if (qp_attr->qp_state == IB_QPS_RTR) 999 qp_attr->rq_psn = id_priv->seq_num; 1000 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 1001 if (!id_priv->cm_id.iw) { 1002 qp_attr->qp_access_flags = 0; 1003 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1004 } else 1005 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr, 1006 qp_attr_mask); 1007 qp_attr->port_num = id_priv->id.port_num; 1008 *qp_attr_mask |= IB_QP_PORT; 1009 } else 1010 ret = -ENOSYS; 1011 1012 return ret; 1013 } 1014 EXPORT_SYMBOL(rdma_init_qp_attr); 1015 1016 static inline int cma_zero_addr(struct sockaddr *addr) 1017 { 1018 switch (addr->sa_family) { 1019 case AF_INET: 1020 return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); 1021 case AF_INET6: 1022 return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); 1023 case AF_IB: 1024 return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); 1025 default: 1026 return 0; 1027 } 1028 } 1029 1030 static inline int cma_loopback_addr(struct sockaddr *addr) 1031 { 1032 switch (addr->sa_family) { 1033 case AF_INET: 1034 return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); 1035 case AF_INET6: 1036 return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); 1037 case AF_IB: 1038 return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); 1039 default: 1040 return 0; 1041 } 1042 } 1043 1044 static inline int cma_any_addr(struct sockaddr *addr) 1045 { 1046 return cma_zero_addr(addr) || cma_loopback_addr(addr); 1047 } 1048 1049 static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) 1050 { 1051 if (src->sa_family != dst->sa_family) 1052 return -1; 1053 1054 switch (src->sa_family) { 1055 case AF_INET: 1056 return ((struct sockaddr_in *) src)->sin_addr.s_addr != 1057 ((struct sockaddr_in *) dst)->sin_addr.s_addr; 1058 case AF_INET6: 1059 return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, 1060 &((struct sockaddr_in6 *) dst)->sin6_addr); 1061 default: 1062 return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, 1063 &((struct sockaddr_ib *) dst)->sib_addr); 1064 } 1065 } 1066 1067 static __be16 cma_port(struct sockaddr *addr) 1068 { 1069 struct sockaddr_ib *sib; 1070 1071 switch (addr->sa_family) { 1072 case AF_INET: 1073 return ((struct sockaddr_in *) addr)->sin_port; 1074 case AF_INET6: 1075 return ((struct sockaddr_in6 *) addr)->sin6_port; 1076 case AF_IB: 1077 sib = (struct sockaddr_ib *) addr; 1078 return htons((u16) (be64_to_cpu(sib->sib_sid) & 1079 be64_to_cpu(sib->sib_sid_mask))); 1080 default: 1081 return 0; 1082 } 1083 } 1084 1085 static inline int cma_any_port(struct sockaddr *addr) 1086 { 1087 return !cma_port(addr); 1088 } 1089 1090 static void cma_save_ib_info(struct sockaddr *src_addr, 1091 struct sockaddr *dst_addr, 1092 struct rdma_cm_id *listen_id, 1093 struct ib_sa_path_rec *path) 1094 { 1095 struct sockaddr_ib *listen_ib, *ib; 1096 1097 listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; 1098 if (src_addr) { 1099 ib = (struct sockaddr_ib *)src_addr; 1100 ib->sib_family = AF_IB; 1101 if (path) { 1102 ib->sib_pkey = path->pkey; 1103 ib->sib_flowinfo = path->flow_label; 1104 memcpy(&ib->sib_addr, &path->sgid, 16); 1105 ib->sib_sid = path->service_id; 1106 ib->sib_scope_id = 0; 1107 } else { 1108 ib->sib_pkey = listen_ib->sib_pkey; 1109 ib->sib_flowinfo = listen_ib->sib_flowinfo; 1110 ib->sib_addr = listen_ib->sib_addr; 1111 ib->sib_sid = listen_ib->sib_sid; 1112 ib->sib_scope_id = listen_ib->sib_scope_id; 1113 } 1114 ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); 1115 } 1116 if (dst_addr) { 1117 ib = (struct sockaddr_ib *)dst_addr; 1118 ib->sib_family = AF_IB; 1119 if (path) { 1120 ib->sib_pkey = path->pkey; 1121 ib->sib_flowinfo = path->flow_label; 1122 memcpy(&ib->sib_addr, &path->dgid, 16); 1123 } 1124 } 1125 } 1126 1127 static void cma_save_ip4_info(struct sockaddr_in *src_addr, 1128 struct sockaddr_in *dst_addr, 1129 struct cma_hdr *hdr, 1130 __be16 local_port) 1131 { 1132 if (src_addr) { 1133 *src_addr = (struct sockaddr_in) { 1134 .sin_len = sizeof(struct sockaddr_in), 1135 .sin_family = AF_INET, 1136 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1137 .sin_port = local_port, 1138 }; 1139 } 1140 1141 if (dst_addr) { 1142 *dst_addr = (struct sockaddr_in) { 1143 .sin_len = sizeof(struct sockaddr_in), 1144 .sin_family = AF_INET, 1145 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1146 .sin_port = hdr->port, 1147 }; 1148 } 1149 } 1150 1151 static void cma_ip6_clear_scope_id(struct in6_addr *addr) 1152 { 1153 /* make sure link local scope ID gets zeroed */ 1154 if (IN6_IS_SCOPE_LINKLOCAL(addr) || 1155 IN6_IS_ADDR_MC_INTFACELOCAL(addr)) { 1156 /* use byte-access to be alignment safe */ 1157 addr->s6_addr[2] = 0; 1158 addr->s6_addr[3] = 0; 1159 } 1160 } 1161 1162 static void cma_save_ip6_info(struct sockaddr_in6 *src_addr, 1163 struct sockaddr_in6 *dst_addr, 1164 struct cma_hdr *hdr, 1165 __be16 local_port) 1166 { 1167 if (src_addr) { 1168 *src_addr = (struct sockaddr_in6) { 1169 .sin6_len = sizeof(struct sockaddr_in6), 1170 .sin6_family = AF_INET6, 1171 .sin6_addr = hdr->dst_addr.ip6, 1172 .sin6_port = local_port, 1173 }; 1174 cma_ip6_clear_scope_id(&src_addr->sin6_addr); 1175 } 1176 1177 if (dst_addr) { 1178 *dst_addr = (struct sockaddr_in6) { 1179 .sin6_len = sizeof(struct sockaddr_in6), 1180 .sin6_family = AF_INET6, 1181 .sin6_addr = hdr->src_addr.ip6, 1182 .sin6_port = hdr->port, 1183 }; 1184 cma_ip6_clear_scope_id(&dst_addr->sin6_addr); 1185 } 1186 } 1187 1188 static u16 cma_port_from_service_id(__be64 service_id) 1189 { 1190 return (u16)be64_to_cpu(service_id); 1191 } 1192 1193 static int sdp_save_ip_info(struct sockaddr *src_addr, 1194 struct sockaddr *dst_addr, 1195 const struct sdp_hh *hdr, 1196 __be64 service_id) 1197 { 1198 __be16 local_port; 1199 1200 BUG_ON(src_addr == NULL || dst_addr == NULL); 1201 1202 if (sdp_get_majv(hdr->majv_minv) != SDP_MAJ_VERSION) 1203 return -EINVAL; 1204 1205 local_port = htons(cma_port_from_service_id(service_id)); 1206 1207 switch (sdp_get_ip_ver(hdr)) { 1208 case 4: { 1209 struct sockaddr_in *s4, *d4; 1210 1211 s4 = (void *)src_addr; 1212 d4 = (void *)dst_addr; 1213 1214 *s4 = (struct sockaddr_in) { 1215 .sin_len = sizeof(*s4), 1216 .sin_family = AF_INET, 1217 .sin_addr.s_addr = hdr->dst_addr.ip4.addr, 1218 .sin_port = local_port, 1219 }; 1220 *d4 = (struct sockaddr_in) { 1221 .sin_len = sizeof(*d4), 1222 .sin_family = AF_INET, 1223 .sin_addr.s_addr = hdr->src_addr.ip4.addr, 1224 .sin_port = hdr->port, 1225 }; 1226 break; 1227 } 1228 case 6: { 1229 struct sockaddr_in6 *s6, *d6; 1230 1231 s6 = (void *)src_addr; 1232 d6 = (void *)dst_addr; 1233 1234 *s6 = (struct sockaddr_in6) { 1235 .sin6_len = sizeof(*s6), 1236 .sin6_family = AF_INET6, 1237 .sin6_addr = hdr->dst_addr.ip6, 1238 .sin6_port = local_port, 1239 }; 1240 *d6 = (struct sockaddr_in6) { 1241 .sin6_len = sizeof(*d6), 1242 .sin6_family = AF_INET6, 1243 .sin6_addr = hdr->src_addr.ip6, 1244 .sin6_port = hdr->port, 1245 }; 1246 cma_ip6_clear_scope_id(&s6->sin6_addr); 1247 cma_ip6_clear_scope_id(&d6->sin6_addr); 1248 break; 1249 } 1250 default: 1251 return -EAFNOSUPPORT; 1252 } 1253 1254 return 0; 1255 } 1256 1257 static int cma_save_ip_info(struct sockaddr *src_addr, 1258 struct sockaddr *dst_addr, 1259 struct ib_cm_event *ib_event, 1260 __be64 service_id) 1261 { 1262 struct cma_hdr *hdr; 1263 __be16 port; 1264 1265 if (rdma_ps_from_service_id(service_id) == RDMA_PS_SDP) 1266 return sdp_save_ip_info(src_addr, dst_addr, 1267 ib_event->private_data, service_id); 1268 1269 hdr = ib_event->private_data; 1270 if (hdr->cma_version != CMA_VERSION) 1271 return -EINVAL; 1272 1273 port = htons(cma_port_from_service_id(service_id)); 1274 1275 switch (cma_get_ip_ver(hdr)) { 1276 case 4: 1277 cma_save_ip4_info((struct sockaddr_in *)src_addr, 1278 (struct sockaddr_in *)dst_addr, hdr, port); 1279 break; 1280 case 6: 1281 cma_save_ip6_info((struct sockaddr_in6 *)src_addr, 1282 (struct sockaddr_in6 *)dst_addr, hdr, port); 1283 break; 1284 default: 1285 return -EAFNOSUPPORT; 1286 } 1287 1288 return 0; 1289 } 1290 1291 static int cma_save_net_info(struct sockaddr *src_addr, 1292 struct sockaddr *dst_addr, 1293 struct rdma_cm_id *listen_id, 1294 struct ib_cm_event *ib_event, 1295 sa_family_t sa_family, __be64 service_id) 1296 { 1297 if (sa_family == AF_IB) { 1298 if (ib_event->event == IB_CM_REQ_RECEIVED) 1299 cma_save_ib_info(src_addr, dst_addr, listen_id, 1300 ib_event->param.req_rcvd.primary_path); 1301 else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) 1302 cma_save_ib_info(src_addr, dst_addr, listen_id, NULL); 1303 return 0; 1304 } 1305 1306 return cma_save_ip_info(src_addr, dst_addr, ib_event, service_id); 1307 } 1308 1309 static int cma_save_req_info(const struct ib_cm_event *ib_event, 1310 struct cma_req_info *req) 1311 { 1312 const struct ib_cm_req_event_param *req_param = 1313 &ib_event->param.req_rcvd; 1314 const struct ib_cm_sidr_req_event_param *sidr_param = 1315 &ib_event->param.sidr_req_rcvd; 1316 1317 switch (ib_event->event) { 1318 case IB_CM_REQ_RECEIVED: 1319 req->device = req_param->listen_id->device; 1320 req->port = req_param->port; 1321 memcpy(&req->local_gid, &req_param->primary_path->sgid, 1322 sizeof(req->local_gid)); 1323 req->has_gid = true; 1324 req->service_id = req_param->primary_path->service_id; 1325 req->pkey = be16_to_cpu(req_param->primary_path->pkey); 1326 if (req->pkey != req_param->bth_pkey) 1327 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n" 1328 "RDMA CMA: in the future this may cause the request to be dropped\n", 1329 req_param->bth_pkey, req->pkey); 1330 break; 1331 case IB_CM_SIDR_REQ_RECEIVED: 1332 req->device = sidr_param->listen_id->device; 1333 req->port = sidr_param->port; 1334 req->has_gid = false; 1335 req->service_id = sidr_param->service_id; 1336 req->pkey = sidr_param->pkey; 1337 if (req->pkey != sidr_param->bth_pkey) 1338 pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n" 1339 "RDMA CMA: in the future this may cause the request to be dropped\n", 1340 sidr_param->bth_pkey, req->pkey); 1341 break; 1342 default: 1343 return -EINVAL; 1344 } 1345 1346 return 0; 1347 } 1348 1349 static bool validate_ipv4_net_dev(struct net_device *net_dev, 1350 const struct sockaddr_in *dst_addr, 1351 const struct sockaddr_in *src_addr) 1352 { 1353 #ifdef INET 1354 struct sockaddr_in src_tmp = *src_addr; 1355 __be32 daddr = dst_addr->sin_addr.s_addr, 1356 saddr = src_addr->sin_addr.s_addr; 1357 struct net_device *dst_dev; 1358 struct rtentry *rte; 1359 bool ret; 1360 1361 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || 1362 ipv4_is_lbcast(daddr) || ipv4_is_zeronet(saddr) || 1363 ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr) || 1364 ipv4_is_loopback(saddr)) 1365 return false; 1366 1367 dst_dev = ip_dev_find(net_dev->if_vnet, daddr); 1368 if (dst_dev != net_dev) { 1369 if (dst_dev != NULL) 1370 dev_put(dst_dev); 1371 return false; 1372 } 1373 dev_put(dst_dev); 1374 1375 /* 1376 * Check for loopback. 1377 */ 1378 if (saddr == daddr) 1379 return true; 1380 1381 /* 1382 * Make sure the socket address length field 1383 * is set, else rtalloc1() will fail. 1384 */ 1385 src_tmp.sin_len = sizeof(src_tmp); 1386 1387 CURVNET_SET(net_dev->if_vnet); 1388 rte = rtalloc1((struct sockaddr *)&src_tmp, 1, 0); 1389 if (rte != NULL) { 1390 ret = (rte->rt_ifp == net_dev); 1391 RTFREE_LOCKED(rte); 1392 } else { 1393 ret = false; 1394 } 1395 CURVNET_RESTORE(); 1396 return ret; 1397 #else 1398 return false; 1399 #endif 1400 } 1401 1402 static bool validate_ipv6_net_dev(struct net_device *net_dev, 1403 const struct sockaddr_in6 *dst_addr, 1404 const struct sockaddr_in6 *src_addr) 1405 { 1406 #ifdef INET6 1407 struct sockaddr_in6 src_tmp = *src_addr; 1408 struct sockaddr_in6 dst_tmp = *dst_addr; 1409 struct net_device *dst_dev; 1410 struct rtentry *rte; 1411 bool ret; 1412 1413 dst_dev = ip6_dev_find(net_dev->if_vnet, dst_tmp.sin6_addr, 1414 net_dev->if_index); 1415 if (dst_dev != net_dev) { 1416 if (dst_dev != NULL) 1417 dev_put(dst_dev); 1418 return false; 1419 } 1420 dev_put(dst_dev); 1421 1422 CURVNET_SET(net_dev->if_vnet); 1423 1424 /* 1425 * Make sure the socket address length field 1426 * is set, else rtalloc1() will fail. 1427 */ 1428 src_tmp.sin6_len = sizeof(src_tmp); 1429 1430 /* 1431 * Make sure the scope ID gets embedded, else rtalloc1() will 1432 * resolve to the loopback interface. 1433 */ 1434 src_tmp.sin6_scope_id = net_dev->if_index; 1435 sa6_embedscope(&src_tmp, 0); 1436 1437 dst_tmp.sin6_scope_id = net_dev->if_index; 1438 sa6_embedscope(&dst_tmp, 0); 1439 1440 /* 1441 * Check for loopback after scope ID 1442 * has been embedded: 1443 */ 1444 if (memcmp(&src_tmp.sin6_addr, &dst_tmp.sin6_addr, 1445 sizeof(dst_tmp.sin6_addr)) == 0) { 1446 ret = true; 1447 } else { 1448 /* non-loopback case */ 1449 rte = rtalloc1((struct sockaddr *)&src_tmp, 1, 0); 1450 if (rte != NULL) { 1451 ret = (rte->rt_ifp == net_dev); 1452 RTFREE_LOCKED(rte); 1453 } else { 1454 ret = false; 1455 } 1456 } 1457 CURVNET_RESTORE(); 1458 return ret; 1459 #else 1460 return false; 1461 #endif 1462 } 1463 1464 static bool validate_net_dev(struct net_device *net_dev, 1465 const struct sockaddr *daddr, 1466 const struct sockaddr *saddr) 1467 { 1468 const struct sockaddr_in *daddr4 = (const struct sockaddr_in *)daddr; 1469 const struct sockaddr_in *saddr4 = (const struct sockaddr_in *)saddr; 1470 const struct sockaddr_in6 *daddr6 = (const struct sockaddr_in6 *)daddr; 1471 const struct sockaddr_in6 *saddr6 = (const struct sockaddr_in6 *)saddr; 1472 1473 switch (daddr->sa_family) { 1474 case AF_INET: 1475 return saddr->sa_family == AF_INET && 1476 validate_ipv4_net_dev(net_dev, daddr4, saddr4); 1477 1478 case AF_INET6: 1479 return saddr->sa_family == AF_INET6 && 1480 validate_ipv6_net_dev(net_dev, daddr6, saddr6); 1481 1482 default: 1483 return false; 1484 } 1485 } 1486 1487 static struct net_device * 1488 roce_get_net_dev_by_cm_event(struct ib_device *device, u8 port_num, 1489 const struct ib_cm_event *ib_event) 1490 { 1491 struct ib_gid_attr sgid_attr; 1492 union ib_gid sgid; 1493 int err = -EINVAL; 1494 1495 if (ib_event->event == IB_CM_REQ_RECEIVED) { 1496 err = ib_get_cached_gid(device, port_num, 1497 ib_event->param.req_rcvd.ppath_sgid_index, &sgid, &sgid_attr); 1498 } else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1499 err = ib_get_cached_gid(device, port_num, 1500 ib_event->param.sidr_req_rcvd.sgid_index, &sgid, &sgid_attr); 1501 } 1502 if (err) 1503 return (NULL); 1504 return (sgid_attr.ndev); 1505 } 1506 1507 static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event, 1508 const struct cma_req_info *req) 1509 { 1510 struct sockaddr_storage listen_addr_storage, src_addr_storage; 1511 struct sockaddr *listen_addr = (struct sockaddr *)&listen_addr_storage, 1512 *src_addr = (struct sockaddr *)&src_addr_storage; 1513 struct net_device *net_dev; 1514 const union ib_gid *gid = req->has_gid ? &req->local_gid : NULL; 1515 int err; 1516 1517 err = cma_save_ip_info(listen_addr, src_addr, ib_event, 1518 req->service_id); 1519 if (err) 1520 return ERR_PTR(err); 1521 1522 if (rdma_protocol_roce(req->device, req->port)) { 1523 net_dev = roce_get_net_dev_by_cm_event(req->device, req->port, 1524 ib_event); 1525 } else { 1526 net_dev = ib_get_net_dev_by_params(req->device, req->port, 1527 req->pkey, 1528 gid, listen_addr); 1529 } 1530 if (!net_dev) 1531 return ERR_PTR(-ENODEV); 1532 1533 if (!validate_net_dev(net_dev, listen_addr, src_addr)) { 1534 dev_put(net_dev); 1535 return ERR_PTR(-EHOSTUNREACH); 1536 } 1537 1538 return net_dev; 1539 } 1540 1541 static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id) 1542 { 1543 return (be64_to_cpu(service_id) >> 16) & 0xffff; 1544 } 1545 1546 static bool sdp_match_private_data(struct rdma_id_private *id_priv, 1547 const struct sdp_hh *hdr, 1548 struct sockaddr *addr) 1549 { 1550 __be32 ip4_addr; 1551 struct in6_addr ip6_addr; 1552 1553 switch (addr->sa_family) { 1554 case AF_INET: 1555 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1556 if (sdp_get_ip_ver(hdr) != 4) 1557 return false; 1558 if (!cma_any_addr(addr) && 1559 hdr->dst_addr.ip4.addr != ip4_addr) 1560 return false; 1561 break; 1562 case AF_INET6: 1563 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1564 if (sdp_get_ip_ver(hdr) != 6) 1565 return false; 1566 cma_ip6_clear_scope_id(&ip6_addr); 1567 if (!cma_any_addr(addr) && 1568 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1569 return false; 1570 break; 1571 case AF_IB: 1572 return true; 1573 default: 1574 return false; 1575 } 1576 1577 return true; 1578 } 1579 1580 static bool cma_match_private_data(struct rdma_id_private *id_priv, 1581 const void *vhdr) 1582 { 1583 const struct cma_hdr *hdr = vhdr; 1584 struct sockaddr *addr = cma_src_addr(id_priv); 1585 __be32 ip4_addr; 1586 struct in6_addr ip6_addr; 1587 1588 if (cma_any_addr(addr) && !id_priv->afonly) 1589 return true; 1590 1591 if (id_priv->id.ps == RDMA_PS_SDP) 1592 return sdp_match_private_data(id_priv, vhdr, addr); 1593 1594 switch (addr->sa_family) { 1595 case AF_INET: 1596 ip4_addr = ((struct sockaddr_in *)addr)->sin_addr.s_addr; 1597 if (cma_get_ip_ver(hdr) != 4) 1598 return false; 1599 if (!cma_any_addr(addr) && 1600 hdr->dst_addr.ip4.addr != ip4_addr) 1601 return false; 1602 break; 1603 case AF_INET6: 1604 ip6_addr = ((struct sockaddr_in6 *)addr)->sin6_addr; 1605 if (cma_get_ip_ver(hdr) != 6) 1606 return false; 1607 cma_ip6_clear_scope_id(&ip6_addr); 1608 if (!cma_any_addr(addr) && 1609 memcmp(&hdr->dst_addr.ip6, &ip6_addr, sizeof(ip6_addr))) 1610 return false; 1611 break; 1612 case AF_IB: 1613 return true; 1614 default: 1615 return false; 1616 } 1617 1618 return true; 1619 } 1620 1621 static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num) 1622 { 1623 enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num); 1624 enum rdma_transport_type transport = 1625 rdma_node_get_transport(device->node_type); 1626 1627 return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB; 1628 } 1629 1630 static bool cma_protocol_roce(const struct rdma_cm_id *id) 1631 { 1632 struct ib_device *device = id->device; 1633 const int port_num = id->port_num ?: rdma_start_port(device); 1634 1635 return cma_protocol_roce_dev_port(device, port_num); 1636 } 1637 1638 static bool cma_match_net_dev(const struct rdma_cm_id *id, 1639 const struct net_device *net_dev, 1640 u8 port_num) 1641 { 1642 const struct rdma_addr *addr = &id->route.addr; 1643 1644 if (!net_dev) { 1645 if (id->port_num && id->port_num != port_num) 1646 return false; 1647 1648 if (id->ps == RDMA_PS_SDP) { 1649 if (addr->src_addr.ss_family == AF_INET || 1650 addr->src_addr.ss_family == AF_INET6) 1651 return true; 1652 return false; 1653 } 1654 /* This request is an AF_IB request or a RoCE request */ 1655 return addr->src_addr.ss_family == AF_IB || 1656 cma_protocol_roce_dev_port(id->device, port_num); 1657 } 1658 1659 return !addr->dev_addr.bound_dev_if || 1660 (net_eq(dev_net(net_dev), addr->dev_addr.net) && 1661 addr->dev_addr.bound_dev_if == net_dev->if_index); 1662 } 1663 1664 static struct rdma_id_private *cma_find_listener( 1665 const struct rdma_bind_list *bind_list, 1666 const struct ib_cm_id *cm_id, 1667 const struct ib_cm_event *ib_event, 1668 const struct cma_req_info *req, 1669 const struct net_device *net_dev) 1670 { 1671 struct rdma_id_private *id_priv, *id_priv_dev; 1672 1673 if (!bind_list) 1674 return ERR_PTR(-EINVAL); 1675 1676 hlist_for_each_entry(id_priv, &bind_list->owners, node) { 1677 if (cma_match_private_data(id_priv, ib_event->private_data)) { 1678 if (id_priv->id.device == cm_id->device && 1679 cma_match_net_dev(&id_priv->id, net_dev, req->port)) 1680 return id_priv; 1681 list_for_each_entry(id_priv_dev, 1682 &id_priv->listen_list, 1683 listen_list) { 1684 if (id_priv_dev->id.device == cm_id->device && 1685 cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) 1686 return id_priv_dev; 1687 } 1688 } 1689 } 1690 1691 return ERR_PTR(-EINVAL); 1692 } 1693 1694 static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, 1695 struct ib_cm_event *ib_event, 1696 struct net_device **net_dev) 1697 { 1698 struct cma_req_info req; 1699 struct rdma_bind_list *bind_list; 1700 struct rdma_id_private *id_priv; 1701 int err; 1702 1703 err = cma_save_req_info(ib_event, &req); 1704 if (err) 1705 return ERR_PTR(err); 1706 1707 if (rdma_ps_from_service_id(cm_id->service_id) == RDMA_PS_SDP) { 1708 *net_dev = NULL; 1709 goto there_is_no_net_dev; 1710 } 1711 1712 *net_dev = cma_get_net_dev(ib_event, &req); 1713 if (IS_ERR(*net_dev)) { 1714 if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) { 1715 /* Assuming the protocol is AF_IB */ 1716 *net_dev = NULL; 1717 } else { 1718 return ERR_CAST(*net_dev); 1719 } 1720 } 1721 1722 there_is_no_net_dev: 1723 bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, 1724 rdma_ps_from_service_id(req.service_id), 1725 cma_port_from_service_id(req.service_id)); 1726 id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); 1727 if (IS_ERR(id_priv) && *net_dev) { 1728 dev_put(*net_dev); 1729 *net_dev = NULL; 1730 } 1731 1732 return id_priv; 1733 } 1734 1735 static inline int cma_user_data_offset(struct rdma_id_private *id_priv) 1736 { 1737 if (cma_family(id_priv) == AF_IB) 1738 return 0; 1739 if (id_priv->id.ps == RDMA_PS_SDP) 1740 return 0; 1741 return sizeof(struct cma_hdr); 1742 } 1743 1744 static void cma_cancel_route(struct rdma_id_private *id_priv) 1745 { 1746 if (rdma_cap_ib_sa(id_priv->id.device, id_priv->id.port_num)) { 1747 if (id_priv->query) 1748 ib_sa_cancel_query(id_priv->query_id, id_priv->query); 1749 } 1750 } 1751 1752 static void cma_cancel_listens(struct rdma_id_private *id_priv) 1753 { 1754 struct rdma_id_private *dev_id_priv; 1755 1756 /* 1757 * Remove from listen_any_list to prevent added devices from spawning 1758 * additional listen requests. 1759 */ 1760 mutex_lock(&lock); 1761 list_del(&id_priv->list); 1762 1763 while (!list_empty(&id_priv->listen_list)) { 1764 dev_id_priv = list_entry(id_priv->listen_list.next, 1765 struct rdma_id_private, listen_list); 1766 /* sync with device removal to avoid duplicate destruction */ 1767 list_del_init(&dev_id_priv->list); 1768 list_del(&dev_id_priv->listen_list); 1769 mutex_unlock(&lock); 1770 1771 rdma_destroy_id(&dev_id_priv->id); 1772 mutex_lock(&lock); 1773 } 1774 mutex_unlock(&lock); 1775 } 1776 1777 static void cma_cancel_operation(struct rdma_id_private *id_priv, 1778 enum rdma_cm_state state) 1779 { 1780 switch (state) { 1781 case RDMA_CM_ADDR_QUERY: 1782 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr); 1783 break; 1784 case RDMA_CM_ROUTE_QUERY: 1785 cma_cancel_route(id_priv); 1786 break; 1787 case RDMA_CM_LISTEN: 1788 if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) 1789 cma_cancel_listens(id_priv); 1790 break; 1791 default: 1792 break; 1793 } 1794 } 1795 1796 static void cma_release_port(struct rdma_id_private *id_priv) 1797 { 1798 struct rdma_bind_list *bind_list = id_priv->bind_list; 1799 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 1800 1801 if (!bind_list) 1802 return; 1803 1804 mutex_lock(&lock); 1805 hlist_del(&id_priv->node); 1806 if (hlist_empty(&bind_list->owners)) { 1807 cma_ps_remove(net, bind_list->ps, bind_list->port); 1808 kfree(bind_list); 1809 } 1810 mutex_unlock(&lock); 1811 } 1812 1813 static void cma_leave_mc_groups(struct rdma_id_private *id_priv) 1814 { 1815 struct cma_multicast *mc; 1816 1817 while (!list_empty(&id_priv->mc_list)) { 1818 mc = container_of(id_priv->mc_list.next, 1819 struct cma_multicast, list); 1820 list_del(&mc->list); 1821 if (rdma_cap_ib_mcast(id_priv->cma_dev->device, 1822 id_priv->id.port_num)) { 1823 ib_sa_free_multicast(mc->multicast.ib); 1824 kfree(mc); 1825 } else { 1826 if (mc->igmp_joined) { 1827 struct rdma_dev_addr *dev_addr = 1828 &id_priv->id.route.addr.dev_addr; 1829 struct net_device *ndev = NULL; 1830 1831 if (dev_addr->bound_dev_if) 1832 ndev = dev_get_by_index(dev_addr->net, 1833 dev_addr->bound_dev_if); 1834 if (ndev) { 1835 cma_igmp_send(ndev, 1836 &mc->multicast.ib->rec.mgid, 1837 false); 1838 dev_put(ndev); 1839 } 1840 } 1841 kref_put(&mc->mcref, release_mc); 1842 } 1843 } 1844 } 1845 1846 void rdma_destroy_id(struct rdma_cm_id *id) 1847 { 1848 struct rdma_id_private *id_priv; 1849 enum rdma_cm_state state; 1850 1851 id_priv = container_of(id, struct rdma_id_private, id); 1852 state = cma_exch(id_priv, RDMA_CM_DESTROYING); 1853 cma_cancel_operation(id_priv, state); 1854 1855 /* 1856 * Wait for any active callback to finish. New callbacks will find 1857 * the id_priv state set to destroying and abort. 1858 */ 1859 mutex_lock(&id_priv->handler_mutex); 1860 mutex_unlock(&id_priv->handler_mutex); 1861 1862 if (id_priv->cma_dev) { 1863 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 1864 if (id_priv->cm_id.ib) 1865 ib_destroy_cm_id(id_priv->cm_id.ib); 1866 } else if (rdma_cap_iw_cm(id_priv->id.device, 1)) { 1867 if (id_priv->cm_id.iw) 1868 iw_destroy_cm_id(id_priv->cm_id.iw); 1869 } 1870 cma_leave_mc_groups(id_priv); 1871 cma_release_dev(id_priv); 1872 } 1873 1874 cma_release_port(id_priv); 1875 cma_deref_id(id_priv); 1876 wait_for_completion(&id_priv->comp); 1877 1878 if (id_priv->internal_id) 1879 cma_deref_id(id_priv->id.context); 1880 1881 kfree(id_priv->id.route.path_rec); 1882 kfree(id_priv); 1883 } 1884 EXPORT_SYMBOL(rdma_destroy_id); 1885 1886 static int cma_rep_recv(struct rdma_id_private *id_priv) 1887 { 1888 int ret; 1889 1890 ret = cma_modify_qp_rtr(id_priv, NULL); 1891 if (ret) 1892 goto reject; 1893 1894 ret = cma_modify_qp_rts(id_priv, NULL); 1895 if (ret) 1896 goto reject; 1897 1898 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0); 1899 if (ret) 1900 goto reject; 1901 1902 return 0; 1903 reject: 1904 cma_modify_qp_err(id_priv); 1905 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 1906 NULL, 0, NULL, 0); 1907 return ret; 1908 } 1909 1910 static int sdp_verify_rep(const struct sdp_hah *data) 1911 { 1912 if (sdp_get_majv(data->majv_minv) != SDP_MAJ_VERSION) 1913 return -EINVAL; 1914 return 0; 1915 } 1916 1917 static void cma_set_rep_event_data(struct rdma_cm_event *event, 1918 struct ib_cm_rep_event_param *rep_data, 1919 void *private_data) 1920 { 1921 event->param.conn.private_data = private_data; 1922 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE; 1923 event->param.conn.responder_resources = rep_data->responder_resources; 1924 event->param.conn.initiator_depth = rep_data->initiator_depth; 1925 event->param.conn.flow_control = rep_data->flow_control; 1926 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count; 1927 event->param.conn.srq = rep_data->srq; 1928 event->param.conn.qp_num = rep_data->remote_qpn; 1929 } 1930 1931 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1932 { 1933 struct rdma_id_private *id_priv = cm_id->context; 1934 struct rdma_cm_event event; 1935 int ret = 0; 1936 1937 mutex_lock(&id_priv->handler_mutex); 1938 if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && 1939 id_priv->state != RDMA_CM_CONNECT) || 1940 (ib_event->event == IB_CM_TIMEWAIT_EXIT && 1941 id_priv->state != RDMA_CM_DISCONNECT)) 1942 goto out; 1943 1944 memset(&event, 0, sizeof event); 1945 switch (ib_event->event) { 1946 case IB_CM_REQ_ERROR: 1947 case IB_CM_REP_ERROR: 1948 event.event = RDMA_CM_EVENT_UNREACHABLE; 1949 event.status = -ETIMEDOUT; 1950 break; 1951 case IB_CM_REP_RECEIVED: 1952 if (id_priv->id.ps == RDMA_PS_SDP) { 1953 event.status = sdp_verify_rep(ib_event->private_data); 1954 if (event.status) 1955 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 1956 else 1957 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1958 } else { 1959 if (id_priv->id.qp) { 1960 event.status = cma_rep_recv(id_priv); 1961 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : 1962 RDMA_CM_EVENT_ESTABLISHED; 1963 } else { 1964 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; 1965 } 1966 } 1967 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, 1968 ib_event->private_data); 1969 break; 1970 case IB_CM_RTU_RECEIVED: 1971 case IB_CM_USER_ESTABLISHED: 1972 event.event = RDMA_CM_EVENT_ESTABLISHED; 1973 break; 1974 case IB_CM_DREQ_ERROR: 1975 event.status = -ETIMEDOUT; /* fall through */ 1976 case IB_CM_DREQ_RECEIVED: 1977 case IB_CM_DREP_RECEIVED: 1978 if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, 1979 RDMA_CM_DISCONNECT)) 1980 goto out; 1981 event.event = RDMA_CM_EVENT_DISCONNECTED; 1982 break; 1983 case IB_CM_TIMEWAIT_EXIT: 1984 event.event = RDMA_CM_EVENT_TIMEWAIT_EXIT; 1985 break; 1986 case IB_CM_MRA_RECEIVED: 1987 /* ignore event */ 1988 goto out; 1989 case IB_CM_REJ_RECEIVED: 1990 cma_modify_qp_err(id_priv); 1991 event.status = ib_event->param.rej_rcvd.reason; 1992 event.event = RDMA_CM_EVENT_REJECTED; 1993 event.param.conn.private_data = ib_event->private_data; 1994 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE; 1995 break; 1996 default: 1997 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 1998 ib_event->event); 1999 goto out; 2000 } 2001 2002 ret = id_priv->id.event_handler(&id_priv->id, &event); 2003 if (ret) { 2004 /* Destroy the CM ID by returning a non-zero value. */ 2005 id_priv->cm_id.ib = NULL; 2006 cma_exch(id_priv, RDMA_CM_DESTROYING); 2007 mutex_unlock(&id_priv->handler_mutex); 2008 rdma_destroy_id(&id_priv->id); 2009 return ret; 2010 } 2011 out: 2012 mutex_unlock(&id_priv->handler_mutex); 2013 return ret; 2014 } 2015 2016 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, 2017 struct ib_cm_event *ib_event, 2018 struct net_device *net_dev) 2019 { 2020 struct rdma_id_private *id_priv; 2021 struct rdma_cm_id *id; 2022 struct rdma_route *rt; 2023 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 2024 const __be64 service_id = 2025 ib_event->param.req_rcvd.primary_path->service_id; 2026 int ret; 2027 2028 id = rdma_create_id(listen_id->route.addr.dev_addr.net, 2029 listen_id->event_handler, listen_id->context, 2030 listen_id->ps, ib_event->param.req_rcvd.qp_type); 2031 if (IS_ERR(id)) 2032 return NULL; 2033 2034 id_priv = container_of(id, struct rdma_id_private, id); 2035 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 2036 (struct sockaddr *)&id->route.addr.dst_addr, 2037 listen_id, ib_event, ss_family, service_id)) 2038 goto err; 2039 2040 rt = &id->route; 2041 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; 2042 rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths, 2043 GFP_KERNEL); 2044 if (!rt->path_rec) 2045 goto err; 2046 2047 rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path; 2048 if (rt->num_paths == 2) 2049 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; 2050 2051 if (net_dev) { 2052 ret = rdma_copy_addr(&rt->addr.dev_addr, net_dev, NULL); 2053 if (ret) 2054 goto err; 2055 } else { 2056 if (!cma_protocol_roce(listen_id) && 2057 cma_any_addr(cma_src_addr(id_priv))) { 2058 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 2059 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 2060 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 2061 } else if (!cma_any_addr(cma_src_addr(id_priv))) { 2062 ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); 2063 if (ret) 2064 goto err; 2065 } 2066 } 2067 rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); 2068 2069 id_priv->state = RDMA_CM_CONNECT; 2070 return id_priv; 2071 2072 err: 2073 rdma_destroy_id(id); 2074 return NULL; 2075 } 2076 2077 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, 2078 struct ib_cm_event *ib_event, 2079 struct net_device *net_dev) 2080 { 2081 struct rdma_id_private *id_priv; 2082 struct rdma_cm_id *id; 2083 const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; 2084 struct vnet *net = listen_id->route.addr.dev_addr.net; 2085 int ret; 2086 2087 id = rdma_create_id(net, listen_id->event_handler, listen_id->context, 2088 listen_id->ps, IB_QPT_UD); 2089 if (IS_ERR(id)) 2090 return NULL; 2091 2092 id_priv = container_of(id, struct rdma_id_private, id); 2093 if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, 2094 (struct sockaddr *)&id->route.addr.dst_addr, 2095 listen_id, ib_event, ss_family, 2096 ib_event->param.sidr_req_rcvd.service_id)) 2097 goto err; 2098 2099 if (net_dev) { 2100 ret = rdma_copy_addr(&id->route.addr.dev_addr, net_dev, NULL); 2101 if (ret) 2102 goto err; 2103 } else { 2104 if (!cma_any_addr(cma_src_addr(id_priv))) { 2105 ret = cma_translate_addr(cma_src_addr(id_priv), 2106 &id->route.addr.dev_addr); 2107 if (ret) 2108 goto err; 2109 } 2110 } 2111 2112 id_priv->state = RDMA_CM_CONNECT; 2113 return id_priv; 2114 err: 2115 rdma_destroy_id(id); 2116 return NULL; 2117 } 2118 2119 static void cma_set_req_event_data(struct rdma_cm_event *event, 2120 struct ib_cm_req_event_param *req_data, 2121 void *private_data, int offset) 2122 { 2123 event->param.conn.private_data = (char *)private_data + offset; 2124 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset; 2125 event->param.conn.responder_resources = req_data->responder_resources; 2126 event->param.conn.initiator_depth = req_data->initiator_depth; 2127 event->param.conn.flow_control = req_data->flow_control; 2128 event->param.conn.retry_count = req_data->retry_count; 2129 event->param.conn.rnr_retry_count = req_data->rnr_retry_count; 2130 event->param.conn.srq = req_data->srq; 2131 event->param.conn.qp_num = req_data->remote_qpn; 2132 } 2133 2134 static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) 2135 { 2136 return (((ib_event->event == IB_CM_REQ_RECEIVED) && 2137 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || 2138 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && 2139 (id->qp_type == IB_QPT_UD)) || 2140 (!id->qp_type)); 2141 } 2142 2143 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 2144 { 2145 struct rdma_id_private *listen_id, *conn_id = NULL; 2146 struct rdma_cm_event event; 2147 struct net_device *net_dev; 2148 int offset, ret; 2149 2150 listen_id = cma_id_from_event(cm_id, ib_event, &net_dev); 2151 if (IS_ERR(listen_id)) 2152 return PTR_ERR(listen_id); 2153 2154 if (!cma_check_req_qp_type(&listen_id->id, ib_event)) { 2155 ret = -EINVAL; 2156 goto net_dev_put; 2157 } 2158 2159 mutex_lock(&listen_id->handler_mutex); 2160 if (listen_id->state != RDMA_CM_LISTEN) { 2161 ret = -ECONNABORTED; 2162 goto err1; 2163 } 2164 2165 memset(&event, 0, sizeof event); 2166 offset = cma_user_data_offset(listen_id); 2167 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2168 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 2169 conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev); 2170 event.param.ud.private_data = (char *)ib_event->private_data + offset; 2171 event.param.ud.private_data_len = 2172 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset; 2173 } else { 2174 conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev); 2175 cma_set_req_event_data(&event, &ib_event->param.req_rcvd, 2176 ib_event->private_data, offset); 2177 } 2178 if (!conn_id) { 2179 ret = -ENOMEM; 2180 goto err1; 2181 } 2182 2183 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2184 ret = cma_acquire_dev(conn_id, listen_id); 2185 if (ret) 2186 goto err2; 2187 2188 conn_id->cm_id.ib = cm_id; 2189 cm_id->context = conn_id; 2190 cm_id->cm_handler = cma_ib_handler; 2191 2192 /* 2193 * Protect against the user destroying conn_id from another thread 2194 * until we're done accessing it. 2195 */ 2196 atomic_inc(&conn_id->refcount); 2197 ret = conn_id->id.event_handler(&conn_id->id, &event); 2198 if (ret) 2199 goto err3; 2200 /* 2201 * Acquire mutex to prevent user executing rdma_destroy_id() 2202 * while we're accessing the cm_id. 2203 */ 2204 mutex_lock(&lock); 2205 if (cma_comp(conn_id, RDMA_CM_CONNECT) && 2206 (conn_id->id.qp_type != IB_QPT_UD)) 2207 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 2208 mutex_unlock(&lock); 2209 mutex_unlock(&conn_id->handler_mutex); 2210 mutex_unlock(&listen_id->handler_mutex); 2211 cma_deref_id(conn_id); 2212 if (net_dev) 2213 dev_put(net_dev); 2214 return 0; 2215 2216 err3: 2217 cma_deref_id(conn_id); 2218 /* Destroy the CM ID by returning a non-zero value. */ 2219 conn_id->cm_id.ib = NULL; 2220 err2: 2221 cma_exch(conn_id, RDMA_CM_DESTROYING); 2222 mutex_unlock(&conn_id->handler_mutex); 2223 err1: 2224 mutex_unlock(&listen_id->handler_mutex); 2225 if (conn_id) 2226 rdma_destroy_id(&conn_id->id); 2227 2228 net_dev_put: 2229 if (net_dev) 2230 dev_put(net_dev); 2231 2232 return ret; 2233 } 2234 2235 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) 2236 { 2237 if (addr->sa_family == AF_IB) 2238 return ((struct sockaddr_ib *) addr)->sib_sid; 2239 2240 return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); 2241 } 2242 EXPORT_SYMBOL(rdma_get_service_id); 2243 2244 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) 2245 { 2246 struct rdma_id_private *id_priv = iw_id->context; 2247 struct rdma_cm_event event; 2248 int ret = 0; 2249 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2250 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2251 2252 mutex_lock(&id_priv->handler_mutex); 2253 if (id_priv->state != RDMA_CM_CONNECT) 2254 goto out; 2255 2256 memset(&event, 0, sizeof event); 2257 switch (iw_event->event) { 2258 case IW_CM_EVENT_CLOSE: 2259 event.event = RDMA_CM_EVENT_DISCONNECTED; 2260 break; 2261 case IW_CM_EVENT_CONNECT_REPLY: 2262 memcpy(cma_src_addr(id_priv), laddr, 2263 rdma_addr_size(laddr)); 2264 memcpy(cma_dst_addr(id_priv), raddr, 2265 rdma_addr_size(raddr)); 2266 switch (iw_event->status) { 2267 case 0: 2268 event.event = RDMA_CM_EVENT_ESTABLISHED; 2269 event.param.conn.initiator_depth = iw_event->ird; 2270 event.param.conn.responder_resources = iw_event->ord; 2271 break; 2272 case -ECONNRESET: 2273 case -ECONNREFUSED: 2274 event.event = RDMA_CM_EVENT_REJECTED; 2275 break; 2276 case -ETIMEDOUT: 2277 event.event = RDMA_CM_EVENT_UNREACHABLE; 2278 break; 2279 default: 2280 event.event = RDMA_CM_EVENT_CONNECT_ERROR; 2281 break; 2282 } 2283 break; 2284 case IW_CM_EVENT_ESTABLISHED: 2285 event.event = RDMA_CM_EVENT_ESTABLISHED; 2286 event.param.conn.initiator_depth = iw_event->ird; 2287 event.param.conn.responder_resources = iw_event->ord; 2288 break; 2289 default: 2290 BUG_ON(1); 2291 } 2292 2293 event.status = iw_event->status; 2294 event.param.conn.private_data = iw_event->private_data; 2295 event.param.conn.private_data_len = iw_event->private_data_len; 2296 ret = id_priv->id.event_handler(&id_priv->id, &event); 2297 if (ret) { 2298 /* Destroy the CM ID by returning a non-zero value. */ 2299 id_priv->cm_id.iw = NULL; 2300 cma_exch(id_priv, RDMA_CM_DESTROYING); 2301 mutex_unlock(&id_priv->handler_mutex); 2302 rdma_destroy_id(&id_priv->id); 2303 return ret; 2304 } 2305 2306 out: 2307 mutex_unlock(&id_priv->handler_mutex); 2308 return ret; 2309 } 2310 2311 static int iw_conn_req_handler(struct iw_cm_id *cm_id, 2312 struct iw_cm_event *iw_event) 2313 { 2314 struct rdma_cm_id *new_cm_id; 2315 struct rdma_id_private *listen_id, *conn_id; 2316 struct rdma_cm_event event; 2317 int ret = -ECONNABORTED; 2318 struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; 2319 struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; 2320 2321 listen_id = cm_id->context; 2322 2323 mutex_lock(&listen_id->handler_mutex); 2324 if (listen_id->state != RDMA_CM_LISTEN) 2325 goto out; 2326 2327 /* Create a new RDMA id for the new IW CM ID */ 2328 new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, 2329 listen_id->id.event_handler, 2330 listen_id->id.context, 2331 RDMA_PS_TCP, IB_QPT_RC); 2332 if (IS_ERR(new_cm_id)) { 2333 ret = -ENOMEM; 2334 goto out; 2335 } 2336 conn_id = container_of(new_cm_id, struct rdma_id_private, id); 2337 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 2338 conn_id->state = RDMA_CM_CONNECT; 2339 2340 ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); 2341 if (ret) { 2342 mutex_unlock(&conn_id->handler_mutex); 2343 rdma_destroy_id(new_cm_id); 2344 goto out; 2345 } 2346 2347 ret = cma_acquire_dev(conn_id, listen_id); 2348 if (ret) { 2349 mutex_unlock(&conn_id->handler_mutex); 2350 rdma_destroy_id(new_cm_id); 2351 goto out; 2352 } 2353 2354 conn_id->cm_id.iw = cm_id; 2355 cm_id->context = conn_id; 2356 cm_id->cm_handler = cma_iw_handler; 2357 2358 memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); 2359 memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); 2360 2361 memset(&event, 0, sizeof event); 2362 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 2363 event.param.conn.private_data = iw_event->private_data; 2364 event.param.conn.private_data_len = iw_event->private_data_len; 2365 event.param.conn.initiator_depth = iw_event->ird; 2366 event.param.conn.responder_resources = iw_event->ord; 2367 2368 /* 2369 * Protect against the user destroying conn_id from another thread 2370 * until we're done accessing it. 2371 */ 2372 atomic_inc(&conn_id->refcount); 2373 ret = conn_id->id.event_handler(&conn_id->id, &event); 2374 if (ret) { 2375 /* User wants to destroy the CM ID */ 2376 conn_id->cm_id.iw = NULL; 2377 cma_exch(conn_id, RDMA_CM_DESTROYING); 2378 mutex_unlock(&conn_id->handler_mutex); 2379 cma_deref_id(conn_id); 2380 rdma_destroy_id(&conn_id->id); 2381 goto out; 2382 } 2383 2384 mutex_unlock(&conn_id->handler_mutex); 2385 cma_deref_id(conn_id); 2386 2387 out: 2388 mutex_unlock(&listen_id->handler_mutex); 2389 return ret; 2390 } 2391 2392 static int cma_ib_listen(struct rdma_id_private *id_priv) 2393 { 2394 struct sockaddr *addr; 2395 struct ib_cm_id *id; 2396 __be64 svc_id; 2397 2398 addr = cma_src_addr(id_priv); 2399 svc_id = rdma_get_service_id(&id_priv->id, addr); 2400 id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id); 2401 if (IS_ERR(id)) 2402 return PTR_ERR(id); 2403 id_priv->cm_id.ib = id; 2404 2405 return 0; 2406 } 2407 2408 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) 2409 { 2410 int ret; 2411 struct iw_cm_id *id; 2412 2413 id = iw_create_cm_id(id_priv->id.device, 2414 iw_conn_req_handler, 2415 id_priv); 2416 if (IS_ERR(id)) 2417 return PTR_ERR(id); 2418 2419 id->tos = id_priv->tos; 2420 id_priv->cm_id.iw = id; 2421 2422 memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), 2423 rdma_addr_size(cma_src_addr(id_priv))); 2424 2425 ret = iw_cm_listen(id_priv->cm_id.iw, backlog); 2426 2427 if (ret) { 2428 iw_destroy_cm_id(id_priv->cm_id.iw); 2429 id_priv->cm_id.iw = NULL; 2430 } 2431 2432 return ret; 2433 } 2434 2435 static int cma_listen_handler(struct rdma_cm_id *id, 2436 struct rdma_cm_event *event) 2437 { 2438 struct rdma_id_private *id_priv = id->context; 2439 2440 id->context = id_priv->id.context; 2441 id->event_handler = id_priv->id.event_handler; 2442 return id_priv->id.event_handler(id, event); 2443 } 2444 2445 static void cma_listen_on_dev(struct rdma_id_private *id_priv, 2446 struct cma_device *cma_dev) 2447 { 2448 struct rdma_id_private *dev_id_priv; 2449 struct rdma_cm_id *id; 2450 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 2451 int ret; 2452 2453 if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) 2454 return; 2455 2456 id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, 2457 id_priv->id.qp_type); 2458 if (IS_ERR(id)) 2459 return; 2460 2461 dev_id_priv = container_of(id, struct rdma_id_private, id); 2462 2463 dev_id_priv->state = RDMA_CM_ADDR_BOUND; 2464 memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), 2465 rdma_addr_size(cma_src_addr(id_priv))); 2466 2467 _cma_attach_to_dev(dev_id_priv, cma_dev); 2468 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 2469 atomic_inc(&id_priv->refcount); 2470 dev_id_priv->internal_id = 1; 2471 dev_id_priv->afonly = id_priv->afonly; 2472 2473 ret = rdma_listen(id, id_priv->backlog); 2474 if (ret) 2475 pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n", 2476 ret, cma_dev->device->name); 2477 } 2478 2479 static void cma_listen_on_all(struct rdma_id_private *id_priv) 2480 { 2481 struct cma_device *cma_dev; 2482 2483 mutex_lock(&lock); 2484 list_add_tail(&id_priv->list, &listen_any_list); 2485 list_for_each_entry(cma_dev, &dev_list, list) 2486 cma_listen_on_dev(id_priv, cma_dev); 2487 mutex_unlock(&lock); 2488 } 2489 2490 void rdma_set_service_type(struct rdma_cm_id *id, int tos) 2491 { 2492 struct rdma_id_private *id_priv; 2493 2494 id_priv = container_of(id, struct rdma_id_private, id); 2495 id_priv->tos = (u8) tos; 2496 } 2497 EXPORT_SYMBOL(rdma_set_service_type); 2498 2499 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, 2500 void *context) 2501 { 2502 struct cma_work *work = context; 2503 struct rdma_route *route; 2504 2505 route = &work->id->id.route; 2506 2507 if (!status) { 2508 route->num_paths = 1; 2509 *route->path_rec = *path_rec; 2510 } else { 2511 work->old_state = RDMA_CM_ROUTE_QUERY; 2512 work->new_state = RDMA_CM_ADDR_RESOLVED; 2513 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; 2514 work->event.status = status; 2515 } 2516 2517 queue_work(cma_wq, &work->work); 2518 } 2519 2520 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, 2521 struct cma_work *work) 2522 { 2523 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 2524 struct ib_sa_path_rec path_rec; 2525 ib_sa_comp_mask comp_mask; 2526 struct sockaddr_in6 *sin6; 2527 struct sockaddr_ib *sib; 2528 2529 memset(&path_rec, 0, sizeof path_rec); 2530 rdma_addr_get_sgid(dev_addr, &path_rec.sgid); 2531 rdma_addr_get_dgid(dev_addr, &path_rec.dgid); 2532 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 2533 path_rec.numb_path = 1; 2534 path_rec.reversible = 1; 2535 path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 2536 2537 comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | 2538 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | 2539 IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; 2540 2541 switch (cma_family(id_priv)) { 2542 case AF_INET: 2543 path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); 2544 comp_mask |= IB_SA_PATH_REC_QOS_CLASS; 2545 break; 2546 case AF_INET6: 2547 sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 2548 path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); 2549 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2550 break; 2551 case AF_IB: 2552 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 2553 path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); 2554 comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; 2555 break; 2556 } 2557 2558 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, 2559 id_priv->id.port_num, &path_rec, 2560 comp_mask, timeout_ms, 2561 GFP_KERNEL, cma_query_handler, 2562 work, &id_priv->query); 2563 2564 return (id_priv->query_id < 0) ? id_priv->query_id : 0; 2565 } 2566 2567 static void cma_work_handler(struct work_struct *_work) 2568 { 2569 struct cma_work *work = container_of(_work, struct cma_work, work); 2570 struct rdma_id_private *id_priv = work->id; 2571 int destroy = 0; 2572 2573 mutex_lock(&id_priv->handler_mutex); 2574 if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) 2575 goto out; 2576 2577 if (id_priv->id.event_handler(&id_priv->id, &work->event)) { 2578 cma_exch(id_priv, RDMA_CM_DESTROYING); 2579 destroy = 1; 2580 } 2581 out: 2582 mutex_unlock(&id_priv->handler_mutex); 2583 cma_deref_id(id_priv); 2584 if (destroy) 2585 rdma_destroy_id(&id_priv->id); 2586 kfree(work); 2587 } 2588 2589 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms) 2590 { 2591 struct rdma_route *route = &id_priv->id.route; 2592 struct cma_work *work; 2593 int ret; 2594 2595 work = kzalloc(sizeof *work, GFP_KERNEL); 2596 if (!work) 2597 return -ENOMEM; 2598 2599 work->id = id_priv; 2600 INIT_WORK(&work->work, cma_work_handler); 2601 work->old_state = RDMA_CM_ROUTE_QUERY; 2602 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2603 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2604 2605 route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); 2606 if (!route->path_rec) { 2607 ret = -ENOMEM; 2608 goto err1; 2609 } 2610 2611 ret = cma_query_ib_route(id_priv, timeout_ms, work); 2612 if (ret) 2613 goto err2; 2614 2615 return 0; 2616 err2: 2617 kfree(route->path_rec); 2618 route->path_rec = NULL; 2619 err1: 2620 kfree(work); 2621 return ret; 2622 } 2623 2624 int rdma_set_ib_paths(struct rdma_cm_id *id, 2625 struct ib_sa_path_rec *path_rec, int num_paths) 2626 { 2627 struct rdma_id_private *id_priv; 2628 int ret; 2629 2630 id_priv = container_of(id, struct rdma_id_private, id); 2631 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2632 RDMA_CM_ROUTE_RESOLVED)) 2633 return -EINVAL; 2634 2635 id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, 2636 GFP_KERNEL); 2637 if (!id->route.path_rec) { 2638 ret = -ENOMEM; 2639 goto err; 2640 } 2641 2642 id->route.num_paths = num_paths; 2643 return 0; 2644 err: 2645 cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED); 2646 return ret; 2647 } 2648 EXPORT_SYMBOL(rdma_set_ib_paths); 2649 2650 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) 2651 { 2652 struct cma_work *work; 2653 2654 work = kzalloc(sizeof *work, GFP_KERNEL); 2655 if (!work) 2656 return -ENOMEM; 2657 2658 work->id = id_priv; 2659 INIT_WORK(&work->work, cma_work_handler); 2660 work->old_state = RDMA_CM_ROUTE_QUERY; 2661 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2662 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2663 queue_work(cma_wq, &work->work); 2664 return 0; 2665 } 2666 2667 static int iboe_tos_to_sl(struct net_device *ndev, int tos) 2668 { 2669 /* get service level, SL, from IPv4 type of service, TOS */ 2670 int sl = (tos >> 5) & 0x7; 2671 2672 /* final mappings are done by the vendor specific drivers */ 2673 return sl; 2674 } 2675 2676 static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type, 2677 unsigned long supported_gids, 2678 enum ib_gid_type default_gid) 2679 { 2680 if ((network_type == RDMA_NETWORK_IPV4 || 2681 network_type == RDMA_NETWORK_IPV6) && 2682 test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) 2683 return IB_GID_TYPE_ROCE_UDP_ENCAP; 2684 2685 return default_gid; 2686 } 2687 2688 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) 2689 { 2690 struct rdma_route *route = &id_priv->id.route; 2691 struct rdma_addr *addr = &route->addr; 2692 struct cma_work *work; 2693 int ret; 2694 struct net_device *ndev = NULL; 2695 2696 2697 work = kzalloc(sizeof *work, GFP_KERNEL); 2698 if (!work) 2699 return -ENOMEM; 2700 2701 work->id = id_priv; 2702 INIT_WORK(&work->work, cma_work_handler); 2703 2704 route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL); 2705 if (!route->path_rec) { 2706 ret = -ENOMEM; 2707 goto err1; 2708 } 2709 2710 route->num_paths = 1; 2711 2712 if (addr->dev_addr.bound_dev_if) { 2713 unsigned long supported_gids; 2714 2715 ndev = dev_get_by_index(addr->dev_addr.net, 2716 addr->dev_addr.bound_dev_if); 2717 if (!ndev) { 2718 ret = -ENODEV; 2719 goto err2; 2720 } 2721 2722 route->path_rec->net = ndev->if_vnet; 2723 route->path_rec->ifindex = ndev->if_index; 2724 supported_gids = roce_gid_type_mask_support(id_priv->id.device, 2725 id_priv->id.port_num); 2726 route->path_rec->gid_type = 2727 cma_route_gid_type(addr->dev_addr.network, 2728 supported_gids, 2729 id_priv->gid_type); 2730 } 2731 if (!ndev) { 2732 ret = -ENODEV; 2733 goto err2; 2734 } 2735 2736 memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); 2737 2738 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 2739 &route->path_rec->sgid); 2740 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, 2741 &route->path_rec->dgid); 2742 2743 /* Use the hint from IP Stack to select GID Type */ 2744 if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network)) 2745 route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network); 2746 if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB) 2747 /* TODO: get the hoplimit from the inet/inet6 device */ 2748 route->path_rec->hop_limit = addr->dev_addr.hoplimit; 2749 else 2750 route->path_rec->hop_limit = 1; 2751 route->path_rec->reversible = 1; 2752 route->path_rec->pkey = cpu_to_be16(0xffff); 2753 route->path_rec->mtu_selector = IB_SA_EQ; 2754 route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); 2755 route->path_rec->traffic_class = id_priv->tos; 2756 route->path_rec->mtu = iboe_get_mtu(ndev->if_mtu); 2757 route->path_rec->rate_selector = IB_SA_EQ; 2758 route->path_rec->rate = iboe_get_rate(ndev); 2759 dev_put(ndev); 2760 route->path_rec->packet_life_time_selector = IB_SA_EQ; 2761 route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME; 2762 if (!route->path_rec->mtu) { 2763 ret = -EINVAL; 2764 goto err2; 2765 } 2766 2767 work->old_state = RDMA_CM_ROUTE_QUERY; 2768 work->new_state = RDMA_CM_ROUTE_RESOLVED; 2769 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; 2770 work->event.status = 0; 2771 2772 queue_work(cma_wq, &work->work); 2773 2774 return 0; 2775 2776 err2: 2777 kfree(route->path_rec); 2778 route->path_rec = NULL; 2779 err1: 2780 kfree(work); 2781 return ret; 2782 } 2783 2784 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms) 2785 { 2786 struct rdma_id_private *id_priv; 2787 int ret; 2788 2789 id_priv = container_of(id, struct rdma_id_private, id); 2790 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY)) 2791 return -EINVAL; 2792 2793 atomic_inc(&id_priv->refcount); 2794 if (rdma_cap_ib_sa(id->device, id->port_num)) 2795 ret = cma_resolve_ib_route(id_priv, timeout_ms); 2796 else if (rdma_protocol_roce(id->device, id->port_num)) 2797 ret = cma_resolve_iboe_route(id_priv); 2798 else if (rdma_protocol_iwarp(id->device, id->port_num)) 2799 ret = cma_resolve_iw_route(id_priv, timeout_ms); 2800 else 2801 ret = -ENOSYS; 2802 2803 if (ret) 2804 goto err; 2805 2806 return 0; 2807 err: 2808 cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED); 2809 cma_deref_id(id_priv); 2810 return ret; 2811 } 2812 EXPORT_SYMBOL(rdma_resolve_route); 2813 2814 static void cma_set_loopback(struct sockaddr *addr) 2815 { 2816 switch (addr->sa_family) { 2817 case AF_INET: 2818 ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 2819 break; 2820 case AF_INET6: 2821 ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, 2822 0, 0, 0, htonl(1)); 2823 break; 2824 default: 2825 ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, 2826 0, 0, 0, htonl(1)); 2827 break; 2828 } 2829 } 2830 2831 static int cma_bind_loopback(struct rdma_id_private *id_priv) 2832 { 2833 struct cma_device *cma_dev, *cur_dev; 2834 struct ib_port_attr port_attr; 2835 union ib_gid gid; 2836 u16 pkey; 2837 int ret; 2838 u8 p; 2839 2840 cma_dev = NULL; 2841 mutex_lock(&lock); 2842 list_for_each_entry(cur_dev, &dev_list, list) { 2843 if (cma_family(id_priv) == AF_IB && 2844 !rdma_cap_ib_cm(cur_dev->device, 1)) 2845 continue; 2846 2847 if (!cma_dev) 2848 cma_dev = cur_dev; 2849 2850 for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { 2851 if (!ib_query_port(cur_dev->device, p, &port_attr) && 2852 port_attr.state == IB_PORT_ACTIVE) { 2853 cma_dev = cur_dev; 2854 goto port_found; 2855 } 2856 } 2857 } 2858 2859 if (!cma_dev) { 2860 ret = -ENODEV; 2861 goto out; 2862 } 2863 2864 p = 1; 2865 2866 port_found: 2867 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL); 2868 if (ret) 2869 goto out; 2870 2871 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey); 2872 if (ret) 2873 goto out; 2874 2875 id_priv->id.route.addr.dev_addr.dev_type = 2876 (rdma_protocol_ib(cma_dev->device, p)) ? 2877 ARPHRD_INFINIBAND : ARPHRD_ETHER; 2878 2879 rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2880 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); 2881 id_priv->id.port_num = p; 2882 cma_attach_to_dev(id_priv, cma_dev); 2883 cma_set_loopback(cma_src_addr(id_priv)); 2884 out: 2885 mutex_unlock(&lock); 2886 return ret; 2887 } 2888 2889 static void addr_handler(int status, struct sockaddr *src_addr, 2890 struct rdma_dev_addr *dev_addr, void *context) 2891 { 2892 struct rdma_id_private *id_priv = context; 2893 struct rdma_cm_event event; 2894 2895 memset(&event, 0, sizeof event); 2896 mutex_lock(&id_priv->handler_mutex); 2897 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, 2898 RDMA_CM_ADDR_RESOLVED)) 2899 goto out; 2900 2901 memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); 2902 if (!status && !id_priv->cma_dev) 2903 status = cma_acquire_dev(id_priv, NULL); 2904 2905 if (status) { 2906 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, 2907 RDMA_CM_ADDR_BOUND)) 2908 goto out; 2909 event.event = RDMA_CM_EVENT_ADDR_ERROR; 2910 event.status = status; 2911 } else 2912 event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2913 2914 if (id_priv->id.event_handler(&id_priv->id, &event)) { 2915 cma_exch(id_priv, RDMA_CM_DESTROYING); 2916 mutex_unlock(&id_priv->handler_mutex); 2917 cma_deref_id(id_priv); 2918 rdma_destroy_id(&id_priv->id); 2919 return; 2920 } 2921 out: 2922 mutex_unlock(&id_priv->handler_mutex); 2923 cma_deref_id(id_priv); 2924 } 2925 2926 static int cma_resolve_loopback(struct rdma_id_private *id_priv) 2927 { 2928 struct cma_work *work; 2929 union ib_gid gid; 2930 int ret; 2931 2932 work = kzalloc(sizeof *work, GFP_KERNEL); 2933 if (!work) 2934 return -ENOMEM; 2935 2936 if (!id_priv->cma_dev) { 2937 ret = cma_bind_loopback(id_priv); 2938 if (ret) 2939 goto err; 2940 } 2941 2942 rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); 2943 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); 2944 2945 work->id = id_priv; 2946 INIT_WORK(&work->work, cma_work_handler); 2947 work->old_state = RDMA_CM_ADDR_QUERY; 2948 work->new_state = RDMA_CM_ADDR_RESOLVED; 2949 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2950 queue_work(cma_wq, &work->work); 2951 return 0; 2952 err: 2953 kfree(work); 2954 return ret; 2955 } 2956 2957 static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) 2958 { 2959 struct cma_work *work; 2960 int ret; 2961 2962 work = kzalloc(sizeof *work, GFP_KERNEL); 2963 if (!work) 2964 return -ENOMEM; 2965 2966 if (!id_priv->cma_dev) { 2967 ret = cma_resolve_ib_dev(id_priv); 2968 if (ret) 2969 goto err; 2970 } 2971 2972 rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) 2973 &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); 2974 2975 work->id = id_priv; 2976 INIT_WORK(&work->work, cma_work_handler); 2977 work->old_state = RDMA_CM_ADDR_QUERY; 2978 work->new_state = RDMA_CM_ADDR_RESOLVED; 2979 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; 2980 queue_work(cma_wq, &work->work); 2981 return 0; 2982 err: 2983 kfree(work); 2984 return ret; 2985 } 2986 2987 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 2988 struct sockaddr *dst_addr) 2989 { 2990 if (!src_addr || !src_addr->sa_family) { 2991 src_addr = (struct sockaddr *) &id->route.addr.src_addr; 2992 src_addr->sa_family = dst_addr->sa_family; 2993 if (dst_addr->sa_family == AF_INET6) { 2994 struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; 2995 struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; 2996 src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; 2997 if (IN6_IS_SCOPE_LINKLOCAL(&dst_addr6->sin6_addr) || 2998 IN6_IS_ADDR_MC_INTFACELOCAL(&dst_addr6->sin6_addr)) 2999 id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; 3000 } else if (dst_addr->sa_family == AF_IB) { 3001 ((struct sockaddr_ib *) src_addr)->sib_pkey = 3002 ((struct sockaddr_ib *) dst_addr)->sib_pkey; 3003 } 3004 } 3005 return rdma_bind_addr(id, src_addr); 3006 } 3007 3008 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, 3009 struct sockaddr *dst_addr, int timeout_ms) 3010 { 3011 struct rdma_id_private *id_priv; 3012 int ret; 3013 3014 id_priv = container_of(id, struct rdma_id_private, id); 3015 if (id_priv->state == RDMA_CM_IDLE) { 3016 ret = cma_bind_addr(id, src_addr, dst_addr); 3017 if (ret) 3018 return ret; 3019 } 3020 3021 if (cma_family(id_priv) != dst_addr->sa_family) 3022 return -EINVAL; 3023 3024 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) 3025 return -EINVAL; 3026 3027 atomic_inc(&id_priv->refcount); 3028 memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); 3029 if (cma_any_addr(dst_addr)) { 3030 ret = cma_resolve_loopback(id_priv); 3031 } else { 3032 if (dst_addr->sa_family == AF_IB) { 3033 ret = cma_resolve_ib_addr(id_priv); 3034 } else { 3035 ret = cma_check_linklocal(&id->route.addr.dev_addr, dst_addr); 3036 if (ret) 3037 goto err; 3038 3039 ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), 3040 dst_addr, &id->route.addr.dev_addr, 3041 timeout_ms, addr_handler, id_priv); 3042 } 3043 } 3044 if (ret) 3045 goto err; 3046 3047 return 0; 3048 err: 3049 cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); 3050 cma_deref_id(id_priv); 3051 return ret; 3052 } 3053 EXPORT_SYMBOL(rdma_resolve_addr); 3054 3055 int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) 3056 { 3057 struct rdma_id_private *id_priv; 3058 unsigned long flags; 3059 int ret; 3060 3061 id_priv = container_of(id, struct rdma_id_private, id); 3062 spin_lock_irqsave(&id_priv->lock, flags); 3063 if (reuse || id_priv->state == RDMA_CM_IDLE) { 3064 id_priv->reuseaddr = reuse; 3065 ret = 0; 3066 } else { 3067 ret = -EINVAL; 3068 } 3069 spin_unlock_irqrestore(&id_priv->lock, flags); 3070 return ret; 3071 } 3072 EXPORT_SYMBOL(rdma_set_reuseaddr); 3073 3074 int rdma_set_afonly(struct rdma_cm_id *id, int afonly) 3075 { 3076 struct rdma_id_private *id_priv; 3077 unsigned long flags; 3078 int ret; 3079 3080 id_priv = container_of(id, struct rdma_id_private, id); 3081 spin_lock_irqsave(&id_priv->lock, flags); 3082 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { 3083 id_priv->options |= (1 << CMA_OPTION_AFONLY); 3084 id_priv->afonly = afonly; 3085 ret = 0; 3086 } else { 3087 ret = -EINVAL; 3088 } 3089 spin_unlock_irqrestore(&id_priv->lock, flags); 3090 return ret; 3091 } 3092 EXPORT_SYMBOL(rdma_set_afonly); 3093 3094 static void cma_bind_port(struct rdma_bind_list *bind_list, 3095 struct rdma_id_private *id_priv) 3096 { 3097 struct sockaddr *addr; 3098 struct sockaddr_ib *sib; 3099 u64 sid, mask; 3100 __be16 port; 3101 3102 addr = cma_src_addr(id_priv); 3103 port = htons(bind_list->port); 3104 3105 switch (addr->sa_family) { 3106 case AF_INET: 3107 ((struct sockaddr_in *) addr)->sin_port = port; 3108 break; 3109 case AF_INET6: 3110 ((struct sockaddr_in6 *) addr)->sin6_port = port; 3111 break; 3112 case AF_IB: 3113 sib = (struct sockaddr_ib *) addr; 3114 sid = be64_to_cpu(sib->sib_sid); 3115 mask = be64_to_cpu(sib->sib_sid_mask); 3116 sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); 3117 sib->sib_sid_mask = cpu_to_be64(~0ULL); 3118 break; 3119 } 3120 id_priv->bind_list = bind_list; 3121 hlist_add_head(&id_priv->node, &bind_list->owners); 3122 } 3123 3124 static int cma_alloc_port(enum rdma_port_space ps, 3125 struct rdma_id_private *id_priv, unsigned short snum) 3126 { 3127 struct rdma_bind_list *bind_list; 3128 int ret; 3129 3130 bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); 3131 if (!bind_list) 3132 return -ENOMEM; 3133 3134 ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, 3135 snum); 3136 if (ret < 0) 3137 goto err; 3138 3139 bind_list->ps = ps; 3140 bind_list->port = (unsigned short)ret; 3141 cma_bind_port(bind_list, id_priv); 3142 return 0; 3143 err: 3144 kfree(bind_list); 3145 return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; 3146 } 3147 3148 static int cma_alloc_any_port(enum rdma_port_space ps, 3149 struct rdma_id_private *id_priv) 3150 { 3151 static unsigned int last_used_port; 3152 int low, high, remaining; 3153 unsigned int rover; 3154 struct vnet *net = id_priv->id.route.addr.dev_addr.net; 3155 u32 rand; 3156 3157 inet_get_local_port_range(net, &low, &high); 3158 remaining = (high - low) + 1; 3159 get_random_bytes(&rand, sizeof(rand)); 3160 rover = rand % remaining + low; 3161 retry: 3162 if (last_used_port != rover && 3163 !cma_ps_find(net, ps, (unsigned short)rover)) { 3164 int ret = cma_alloc_port(ps, id_priv, rover); 3165 /* 3166 * Remember previously used port number in order to avoid 3167 * re-using same port immediately after it is closed. 3168 */ 3169 if (!ret) 3170 last_used_port = rover; 3171 if (ret != -EADDRNOTAVAIL) 3172 return ret; 3173 } 3174 if (--remaining) { 3175 rover++; 3176 if ((rover < low) || (rover > high)) 3177 rover = low; 3178 goto retry; 3179 } 3180 return -EADDRNOTAVAIL; 3181 } 3182 3183 /* 3184 * Check that the requested port is available. This is called when trying to 3185 * bind to a specific port, or when trying to listen on a bound port. In 3186 * the latter case, the provided id_priv may already be on the bind_list, but 3187 * we still need to check that it's okay to start listening. 3188 */ 3189 static int cma_check_port(struct rdma_bind_list *bind_list, 3190 struct rdma_id_private *id_priv, uint8_t reuseaddr) 3191 { 3192 struct rdma_id_private *cur_id; 3193 struct sockaddr *addr, *cur_addr; 3194 3195 addr = cma_src_addr(id_priv); 3196 hlist_for_each_entry(cur_id, &bind_list->owners, node) { 3197 if (id_priv == cur_id) 3198 continue; 3199 3200 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && 3201 cur_id->reuseaddr) 3202 continue; 3203 3204 cur_addr = cma_src_addr(cur_id); 3205 if (id_priv->afonly && cur_id->afonly && 3206 (addr->sa_family != cur_addr->sa_family)) 3207 continue; 3208 3209 if (cma_any_addr(addr) || cma_any_addr(cur_addr)) 3210 return -EADDRNOTAVAIL; 3211 3212 if (!cma_addr_cmp(addr, cur_addr)) 3213 return -EADDRINUSE; 3214 } 3215 return 0; 3216 } 3217 3218 static int cma_use_port(enum rdma_port_space ps, 3219 struct rdma_id_private *id_priv) 3220 { 3221 struct rdma_bind_list *bind_list; 3222 unsigned short snum; 3223 int ret; 3224 3225 snum = ntohs(cma_port(cma_src_addr(id_priv))); 3226 if (snum < IPPORT_RESERVED && 3227 priv_check(curthread, PRIV_NETINET_BINDANY) != 0) 3228 return -EACCES; 3229 3230 bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); 3231 if (!bind_list) { 3232 ret = cma_alloc_port(ps, id_priv, snum); 3233 } else { 3234 ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); 3235 if (!ret) 3236 cma_bind_port(bind_list, id_priv); 3237 } 3238 return ret; 3239 } 3240 3241 static int cma_bind_listen(struct rdma_id_private *id_priv) 3242 { 3243 struct rdma_bind_list *bind_list = id_priv->bind_list; 3244 int ret = 0; 3245 3246 mutex_lock(&lock); 3247 if (bind_list->owners.first->next) 3248 ret = cma_check_port(bind_list, id_priv, 0); 3249 mutex_unlock(&lock); 3250 return ret; 3251 } 3252 3253 static enum rdma_port_space cma_select_inet_ps( 3254 struct rdma_id_private *id_priv) 3255 { 3256 switch (id_priv->id.ps) { 3257 case RDMA_PS_TCP: 3258 case RDMA_PS_UDP: 3259 case RDMA_PS_IPOIB: 3260 case RDMA_PS_IB: 3261 case RDMA_PS_SDP: 3262 return id_priv->id.ps; 3263 default: 3264 3265 return 0; 3266 } 3267 } 3268 3269 static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv) 3270 { 3271 enum rdma_port_space ps = 0; 3272 struct sockaddr_ib *sib; 3273 u64 sid_ps, mask, sid; 3274 3275 sib = (struct sockaddr_ib *) cma_src_addr(id_priv); 3276 mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; 3277 sid = be64_to_cpu(sib->sib_sid) & mask; 3278 3279 if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { 3280 sid_ps = RDMA_IB_IP_PS_IB; 3281 ps = RDMA_PS_IB; 3282 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && 3283 (sid == (RDMA_IB_IP_PS_TCP & mask))) { 3284 sid_ps = RDMA_IB_IP_PS_TCP; 3285 ps = RDMA_PS_TCP; 3286 } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && 3287 (sid == (RDMA_IB_IP_PS_UDP & mask))) { 3288 sid_ps = RDMA_IB_IP_PS_UDP; 3289 ps = RDMA_PS_UDP; 3290 } 3291 3292 if (ps) { 3293 sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); 3294 sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | 3295 be64_to_cpu(sib->sib_sid_mask)); 3296 } 3297 return ps; 3298 } 3299 3300 static int cma_get_port(struct rdma_id_private *id_priv) 3301 { 3302 enum rdma_port_space ps; 3303 int ret; 3304 3305 if (cma_family(id_priv) != AF_IB) 3306 ps = cma_select_inet_ps(id_priv); 3307 else 3308 ps = cma_select_ib_ps(id_priv); 3309 if (!ps) 3310 return -EPROTONOSUPPORT; 3311 3312 mutex_lock(&lock); 3313 if (cma_any_port(cma_src_addr(id_priv))) 3314 ret = cma_alloc_any_port(ps, id_priv); 3315 else 3316 ret = cma_use_port(ps, id_priv); 3317 mutex_unlock(&lock); 3318 3319 return ret; 3320 } 3321 3322 static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 3323 struct sockaddr *addr) 3324 { 3325 #ifdef INET6 3326 struct sockaddr_in6 sin6; 3327 3328 if (addr->sa_family != AF_INET6) 3329 return 0; 3330 3331 sin6 = *(struct sockaddr_in6 *)addr; 3332 3333 if (IN6_IS_SCOPE_LINKLOCAL(&sin6.sin6_addr) || 3334 IN6_IS_ADDR_MC_INTFACELOCAL(&sin6.sin6_addr)) { 3335 bool failure; 3336 3337 CURVNET_SET_QUIET(dev_addr->net); 3338 failure = sa6_recoverscope(&sin6) || sin6.sin6_scope_id == 0; 3339 CURVNET_RESTORE(); 3340 3341 /* check if IPv6 scope ID is not set */ 3342 if (failure) 3343 return -EINVAL; 3344 dev_addr->bound_dev_if = sin6.sin6_scope_id; 3345 } 3346 #endif 3347 return 0; 3348 } 3349 3350 int rdma_listen(struct rdma_cm_id *id, int backlog) 3351 { 3352 struct rdma_id_private *id_priv; 3353 int ret; 3354 3355 id_priv = container_of(id, struct rdma_id_private, id); 3356 if (id_priv->state == RDMA_CM_IDLE) { 3357 id->route.addr.src_addr.ss_family = AF_INET; 3358 ret = rdma_bind_addr(id, cma_src_addr(id_priv)); 3359 if (ret) 3360 return ret; 3361 } 3362 3363 if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) 3364 return -EINVAL; 3365 3366 if (id_priv->reuseaddr) { 3367 ret = cma_bind_listen(id_priv); 3368 if (ret) 3369 goto err; 3370 } 3371 3372 id_priv->backlog = backlog; 3373 if (id->device) { 3374 if (rdma_cap_ib_cm(id->device, 1)) { 3375 ret = cma_ib_listen(id_priv); 3376 if (ret) 3377 goto err; 3378 } else if (rdma_cap_iw_cm(id->device, 1)) { 3379 ret = cma_iw_listen(id_priv, backlog); 3380 if (ret) 3381 goto err; 3382 } else { 3383 ret = -ENOSYS; 3384 goto err; 3385 } 3386 } else 3387 cma_listen_on_all(id_priv); 3388 3389 return 0; 3390 err: 3391 id_priv->backlog = 0; 3392 cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); 3393 return ret; 3394 } 3395 EXPORT_SYMBOL(rdma_listen); 3396 3397 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) 3398 { 3399 struct rdma_id_private *id_priv; 3400 int ret; 3401 3402 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && 3403 addr->sa_family != AF_IB) 3404 return -EAFNOSUPPORT; 3405 3406 id_priv = container_of(id, struct rdma_id_private, id); 3407 if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND)) 3408 return -EINVAL; 3409 3410 ret = cma_check_linklocal(&id->route.addr.dev_addr, addr); 3411 if (ret) 3412 goto err1; 3413 3414 memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); 3415 if (!cma_any_addr(addr)) { 3416 ret = cma_translate_addr(addr, &id->route.addr.dev_addr); 3417 if (ret) 3418 goto err1; 3419 3420 ret = cma_acquire_dev(id_priv, NULL); 3421 if (ret) 3422 goto err1; 3423 } 3424 3425 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { 3426 if (addr->sa_family == AF_INET) 3427 id_priv->afonly = 1; 3428 #ifdef INET6 3429 else if (addr->sa_family == AF_INET6) { 3430 CURVNET_SET_QUIET(id_priv->id.route.addr.dev_addr.net); 3431 id_priv->afonly = V_ip6_v6only; 3432 CURVNET_RESTORE(); 3433 } 3434 #endif 3435 } 3436 ret = cma_get_port(id_priv); 3437 if (ret) 3438 goto err2; 3439 3440 return 0; 3441 err2: 3442 if (id_priv->cma_dev) 3443 cma_release_dev(id_priv); 3444 err1: 3445 cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE); 3446 return ret; 3447 } 3448 EXPORT_SYMBOL(rdma_bind_addr); 3449 3450 static int sdp_format_hdr(struct sdp_hh *sdp_hdr, struct rdma_id_private *id_priv) 3451 { 3452 /* 3453 * XXXCEM: CMA just sets the version itself rather than relying on 3454 * passed in packet to have the major version set. Should we? 3455 */ 3456 if (sdp_get_majv(sdp_hdr->majv_minv) != SDP_MAJ_VERSION) 3457 return -EINVAL; 3458 3459 if (cma_family(id_priv) == AF_INET) { 3460 struct sockaddr_in *src4, *dst4; 3461 3462 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3463 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3464 3465 sdp_set_ip_ver(sdp_hdr, 4); 3466 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3467 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3468 sdp_hdr->port = src4->sin_port; 3469 } else if (cma_family(id_priv) == AF_INET6) { 3470 struct sockaddr_in6 *src6, *dst6; 3471 3472 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3473 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3474 3475 sdp_set_ip_ver(sdp_hdr, 6); 3476 sdp_hdr->src_addr.ip6 = src6->sin6_addr; 3477 sdp_hdr->dst_addr.ip6 = dst6->sin6_addr; 3478 sdp_hdr->port = src6->sin6_port; 3479 cma_ip6_clear_scope_id(&sdp_hdr->src_addr.ip6); 3480 cma_ip6_clear_scope_id(&sdp_hdr->dst_addr.ip6); 3481 } else 3482 return -EAFNOSUPPORT; 3483 return 0; 3484 } 3485 3486 static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) 3487 { 3488 struct cma_hdr *cma_hdr; 3489 3490 if (id_priv->id.ps == RDMA_PS_SDP) 3491 return sdp_format_hdr(hdr, id_priv); 3492 3493 cma_hdr = hdr; 3494 cma_hdr->cma_version = CMA_VERSION; 3495 if (cma_family(id_priv) == AF_INET) { 3496 struct sockaddr_in *src4, *dst4; 3497 3498 src4 = (struct sockaddr_in *) cma_src_addr(id_priv); 3499 dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); 3500 3501 cma_set_ip_ver(cma_hdr, 4); 3502 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; 3503 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; 3504 cma_hdr->port = src4->sin_port; 3505 } else if (cma_family(id_priv) == AF_INET6) { 3506 struct sockaddr_in6 *src6, *dst6; 3507 3508 src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); 3509 dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); 3510 3511 cma_set_ip_ver(cma_hdr, 6); 3512 cma_hdr->src_addr.ip6 = src6->sin6_addr; 3513 cma_hdr->dst_addr.ip6 = dst6->sin6_addr; 3514 cma_hdr->port = src6->sin6_port; 3515 cma_ip6_clear_scope_id(&cma_hdr->src_addr.ip6); 3516 cma_ip6_clear_scope_id(&cma_hdr->dst_addr.ip6); 3517 } 3518 return 0; 3519 } 3520 3521 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, 3522 struct ib_cm_event *ib_event) 3523 { 3524 struct rdma_id_private *id_priv = cm_id->context; 3525 struct rdma_cm_event event; 3526 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; 3527 int ret = 0; 3528 3529 mutex_lock(&id_priv->handler_mutex); 3530 if (id_priv->state != RDMA_CM_CONNECT) 3531 goto out; 3532 3533 memset(&event, 0, sizeof event); 3534 switch (ib_event->event) { 3535 case IB_CM_SIDR_REQ_ERROR: 3536 event.event = RDMA_CM_EVENT_UNREACHABLE; 3537 event.status = -ETIMEDOUT; 3538 break; 3539 case IB_CM_SIDR_REP_RECEIVED: 3540 event.param.ud.private_data = ib_event->private_data; 3541 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE; 3542 if (rep->status != IB_SIDR_SUCCESS) { 3543 event.event = RDMA_CM_EVENT_UNREACHABLE; 3544 event.status = ib_event->param.sidr_rep_rcvd.status; 3545 break; 3546 } 3547 ret = cma_set_qkey(id_priv, rep->qkey); 3548 if (ret) { 3549 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3550 event.status = ret; 3551 break; 3552 } 3553 ret = ib_init_ah_from_path(id_priv->id.device, 3554 id_priv->id.port_num, 3555 id_priv->id.route.path_rec, 3556 &event.param.ud.ah_attr); 3557 if (ret) { 3558 event.event = RDMA_CM_EVENT_ADDR_ERROR; 3559 event.status = ret; 3560 break; 3561 } 3562 event.param.ud.qp_num = rep->qpn; 3563 event.param.ud.qkey = rep->qkey; 3564 event.event = RDMA_CM_EVENT_ESTABLISHED; 3565 event.status = 0; 3566 break; 3567 default: 3568 pr_err("RDMA CMA: unexpected IB CM event: %d\n", 3569 ib_event->event); 3570 goto out; 3571 } 3572 3573 ret = id_priv->id.event_handler(&id_priv->id, &event); 3574 if (ret) { 3575 /* Destroy the CM ID by returning a non-zero value. */ 3576 id_priv->cm_id.ib = NULL; 3577 cma_exch(id_priv, RDMA_CM_DESTROYING); 3578 mutex_unlock(&id_priv->handler_mutex); 3579 rdma_destroy_id(&id_priv->id); 3580 return ret; 3581 } 3582 out: 3583 mutex_unlock(&id_priv->handler_mutex); 3584 return ret; 3585 } 3586 3587 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, 3588 struct rdma_conn_param *conn_param) 3589 { 3590 struct ib_cm_sidr_req_param req; 3591 struct ib_cm_id *id; 3592 void *private_data; 3593 int offset, ret; 3594 3595 memset(&req, 0, sizeof req); 3596 offset = cma_user_data_offset(id_priv); 3597 req.private_data_len = offset + conn_param->private_data_len; 3598 if (req.private_data_len < conn_param->private_data_len) 3599 return -EINVAL; 3600 3601 if (req.private_data_len) { 3602 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3603 if (!private_data) 3604 return -ENOMEM; 3605 } else { 3606 private_data = NULL; 3607 } 3608 3609 if (conn_param->private_data && conn_param->private_data_len) 3610 memcpy((char *)private_data + offset, conn_param->private_data, 3611 conn_param->private_data_len); 3612 3613 if (private_data) { 3614 ret = cma_format_hdr(private_data, id_priv); 3615 if (ret) 3616 goto out; 3617 req.private_data = private_data; 3618 } 3619 3620 id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, 3621 id_priv); 3622 if (IS_ERR(id)) { 3623 ret = PTR_ERR(id); 3624 goto out; 3625 } 3626 id_priv->cm_id.ib = id; 3627 3628 req.path = id_priv->id.route.path_rec; 3629 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3630 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); 3631 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3632 3633 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req); 3634 if (ret) { 3635 ib_destroy_cm_id(id_priv->cm_id.ib); 3636 id_priv->cm_id.ib = NULL; 3637 } 3638 out: 3639 kfree(private_data); 3640 return ret; 3641 } 3642 3643 static int cma_connect_ib(struct rdma_id_private *id_priv, 3644 struct rdma_conn_param *conn_param) 3645 { 3646 struct ib_cm_req_param req; 3647 struct rdma_route *route; 3648 void *private_data; 3649 struct ib_cm_id *id; 3650 int offset, ret; 3651 3652 memset(&req, 0, sizeof req); 3653 offset = cma_user_data_offset(id_priv); 3654 req.private_data_len = offset + conn_param->private_data_len; 3655 if (req.private_data_len < conn_param->private_data_len) 3656 return -EINVAL; 3657 3658 if (req.private_data_len) { 3659 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 3660 if (!private_data) 3661 return -ENOMEM; 3662 } else { 3663 private_data = NULL; 3664 } 3665 3666 if (conn_param->private_data && conn_param->private_data_len) 3667 memcpy((char *)private_data + offset, conn_param->private_data, 3668 conn_param->private_data_len); 3669 3670 id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); 3671 if (IS_ERR(id)) { 3672 ret = PTR_ERR(id); 3673 goto out; 3674 } 3675 id_priv->cm_id.ib = id; 3676 3677 route = &id_priv->id.route; 3678 if (private_data) { 3679 ret = cma_format_hdr(private_data, id_priv); 3680 if (ret) 3681 goto out; 3682 req.private_data = private_data; 3683 } 3684 3685 req.primary_path = &route->path_rec[0]; 3686 if (route->num_paths == 2) 3687 req.alternate_path = &route->path_rec[1]; 3688 3689 req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); 3690 req.qp_num = id_priv->qp_num; 3691 req.qp_type = id_priv->id.qp_type; 3692 req.starting_psn = id_priv->seq_num; 3693 req.responder_resources = conn_param->responder_resources; 3694 req.initiator_depth = conn_param->initiator_depth; 3695 req.flow_control = conn_param->flow_control; 3696 req.retry_count = min_t(u8, 7, conn_param->retry_count); 3697 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3698 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3699 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 3700 req.max_cm_retries = CMA_MAX_CM_RETRIES; 3701 req.srq = id_priv->srq ? 1 : 0; 3702 3703 ret = ib_send_cm_req(id_priv->cm_id.ib, &req); 3704 out: 3705 if (ret && !IS_ERR(id)) { 3706 ib_destroy_cm_id(id); 3707 id_priv->cm_id.ib = NULL; 3708 } 3709 3710 kfree(private_data); 3711 return ret; 3712 } 3713 3714 static int cma_connect_iw(struct rdma_id_private *id_priv, 3715 struct rdma_conn_param *conn_param) 3716 { 3717 struct iw_cm_id *cm_id; 3718 int ret; 3719 struct iw_cm_conn_param iw_param; 3720 3721 cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); 3722 if (IS_ERR(cm_id)) 3723 return PTR_ERR(cm_id); 3724 3725 cm_id->tos = id_priv->tos; 3726 id_priv->cm_id.iw = cm_id; 3727 3728 memcpy(&cm_id->local_addr, cma_src_addr(id_priv), 3729 rdma_addr_size(cma_src_addr(id_priv))); 3730 memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), 3731 rdma_addr_size(cma_dst_addr(id_priv))); 3732 3733 ret = cma_modify_qp_rtr(id_priv, conn_param); 3734 if (ret) 3735 goto out; 3736 3737 if (conn_param) { 3738 iw_param.ord = conn_param->initiator_depth; 3739 iw_param.ird = conn_param->responder_resources; 3740 iw_param.private_data = conn_param->private_data; 3741 iw_param.private_data_len = conn_param->private_data_len; 3742 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; 3743 } else { 3744 memset(&iw_param, 0, sizeof iw_param); 3745 iw_param.qpn = id_priv->qp_num; 3746 } 3747 ret = iw_cm_connect(cm_id, &iw_param); 3748 out: 3749 if (ret) { 3750 iw_destroy_cm_id(cm_id); 3751 id_priv->cm_id.iw = NULL; 3752 } 3753 return ret; 3754 } 3755 3756 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3757 { 3758 struct rdma_id_private *id_priv; 3759 int ret; 3760 3761 id_priv = container_of(id, struct rdma_id_private, id); 3762 if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) 3763 return -EINVAL; 3764 3765 if (!id->qp) { 3766 id_priv->qp_num = conn_param->qp_num; 3767 id_priv->srq = conn_param->srq; 3768 } 3769 3770 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3771 if (id->qp_type == IB_QPT_UD) 3772 ret = cma_resolve_ib_udp(id_priv, conn_param); 3773 else 3774 ret = cma_connect_ib(id_priv, conn_param); 3775 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3776 ret = cma_connect_iw(id_priv, conn_param); 3777 else 3778 ret = -ENOSYS; 3779 if (ret) 3780 goto err; 3781 3782 return 0; 3783 err: 3784 cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); 3785 return ret; 3786 } 3787 EXPORT_SYMBOL(rdma_connect); 3788 3789 static int cma_accept_ib(struct rdma_id_private *id_priv, 3790 struct rdma_conn_param *conn_param) 3791 { 3792 struct ib_cm_rep_param rep; 3793 int ret; 3794 3795 ret = cma_modify_qp_rtr(id_priv, conn_param); 3796 if (ret) 3797 goto out; 3798 3799 ret = cma_modify_qp_rts(id_priv, conn_param); 3800 if (ret) 3801 goto out; 3802 3803 memset(&rep, 0, sizeof rep); 3804 rep.qp_num = id_priv->qp_num; 3805 rep.starting_psn = id_priv->seq_num; 3806 rep.private_data = conn_param->private_data; 3807 rep.private_data_len = conn_param->private_data_len; 3808 rep.responder_resources = conn_param->responder_resources; 3809 rep.initiator_depth = conn_param->initiator_depth; 3810 rep.failover_accepted = 0; 3811 rep.flow_control = conn_param->flow_control; 3812 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 3813 rep.srq = id_priv->srq ? 1 : 0; 3814 3815 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 3816 out: 3817 return ret; 3818 } 3819 3820 static int cma_accept_iw(struct rdma_id_private *id_priv, 3821 struct rdma_conn_param *conn_param) 3822 { 3823 struct iw_cm_conn_param iw_param; 3824 int ret; 3825 3826 ret = cma_modify_qp_rtr(id_priv, conn_param); 3827 if (ret) 3828 return ret; 3829 3830 iw_param.ord = conn_param->initiator_depth; 3831 iw_param.ird = conn_param->responder_resources; 3832 iw_param.private_data = conn_param->private_data; 3833 iw_param.private_data_len = conn_param->private_data_len; 3834 if (id_priv->id.qp) { 3835 iw_param.qpn = id_priv->qp_num; 3836 } else 3837 iw_param.qpn = conn_param->qp_num; 3838 3839 return iw_cm_accept(id_priv->cm_id.iw, &iw_param); 3840 } 3841 3842 static int cma_send_sidr_rep(struct rdma_id_private *id_priv, 3843 enum ib_cm_sidr_status status, u32 qkey, 3844 const void *private_data, int private_data_len) 3845 { 3846 struct ib_cm_sidr_rep_param rep; 3847 int ret; 3848 3849 memset(&rep, 0, sizeof rep); 3850 rep.status = status; 3851 if (status == IB_SIDR_SUCCESS) { 3852 ret = cma_set_qkey(id_priv, qkey); 3853 if (ret) 3854 return ret; 3855 rep.qp_num = id_priv->qp_num; 3856 rep.qkey = id_priv->qkey; 3857 } 3858 rep.private_data = private_data; 3859 rep.private_data_len = private_data_len; 3860 3861 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); 3862 } 3863 3864 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) 3865 { 3866 struct rdma_id_private *id_priv; 3867 int ret; 3868 3869 id_priv = container_of(id, struct rdma_id_private, id); 3870 3871 id_priv->owner = task_pid_nr(current); 3872 3873 if (!cma_comp(id_priv, RDMA_CM_CONNECT)) 3874 return -EINVAL; 3875 3876 if (!id->qp && conn_param) { 3877 id_priv->qp_num = conn_param->qp_num; 3878 id_priv->srq = conn_param->srq; 3879 } 3880 3881 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3882 if (id->qp_type == IB_QPT_UD) { 3883 if (conn_param) 3884 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3885 conn_param->qkey, 3886 conn_param->private_data, 3887 conn_param->private_data_len); 3888 else 3889 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 3890 0, NULL, 0); 3891 } else { 3892 if (conn_param) 3893 ret = cma_accept_ib(id_priv, conn_param); 3894 else 3895 ret = cma_rep_recv(id_priv); 3896 } 3897 } else if (rdma_cap_iw_cm(id->device, id->port_num)) 3898 ret = cma_accept_iw(id_priv, conn_param); 3899 else 3900 ret = -ENOSYS; 3901 3902 if (ret) 3903 goto reject; 3904 3905 return 0; 3906 reject: 3907 cma_modify_qp_err(id_priv); 3908 rdma_reject(id, NULL, 0); 3909 return ret; 3910 } 3911 EXPORT_SYMBOL(rdma_accept); 3912 3913 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) 3914 { 3915 struct rdma_id_private *id_priv; 3916 int ret; 3917 3918 id_priv = container_of(id, struct rdma_id_private, id); 3919 if (!id_priv->cm_id.ib) 3920 return -EINVAL; 3921 3922 switch (id->device->node_type) { 3923 case RDMA_NODE_IB_CA: 3924 ret = ib_cm_notify(id_priv->cm_id.ib, event); 3925 break; 3926 default: 3927 ret = 0; 3928 break; 3929 } 3930 return ret; 3931 } 3932 EXPORT_SYMBOL(rdma_notify); 3933 3934 int rdma_reject(struct rdma_cm_id *id, const void *private_data, 3935 u8 private_data_len) 3936 { 3937 struct rdma_id_private *id_priv; 3938 int ret; 3939 3940 id_priv = container_of(id, struct rdma_id_private, id); 3941 if (!id_priv->cm_id.ib) 3942 return -EINVAL; 3943 3944 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3945 if (id->qp_type == IB_QPT_UD) 3946 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, 3947 private_data, private_data_len); 3948 else 3949 ret = ib_send_cm_rej(id_priv->cm_id.ib, 3950 IB_CM_REJ_CONSUMER_DEFINED, NULL, 3951 0, private_data, private_data_len); 3952 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3953 ret = iw_cm_reject(id_priv->cm_id.iw, 3954 private_data, private_data_len); 3955 } else 3956 ret = -ENOSYS; 3957 3958 return ret; 3959 } 3960 EXPORT_SYMBOL(rdma_reject); 3961 3962 int rdma_disconnect(struct rdma_cm_id *id) 3963 { 3964 struct rdma_id_private *id_priv; 3965 int ret; 3966 3967 id_priv = container_of(id, struct rdma_id_private, id); 3968 if (!id_priv->cm_id.ib) 3969 return -EINVAL; 3970 3971 if (rdma_cap_ib_cm(id->device, id->port_num)) { 3972 ret = cma_modify_qp_err(id_priv); 3973 if (ret) 3974 goto out; 3975 /* Initiate or respond to a disconnect. */ 3976 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0)) 3977 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0); 3978 } else if (rdma_cap_iw_cm(id->device, id->port_num)) { 3979 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0); 3980 } else 3981 ret = -EINVAL; 3982 3983 out: 3984 return ret; 3985 } 3986 EXPORT_SYMBOL(rdma_disconnect); 3987 3988 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) 3989 { 3990 struct rdma_id_private *id_priv; 3991 struct cma_multicast *mc = multicast->context; 3992 struct rdma_cm_event event; 3993 int ret = 0; 3994 3995 id_priv = mc->id_priv; 3996 mutex_lock(&id_priv->handler_mutex); 3997 if (id_priv->state != RDMA_CM_ADDR_BOUND && 3998 id_priv->state != RDMA_CM_ADDR_RESOLVED) 3999 goto out; 4000 4001 if (!status) 4002 status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); 4003 mutex_lock(&id_priv->qp_mutex); 4004 if (!status && id_priv->id.qp) 4005 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 4006 be16_to_cpu(multicast->rec.mlid)); 4007 mutex_unlock(&id_priv->qp_mutex); 4008 4009 memset(&event, 0, sizeof event); 4010 event.status = status; 4011 event.param.ud.private_data = mc->context; 4012 if (!status) { 4013 struct rdma_dev_addr *dev_addr = 4014 &id_priv->id.route.addr.dev_addr; 4015 struct net_device *ndev = 4016 dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4017 enum ib_gid_type gid_type = 4018 id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4019 rdma_start_port(id_priv->cma_dev->device)]; 4020 4021 event.event = RDMA_CM_EVENT_MULTICAST_JOIN; 4022 ret = ib_init_ah_from_mcmember(id_priv->id.device, 4023 id_priv->id.port_num, 4024 &multicast->rec, 4025 ndev, gid_type, 4026 &event.param.ud.ah_attr); 4027 if (ret) 4028 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 4029 4030 event.param.ud.qp_num = 0xFFFFFF; 4031 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); 4032 if (ndev) 4033 dev_put(ndev); 4034 } else 4035 event.event = RDMA_CM_EVENT_MULTICAST_ERROR; 4036 4037 ret = id_priv->id.event_handler(&id_priv->id, &event); 4038 if (ret) { 4039 cma_exch(id_priv, RDMA_CM_DESTROYING); 4040 mutex_unlock(&id_priv->handler_mutex); 4041 rdma_destroy_id(&id_priv->id); 4042 return 0; 4043 } 4044 4045 out: 4046 mutex_unlock(&id_priv->handler_mutex); 4047 return 0; 4048 } 4049 4050 static void cma_set_mgid(struct rdma_id_private *id_priv, 4051 struct sockaddr *addr, union ib_gid *mgid) 4052 { 4053 unsigned char mc_map[MAX_ADDR_LEN]; 4054 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4055 struct sockaddr_in *sin = (struct sockaddr_in *) addr; 4056 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr; 4057 4058 if (cma_any_addr(addr)) { 4059 memset(mgid, 0, sizeof *mgid); 4060 } else if ((addr->sa_family == AF_INET6) && 4061 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) == 4062 0xFF10A01B)) { 4063 /* IPv6 address is an SA assigned MGID. */ 4064 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4065 } else if (addr->sa_family == AF_IB) { 4066 memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); 4067 } else if (addr->sa_family == AF_INET6) { 4068 ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); 4069 if (id_priv->id.ps == RDMA_PS_UDP) 4070 mc_map[7] = 0x01; /* Use RDMA CM signature */ 4071 *mgid = *(union ib_gid *) (mc_map + 4); 4072 } else { 4073 ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map); 4074 if (id_priv->id.ps == RDMA_PS_UDP) 4075 mc_map[7] = 0x01; /* Use RDMA CM signature */ 4076 *mgid = *(union ib_gid *) (mc_map + 4); 4077 } 4078 } 4079 4080 static void cma_query_sa_classport_info_cb(int status, 4081 struct ib_class_port_info *rec, 4082 void *context) 4083 { 4084 struct class_port_info_context *cb_ctx = context; 4085 4086 WARN_ON(!context); 4087 4088 if (status || !rec) { 4089 pr_debug("RDMA CM: %s port %u failed query ClassPortInfo status: %d\n", 4090 cb_ctx->device->name, cb_ctx->port_num, status); 4091 goto out; 4092 } 4093 4094 memcpy(cb_ctx->class_port_info, rec, sizeof(struct ib_class_port_info)); 4095 4096 out: 4097 complete(&cb_ctx->done); 4098 } 4099 4100 static int cma_query_sa_classport_info(struct ib_device *device, u8 port_num, 4101 struct ib_class_port_info *class_port_info) 4102 { 4103 struct class_port_info_context *cb_ctx; 4104 int ret; 4105 4106 cb_ctx = kmalloc(sizeof(*cb_ctx), GFP_KERNEL); 4107 if (!cb_ctx) 4108 return -ENOMEM; 4109 4110 cb_ctx->device = device; 4111 cb_ctx->class_port_info = class_port_info; 4112 cb_ctx->port_num = port_num; 4113 init_completion(&cb_ctx->done); 4114 4115 ret = ib_sa_classport_info_rec_query(&sa_client, device, port_num, 4116 CMA_QUERY_CLASSPORT_INFO_TIMEOUT, 4117 GFP_KERNEL, cma_query_sa_classport_info_cb, 4118 cb_ctx, &cb_ctx->sa_query); 4119 if (ret < 0) { 4120 pr_err("RDMA CM: %s port %u failed to send ClassPortInfo query, ret: %d\n", 4121 device->name, port_num, ret); 4122 goto out; 4123 } 4124 4125 wait_for_completion(&cb_ctx->done); 4126 4127 out: 4128 kfree(cb_ctx); 4129 return ret; 4130 } 4131 4132 static int cma_join_ib_multicast(struct rdma_id_private *id_priv, 4133 struct cma_multicast *mc) 4134 { 4135 struct ib_sa_mcmember_rec rec; 4136 struct ib_class_port_info class_port_info; 4137 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4138 ib_sa_comp_mask comp_mask; 4139 int ret; 4140 4141 ib_addr_get_mgid(dev_addr, &rec.mgid); 4142 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num, 4143 &rec.mgid, &rec); 4144 if (ret) 4145 return ret; 4146 4147 ret = cma_set_qkey(id_priv, 0); 4148 if (ret) 4149 return ret; 4150 4151 cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); 4152 rec.qkey = cpu_to_be32(id_priv->qkey); 4153 rdma_addr_get_sgid(dev_addr, &rec.port_gid); 4154 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); 4155 rec.join_state = mc->join_state; 4156 4157 if (rec.join_state == BIT(SENDONLY_FULLMEMBER_JOIN)) { 4158 ret = cma_query_sa_classport_info(id_priv->id.device, 4159 id_priv->id.port_num, 4160 &class_port_info); 4161 4162 if (ret) 4163 return ret; 4164 4165 if (!(ib_get_cpi_capmask2(&class_port_info) & 4166 IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT)) { 4167 pr_warn("RDMA CM: %s port %u Unable to multicast join\n" 4168 "RDMA CM: SM doesn't support Send Only Full Member option\n", 4169 id_priv->id.device->name, id_priv->id.port_num); 4170 return -EOPNOTSUPP; 4171 } 4172 } 4173 4174 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | 4175 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE | 4176 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL | 4177 IB_SA_MCMEMBER_REC_FLOW_LABEL | 4178 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS; 4179 4180 if (id_priv->id.ps == RDMA_PS_IPOIB) 4181 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 4182 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 4183 IB_SA_MCMEMBER_REC_MTU_SELECTOR | 4184 IB_SA_MCMEMBER_REC_MTU | 4185 IB_SA_MCMEMBER_REC_HOP_LIMIT; 4186 4187 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 4188 id_priv->id.port_num, &rec, 4189 comp_mask, GFP_KERNEL, 4190 cma_ib_mc_handler, mc); 4191 return PTR_ERR_OR_ZERO(mc->multicast.ib); 4192 } 4193 4194 static void iboe_mcast_work_handler(struct work_struct *work) 4195 { 4196 struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); 4197 struct cma_multicast *mc = mw->mc; 4198 struct ib_sa_multicast *m = mc->multicast.ib; 4199 4200 mc->multicast.ib->context = mc; 4201 cma_ib_mc_handler(0, m); 4202 kref_put(&mc->mcref, release_mc); 4203 kfree(mw); 4204 } 4205 4206 static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, 4207 enum ib_gid_type gid_type) 4208 { 4209 struct sockaddr_in *sin = (struct sockaddr_in *)addr; 4210 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr; 4211 4212 if (cma_any_addr(addr)) { 4213 memset(mgid, 0, sizeof *mgid); 4214 } else if (addr->sa_family == AF_INET6) { 4215 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); 4216 } else { 4217 mgid->raw[0] = 4218 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0xff; 4219 mgid->raw[1] = 4220 (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? 0 : 0x0e; 4221 mgid->raw[2] = 0; 4222 mgid->raw[3] = 0; 4223 mgid->raw[4] = 0; 4224 mgid->raw[5] = 0; 4225 mgid->raw[6] = 0; 4226 mgid->raw[7] = 0; 4227 mgid->raw[8] = 0; 4228 mgid->raw[9] = 0; 4229 mgid->raw[10] = 0xff; 4230 mgid->raw[11] = 0xff; 4231 *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr; 4232 } 4233 } 4234 4235 static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, 4236 struct cma_multicast *mc) 4237 { 4238 struct iboe_mcast_work *work; 4239 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; 4240 int err = 0; 4241 struct sockaddr *addr = (struct sockaddr *)&mc->addr; 4242 struct net_device *ndev = NULL; 4243 enum ib_gid_type gid_type; 4244 bool send_only; 4245 4246 send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); 4247 4248 if (cma_zero_addr((struct sockaddr *)&mc->addr)) 4249 return -EINVAL; 4250 4251 work = kzalloc(sizeof *work, GFP_KERNEL); 4252 if (!work) 4253 return -ENOMEM; 4254 4255 mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); 4256 if (!mc->multicast.ib) { 4257 err = -ENOMEM; 4258 goto out1; 4259 } 4260 4261 gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - 4262 rdma_start_port(id_priv->cma_dev->device)]; 4263 cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type); 4264 4265 mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); 4266 if (id_priv->id.ps == RDMA_PS_UDP) 4267 mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); 4268 4269 if (dev_addr->bound_dev_if) 4270 ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); 4271 if (!ndev) { 4272 err = -ENODEV; 4273 goto out2; 4274 } 4275 mc->multicast.ib->rec.rate = iboe_get_rate(ndev); 4276 mc->multicast.ib->rec.hop_limit = 1; 4277 mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->if_mtu); 4278 4279 if (addr->sa_family == AF_INET || addr->sa_family == AF_INET6) { 4280 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 4281 mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; 4282 if (!send_only) { 4283 err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, 4284 true); 4285 if (!err) 4286 mc->igmp_joined = true; 4287 } 4288 } 4289 } else { 4290 if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 4291 err = -ENOTSUPP; 4292 } 4293 dev_put(ndev); 4294 if (err || !mc->multicast.ib->rec.mtu) { 4295 if (!err) 4296 err = -EINVAL; 4297 goto out2; 4298 } 4299 rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, 4300 &mc->multicast.ib->rec.port_gid); 4301 work->id = id_priv; 4302 work->mc = mc; 4303 INIT_WORK(&work->work, iboe_mcast_work_handler); 4304 kref_get(&mc->mcref); 4305 queue_work(cma_wq, &work->work); 4306 4307 return 0; 4308 4309 out2: 4310 kfree(mc->multicast.ib); 4311 out1: 4312 kfree(work); 4313 return err; 4314 } 4315 4316 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, 4317 u8 join_state, void *context) 4318 { 4319 struct rdma_id_private *id_priv; 4320 struct cma_multicast *mc; 4321 int ret; 4322 4323 if (!id->device) 4324 return -EINVAL; 4325 4326 id_priv = container_of(id, struct rdma_id_private, id); 4327 if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && 4328 !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) 4329 return -EINVAL; 4330 4331 mc = kmalloc(sizeof *mc, GFP_KERNEL); 4332 if (!mc) 4333 return -ENOMEM; 4334 4335 memcpy(&mc->addr, addr, rdma_addr_size(addr)); 4336 mc->context = context; 4337 mc->id_priv = id_priv; 4338 mc->igmp_joined = false; 4339 mc->join_state = join_state; 4340 spin_lock(&id_priv->lock); 4341 list_add(&mc->list, &id_priv->mc_list); 4342 spin_unlock(&id_priv->lock); 4343 4344 if (rdma_protocol_roce(id->device, id->port_num)) { 4345 kref_init(&mc->mcref); 4346 ret = cma_iboe_join_multicast(id_priv, mc); 4347 } else if (rdma_cap_ib_mcast(id->device, id->port_num)) 4348 ret = cma_join_ib_multicast(id_priv, mc); 4349 else 4350 ret = -ENOSYS; 4351 4352 if (ret) { 4353 spin_lock_irq(&id_priv->lock); 4354 list_del(&mc->list); 4355 spin_unlock_irq(&id_priv->lock); 4356 kfree(mc); 4357 } 4358 return ret; 4359 } 4360 EXPORT_SYMBOL(rdma_join_multicast); 4361 4362 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) 4363 { 4364 struct rdma_id_private *id_priv; 4365 struct cma_multicast *mc; 4366 4367 id_priv = container_of(id, struct rdma_id_private, id); 4368 spin_lock_irq(&id_priv->lock); 4369 list_for_each_entry(mc, &id_priv->mc_list, list) { 4370 if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { 4371 list_del(&mc->list); 4372 spin_unlock_irq(&id_priv->lock); 4373 4374 if (id->qp) 4375 ib_detach_mcast(id->qp, 4376 &mc->multicast.ib->rec.mgid, 4377 be16_to_cpu(mc->multicast.ib->rec.mlid)); 4378 4379 BUG_ON(id_priv->cma_dev->device != id->device); 4380 4381 if (rdma_cap_ib_mcast(id->device, id->port_num)) { 4382 ib_sa_free_multicast(mc->multicast.ib); 4383 kfree(mc); 4384 } else if (rdma_protocol_roce(id->device, id->port_num)) { 4385 if (mc->igmp_joined) { 4386 struct rdma_dev_addr *dev_addr = 4387 &id->route.addr.dev_addr; 4388 struct net_device *ndev = NULL; 4389 4390 if (dev_addr->bound_dev_if) 4391 ndev = dev_get_by_index(dev_addr->net, 4392 dev_addr->bound_dev_if); 4393 if (ndev) { 4394 cma_igmp_send(ndev, 4395 &mc->multicast.ib->rec.mgid, 4396 false); 4397 dev_put(ndev); 4398 } 4399 mc->igmp_joined = false; 4400 } 4401 kref_put(&mc->mcref, release_mc); 4402 } 4403 return; 4404 } 4405 } 4406 spin_unlock_irq(&id_priv->lock); 4407 } 4408 EXPORT_SYMBOL(rdma_leave_multicast); 4409 4410 static int 4411 sysctl_cma_default_roce_mode(SYSCTL_HANDLER_ARGS) 4412 { 4413 struct cma_device *cma_dev = arg1; 4414 const int port = arg2; 4415 char buf[64]; 4416 int error; 4417 4418 strlcpy(buf, ib_cache_gid_type_str( 4419 cma_get_default_gid_type(cma_dev, port)), sizeof(buf)); 4420 4421 error = sysctl_handle_string(oidp, buf, sizeof(buf), req); 4422 if (error != 0 || req->newptr == NULL) 4423 goto done; 4424 4425 error = ib_cache_gid_parse_type_str(buf); 4426 if (error < 0) { 4427 error = EINVAL; 4428 goto done; 4429 } 4430 4431 cma_set_default_gid_type(cma_dev, port, error); 4432 error = 0; 4433 done: 4434 return (error); 4435 } 4436 4437 static void cma_add_one(struct ib_device *device) 4438 { 4439 struct cma_device *cma_dev; 4440 struct rdma_id_private *id_priv; 4441 unsigned int i; 4442 4443 cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL); 4444 if (!cma_dev) 4445 return; 4446 4447 sysctl_ctx_init(&cma_dev->sysctl_ctx); 4448 4449 cma_dev->device = device; 4450 cma_dev->default_gid_type = kcalloc(device->phys_port_cnt, 4451 sizeof(*cma_dev->default_gid_type), 4452 GFP_KERNEL); 4453 if (!cma_dev->default_gid_type) { 4454 kfree(cma_dev); 4455 return; 4456 } 4457 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4458 unsigned long supported_gids; 4459 unsigned int default_gid_type; 4460 4461 supported_gids = roce_gid_type_mask_support(device, i); 4462 4463 if (WARN_ON(!supported_gids)) { 4464 /* set something valid */ 4465 default_gid_type = 0; 4466 } else if (test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids)) { 4467 /* prefer RoCEv2, if supported */ 4468 default_gid_type = IB_GID_TYPE_ROCE_UDP_ENCAP; 4469 } else { 4470 default_gid_type = find_first_bit(&supported_gids, 4471 BITS_PER_LONG); 4472 } 4473 cma_dev->default_gid_type[i - rdma_start_port(device)] = 4474 default_gid_type; 4475 } 4476 4477 init_completion(&cma_dev->comp); 4478 atomic_set(&cma_dev->refcount, 1); 4479 INIT_LIST_HEAD(&cma_dev->id_list); 4480 ib_set_client_data(device, &cma_client, cma_dev); 4481 4482 mutex_lock(&lock); 4483 list_add_tail(&cma_dev->list, &dev_list); 4484 list_for_each_entry(id_priv, &listen_any_list, list) 4485 cma_listen_on_dev(id_priv, cma_dev); 4486 mutex_unlock(&lock); 4487 4488 for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) { 4489 char buf[64]; 4490 4491 snprintf(buf, sizeof(buf), "default_roce_mode_port%d", i); 4492 4493 (void) SYSCTL_ADD_PROC(&cma_dev->sysctl_ctx, 4494 SYSCTL_CHILDREN(device->ports_parent->parent->oidp), 4495 OID_AUTO, buf, CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 4496 cma_dev, i, &sysctl_cma_default_roce_mode, "A", 4497 "Default RoCE mode. Valid values: IB/RoCE v1 and RoCE v2"); 4498 } 4499 } 4500 4501 static int cma_remove_id_dev(struct rdma_id_private *id_priv) 4502 { 4503 struct rdma_cm_event event; 4504 enum rdma_cm_state state; 4505 int ret = 0; 4506 4507 /* Record that we want to remove the device */ 4508 state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); 4509 if (state == RDMA_CM_DESTROYING) 4510 return 0; 4511 4512 cma_cancel_operation(id_priv, state); 4513 mutex_lock(&id_priv->handler_mutex); 4514 4515 /* Check for destruction from another callback. */ 4516 if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL)) 4517 goto out; 4518 4519 memset(&event, 0, sizeof event); 4520 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL; 4521 ret = id_priv->id.event_handler(&id_priv->id, &event); 4522 out: 4523 mutex_unlock(&id_priv->handler_mutex); 4524 return ret; 4525 } 4526 4527 static void cma_process_remove(struct cma_device *cma_dev) 4528 { 4529 struct rdma_id_private *id_priv; 4530 int ret; 4531 4532 mutex_lock(&lock); 4533 while (!list_empty(&cma_dev->id_list)) { 4534 id_priv = list_entry(cma_dev->id_list.next, 4535 struct rdma_id_private, list); 4536 4537 list_del(&id_priv->listen_list); 4538 list_del_init(&id_priv->list); 4539 atomic_inc(&id_priv->refcount); 4540 mutex_unlock(&lock); 4541 4542 ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv); 4543 cma_deref_id(id_priv); 4544 if (ret) 4545 rdma_destroy_id(&id_priv->id); 4546 4547 mutex_lock(&lock); 4548 } 4549 mutex_unlock(&lock); 4550 4551 cma_deref_dev(cma_dev); 4552 wait_for_completion(&cma_dev->comp); 4553 } 4554 4555 static void cma_remove_one(struct ib_device *device, void *client_data) 4556 { 4557 struct cma_device *cma_dev = client_data; 4558 4559 if (!cma_dev) 4560 return; 4561 4562 mutex_lock(&lock); 4563 list_del(&cma_dev->list); 4564 mutex_unlock(&lock); 4565 4566 cma_process_remove(cma_dev); 4567 sysctl_ctx_free(&cma_dev->sysctl_ctx); 4568 kfree(cma_dev->default_gid_type); 4569 kfree(cma_dev); 4570 } 4571 4572 static void cma_init_vnet(void *arg) 4573 { 4574 struct cma_pernet *pernet = &VNET(cma_pernet); 4575 4576 idr_init(&pernet->tcp_ps); 4577 idr_init(&pernet->udp_ps); 4578 idr_init(&pernet->ipoib_ps); 4579 idr_init(&pernet->ib_ps); 4580 idr_init(&pernet->sdp_ps); 4581 } 4582 VNET_SYSINIT(cma_init_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_FIRST, cma_init_vnet, NULL); 4583 4584 static void cma_destroy_vnet(void *arg) 4585 { 4586 struct cma_pernet *pernet = &VNET(cma_pernet); 4587 4588 idr_destroy(&pernet->tcp_ps); 4589 idr_destroy(&pernet->udp_ps); 4590 idr_destroy(&pernet->ipoib_ps); 4591 idr_destroy(&pernet->ib_ps); 4592 idr_destroy(&pernet->sdp_ps); 4593 } 4594 VNET_SYSUNINIT(cma_destroy_vnet, SI_SUB_OFED_MODINIT - 1, SI_ORDER_SECOND, cma_destroy_vnet, NULL); 4595 4596 static int __init cma_init(void) 4597 { 4598 int ret; 4599 4600 cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); 4601 if (!cma_wq) 4602 return -ENOMEM; 4603 4604 ib_sa_register_client(&sa_client); 4605 rdma_addr_register_client(&addr_client); 4606 4607 ret = ib_register_client(&cma_client); 4608 if (ret) 4609 goto err; 4610 4611 cma_configfs_init(); 4612 4613 return 0; 4614 4615 err: 4616 rdma_addr_unregister_client(&addr_client); 4617 ib_sa_unregister_client(&sa_client); 4618 destroy_workqueue(cma_wq); 4619 return ret; 4620 } 4621 4622 static void __exit cma_cleanup(void) 4623 { 4624 cma_configfs_exit(); 4625 ib_unregister_client(&cma_client); 4626 rdma_addr_unregister_client(&addr_client); 4627 ib_sa_unregister_client(&sa_client); 4628 destroy_workqueue(cma_wq); 4629 } 4630 4631 module_init(cma_init); 4632 module_exit(cma_cleanup); 4633