1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2015-2017, Mellanox Technologies inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 * 34 * $FreeBSD$ 35 */ 36 37 #include "core_priv.h" 38 39 #include <linux/in.h> 40 #include <linux/in6.h> 41 #include <linux/rcupdate.h> 42 43 #include <rdma/ib_cache.h> 44 #include <rdma/ib_addr.h> 45 46 #include <netinet6/scope6_var.h> 47 48 static struct workqueue_struct *roce_gid_mgmt_wq; 49 50 enum gid_op_type { 51 GID_DEL = 0, 52 GID_ADD 53 }; 54 55 struct roce_netdev_event_work { 56 struct work_struct work; 57 struct net_device *ndev; 58 }; 59 60 struct roce_rescan_work { 61 struct work_struct work; 62 struct ib_device *ib_dev; 63 }; 64 65 static const struct { 66 bool (*is_supported)(const struct ib_device *device, u8 port_num); 67 enum ib_gid_type gid_type; 68 } PORT_CAP_TO_GID_TYPE[] = { 69 {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE}, 70 {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP}, 71 }; 72 73 #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE) 74 75 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port) 76 { 77 int i; 78 unsigned int ret_flags = 0; 79 80 if (!rdma_protocol_roce(ib_dev, port)) 81 return 1UL << IB_GID_TYPE_IB; 82 83 for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++) 84 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port)) 85 ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type; 86 87 return ret_flags; 88 } 89 EXPORT_SYMBOL(roce_gid_type_mask_support); 90 91 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, 92 u8 port, union ib_gid *gid, struct net_device *ndev) 93 { 94 int i; 95 unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 96 struct ib_gid_attr gid_attr; 97 98 memset(&gid_attr, 0, sizeof(gid_attr)); 99 gid_attr.ndev = ndev; 100 101 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 102 if ((1UL << i) & gid_type_mask) { 103 gid_attr.gid_type = i; 104 switch (gid_op) { 105 case GID_ADD: 106 ib_cache_gid_add(ib_dev, port, 107 gid, &gid_attr); 108 break; 109 case GID_DEL: 110 ib_cache_gid_del(ib_dev, port, 111 gid, &gid_attr); 112 break; 113 } 114 } 115 } 116 } 117 118 static int 119 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port, 120 struct net_device *idev, void *cookie) 121 { 122 struct net_device *ndev = (struct net_device *)cookie; 123 if (idev == NULL) 124 return (0); 125 return (ndev == idev); 126 } 127 128 static int 129 roce_gid_match_all(struct ib_device *ib_dev, u8 port, 130 struct net_device *idev, void *cookie) 131 { 132 if (idev == NULL) 133 return (0); 134 return (1); 135 } 136 137 static int 138 roce_gid_enum_netdev_default(struct ib_device *ib_dev, 139 u8 port, struct net_device *idev) 140 { 141 unsigned long gid_type_mask; 142 143 gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 144 145 ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask, 146 IB_CACHE_GID_DEFAULT_MODE_SET); 147 148 return (hweight_long(gid_type_mask)); 149 } 150 151 #define ETH_IPOIB_DRV_NAME "ib" 152 153 static inline int 154 is_eth_ipoib_intf(struct net_device *dev) 155 { 156 if (strcmp(dev->if_dname, ETH_IPOIB_DRV_NAME)) 157 return 0; 158 return 1; 159 } 160 161 static void 162 roce_gid_update_addr_callback(struct ib_device *device, u8 port, 163 struct net_device *ndev, void *cookie) 164 { 165 struct ipx_entry { 166 STAILQ_ENTRY(ipx_entry) entry; 167 union ipx_addr { 168 struct sockaddr sa[0]; 169 struct sockaddr_in v4; 170 struct sockaddr_in6 v6; 171 } ipx_addr; 172 struct net_device *ndev; 173 }; 174 struct ipx_entry *entry; 175 struct net_device *idev; 176 #if defined(INET) || defined(INET6) 177 struct ifaddr *ifa; 178 #endif 179 struct ib_gid_attr gid_attr; 180 union ib_gid gid; 181 int default_gids; 182 u16 index_num; 183 int i; 184 185 STAILQ_HEAD(, ipx_entry) ipx_head; 186 187 STAILQ_INIT(&ipx_head); 188 189 /* make sure default GIDs are in */ 190 default_gids = roce_gid_enum_netdev_default(device, port, ndev); 191 192 CURVNET_SET(ndev->if_vnet); 193 IFNET_RLOCK(); 194 TAILQ_FOREACH(idev, &V_ifnet, if_link) { 195 if (idev != ndev) { 196 if (idev->if_type != IFT_L2VLAN) 197 continue; 198 if (ndev != rdma_vlan_dev_real_dev(idev)) 199 continue; 200 } 201 202 /* clone address information for IPv4 and IPv6 */ 203 IF_ADDR_RLOCK(idev); 204 #if defined(INET) 205 TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { 206 if (ifa->ifa_addr == NULL || 207 ifa->ifa_addr->sa_family != AF_INET) 208 continue; 209 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 210 if (entry == NULL) { 211 pr_warn("roce_gid_update_addr_callback: " 212 "couldn't allocate entry for IPv4 update\n"); 213 continue; 214 } 215 entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr); 216 entry->ndev = idev; 217 STAILQ_INSERT_TAIL(&ipx_head, entry, entry); 218 } 219 #endif 220 #if defined(INET6) 221 TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { 222 if (ifa->ifa_addr == NULL || 223 ifa->ifa_addr->sa_family != AF_INET6) 224 continue; 225 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 226 if (entry == NULL) { 227 pr_warn("roce_gid_update_addr_callback: " 228 "couldn't allocate entry for IPv6 update\n"); 229 continue; 230 } 231 entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr); 232 entry->ndev = idev; 233 234 /* trash IPv6 scope ID */ 235 sa6_recoverscope(&entry->ipx_addr.v6); 236 entry->ipx_addr.v6.sin6_scope_id = 0; 237 238 STAILQ_INSERT_TAIL(&ipx_head, entry, entry); 239 } 240 #endif 241 IF_ADDR_RUNLOCK(idev); 242 } 243 IFNET_RUNLOCK(); 244 CURVNET_RESTORE(); 245 246 /* add missing GIDs, if any */ 247 STAILQ_FOREACH(entry, &ipx_head, entry) { 248 unsigned long gid_type_mask = roce_gid_type_mask_support(device, port); 249 250 if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0) 251 continue; 252 253 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 254 if (!((1UL << i) & gid_type_mask)) 255 continue; 256 /* check if entry found */ 257 if (ib_find_cached_gid_by_port(device, &gid, i, 258 port, entry->ndev, &index_num) == 0) 259 break; 260 } 261 if (i != IB_GID_TYPE_SIZE) 262 continue; 263 /* add new GID */ 264 update_gid(GID_ADD, device, port, &gid, entry->ndev); 265 } 266 267 /* remove stale GIDs, if any */ 268 for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) { 269 union ipx_addr ipx; 270 271 /* check for valid network device pointer */ 272 ndev = gid_attr.ndev; 273 if (ndev == NULL) 274 continue; 275 dev_put(ndev); 276 277 /* don't delete empty entries */ 278 if (memcmp(&gid, &zgid, sizeof(zgid)) == 0) 279 continue; 280 281 /* zero default */ 282 memset(&ipx, 0, sizeof(ipx)); 283 284 rdma_gid2ip(&ipx.sa[0], &gid); 285 286 STAILQ_FOREACH(entry, &ipx_head, entry) { 287 if (entry->ndev == ndev && 288 memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0) 289 break; 290 } 291 /* check if entry found */ 292 if (entry != NULL) 293 continue; 294 295 /* remove GID */ 296 update_gid(GID_DEL, device, port, &gid, ndev); 297 } 298 299 while ((entry = STAILQ_FIRST(&ipx_head))) { 300 STAILQ_REMOVE_HEAD(&ipx_head, entry); 301 kfree(entry); 302 } 303 } 304 305 static void 306 roce_gid_queue_scan_event_handler(struct work_struct *_work) 307 { 308 struct roce_netdev_event_work *work = 309 container_of(_work, struct roce_netdev_event_work, work); 310 311 ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev, 312 roce_gid_update_addr_callback, NULL); 313 314 dev_put(work->ndev); 315 kfree(work); 316 } 317 318 static void 319 roce_gid_queue_scan_event(struct net_device *ndev) 320 { 321 struct roce_netdev_event_work *work; 322 323 retry: 324 if (is_eth_ipoib_intf(ndev)) 325 return; 326 327 if (ndev->if_type != IFT_ETHER) { 328 if (ndev->if_type == IFT_L2VLAN) { 329 ndev = rdma_vlan_dev_real_dev(ndev); 330 if (ndev != NULL) 331 goto retry; 332 } 333 return; 334 } 335 336 work = kmalloc(sizeof(*work), GFP_ATOMIC); 337 if (!work) { 338 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 339 return; 340 } 341 342 INIT_WORK(&work->work, roce_gid_queue_scan_event_handler); 343 dev_hold(ndev); 344 345 work->ndev = ndev; 346 347 queue_work(roce_gid_mgmt_wq, &work->work); 348 } 349 350 static void 351 roce_gid_delete_all_event_handler(struct work_struct *_work) 352 { 353 struct roce_netdev_event_work *work = 354 container_of(_work, struct roce_netdev_event_work, work); 355 356 ib_cache_gid_del_all_by_netdev(work->ndev); 357 dev_put(work->ndev); 358 kfree(work); 359 } 360 361 static void 362 roce_gid_delete_all_event(struct net_device *ndev) 363 { 364 struct roce_netdev_event_work *work; 365 366 work = kmalloc(sizeof(*work), GFP_ATOMIC); 367 if (!work) { 368 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 369 return; 370 } 371 372 INIT_WORK(&work->work, roce_gid_delete_all_event_handler); 373 dev_hold(ndev); 374 work->ndev = ndev; 375 queue_work(roce_gid_mgmt_wq, &work->work); 376 377 /* make sure job is complete before returning */ 378 flush_workqueue(roce_gid_mgmt_wq); 379 } 380 381 static int 382 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 383 { 384 struct net_device *ndev = ptr; 385 386 switch (event) { 387 case NETDEV_UNREGISTER: 388 roce_gid_delete_all_event(ndev); 389 break; 390 case NETDEV_REGISTER: 391 case NETDEV_CHANGEADDR: 392 case NETDEV_CHANGEIFADDR: 393 roce_gid_queue_scan_event(ndev); 394 break; 395 default: 396 break; 397 } 398 return NOTIFY_DONE; 399 } 400 401 static struct notifier_block nb_inetaddr = { 402 .notifier_call = inetaddr_event 403 }; 404 405 static void 406 roce_rescan_device_handler(struct work_struct *_work) 407 { 408 struct roce_rescan_work *work = 409 container_of(_work, struct roce_rescan_work, work); 410 411 ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL, 412 roce_gid_update_addr_callback, NULL); 413 kfree(work); 414 } 415 416 /* Caller must flush system workqueue before removing the ib_device */ 417 int roce_rescan_device(struct ib_device *ib_dev) 418 { 419 struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL); 420 421 if (!work) 422 return -ENOMEM; 423 424 work->ib_dev = ib_dev; 425 INIT_WORK(&work->work, roce_rescan_device_handler); 426 queue_work(roce_gid_mgmt_wq, &work->work); 427 428 return 0; 429 } 430 431 int __init roce_gid_mgmt_init(void) 432 { 433 roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0); 434 if (!roce_gid_mgmt_wq) { 435 pr_warn("roce_gid_mgmt: can't allocate work queue\n"); 436 return -ENOMEM; 437 } 438 439 register_inetaddr_notifier(&nb_inetaddr); 440 441 /* 442 * We rely on the netdevice notifier to enumerate all existing 443 * devices in the system. Register to this notifier last to 444 * make sure we will not miss any IP add/del callbacks. 445 */ 446 register_netdevice_notifier(&nb_inetaddr); 447 448 return 0; 449 } 450 451 void __exit roce_gid_mgmt_cleanup(void) 452 { 453 unregister_inetaddr_notifier(&nb_inetaddr); 454 unregister_netdevice_notifier(&nb_inetaddr); 455 456 /* 457 * Ensure all gid deletion tasks complete before we go down, 458 * to avoid any reference to free'd memory. By the time 459 * ib-core is removed, all physical devices have been removed, 460 * so no issue with remaining hardware contexts. 461 */ 462 synchronize_rcu(); 463 drain_workqueue(roce_gid_mgmt_wq); 464 destroy_workqueue(roce_gid_mgmt_wq); 465 } 466