1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2015-2017, Mellanox Technologies inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "core_priv.h" 39 40 #include <linux/in.h> 41 #include <linux/in6.h> 42 #include <linux/rcupdate.h> 43 44 #include <rdma/ib_cache.h> 45 #include <rdma/ib_addr.h> 46 47 #include <netinet6/scope6_var.h> 48 49 static struct workqueue_struct *roce_gid_mgmt_wq; 50 51 enum gid_op_type { 52 GID_DEL = 0, 53 GID_ADD 54 }; 55 56 struct roce_netdev_event_work { 57 struct work_struct work; 58 struct net_device *ndev; 59 }; 60 61 struct roce_rescan_work { 62 struct work_struct work; 63 struct ib_device *ib_dev; 64 }; 65 66 static const struct { 67 bool (*is_supported)(const struct ib_device *device, u8 port_num); 68 enum ib_gid_type gid_type; 69 } PORT_CAP_TO_GID_TYPE[] = { 70 {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE}, 71 {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP}, 72 }; 73 74 #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE) 75 76 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port) 77 { 78 int i; 79 unsigned int ret_flags = 0; 80 81 if (!rdma_protocol_roce(ib_dev, port)) 82 return 1UL << IB_GID_TYPE_IB; 83 84 for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++) 85 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port)) 86 ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type; 87 88 return ret_flags; 89 } 90 EXPORT_SYMBOL(roce_gid_type_mask_support); 91 92 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, 93 u8 port, union ib_gid *gid, struct net_device *ndev) 94 { 95 int i; 96 unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 97 struct ib_gid_attr gid_attr; 98 99 memset(&gid_attr, 0, sizeof(gid_attr)); 100 gid_attr.ndev = ndev; 101 102 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 103 if ((1UL << i) & gid_type_mask) { 104 gid_attr.gid_type = i; 105 switch (gid_op) { 106 case GID_ADD: 107 ib_cache_gid_add(ib_dev, port, 108 gid, &gid_attr); 109 break; 110 case GID_DEL: 111 ib_cache_gid_del(ib_dev, port, 112 gid, &gid_attr); 113 break; 114 } 115 } 116 } 117 } 118 119 static int 120 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port, 121 struct net_device *idev, void *cookie) 122 { 123 struct net_device *ndev = (struct net_device *)cookie; 124 if (idev == NULL) 125 return (0); 126 return (ndev == idev); 127 } 128 129 static int 130 roce_gid_match_all(struct ib_device *ib_dev, u8 port, 131 struct net_device *idev, void *cookie) 132 { 133 if (idev == NULL) 134 return (0); 135 return (1); 136 } 137 138 static int 139 roce_gid_enum_netdev_default(struct ib_device *ib_dev, 140 u8 port, struct net_device *idev) 141 { 142 unsigned long gid_type_mask; 143 144 gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 145 146 ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask, 147 IB_CACHE_GID_DEFAULT_MODE_SET); 148 149 return (hweight_long(gid_type_mask)); 150 } 151 152 static void 153 roce_gid_update_addr_callback(struct ib_device *device, u8 port, 154 struct net_device *ndev, void *cookie) 155 { 156 struct ipx_entry { 157 STAILQ_ENTRY(ipx_entry) entry; 158 union ipx_addr { 159 struct sockaddr sa[0]; 160 struct sockaddr_in v4; 161 struct sockaddr_in6 v6; 162 } ipx_addr; 163 struct net_device *ndev; 164 }; 165 struct ipx_entry *entry; 166 struct net_device *idev; 167 #if defined(INET) || defined(INET6) 168 struct ifaddr *ifa; 169 #endif 170 VNET_ITERATOR_DECL(vnet_iter); 171 struct ib_gid_attr gid_attr; 172 union ib_gid gid; 173 int default_gids; 174 u16 index_num; 175 int i; 176 177 STAILQ_HEAD(, ipx_entry) ipx_head; 178 179 STAILQ_INIT(&ipx_head); 180 181 /* make sure default GIDs are in */ 182 default_gids = roce_gid_enum_netdev_default(device, port, ndev); 183 184 VNET_LIST_RLOCK(); 185 VNET_FOREACH(vnet_iter) { 186 CURVNET_SET(vnet_iter); 187 IFNET_RLOCK(); 188 CK_STAILQ_FOREACH(idev, &V_ifnet, if_link) { 189 struct epoch_tracker et; 190 191 if (idev != ndev) { 192 if (idev->if_type != IFT_L2VLAN) 193 continue; 194 if (ndev != rdma_vlan_dev_real_dev(idev)) 195 continue; 196 } 197 198 /* clone address information for IPv4 and IPv6 */ 199 NET_EPOCH_ENTER(et); 200 #if defined(INET) 201 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { 202 if (ifa->ifa_addr == NULL || 203 ifa->ifa_addr->sa_family != AF_INET) 204 continue; 205 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 206 if (entry == NULL) { 207 pr_warn("roce_gid_update_addr_callback: " 208 "couldn't allocate entry for IPv4 update\n"); 209 continue; 210 } 211 entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr); 212 entry->ndev = idev; 213 STAILQ_INSERT_TAIL(&ipx_head, entry, entry); 214 } 215 #endif 216 #if defined(INET6) 217 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { 218 if (ifa->ifa_addr == NULL || 219 ifa->ifa_addr->sa_family != AF_INET6) 220 continue; 221 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 222 if (entry == NULL) { 223 pr_warn("roce_gid_update_addr_callback: " 224 "couldn't allocate entry for IPv6 update\n"); 225 continue; 226 } 227 entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr); 228 entry->ndev = idev; 229 230 /* trash IPv6 scope ID */ 231 sa6_recoverscope(&entry->ipx_addr.v6); 232 entry->ipx_addr.v6.sin6_scope_id = 0; 233 234 STAILQ_INSERT_TAIL(&ipx_head, entry, entry); 235 } 236 #endif 237 NET_EPOCH_EXIT(et); 238 } 239 IFNET_RUNLOCK(); 240 CURVNET_RESTORE(); 241 } 242 VNET_LIST_RUNLOCK(); 243 244 /* add missing GIDs, if any */ 245 STAILQ_FOREACH(entry, &ipx_head, entry) { 246 unsigned long gid_type_mask = roce_gid_type_mask_support(device, port); 247 248 if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0) 249 continue; 250 251 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 252 if (!((1UL << i) & gid_type_mask)) 253 continue; 254 /* check if entry found */ 255 if (ib_find_cached_gid_by_port(device, &gid, i, 256 port, entry->ndev, &index_num) == 0) 257 break; 258 } 259 if (i != IB_GID_TYPE_SIZE) 260 continue; 261 /* add new GID */ 262 update_gid(GID_ADD, device, port, &gid, entry->ndev); 263 } 264 265 /* remove stale GIDs, if any */ 266 for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) { 267 union ipx_addr ipx; 268 269 /* check for valid network device pointer */ 270 ndev = gid_attr.ndev; 271 if (ndev == NULL) 272 continue; 273 dev_put(ndev); 274 275 /* don't delete empty entries */ 276 if (memcmp(&gid, &zgid, sizeof(zgid)) == 0) 277 continue; 278 279 /* zero default */ 280 memset(&ipx, 0, sizeof(ipx)); 281 282 rdma_gid2ip(&ipx.sa[0], &gid); 283 284 STAILQ_FOREACH(entry, &ipx_head, entry) { 285 if (entry->ndev == ndev && 286 memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0) 287 break; 288 } 289 /* check if entry found */ 290 if (entry != NULL) 291 continue; 292 293 /* remove GID */ 294 update_gid(GID_DEL, device, port, &gid, ndev); 295 } 296 297 while ((entry = STAILQ_FIRST(&ipx_head))) { 298 STAILQ_REMOVE_HEAD(&ipx_head, entry); 299 kfree(entry); 300 } 301 } 302 303 static void 304 roce_gid_queue_scan_event_handler(struct work_struct *_work) 305 { 306 struct roce_netdev_event_work *work = 307 container_of(_work, struct roce_netdev_event_work, work); 308 309 ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev, 310 roce_gid_update_addr_callback, NULL); 311 312 dev_put(work->ndev); 313 kfree(work); 314 } 315 316 static void 317 roce_gid_queue_scan_event(struct net_device *ndev) 318 { 319 struct roce_netdev_event_work *work; 320 321 retry: 322 switch (ndev->if_type) { 323 case IFT_ETHER: 324 break; 325 case IFT_L2VLAN: 326 ndev = rdma_vlan_dev_real_dev(ndev); 327 if (ndev != NULL) 328 goto retry; 329 /* FALLTHROUGH */ 330 default: 331 return; 332 } 333 334 work = kmalloc(sizeof(*work), GFP_ATOMIC); 335 if (!work) { 336 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 337 return; 338 } 339 340 INIT_WORK(&work->work, roce_gid_queue_scan_event_handler); 341 dev_hold(ndev); 342 343 work->ndev = ndev; 344 345 queue_work(roce_gid_mgmt_wq, &work->work); 346 } 347 348 static void 349 roce_gid_delete_all_event_handler(struct work_struct *_work) 350 { 351 struct roce_netdev_event_work *work = 352 container_of(_work, struct roce_netdev_event_work, work); 353 354 ib_cache_gid_del_all_by_netdev(work->ndev); 355 dev_put(work->ndev); 356 kfree(work); 357 } 358 359 static void 360 roce_gid_delete_all_event(struct net_device *ndev) 361 { 362 struct roce_netdev_event_work *work; 363 364 work = kmalloc(sizeof(*work), GFP_ATOMIC); 365 if (!work) { 366 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 367 return; 368 } 369 370 INIT_WORK(&work->work, roce_gid_delete_all_event_handler); 371 dev_hold(ndev); 372 work->ndev = ndev; 373 queue_work(roce_gid_mgmt_wq, &work->work); 374 375 /* make sure job is complete before returning */ 376 flush_workqueue(roce_gid_mgmt_wq); 377 } 378 379 static int 380 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 381 { 382 struct net_device *ndev = ptr; 383 384 switch (event) { 385 case NETDEV_UNREGISTER: 386 roce_gid_delete_all_event(ndev); 387 break; 388 case NETDEV_REGISTER: 389 case NETDEV_CHANGEADDR: 390 case NETDEV_CHANGEIFADDR: 391 roce_gid_queue_scan_event(ndev); 392 break; 393 default: 394 break; 395 } 396 return NOTIFY_DONE; 397 } 398 399 static struct notifier_block nb_inetaddr = { 400 .notifier_call = inetaddr_event 401 }; 402 403 static eventhandler_tag eh_ifnet_event; 404 405 static void 406 roce_ifnet_event(void *arg, struct ifnet *ifp, int event) 407 { 408 if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp)) 409 return; 410 411 /* make sure GID table is reloaded */ 412 roce_gid_delete_all_event(ifp); 413 roce_gid_queue_scan_event(ifp); 414 } 415 416 static void 417 roce_rescan_device_handler(struct work_struct *_work) 418 { 419 struct roce_rescan_work *work = 420 container_of(_work, struct roce_rescan_work, work); 421 422 ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL, 423 roce_gid_update_addr_callback, NULL); 424 kfree(work); 425 } 426 427 /* Caller must flush system workqueue before removing the ib_device */ 428 int roce_rescan_device(struct ib_device *ib_dev) 429 { 430 struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL); 431 432 if (!work) 433 return -ENOMEM; 434 435 work->ib_dev = ib_dev; 436 INIT_WORK(&work->work, roce_rescan_device_handler); 437 queue_work(roce_gid_mgmt_wq, &work->work); 438 439 return 0; 440 } 441 442 int __init roce_gid_mgmt_init(void) 443 { 444 roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0); 445 if (!roce_gid_mgmt_wq) { 446 pr_warn("roce_gid_mgmt: can't allocate work queue\n"); 447 return -ENOMEM; 448 } 449 450 register_inetaddr_notifier(&nb_inetaddr); 451 452 /* 453 * We rely on the netdevice notifier to enumerate all existing 454 * devices in the system. Register to this notifier last to 455 * make sure we will not miss any IP add/del callbacks. 456 */ 457 register_netdevice_notifier(&nb_inetaddr); 458 459 eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event, 460 roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); 461 462 return 0; 463 } 464 465 void __exit roce_gid_mgmt_cleanup(void) 466 { 467 468 if (eh_ifnet_event != NULL) 469 EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event); 470 471 unregister_inetaddr_notifier(&nb_inetaddr); 472 unregister_netdevice_notifier(&nb_inetaddr); 473 474 /* 475 * Ensure all gid deletion tasks complete before we go down, 476 * to avoid any reference to free'd memory. By the time 477 * ib-core is removed, all physical devices have been removed, 478 * so no issue with remaining hardware contexts. 479 */ 480 synchronize_rcu(); 481 drain_workqueue(roce_gid_mgmt_wq); 482 destroy_workqueue(roce_gid_mgmt_wq); 483 } 484