1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2015-2017, Mellanox Technologies inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "core_priv.h" 39 #include <sys/eventhandler.h> 40 41 #include <linux/in.h> 42 #include <linux/in6.h> 43 #include <linux/rcupdate.h> 44 45 #include <rdma/ib_cache.h> 46 #include <rdma/ib_addr.h> 47 48 #include <netinet6/scope6_var.h> 49 50 static struct workqueue_struct *roce_gid_mgmt_wq; 51 52 enum gid_op_type { 53 GID_DEL = 0, 54 GID_ADD 55 }; 56 57 struct roce_netdev_event_work { 58 struct work_struct work; 59 struct ifnet *ndev; 60 }; 61 62 struct roce_rescan_work { 63 struct work_struct work; 64 struct ib_device *ib_dev; 65 }; 66 67 static const struct { 68 bool (*is_supported)(const struct ib_device *device, u8 port_num); 69 enum ib_gid_type gid_type; 70 } PORT_CAP_TO_GID_TYPE[] = { 71 {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE}, 72 {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP}, 73 }; 74 75 #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE) 76 77 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port) 78 { 79 int i; 80 unsigned int ret_flags = 0; 81 82 if (!rdma_protocol_roce(ib_dev, port)) 83 return 1UL << IB_GID_TYPE_IB; 84 85 for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++) 86 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port)) 87 ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type; 88 89 return ret_flags; 90 } 91 EXPORT_SYMBOL(roce_gid_type_mask_support); 92 93 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, 94 u8 port, union ib_gid *gid, struct ifnet *ndev) 95 { 96 int i; 97 unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 98 struct ib_gid_attr gid_attr; 99 100 memset(&gid_attr, 0, sizeof(gid_attr)); 101 gid_attr.ndev = ndev; 102 103 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 104 if ((1UL << i) & gid_type_mask) { 105 gid_attr.gid_type = i; 106 switch (gid_op) { 107 case GID_ADD: 108 ib_cache_gid_add(ib_dev, port, 109 gid, &gid_attr); 110 break; 111 case GID_DEL: 112 ib_cache_gid_del(ib_dev, port, 113 gid, &gid_attr); 114 break; 115 } 116 } 117 } 118 } 119 120 static int 121 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port, 122 struct ifnet *idev, void *cookie) 123 { 124 struct ifnet *ndev = (struct ifnet *)cookie; 125 if (idev == NULL) 126 return (0); 127 return (ndev == idev); 128 } 129 130 static int 131 roce_gid_match_all(struct ib_device *ib_dev, u8 port, 132 struct ifnet *idev, void *cookie) 133 { 134 if (idev == NULL) 135 return (0); 136 return (1); 137 } 138 139 static int 140 roce_gid_enum_netdev_default(struct ib_device *ib_dev, 141 u8 port, struct ifnet *idev) 142 { 143 unsigned long gid_type_mask; 144 145 gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 146 147 ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask, 148 IB_CACHE_GID_DEFAULT_MODE_SET); 149 150 return (hweight_long(gid_type_mask)); 151 } 152 153 static void 154 roce_gid_update_addr_callback(struct ib_device *device, u8 port, 155 struct ifnet *ndev, void *cookie) 156 { 157 struct ipx_entry { 158 STAILQ_ENTRY(ipx_entry) entry; 159 union ipx_addr { 160 struct sockaddr sa[0]; 161 struct sockaddr_in v4; 162 struct sockaddr_in6 v6; 163 } ipx_addr; 164 struct ifnet *ndev; 165 }; 166 struct ipx_entry *entry; 167 struct ifnet *idev; 168 #if defined(INET) || defined(INET6) 169 struct ifaddr *ifa; 170 #endif 171 VNET_ITERATOR_DECL(vnet_iter); 172 struct ib_gid_attr gid_attr; 173 union ib_gid gid; 174 int default_gids; 175 u16 index_num; 176 int i; 177 178 STAILQ_HEAD(, ipx_entry) ipx_head; 179 180 STAILQ_INIT(&ipx_head); 181 182 /* make sure default GIDs are in */ 183 default_gids = roce_gid_enum_netdev_default(device, port, ndev); 184 185 VNET_LIST_RLOCK(); 186 VNET_FOREACH(vnet_iter) { 187 CURVNET_SET(vnet_iter); 188 IFNET_RLOCK(); 189 CK_STAILQ_FOREACH(idev, &V_ifnet, if_link) { 190 struct epoch_tracker et; 191 192 if (idev != ndev) { 193 if (idev->if_type != IFT_L2VLAN) 194 continue; 195 if (ndev != rdma_vlan_dev_real_dev(idev)) 196 continue; 197 } 198 199 /* clone address information for IPv4 and IPv6 */ 200 NET_EPOCH_ENTER(et); 201 #if defined(INET) 202 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { 203 if (ifa->ifa_addr == NULL || 204 ifa->ifa_addr->sa_family != AF_INET) 205 continue; 206 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 207 if (entry == NULL) { 208 pr_warn("roce_gid_update_addr_callback: " 209 "couldn't allocate entry for IPv4 update\n"); 210 continue; 211 } 212 entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr); 213 entry->ndev = idev; 214 STAILQ_INSERT_TAIL(&ipx_head, entry, entry); 215 } 216 #endif 217 #if defined(INET6) 218 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { 219 if (ifa->ifa_addr == NULL || 220 ifa->ifa_addr->sa_family != AF_INET6) 221 continue; 222 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 223 if (entry == NULL) { 224 pr_warn("roce_gid_update_addr_callback: " 225 "couldn't allocate entry for IPv6 update\n"); 226 continue; 227 } 228 entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr); 229 entry->ndev = idev; 230 231 /* trash IPv6 scope ID */ 232 sa6_recoverscope(&entry->ipx_addr.v6); 233 entry->ipx_addr.v6.sin6_scope_id = 0; 234 235 STAILQ_INSERT_TAIL(&ipx_head, entry, entry); 236 } 237 #endif 238 NET_EPOCH_EXIT(et); 239 } 240 IFNET_RUNLOCK(); 241 CURVNET_RESTORE(); 242 } 243 VNET_LIST_RUNLOCK(); 244 245 /* add missing GIDs, if any */ 246 STAILQ_FOREACH(entry, &ipx_head, entry) { 247 unsigned long gid_type_mask = roce_gid_type_mask_support(device, port); 248 249 if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0) 250 continue; 251 252 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 253 if (!((1UL << i) & gid_type_mask)) 254 continue; 255 /* check if entry found */ 256 if (ib_find_cached_gid_by_port(device, &gid, i, 257 port, entry->ndev, &index_num) == 0) 258 break; 259 } 260 if (i != IB_GID_TYPE_SIZE) 261 continue; 262 /* add new GID */ 263 update_gid(GID_ADD, device, port, &gid, entry->ndev); 264 } 265 266 /* remove stale GIDs, if any */ 267 for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) { 268 union ipx_addr ipx; 269 270 /* check for valid network device pointer */ 271 ndev = gid_attr.ndev; 272 if (ndev == NULL) 273 continue; 274 dev_put(ndev); 275 276 /* don't delete empty entries */ 277 if (memcmp(&gid, &zgid, sizeof(zgid)) == 0) 278 continue; 279 280 /* zero default */ 281 memset(&ipx, 0, sizeof(ipx)); 282 283 rdma_gid2ip(&ipx.sa[0], &gid); 284 285 STAILQ_FOREACH(entry, &ipx_head, entry) { 286 if (entry->ndev == ndev && 287 memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0) 288 break; 289 } 290 /* check if entry found */ 291 if (entry != NULL) 292 continue; 293 294 /* remove GID */ 295 update_gid(GID_DEL, device, port, &gid, ndev); 296 } 297 298 while ((entry = STAILQ_FIRST(&ipx_head))) { 299 STAILQ_REMOVE_HEAD(&ipx_head, entry); 300 kfree(entry); 301 } 302 } 303 304 static void 305 roce_gid_queue_scan_event_handler(struct work_struct *_work) 306 { 307 struct roce_netdev_event_work *work = 308 container_of(_work, struct roce_netdev_event_work, work); 309 310 ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev, 311 roce_gid_update_addr_callback, NULL); 312 313 dev_put(work->ndev); 314 kfree(work); 315 } 316 317 static void 318 roce_gid_queue_scan_event(struct ifnet *ndev) 319 { 320 struct roce_netdev_event_work *work; 321 322 retry: 323 switch (ndev->if_type) { 324 case IFT_ETHER: 325 break; 326 case IFT_L2VLAN: 327 ndev = rdma_vlan_dev_real_dev(ndev); 328 if (ndev != NULL) 329 goto retry; 330 /* FALLTHROUGH */ 331 default: 332 return; 333 } 334 335 work = kmalloc(sizeof(*work), GFP_ATOMIC); 336 if (!work) { 337 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 338 return; 339 } 340 341 INIT_WORK(&work->work, roce_gid_queue_scan_event_handler); 342 dev_hold(ndev); 343 344 work->ndev = ndev; 345 346 queue_work(roce_gid_mgmt_wq, &work->work); 347 } 348 349 static void 350 roce_gid_delete_all_event_handler(struct work_struct *_work) 351 { 352 struct roce_netdev_event_work *work = 353 container_of(_work, struct roce_netdev_event_work, work); 354 355 ib_cache_gid_del_all_by_netdev(work->ndev); 356 dev_put(work->ndev); 357 kfree(work); 358 } 359 360 static void 361 roce_gid_delete_all_event(struct ifnet *ndev) 362 { 363 struct roce_netdev_event_work *work; 364 365 work = kmalloc(sizeof(*work), GFP_ATOMIC); 366 if (!work) { 367 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 368 return; 369 } 370 371 INIT_WORK(&work->work, roce_gid_delete_all_event_handler); 372 dev_hold(ndev); 373 work->ndev = ndev; 374 queue_work(roce_gid_mgmt_wq, &work->work); 375 376 /* make sure job is complete before returning */ 377 flush_workqueue(roce_gid_mgmt_wq); 378 } 379 380 static int 381 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 382 { 383 struct ifnet *ndev = netdev_notifier_info_to_ifp(ptr); 384 385 switch (event) { 386 case NETDEV_UNREGISTER: 387 roce_gid_delete_all_event(ndev); 388 break; 389 case NETDEV_REGISTER: 390 case NETDEV_CHANGEADDR: 391 case NETDEV_CHANGEIFADDR: 392 roce_gid_queue_scan_event(ndev); 393 break; 394 default: 395 break; 396 } 397 return NOTIFY_DONE; 398 } 399 400 static struct notifier_block nb_inetaddr = { 401 .notifier_call = inetaddr_event 402 }; 403 404 static eventhandler_tag eh_ifnet_event; 405 406 static void 407 roce_ifnet_event(void *arg, struct ifnet *ifp, int event) 408 { 409 if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp)) 410 return; 411 412 /* make sure GID table is reloaded */ 413 roce_gid_delete_all_event(ifp); 414 roce_gid_queue_scan_event(ifp); 415 } 416 417 static void 418 roce_rescan_device_handler(struct work_struct *_work) 419 { 420 struct roce_rescan_work *work = 421 container_of(_work, struct roce_rescan_work, work); 422 423 ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL, 424 roce_gid_update_addr_callback, NULL); 425 kfree(work); 426 } 427 428 /* Caller must flush system workqueue before removing the ib_device */ 429 int roce_rescan_device(struct ib_device *ib_dev) 430 { 431 struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL); 432 433 if (!work) 434 return -ENOMEM; 435 436 work->ib_dev = ib_dev; 437 INIT_WORK(&work->work, roce_rescan_device_handler); 438 queue_work(roce_gid_mgmt_wq, &work->work); 439 440 return 0; 441 } 442 443 int __init roce_gid_mgmt_init(void) 444 { 445 roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0); 446 if (!roce_gid_mgmt_wq) { 447 pr_warn("roce_gid_mgmt: can't allocate work queue\n"); 448 return -ENOMEM; 449 } 450 451 register_inetaddr_notifier(&nb_inetaddr); 452 453 /* 454 * We rely on the netdevice notifier to enumerate all existing 455 * devices in the system. Register to this notifier last to 456 * make sure we will not miss any IP add/del callbacks. 457 */ 458 register_netdevice_notifier(&nb_inetaddr); 459 460 eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event, 461 roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); 462 463 return 0; 464 } 465 466 void __exit roce_gid_mgmt_cleanup(void) 467 { 468 469 if (eh_ifnet_event != NULL) 470 EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event); 471 472 unregister_inetaddr_notifier(&nb_inetaddr); 473 unregister_netdevice_notifier(&nb_inetaddr); 474 475 /* 476 * Ensure all gid deletion tasks complete before we go down, 477 * to avoid any reference to free'd memory. By the time 478 * ib-core is removed, all physical devices have been removed, 479 * so no issue with remaining hardware contexts. 480 */ 481 synchronize_rcu(); 482 drain_workqueue(roce_gid_mgmt_wq); 483 destroy_workqueue(roce_gid_mgmt_wq); 484 } 485