1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2015-2017, Mellanox Technologies inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include "core_priv.h" 39 #include <sys/eventhandler.h> 40 41 #include <linux/in.h> 42 #include <linux/in6.h> 43 #include <linux/rcupdate.h> 44 45 #include <rdma/ib_cache.h> 46 #include <rdma/ib_addr.h> 47 48 #include <netinet6/scope6_var.h> 49 50 static struct workqueue_struct *roce_gid_mgmt_wq; 51 52 enum gid_op_type { 53 GID_DEL = 0, 54 GID_ADD 55 }; 56 57 struct roce_netdev_event_work { 58 struct work_struct work; 59 if_t ndev; 60 }; 61 62 struct roce_rescan_work { 63 struct work_struct work; 64 struct ib_device *ib_dev; 65 }; 66 67 static const struct { 68 bool (*is_supported)(const struct ib_device *device, u8 port_num); 69 enum ib_gid_type gid_type; 70 } PORT_CAP_TO_GID_TYPE[] = { 71 {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE}, 72 {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP}, 73 }; 74 75 #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE) 76 77 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port) 78 { 79 int i; 80 unsigned int ret_flags = 0; 81 82 if (!rdma_protocol_roce(ib_dev, port)) 83 return 1UL << IB_GID_TYPE_IB; 84 85 for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++) 86 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port)) 87 ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type; 88 89 return ret_flags; 90 } 91 EXPORT_SYMBOL(roce_gid_type_mask_support); 92 93 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, 94 u8 port, union ib_gid *gid, if_t ndev) 95 { 96 int i; 97 unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 98 struct ib_gid_attr gid_attr; 99 100 memset(&gid_attr, 0, sizeof(gid_attr)); 101 gid_attr.ndev = ndev; 102 103 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 104 if ((1UL << i) & gid_type_mask) { 105 gid_attr.gid_type = i; 106 switch (gid_op) { 107 case GID_ADD: 108 ib_cache_gid_add(ib_dev, port, 109 gid, &gid_attr); 110 break; 111 case GID_DEL: 112 ib_cache_gid_del(ib_dev, port, 113 gid, &gid_attr); 114 break; 115 } 116 } 117 } 118 } 119 120 static int 121 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port, 122 if_t idev, void *cookie) 123 { 124 if_t ndev = (if_t )cookie; 125 if (idev == NULL) 126 return (0); 127 return (ndev == idev); 128 } 129 130 static int 131 roce_gid_match_all(struct ib_device *ib_dev, u8 port, 132 if_t idev, void *cookie) 133 { 134 if (idev == NULL) 135 return (0); 136 return (1); 137 } 138 139 static int 140 roce_gid_enum_netdev_default(struct ib_device *ib_dev, 141 u8 port, if_t idev) 142 { 143 unsigned long gid_type_mask; 144 145 gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 146 147 ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask, 148 IB_CACHE_GID_DEFAULT_MODE_SET); 149 150 return (hweight_long(gid_type_mask)); 151 } 152 153 struct ipx_entry { 154 STAILQ_ENTRY(ipx_entry) entry; 155 union ipx_addr { 156 struct sockaddr sa[0]; 157 struct sockaddr_in v4; 158 struct sockaddr_in6 v6; 159 } ipx_addr; 160 if_t ndev; 161 }; 162 163 STAILQ_HEAD(ipx_queue, ipx_entry); 164 165 #ifdef INET 166 static u_int 167 roce_gid_update_addr_ifa4_cb(void *arg, struct ifaddr *ifa, u_int count) 168 { 169 struct ipx_queue *ipx_head = arg; 170 struct ipx_entry *entry; 171 172 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 173 if (entry == NULL) { 174 pr_warn("roce_gid_update_addr_callback: " 175 "couldn't allocate entry for IPv4 update\n"); 176 return (0); 177 } 178 entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr); 179 entry->ndev = ifa->ifa_ifp; 180 STAILQ_INSERT_TAIL(ipx_head, entry, entry); 181 182 return (1); 183 } 184 #endif 185 186 #ifdef INET6 187 static u_int 188 roce_gid_update_addr_ifa6_cb(void *arg, struct ifaddr *ifa, u_int count) 189 { 190 struct ipx_queue *ipx_head = arg; 191 struct ipx_entry *entry; 192 193 entry = kzalloc(sizeof(*entry), GFP_ATOMIC); 194 if (entry == NULL) { 195 pr_warn("roce_gid_update_addr_callback: " 196 "couldn't allocate entry for IPv6 update\n"); 197 return (0); 198 } 199 entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr); 200 entry->ndev = ifa->ifa_ifp; 201 202 /* trash IPv6 scope ID */ 203 sa6_recoverscope(&entry->ipx_addr.v6); 204 entry->ipx_addr.v6.sin6_scope_id = 0; 205 206 STAILQ_INSERT_TAIL(ipx_head, entry, entry); 207 208 return (1); 209 } 210 #endif 211 212 static void 213 roce_gid_update_addr_callback(struct ib_device *device, u8 port, 214 if_t ndev, void *cookie) 215 { 216 struct epoch_tracker et; 217 struct if_iter iter; 218 struct ipx_entry *entry; 219 VNET_ITERATOR_DECL(vnet_iter); 220 struct ib_gid_attr gid_attr; 221 union ib_gid gid; 222 if_t ifp; 223 int default_gids; 224 u16 index_num; 225 int i; 226 227 struct ipx_queue ipx_head; 228 229 STAILQ_INIT(&ipx_head); 230 231 /* make sure default GIDs are in */ 232 default_gids = roce_gid_enum_netdev_default(device, port, ndev); 233 234 VNET_LIST_RLOCK(); 235 VNET_FOREACH(vnet_iter) { 236 CURVNET_SET(vnet_iter); 237 NET_EPOCH_ENTER(et); 238 for (ifp = if_iter_start(&iter); ifp != NULL; ifp = if_iter_next(&iter)) { 239 if (ifp != ndev) { 240 if (if_gettype(ifp) != IFT_L2VLAN) 241 continue; 242 if (ifp != rdma_vlan_dev_real_dev(ifp)) 243 continue; 244 } 245 246 /* clone address information for IPv4 and IPv6 */ 247 #if defined(INET) 248 if_foreach_addr_type(ifp, AF_INET, roce_gid_update_addr_ifa4_cb, &ipx_head); 249 #endif 250 #if defined(INET6) 251 if_foreach_addr_type(ifp, AF_INET6, roce_gid_update_addr_ifa6_cb, &ipx_head); 252 #endif 253 } 254 NET_EPOCH_EXIT(et); 255 CURVNET_RESTORE(); 256 } 257 VNET_LIST_RUNLOCK(); 258 259 /* add missing GIDs, if any */ 260 STAILQ_FOREACH(entry, &ipx_head, entry) { 261 unsigned long gid_type_mask = roce_gid_type_mask_support(device, port); 262 263 if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0) 264 continue; 265 266 for (i = 0; i != IB_GID_TYPE_SIZE; i++) { 267 if (!((1UL << i) & gid_type_mask)) 268 continue; 269 /* check if entry found */ 270 if (ib_find_cached_gid_by_port(device, &gid, i, 271 port, entry->ndev, &index_num) == 0) 272 break; 273 } 274 if (i != IB_GID_TYPE_SIZE) 275 continue; 276 /* add new GID */ 277 update_gid(GID_ADD, device, port, &gid, entry->ndev); 278 } 279 280 /* remove stale GIDs, if any */ 281 for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) { 282 union ipx_addr ipx; 283 284 /* check for valid network device pointer */ 285 ndev = gid_attr.ndev; 286 if (ndev == NULL) 287 continue; 288 dev_put(ndev); 289 290 /* don't delete empty entries */ 291 if (memcmp(&gid, &zgid, sizeof(zgid)) == 0) 292 continue; 293 294 /* zero default */ 295 memset(&ipx, 0, sizeof(ipx)); 296 297 rdma_gid2ip(&ipx.sa[0], &gid); 298 299 STAILQ_FOREACH(entry, &ipx_head, entry) { 300 if (entry->ndev == ndev && 301 memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0) 302 break; 303 } 304 /* check if entry found */ 305 if (entry != NULL) 306 continue; 307 308 /* remove GID */ 309 update_gid(GID_DEL, device, port, &gid, ndev); 310 } 311 312 while ((entry = STAILQ_FIRST(&ipx_head))) { 313 STAILQ_REMOVE_HEAD(&ipx_head, entry); 314 kfree(entry); 315 } 316 } 317 318 static void 319 roce_gid_queue_scan_event_handler(struct work_struct *_work) 320 { 321 struct roce_netdev_event_work *work = 322 container_of(_work, struct roce_netdev_event_work, work); 323 324 ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev, 325 roce_gid_update_addr_callback, NULL); 326 327 dev_put(work->ndev); 328 kfree(work); 329 } 330 331 static void 332 roce_gid_queue_scan_event(if_t ndev) 333 { 334 struct roce_netdev_event_work *work; 335 336 retry: 337 switch (if_gettype(ndev)) { 338 case IFT_ETHER: 339 break; 340 case IFT_L2VLAN: 341 ndev = rdma_vlan_dev_real_dev(ndev); 342 if (ndev != NULL) 343 goto retry; 344 /* FALLTHROUGH */ 345 default: 346 return; 347 } 348 349 work = kmalloc(sizeof(*work), GFP_ATOMIC); 350 if (!work) { 351 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 352 return; 353 } 354 355 INIT_WORK(&work->work, roce_gid_queue_scan_event_handler); 356 dev_hold(ndev); 357 358 work->ndev = ndev; 359 360 queue_work(roce_gid_mgmt_wq, &work->work); 361 } 362 363 static void 364 roce_gid_delete_all_event_handler(struct work_struct *_work) 365 { 366 struct roce_netdev_event_work *work = 367 container_of(_work, struct roce_netdev_event_work, work); 368 369 ib_cache_gid_del_all_by_netdev(work->ndev); 370 dev_put(work->ndev); 371 kfree(work); 372 } 373 374 static void 375 roce_gid_delete_all_event(if_t ndev) 376 { 377 struct roce_netdev_event_work *work; 378 379 work = kmalloc(sizeof(*work), GFP_ATOMIC); 380 if (!work) { 381 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n"); 382 return; 383 } 384 385 INIT_WORK(&work->work, roce_gid_delete_all_event_handler); 386 dev_hold(ndev); 387 work->ndev = ndev; 388 queue_work(roce_gid_mgmt_wq, &work->work); 389 390 /* make sure job is complete before returning */ 391 flush_workqueue(roce_gid_mgmt_wq); 392 } 393 394 static int 395 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 396 { 397 if_t ndev = netdev_notifier_info_to_ifp(ptr); 398 399 switch (event) { 400 case NETDEV_UNREGISTER: 401 roce_gid_delete_all_event(ndev); 402 break; 403 case NETDEV_REGISTER: 404 case NETDEV_CHANGEADDR: 405 case NETDEV_CHANGEIFADDR: 406 roce_gid_queue_scan_event(ndev); 407 break; 408 default: 409 break; 410 } 411 return NOTIFY_DONE; 412 } 413 414 static struct notifier_block nb_inetaddr = { 415 .notifier_call = inetaddr_event 416 }; 417 418 static eventhandler_tag eh_ifnet_event; 419 420 static void 421 roce_ifnet_event(void *arg, if_t ifp, int event) 422 { 423 if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp)) 424 return; 425 426 /* make sure GID table is reloaded */ 427 roce_gid_delete_all_event(ifp); 428 roce_gid_queue_scan_event(ifp); 429 } 430 431 static void 432 roce_rescan_device_handler(struct work_struct *_work) 433 { 434 struct roce_rescan_work *work = 435 container_of(_work, struct roce_rescan_work, work); 436 437 ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL, 438 roce_gid_update_addr_callback, NULL); 439 kfree(work); 440 } 441 442 /* Caller must flush system workqueue before removing the ib_device */ 443 int roce_rescan_device(struct ib_device *ib_dev) 444 { 445 struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL); 446 447 if (!work) 448 return -ENOMEM; 449 450 work->ib_dev = ib_dev; 451 INIT_WORK(&work->work, roce_rescan_device_handler); 452 queue_work(roce_gid_mgmt_wq, &work->work); 453 454 return 0; 455 } 456 457 int __init roce_gid_mgmt_init(void) 458 { 459 roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0); 460 if (!roce_gid_mgmt_wq) { 461 pr_warn("roce_gid_mgmt: can't allocate work queue\n"); 462 return -ENOMEM; 463 } 464 465 register_inetaddr_notifier(&nb_inetaddr); 466 467 /* 468 * We rely on the netdevice notifier to enumerate all existing 469 * devices in the system. Register to this notifier last to 470 * make sure we will not miss any IP add/del callbacks. 471 */ 472 register_netdevice_notifier(&nb_inetaddr); 473 474 eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event, 475 roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); 476 477 return 0; 478 } 479 480 void __exit roce_gid_mgmt_cleanup(void) 481 { 482 483 if (eh_ifnet_event != NULL) 484 EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event); 485 486 unregister_inetaddr_notifier(&nb_inetaddr); 487 unregister_netdevice_notifier(&nb_inetaddr); 488 489 /* 490 * Ensure all gid deletion tasks complete before we go down, 491 * to avoid any reference to free'd memory. By the time 492 * ib-core is removed, all physical devices have been removed, 493 * so no issue with remaining hardware contexts. 494 */ 495 synchronize_rcu(); 496 drain_workqueue(roce_gid_mgmt_wq); 497 destroy_workqueue(roce_gid_mgmt_wq); 498 } 499