1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2004 Topspin Communications. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 7 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 */ 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include <linux/module.h> 42 #include <linux/errno.h> 43 #include <linux/slab.h> 44 #include <linux/workqueue.h> 45 #include <linux/netdevice.h> 46 #include <linux/in6.h> 47 48 #include <rdma/ib_addr.h> 49 #include <rdma/ib_cache.h> 50 51 #include "core_priv.h" 52 53 struct ib_pkey_cache { 54 int table_len; 55 u16 table[0]; 56 }; 57 58 struct ib_update_work { 59 struct work_struct work; 60 struct ib_device *device; 61 u8 port_num; 62 }; 63 64 union ib_gid zgid; 65 EXPORT_SYMBOL(zgid); 66 67 static const struct ib_gid_attr zattr; 68 69 enum gid_attr_find_mask { 70 GID_ATTR_FIND_MASK_GID = 1UL << 0, 71 GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, 72 GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, 73 GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, 74 }; 75 76 enum gid_table_entry_props { 77 GID_TABLE_ENTRY_INVALID = 1UL << 0, 78 GID_TABLE_ENTRY_DEFAULT = 1UL << 1, 79 }; 80 81 enum gid_table_write_action { 82 GID_TABLE_WRITE_ACTION_ADD, 83 GID_TABLE_WRITE_ACTION_DEL, 84 /* MODIFY only updates the GID table. Currently only used by 85 * ib_cache_update. 86 */ 87 GID_TABLE_WRITE_ACTION_MODIFY 88 }; 89 90 struct ib_gid_table_entry { 91 unsigned long props; 92 union ib_gid gid; 93 struct ib_gid_attr attr; 94 void *context; 95 }; 96 97 struct ib_gid_table { 98 int sz; 99 /* In RoCE, adding a GID to the table requires: 100 * (a) Find if this GID is already exists. 101 * (b) Find a free space. 102 * (c) Write the new GID 103 * 104 * Delete requires different set of operations: 105 * (a) Find the GID 106 * (b) Delete it. 107 * 108 * Add/delete should be carried out atomically. 109 * This is done by locking this mutex from multiple 110 * writers. We don't need this lock for IB, as the MAD 111 * layer replaces all entries. All data_vec entries 112 * are locked by this lock. 113 **/ 114 struct mutex lock; 115 /* This lock protects the table entries from being 116 * read and written simultaneously. 117 */ 118 rwlock_t rwlock; 119 struct ib_gid_table_entry *data_vec; 120 }; 121 122 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) 123 { 124 if (rdma_cap_roce_gid_table(ib_dev, port)) { 125 struct ib_event event; 126 127 event.device = ib_dev; 128 event.element.port_num = port; 129 event.event = IB_EVENT_GID_CHANGE; 130 131 ib_dispatch_event(&event); 132 } 133 } 134 135 static const char * const gid_type_str[] = { 136 [IB_GID_TYPE_IB] = "IB/RoCE v1", 137 [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2", 138 }; 139 140 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type) 141 { 142 if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type]) 143 return gid_type_str[gid_type]; 144 145 return "Invalid GID type"; 146 } 147 EXPORT_SYMBOL(ib_cache_gid_type_str); 148 149 int ib_cache_gid_parse_type_str(const char *buf) 150 { 151 unsigned int i; 152 size_t len; 153 int err = -EINVAL; 154 155 len = strlen(buf); 156 if (len == 0) 157 return -EINVAL; 158 159 if (buf[len - 1] == '\n') 160 len--; 161 162 for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i) 163 if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) && 164 len == strlen(gid_type_str[i])) { 165 err = i; 166 break; 167 } 168 169 return err; 170 } 171 EXPORT_SYMBOL(ib_cache_gid_parse_type_str); 172 173 /* This function expects that rwlock will be write locked in all 174 * scenarios and that lock will be locked in sleep-able (RoCE) 175 * scenarios. 176 */ 177 static int write_gid(struct ib_device *ib_dev, u8 port, 178 struct ib_gid_table *table, int ix, 179 const union ib_gid *gid, 180 const struct ib_gid_attr *attr, 181 enum gid_table_write_action action, 182 bool default_gid) 183 __releases(&table->rwlock) __acquires(&table->rwlock) 184 { 185 int ret = 0; 186 struct ifnet *old_net_dev; 187 enum ib_gid_type old_gid_type; 188 189 /* in rdma_cap_roce_gid_table, this funciton should be protected by a 190 * sleep-able lock. 191 */ 192 193 if (rdma_cap_roce_gid_table(ib_dev, port)) { 194 table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; 195 write_unlock_irq(&table->rwlock); 196 /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by 197 * RoCE providers and thus only updates the cache. 198 */ 199 if (action == GID_TABLE_WRITE_ACTION_ADD) 200 ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr, 201 &table->data_vec[ix].context); 202 else if (action == GID_TABLE_WRITE_ACTION_DEL) 203 ret = ib_dev->del_gid(ib_dev, port, ix, 204 &table->data_vec[ix].context); 205 write_lock_irq(&table->rwlock); 206 } 207 208 old_net_dev = table->data_vec[ix].attr.ndev; 209 old_gid_type = table->data_vec[ix].attr.gid_type; 210 if (old_net_dev && old_net_dev != attr->ndev) 211 dev_put(old_net_dev); 212 /* if modify_gid failed, just delete the old gid */ 213 if (ret || action == GID_TABLE_WRITE_ACTION_DEL) { 214 gid = &zgid; 215 attr = &zattr; 216 table->data_vec[ix].context = NULL; 217 } 218 219 memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid)); 220 memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr)); 221 if (default_gid) { 222 table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; 223 if (action == GID_TABLE_WRITE_ACTION_DEL) 224 table->data_vec[ix].attr.gid_type = old_gid_type; 225 } 226 if (table->data_vec[ix].attr.ndev && 227 table->data_vec[ix].attr.ndev != old_net_dev) 228 dev_hold(table->data_vec[ix].attr.ndev); 229 230 table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID; 231 232 return ret; 233 } 234 235 static int add_gid(struct ib_device *ib_dev, u8 port, 236 struct ib_gid_table *table, int ix, 237 const union ib_gid *gid, 238 const struct ib_gid_attr *attr, 239 bool default_gid) { 240 return write_gid(ib_dev, port, table, ix, gid, attr, 241 GID_TABLE_WRITE_ACTION_ADD, default_gid); 242 } 243 244 static int modify_gid(struct ib_device *ib_dev, u8 port, 245 struct ib_gid_table *table, int ix, 246 const union ib_gid *gid, 247 const struct ib_gid_attr *attr, 248 bool default_gid) { 249 return write_gid(ib_dev, port, table, ix, gid, attr, 250 GID_TABLE_WRITE_ACTION_MODIFY, default_gid); 251 } 252 253 static int del_gid(struct ib_device *ib_dev, u8 port, 254 struct ib_gid_table *table, int ix, 255 bool default_gid) { 256 return write_gid(ib_dev, port, table, ix, &zgid, &zattr, 257 GID_TABLE_WRITE_ACTION_DEL, default_gid); 258 } 259 260 /* rwlock should be read locked */ 261 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, 262 const struct ib_gid_attr *val, bool default_gid, 263 unsigned long mask, int *pempty) 264 { 265 int i = 0; 266 int found = -1; 267 int empty = pempty ? -1 : 0; 268 269 while (i < table->sz && (found < 0 || empty < 0)) { 270 struct ib_gid_table_entry *data = &table->data_vec[i]; 271 struct ib_gid_attr *attr = &data->attr; 272 int curr_index = i; 273 274 i++; 275 276 if (data->props & GID_TABLE_ENTRY_INVALID) 277 continue; 278 279 if (empty < 0) 280 if (!memcmp(&data->gid, &zgid, sizeof(*gid)) && 281 !memcmp(attr, &zattr, sizeof(*attr)) && 282 !data->props) 283 empty = curr_index; 284 285 if (found >= 0) 286 continue; 287 288 if (mask & GID_ATTR_FIND_MASK_GID_TYPE && 289 attr->gid_type != val->gid_type) 290 continue; 291 292 if (mask & GID_ATTR_FIND_MASK_GID && 293 memcmp(gid, &data->gid, sizeof(*gid))) 294 continue; 295 296 if (mask & GID_ATTR_FIND_MASK_NETDEV && 297 attr->ndev != val->ndev) 298 continue; 299 300 if (mask & GID_ATTR_FIND_MASK_DEFAULT && 301 !!(data->props & GID_TABLE_ENTRY_DEFAULT) != 302 default_gid) 303 continue; 304 305 found = curr_index; 306 } 307 308 if (pempty) 309 *pempty = empty; 310 311 return found; 312 } 313 314 static void addrconf_ifid_eui48(u8 *eui, struct ifnet *dev) 315 { 316 if (dev->if_addrlen != ETH_ALEN) 317 return; 318 memcpy(eui, IF_LLADDR(dev), 3); 319 memcpy(eui + 5, IF_LLADDR(dev) + 3, 3); 320 321 /* NOTE: The scope ID is added by the GID to IP conversion */ 322 323 eui[3] = 0xFF; 324 eui[4] = 0xFE; 325 eui[0] ^= 2; 326 } 327 328 static void make_default_gid(struct ifnet *dev, union ib_gid *gid) 329 { 330 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 331 addrconf_ifid_eui48(&gid->raw[8], dev); 332 } 333 334 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, 335 union ib_gid *gid, struct ib_gid_attr *attr) 336 { 337 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 338 struct ib_gid_table *table; 339 int ix; 340 int ret = 0; 341 int empty; 342 343 table = ports_table[port - rdma_start_port(ib_dev)]; 344 345 if (!memcmp(gid, &zgid, sizeof(*gid))) 346 return -EINVAL; 347 348 mutex_lock(&table->lock); 349 write_lock_irq(&table->rwlock); 350 351 ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | 352 GID_ATTR_FIND_MASK_GID_TYPE | 353 GID_ATTR_FIND_MASK_NETDEV, &empty); 354 if (ix >= 0) 355 goto out_unlock; 356 357 if (empty < 0) { 358 ret = -ENOSPC; 359 goto out_unlock; 360 } 361 362 ret = add_gid(ib_dev, port, table, empty, gid, attr, false); 363 if (!ret) 364 dispatch_gid_change_event(ib_dev, port); 365 366 out_unlock: 367 write_unlock_irq(&table->rwlock); 368 mutex_unlock(&table->lock); 369 return ret; 370 } 371 372 int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, 373 union ib_gid *gid, struct ib_gid_attr *attr) 374 { 375 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 376 struct ib_gid_table *table; 377 int ix; 378 379 table = ports_table[port - rdma_start_port(ib_dev)]; 380 381 mutex_lock(&table->lock); 382 write_lock_irq(&table->rwlock); 383 384 ix = find_gid(table, gid, attr, false, 385 GID_ATTR_FIND_MASK_GID | 386 GID_ATTR_FIND_MASK_GID_TYPE | 387 GID_ATTR_FIND_MASK_NETDEV | 388 GID_ATTR_FIND_MASK_DEFAULT, 389 NULL); 390 if (ix < 0) 391 goto out_unlock; 392 393 if (!del_gid(ib_dev, port, table, ix, false)) 394 dispatch_gid_change_event(ib_dev, port); 395 396 out_unlock: 397 write_unlock_irq(&table->rwlock); 398 mutex_unlock(&table->lock); 399 return 0; 400 } 401 402 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, 403 struct ifnet *ndev) 404 { 405 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 406 struct ib_gid_table *table; 407 int ix; 408 bool deleted = false; 409 410 table = ports_table[port - rdma_start_port(ib_dev)]; 411 412 mutex_lock(&table->lock); 413 write_lock_irq(&table->rwlock); 414 415 for (ix = 0; ix < table->sz; ix++) 416 if (table->data_vec[ix].attr.ndev == ndev) 417 if (!del_gid(ib_dev, port, table, ix, 418 !!(table->data_vec[ix].props & 419 GID_TABLE_ENTRY_DEFAULT))) 420 deleted = true; 421 422 write_unlock_irq(&table->rwlock); 423 mutex_unlock(&table->lock); 424 425 if (deleted) 426 dispatch_gid_change_event(ib_dev, port); 427 428 return 0; 429 } 430 431 static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, 432 union ib_gid *gid, struct ib_gid_attr *attr) 433 { 434 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 435 struct ib_gid_table *table; 436 437 table = ports_table[port - rdma_start_port(ib_dev)]; 438 439 if (index < 0 || index >= table->sz) 440 return -EINVAL; 441 442 if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) 443 return -EAGAIN; 444 445 memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); 446 if (attr) { 447 memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); 448 /* make sure network device is valid and attached */ 449 if (attr->ndev != NULL && 450 (attr->ndev->if_flags & IFF_DYING) == 0 && 451 attr->ndev->if_addr != NULL) 452 dev_hold(attr->ndev); 453 else 454 attr->ndev = NULL; 455 } 456 457 return 0; 458 } 459 460 static int _ib_cache_gid_table_find(struct ib_device *ib_dev, 461 const union ib_gid *gid, 462 const struct ib_gid_attr *val, 463 unsigned long mask, 464 u8 *port, u16 *index) 465 { 466 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 467 struct ib_gid_table *table; 468 u8 p; 469 int local_index; 470 unsigned long flags; 471 472 for (p = 0; p < ib_dev->phys_port_cnt; p++) { 473 table = ports_table[p]; 474 read_lock_irqsave(&table->rwlock, flags); 475 local_index = find_gid(table, gid, val, false, mask, NULL); 476 if (local_index >= 0) { 477 if (index) 478 *index = local_index; 479 if (port) 480 *port = p + rdma_start_port(ib_dev); 481 read_unlock_irqrestore(&table->rwlock, flags); 482 return 0; 483 } 484 read_unlock_irqrestore(&table->rwlock, flags); 485 } 486 487 return -ENOENT; 488 } 489 490 static int ib_cache_gid_find(struct ib_device *ib_dev, 491 const union ib_gid *gid, 492 enum ib_gid_type gid_type, 493 struct ifnet *ndev, u8 *port, 494 u16 *index) 495 { 496 unsigned long mask = GID_ATTR_FIND_MASK_GID | 497 GID_ATTR_FIND_MASK_GID_TYPE; 498 struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; 499 500 if (ndev) 501 mask |= GID_ATTR_FIND_MASK_NETDEV; 502 503 return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, 504 mask, port, index); 505 } 506 507 int ib_find_cached_gid_by_port(struct ib_device *ib_dev, 508 const union ib_gid *gid, 509 enum ib_gid_type gid_type, 510 u8 port, struct ifnet *ndev, 511 u16 *index) 512 { 513 int local_index; 514 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 515 struct ib_gid_table *table; 516 unsigned long mask = GID_ATTR_FIND_MASK_GID | 517 GID_ATTR_FIND_MASK_GID_TYPE; 518 struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; 519 unsigned long flags; 520 521 if (!rdma_is_port_valid(ib_dev, port)) 522 return -ENOENT; 523 524 table = ports_table[port - rdma_start_port(ib_dev)]; 525 526 if (ndev) 527 mask |= GID_ATTR_FIND_MASK_NETDEV; 528 529 read_lock_irqsave(&table->rwlock, flags); 530 local_index = find_gid(table, gid, &val, false, mask, NULL); 531 if (local_index >= 0) { 532 if (index) 533 *index = local_index; 534 read_unlock_irqrestore(&table->rwlock, flags); 535 return 0; 536 } 537 538 read_unlock_irqrestore(&table->rwlock, flags); 539 return -ENOENT; 540 } 541 EXPORT_SYMBOL(ib_find_cached_gid_by_port); 542 543 /** 544 * ib_find_gid_by_filter - Returns the GID table index where a specified 545 * GID value occurs 546 * @device: The device to query. 547 * @gid: The GID value to search for. 548 * @port_num: The port number of the device where the GID value could be 549 * searched. 550 * @filter: The filter function is executed on any matching GID in the table. 551 * If the filter function returns true, the corresponding index is returned, 552 * otherwise, we continue searching the GID table. It's guaranteed that 553 * while filter is executed, ndev field is valid and the structure won't 554 * change. filter is executed in an atomic context. filter must not be NULL. 555 * @index: The index into the cached GID table where the GID was found. This 556 * parameter may be NULL. 557 * 558 * ib_cache_gid_find_by_filter() searches for the specified GID value 559 * of which the filter function returns true in the port's GID table. 560 * This function is only supported on RoCE ports. 561 * 562 */ 563 static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, 564 const union ib_gid *gid, 565 u8 port, 566 bool (*filter)(const union ib_gid *, 567 const struct ib_gid_attr *, 568 void *), 569 void *context, 570 u16 *index) 571 { 572 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 573 struct ib_gid_table *table; 574 unsigned int i; 575 unsigned long flags; 576 bool found = false; 577 578 if (!ports_table) 579 return -EOPNOTSUPP; 580 581 if (!rdma_is_port_valid(ib_dev, port)) 582 return -EINVAL; 583 584 if (!rdma_protocol_roce(ib_dev, port)) 585 return -EPROTONOSUPPORT; 586 587 table = ports_table[port - rdma_start_port(ib_dev)]; 588 589 read_lock_irqsave(&table->rwlock, flags); 590 for (i = 0; i < table->sz; i++) { 591 struct ib_gid_attr attr; 592 593 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) 594 goto next; 595 596 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) 597 goto next; 598 599 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); 600 601 if (filter(gid, &attr, context)) 602 found = true; 603 604 next: 605 if (found) 606 break; 607 } 608 read_unlock_irqrestore(&table->rwlock, flags); 609 610 if (!found) 611 return -ENOENT; 612 613 if (index) 614 *index = i; 615 return 0; 616 } 617 618 static struct ib_gid_table *alloc_gid_table(int sz) 619 { 620 struct ib_gid_table *table = 621 kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); 622 623 if (!table) 624 return NULL; 625 626 table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL); 627 if (!table->data_vec) 628 goto err_free_table; 629 630 mutex_init(&table->lock); 631 632 table->sz = sz; 633 rwlock_init(&table->rwlock); 634 635 return table; 636 637 err_free_table: 638 kfree(table); 639 return NULL; 640 } 641 642 static void release_gid_table(struct ib_gid_table *table) 643 { 644 if (table) { 645 kfree(table->data_vec); 646 kfree(table); 647 } 648 } 649 650 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, 651 struct ib_gid_table *table) 652 { 653 int i; 654 bool deleted = false; 655 656 if (!table) 657 return; 658 659 write_lock_irq(&table->rwlock); 660 for (i = 0; i < table->sz; ++i) { 661 if (memcmp(&table->data_vec[i].gid, &zgid, 662 sizeof(table->data_vec[i].gid))) 663 if (!del_gid(ib_dev, port, table, i, 664 table->data_vec[i].props & 665 GID_ATTR_FIND_MASK_DEFAULT)) 666 deleted = true; 667 } 668 write_unlock_irq(&table->rwlock); 669 670 if (deleted) 671 dispatch_gid_change_event(ib_dev, port); 672 } 673 674 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, 675 struct ifnet *ndev, 676 unsigned long gid_type_mask, 677 enum ib_cache_gid_default_mode mode) 678 { 679 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 680 union ib_gid gid; 681 struct ib_gid_attr gid_attr; 682 struct ib_gid_attr zattr_type = zattr; 683 struct ib_gid_table *table; 684 unsigned int gid_type; 685 686 table = ports_table[port - rdma_start_port(ib_dev)]; 687 688 make_default_gid(ndev, &gid); 689 memset(&gid_attr, 0, sizeof(gid_attr)); 690 gid_attr.ndev = ndev; 691 692 /* Default GID is created using unique GUID and local subnet prefix, 693 * as described in section 4.1.1 and 3.5.10 in IB spec 1.3. 694 * Therefore don't create RoCEv2 default GID based on it that 695 * resembles as IPv6 GID based on link local address when IPv6 is 696 * disabled in kernel. 697 */ 698 #ifndef INET6 699 gid_type_mask &= ~BIT(IB_GID_TYPE_ROCE_UDP_ENCAP); 700 #endif 701 702 for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) { 703 int ix; 704 union ib_gid current_gid; 705 struct ib_gid_attr current_gid_attr = {}; 706 707 if (1UL << gid_type & ~gid_type_mask) 708 continue; 709 710 gid_attr.gid_type = gid_type; 711 712 mutex_lock(&table->lock); 713 write_lock_irq(&table->rwlock); 714 ix = find_gid(table, NULL, &gid_attr, true, 715 GID_ATTR_FIND_MASK_GID_TYPE | 716 GID_ATTR_FIND_MASK_DEFAULT, 717 NULL); 718 719 /* Coudn't find default GID location */ 720 if (WARN_ON(ix < 0)) 721 goto release; 722 723 zattr_type.gid_type = gid_type; 724 725 if (!__ib_cache_gid_get(ib_dev, port, ix, 726 ¤t_gid, ¤t_gid_attr) && 727 mode == IB_CACHE_GID_DEFAULT_MODE_SET && 728 !memcmp(&gid, ¤t_gid, sizeof(gid)) && 729 !memcmp(&gid_attr, ¤t_gid_attr, sizeof(gid_attr))) 730 goto release; 731 732 if (memcmp(¤t_gid, &zgid, sizeof(current_gid)) || 733 memcmp(¤t_gid_attr, &zattr_type, 734 sizeof(current_gid_attr))) { 735 if (del_gid(ib_dev, port, table, ix, true)) { 736 pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n", 737 ix, gid.raw); 738 goto release; 739 } else { 740 dispatch_gid_change_event(ib_dev, port); 741 } 742 } 743 744 if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { 745 if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true)) 746 pr_warn("ib_cache_gid: unable to add default gid %pI6\n", 747 gid.raw); 748 else 749 dispatch_gid_change_event(ib_dev, port); 750 } 751 752 release: 753 if (current_gid_attr.ndev) 754 dev_put(current_gid_attr.ndev); 755 write_unlock_irq(&table->rwlock); 756 mutex_unlock(&table->lock); 757 } 758 } 759 760 static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, 761 struct ib_gid_table *table) 762 { 763 unsigned int i; 764 unsigned long roce_gid_type_mask; 765 unsigned int num_default_gids; 766 unsigned int current_gid = 0; 767 768 roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 769 num_default_gids = hweight_long(roce_gid_type_mask); 770 for (i = 0; i < num_default_gids && i < table->sz; i++) { 771 struct ib_gid_table_entry *entry = 772 &table->data_vec[i]; 773 774 entry->props |= GID_TABLE_ENTRY_DEFAULT; 775 current_gid = find_next_bit(&roce_gid_type_mask, 776 BITS_PER_LONG, 777 current_gid); 778 entry->attr.gid_type = current_gid++; 779 } 780 781 return 0; 782 } 783 784 static int _gid_table_setup_one(struct ib_device *ib_dev) 785 { 786 u8 port; 787 struct ib_gid_table **table; 788 int err = 0; 789 790 table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL); 791 792 if (!table) { 793 pr_warn("failed to allocate ib gid cache for %s\n", 794 ib_dev->name); 795 return -ENOMEM; 796 } 797 798 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 799 u8 rdma_port = port + rdma_start_port(ib_dev); 800 801 table[port] = 802 alloc_gid_table( 803 ib_dev->port_immutable[rdma_port].gid_tbl_len); 804 if (!table[port]) { 805 err = -ENOMEM; 806 goto rollback_table_setup; 807 } 808 809 err = gid_table_reserve_default(ib_dev, 810 port + rdma_start_port(ib_dev), 811 table[port]); 812 if (err) 813 goto rollback_table_setup; 814 } 815 816 ib_dev->cache.gid_cache = table; 817 return 0; 818 819 rollback_table_setup: 820 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 821 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), 822 table[port]); 823 release_gid_table(table[port]); 824 } 825 826 kfree(table); 827 return err; 828 } 829 830 static void gid_table_release_one(struct ib_device *ib_dev) 831 { 832 struct ib_gid_table **table = ib_dev->cache.gid_cache; 833 u8 port; 834 835 if (!table) 836 return; 837 838 for (port = 0; port < ib_dev->phys_port_cnt; port++) 839 release_gid_table(table[port]); 840 841 kfree(table); 842 ib_dev->cache.gid_cache = NULL; 843 } 844 845 static void gid_table_cleanup_one(struct ib_device *ib_dev) 846 { 847 struct ib_gid_table **table = ib_dev->cache.gid_cache; 848 u8 port; 849 850 if (!table) 851 return; 852 853 for (port = 0; port < ib_dev->phys_port_cnt; port++) 854 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), 855 table[port]); 856 } 857 858 static int gid_table_setup_one(struct ib_device *ib_dev) 859 { 860 int err; 861 862 err = _gid_table_setup_one(ib_dev); 863 864 if (err) 865 return err; 866 867 err = roce_rescan_device(ib_dev); 868 869 if (err) { 870 gid_table_cleanup_one(ib_dev); 871 gid_table_release_one(ib_dev); 872 } 873 874 return err; 875 } 876 877 int ib_get_cached_gid(struct ib_device *device, 878 u8 port_num, 879 int index, 880 union ib_gid *gid, 881 struct ib_gid_attr *gid_attr) 882 { 883 int res; 884 unsigned long flags; 885 struct ib_gid_table **ports_table = device->cache.gid_cache; 886 struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)]; 887 888 if (!rdma_is_port_valid(device, port_num)) 889 return -EINVAL; 890 891 read_lock_irqsave(&table->rwlock, flags); 892 res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); 893 read_unlock_irqrestore(&table->rwlock, flags); 894 895 return res; 896 } 897 EXPORT_SYMBOL(ib_get_cached_gid); 898 899 int ib_find_cached_gid(struct ib_device *device, 900 const union ib_gid *gid, 901 enum ib_gid_type gid_type, 902 struct ifnet *ndev, 903 u8 *port_num, 904 u16 *index) 905 { 906 return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); 907 } 908 EXPORT_SYMBOL(ib_find_cached_gid); 909 910 int ib_find_gid_by_filter(struct ib_device *device, 911 const union ib_gid *gid, 912 u8 port_num, 913 bool (*filter)(const union ib_gid *gid, 914 const struct ib_gid_attr *, 915 void *), 916 void *context, u16 *index) 917 { 918 /* Only RoCE GID table supports filter function */ 919 if (!rdma_cap_roce_gid_table(device, port_num) && filter) 920 return -EPROTONOSUPPORT; 921 922 return ib_cache_gid_find_by_filter(device, gid, 923 port_num, filter, 924 context, index); 925 } 926 EXPORT_SYMBOL(ib_find_gid_by_filter); 927 928 int ib_get_cached_pkey(struct ib_device *device, 929 u8 port_num, 930 int index, 931 u16 *pkey) 932 { 933 struct ib_pkey_cache *cache; 934 unsigned long flags; 935 int ret = 0; 936 937 if (!rdma_is_port_valid(device, port_num)) 938 return -EINVAL; 939 940 read_lock_irqsave(&device->cache.lock, flags); 941 942 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; 943 944 if (index < 0 || index >= cache->table_len) 945 ret = -EINVAL; 946 else 947 *pkey = cache->table[index]; 948 949 read_unlock_irqrestore(&device->cache.lock, flags); 950 951 return ret; 952 } 953 EXPORT_SYMBOL(ib_get_cached_pkey); 954 955 int ib_find_cached_pkey(struct ib_device *device, 956 u8 port_num, 957 u16 pkey, 958 u16 *index) 959 { 960 struct ib_pkey_cache *cache; 961 unsigned long flags; 962 int i; 963 int ret = -ENOENT; 964 int partial_ix = -1; 965 966 if (!rdma_is_port_valid(device, port_num)) 967 return -EINVAL; 968 969 read_lock_irqsave(&device->cache.lock, flags); 970 971 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; 972 973 *index = -1; 974 975 for (i = 0; i < cache->table_len; ++i) 976 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { 977 if (cache->table[i] & 0x8000) { 978 *index = i; 979 ret = 0; 980 break; 981 } else 982 partial_ix = i; 983 } 984 985 if (ret && partial_ix >= 0) { 986 *index = partial_ix; 987 ret = 0; 988 } 989 990 read_unlock_irqrestore(&device->cache.lock, flags); 991 992 return ret; 993 } 994 EXPORT_SYMBOL(ib_find_cached_pkey); 995 996 int ib_find_exact_cached_pkey(struct ib_device *device, 997 u8 port_num, 998 u16 pkey, 999 u16 *index) 1000 { 1001 struct ib_pkey_cache *cache; 1002 unsigned long flags; 1003 int i; 1004 int ret = -ENOENT; 1005 1006 if (!rdma_is_port_valid(device, port_num)) 1007 return -EINVAL; 1008 1009 read_lock_irqsave(&device->cache.lock, flags); 1010 1011 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; 1012 1013 *index = -1; 1014 1015 for (i = 0; i < cache->table_len; ++i) 1016 if (cache->table[i] == pkey) { 1017 *index = i; 1018 ret = 0; 1019 break; 1020 } 1021 1022 read_unlock_irqrestore(&device->cache.lock, flags); 1023 1024 return ret; 1025 } 1026 EXPORT_SYMBOL(ib_find_exact_cached_pkey); 1027 1028 int ib_get_cached_lmc(struct ib_device *device, 1029 u8 port_num, 1030 u8 *lmc) 1031 { 1032 unsigned long flags; 1033 int ret = 0; 1034 1035 if (!rdma_is_port_valid(device, port_num)) 1036 return -EINVAL; 1037 1038 read_lock_irqsave(&device->cache.lock, flags); 1039 *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)]; 1040 read_unlock_irqrestore(&device->cache.lock, flags); 1041 1042 return ret; 1043 } 1044 EXPORT_SYMBOL(ib_get_cached_lmc); 1045 1046 static void ib_cache_update(struct ib_device *device, 1047 u8 port) 1048 { 1049 struct ib_port_attr *tprops = NULL; 1050 struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; 1051 struct ib_gid_cache { 1052 int table_len; 1053 union ib_gid table[0]; 1054 } *gid_cache = NULL; 1055 int i; 1056 int ret; 1057 struct ib_gid_table *table; 1058 struct ib_gid_table **ports_table = device->cache.gid_cache; 1059 bool use_roce_gid_table = 1060 rdma_cap_roce_gid_table(device, port); 1061 1062 if (!rdma_is_port_valid(device, port)) 1063 return; 1064 1065 table = ports_table[port - rdma_start_port(device)]; 1066 1067 tprops = kmalloc(sizeof *tprops, GFP_KERNEL); 1068 if (!tprops) 1069 return; 1070 1071 ret = ib_query_port(device, port, tprops); 1072 if (ret) { 1073 pr_warn("ib_query_port failed (%d) for %s\n", 1074 ret, device->name); 1075 goto err; 1076 } 1077 1078 pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * 1079 sizeof *pkey_cache->table, GFP_KERNEL); 1080 if (!pkey_cache) 1081 goto err; 1082 1083 pkey_cache->table_len = tprops->pkey_tbl_len; 1084 1085 if (!use_roce_gid_table) { 1086 gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len * 1087 sizeof(*gid_cache->table), GFP_KERNEL); 1088 if (!gid_cache) 1089 goto err; 1090 1091 gid_cache->table_len = tprops->gid_tbl_len; 1092 } 1093 1094 for (i = 0; i < pkey_cache->table_len; ++i) { 1095 ret = ib_query_pkey(device, port, i, pkey_cache->table + i); 1096 if (ret) { 1097 pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", 1098 ret, device->name, i); 1099 goto err; 1100 } 1101 } 1102 1103 if (!use_roce_gid_table) { 1104 for (i = 0; i < gid_cache->table_len; ++i) { 1105 ret = ib_query_gid(device, port, i, 1106 gid_cache->table + i, NULL); 1107 if (ret) { 1108 pr_warn("ib_query_gid failed (%d) for %s (index %d)\n", 1109 ret, device->name, i); 1110 goto err; 1111 } 1112 } 1113 } 1114 1115 write_lock_irq(&device->cache.lock); 1116 1117 old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)]; 1118 1119 device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache; 1120 if (!use_roce_gid_table) { 1121 write_lock(&table->rwlock); 1122 for (i = 0; i < gid_cache->table_len; i++) { 1123 modify_gid(device, port, table, i, gid_cache->table + i, 1124 &zattr, false); 1125 } 1126 write_unlock(&table->rwlock); 1127 } 1128 1129 device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc; 1130 1131 write_unlock_irq(&device->cache.lock); 1132 1133 kfree(gid_cache); 1134 kfree(old_pkey_cache); 1135 kfree(tprops); 1136 return; 1137 1138 err: 1139 kfree(pkey_cache); 1140 kfree(gid_cache); 1141 kfree(tprops); 1142 } 1143 1144 static void ib_cache_task(struct work_struct *_work) 1145 { 1146 struct ib_update_work *work = 1147 container_of(_work, struct ib_update_work, work); 1148 1149 ib_cache_update(work->device, work->port_num); 1150 kfree(work); 1151 } 1152 1153 static void ib_cache_event(struct ib_event_handler *handler, 1154 struct ib_event *event) 1155 { 1156 struct ib_update_work *work; 1157 1158 if (event->event == IB_EVENT_PORT_ERR || 1159 event->event == IB_EVENT_PORT_ACTIVE || 1160 event->event == IB_EVENT_LID_CHANGE || 1161 event->event == IB_EVENT_PKEY_CHANGE || 1162 event->event == IB_EVENT_SM_CHANGE || 1163 event->event == IB_EVENT_CLIENT_REREGISTER || 1164 event->event == IB_EVENT_GID_CHANGE) { 1165 work = kmalloc(sizeof *work, GFP_ATOMIC); 1166 if (work) { 1167 INIT_WORK(&work->work, ib_cache_task); 1168 work->device = event->device; 1169 work->port_num = event->element.port_num; 1170 queue_work(ib_wq, &work->work); 1171 } 1172 } 1173 } 1174 1175 int ib_cache_setup_one(struct ib_device *device) 1176 { 1177 int p; 1178 int err; 1179 1180 rwlock_init(&device->cache.lock); 1181 1182 device->cache.pkey_cache = 1183 kzalloc(sizeof *device->cache.pkey_cache * 1184 (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); 1185 device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache * 1186 (rdma_end_port(device) - 1187 rdma_start_port(device) + 1), 1188 GFP_KERNEL); 1189 if (!device->cache.pkey_cache || 1190 !device->cache.lmc_cache) { 1191 pr_warn("Couldn't allocate cache for %s\n", device->name); 1192 return -ENOMEM; 1193 } 1194 1195 err = gid_table_setup_one(device); 1196 if (err) 1197 /* Allocated memory will be cleaned in the release function */ 1198 return err; 1199 1200 for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1201 ib_cache_update(device, p + rdma_start_port(device)); 1202 1203 INIT_IB_EVENT_HANDLER(&device->cache.event_handler, 1204 device, ib_cache_event); 1205 err = ib_register_event_handler(&device->cache.event_handler); 1206 if (err) 1207 goto err; 1208 1209 return 0; 1210 1211 err: 1212 gid_table_cleanup_one(device); 1213 return err; 1214 } 1215 1216 void ib_cache_release_one(struct ib_device *device) 1217 { 1218 int p; 1219 1220 /* 1221 * The release function frees all the cache elements. 1222 * This function should be called as part of freeing 1223 * all the device's resources when the cache could no 1224 * longer be accessed. 1225 */ 1226 if (device->cache.pkey_cache) 1227 for (p = 0; 1228 p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1229 kfree(device->cache.pkey_cache[p]); 1230 1231 gid_table_release_one(device); 1232 kfree(device->cache.pkey_cache); 1233 kfree(device->cache.lmc_cache); 1234 } 1235 1236 void ib_cache_cleanup_one(struct ib_device *device) 1237 { 1238 /* The cleanup function unregisters the event handler, 1239 * waits for all in-progress workqueue elements and cleans 1240 * up the GID cache. This function should be called after 1241 * the device was removed from the devices list and all 1242 * clients were removed, so the cache exists but is 1243 * non-functional and shouldn't be updated anymore. 1244 */ 1245 ib_unregister_event_handler(&device->cache.event_handler); 1246 flush_workqueue(ib_wq); 1247 gid_table_cleanup_one(device); 1248 } 1249 1250 void __init ib_cache_setup(void) 1251 { 1252 roce_gid_mgmt_init(); 1253 } 1254 1255 void __exit ib_cache_cleanup(void) 1256 { 1257 roce_gid_mgmt_cleanup(); 1258 } 1259