1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 3 * 4 * Copyright (c) 2004 Topspin Communications. All rights reserved. 5 * Copyright (c) 2005 Intel Corporation. All rights reserved. 6 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 7 * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 */ 37 38 #include <sys/cdefs.h> 39 #include <linux/module.h> 40 #include <linux/errno.h> 41 #include <linux/slab.h> 42 #include <linux/workqueue.h> 43 #include <linux/netdevice.h> 44 #include <linux/in6.h> 45 46 #include <rdma/ib_addr.h> 47 #include <rdma/ib_cache.h> 48 49 #include "core_priv.h" 50 51 struct ib_pkey_cache { 52 int table_len; 53 u16 table[0]; 54 }; 55 56 struct ib_update_work { 57 struct work_struct work; 58 struct ib_device *device; 59 u8 port_num; 60 }; 61 62 union ib_gid zgid; 63 EXPORT_SYMBOL(zgid); 64 65 static const struct ib_gid_attr zattr; 66 67 enum gid_attr_find_mask { 68 GID_ATTR_FIND_MASK_GID = 1UL << 0, 69 GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, 70 GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, 71 GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, 72 }; 73 74 enum gid_table_entry_props { 75 GID_TABLE_ENTRY_INVALID = 1UL << 0, 76 GID_TABLE_ENTRY_DEFAULT = 1UL << 1, 77 }; 78 79 enum gid_table_write_action { 80 GID_TABLE_WRITE_ACTION_ADD, 81 GID_TABLE_WRITE_ACTION_DEL, 82 /* MODIFY only updates the GID table. Currently only used by 83 * ib_cache_update. 84 */ 85 GID_TABLE_WRITE_ACTION_MODIFY 86 }; 87 88 struct ib_gid_table_entry { 89 unsigned long props; 90 union ib_gid gid; 91 struct ib_gid_attr attr; 92 void *context; 93 }; 94 95 struct ib_gid_table { 96 int sz; 97 /* In RoCE, adding a GID to the table requires: 98 * (a) Find if this GID is already exists. 99 * (b) Find a free space. 100 * (c) Write the new GID 101 * 102 * Delete requires different set of operations: 103 * (a) Find the GID 104 * (b) Delete it. 105 * 106 * Add/delete should be carried out atomically. 107 * This is done by locking this mutex from multiple 108 * writers. We don't need this lock for IB, as the MAD 109 * layer replaces all entries. All data_vec entries 110 * are locked by this lock. 111 **/ 112 struct mutex lock; 113 /* This lock protects the table entries from being 114 * read and written simultaneously. 115 */ 116 rwlock_t rwlock; 117 struct ib_gid_table_entry *data_vec; 118 }; 119 120 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) 121 { 122 if (rdma_cap_roce_gid_table(ib_dev, port)) { 123 struct ib_event event; 124 125 event.device = ib_dev; 126 event.element.port_num = port; 127 event.event = IB_EVENT_GID_CHANGE; 128 129 ib_dispatch_event(&event); 130 } 131 } 132 133 static const char * const gid_type_str[] = { 134 [IB_GID_TYPE_IB] = "IB/RoCE v1", 135 [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2", 136 }; 137 138 const char *ib_cache_gid_type_str(enum ib_gid_type gid_type) 139 { 140 if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type]) 141 return gid_type_str[gid_type]; 142 143 return "Invalid GID type"; 144 } 145 EXPORT_SYMBOL(ib_cache_gid_type_str); 146 147 int ib_cache_gid_parse_type_str(const char *buf) 148 { 149 unsigned int i; 150 size_t len; 151 int err = -EINVAL; 152 153 len = strlen(buf); 154 if (len == 0) 155 return -EINVAL; 156 157 if (buf[len - 1] == '\n') 158 len--; 159 160 for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i) 161 if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) && 162 len == strlen(gid_type_str[i])) { 163 err = i; 164 break; 165 } 166 167 return err; 168 } 169 EXPORT_SYMBOL(ib_cache_gid_parse_type_str); 170 171 /* This function expects that rwlock will be write locked in all 172 * scenarios and that lock will be locked in sleep-able (RoCE) 173 * scenarios. 174 */ 175 static int write_gid(struct ib_device *ib_dev, u8 port, 176 struct ib_gid_table *table, int ix, 177 const union ib_gid *gid, 178 const struct ib_gid_attr *attr, 179 enum gid_table_write_action action, 180 bool default_gid) 181 __releases(&table->rwlock) __acquires(&table->rwlock) 182 { 183 int ret = 0; 184 if_t old_net_dev; 185 enum ib_gid_type old_gid_type; 186 187 /* in rdma_cap_roce_gid_table, this funciton should be protected by a 188 * sleep-able lock. 189 */ 190 191 if (rdma_cap_roce_gid_table(ib_dev, port)) { 192 table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; 193 write_unlock_irq(&table->rwlock); 194 /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by 195 * RoCE providers and thus only updates the cache. 196 */ 197 if (action == GID_TABLE_WRITE_ACTION_ADD) 198 ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr, 199 &table->data_vec[ix].context); 200 else if (action == GID_TABLE_WRITE_ACTION_DEL) 201 ret = ib_dev->del_gid(ib_dev, port, ix, 202 &table->data_vec[ix].context); 203 write_lock_irq(&table->rwlock); 204 } 205 206 old_net_dev = table->data_vec[ix].attr.ndev; 207 old_gid_type = table->data_vec[ix].attr.gid_type; 208 if (old_net_dev && old_net_dev != attr->ndev) 209 dev_put(old_net_dev); 210 /* if modify_gid failed, just delete the old gid */ 211 if (ret || action == GID_TABLE_WRITE_ACTION_DEL) { 212 gid = &zgid; 213 attr = &zattr; 214 table->data_vec[ix].context = NULL; 215 } 216 217 memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid)); 218 memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr)); 219 if (default_gid) { 220 table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; 221 if (action == GID_TABLE_WRITE_ACTION_DEL) 222 table->data_vec[ix].attr.gid_type = old_gid_type; 223 } 224 if (table->data_vec[ix].attr.ndev && 225 table->data_vec[ix].attr.ndev != old_net_dev) 226 dev_hold(table->data_vec[ix].attr.ndev); 227 228 table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID; 229 230 return ret; 231 } 232 233 static int add_gid(struct ib_device *ib_dev, u8 port, 234 struct ib_gid_table *table, int ix, 235 const union ib_gid *gid, 236 const struct ib_gid_attr *attr, 237 bool default_gid) { 238 return write_gid(ib_dev, port, table, ix, gid, attr, 239 GID_TABLE_WRITE_ACTION_ADD, default_gid); 240 } 241 242 static int modify_gid(struct ib_device *ib_dev, u8 port, 243 struct ib_gid_table *table, int ix, 244 const union ib_gid *gid, 245 const struct ib_gid_attr *attr, 246 bool default_gid) { 247 return write_gid(ib_dev, port, table, ix, gid, attr, 248 GID_TABLE_WRITE_ACTION_MODIFY, default_gid); 249 } 250 251 static int del_gid(struct ib_device *ib_dev, u8 port, 252 struct ib_gid_table *table, int ix, 253 bool default_gid) { 254 return write_gid(ib_dev, port, table, ix, &zgid, &zattr, 255 GID_TABLE_WRITE_ACTION_DEL, default_gid); 256 } 257 258 /* rwlock should be read locked */ 259 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, 260 const struct ib_gid_attr *val, bool default_gid, 261 unsigned long mask, int *pempty) 262 { 263 int i = 0; 264 int found = -1; 265 int empty = pempty ? -1 : 0; 266 267 while (i < table->sz && (found < 0 || empty < 0)) { 268 struct ib_gid_table_entry *data = &table->data_vec[i]; 269 struct ib_gid_attr *attr = &data->attr; 270 int curr_index = i; 271 272 i++; 273 274 if (data->props & GID_TABLE_ENTRY_INVALID) 275 continue; 276 277 if (empty < 0) 278 if (!memcmp(&data->gid, &zgid, sizeof(*gid)) && 279 !memcmp(attr, &zattr, sizeof(*attr)) && 280 !data->props) 281 empty = curr_index; 282 283 if (found >= 0) 284 continue; 285 286 if (mask & GID_ATTR_FIND_MASK_GID_TYPE && 287 attr->gid_type != val->gid_type) 288 continue; 289 290 if (mask & GID_ATTR_FIND_MASK_GID && 291 memcmp(gid, &data->gid, sizeof(*gid))) 292 continue; 293 294 if (mask & GID_ATTR_FIND_MASK_NETDEV && 295 attr->ndev != val->ndev) 296 continue; 297 298 if (mask & GID_ATTR_FIND_MASK_DEFAULT && 299 !!(data->props & GID_TABLE_ENTRY_DEFAULT) != 300 default_gid) 301 continue; 302 303 found = curr_index; 304 } 305 306 if (pempty) 307 *pempty = empty; 308 309 return found; 310 } 311 312 static void addrconf_ifid_eui48(u8 *eui, if_t dev) 313 { 314 if (if_getaddrlen(dev) != ETH_ALEN) 315 return; 316 memcpy(eui, if_getlladdr(dev), 3); 317 memcpy(eui + 5, if_getlladdr(dev) + 3, 3); 318 319 /* NOTE: The scope ID is added by the GID to IP conversion */ 320 321 eui[3] = 0xFF; 322 eui[4] = 0xFE; 323 eui[0] ^= 2; 324 } 325 326 static void make_default_gid(if_t dev, union ib_gid *gid) 327 { 328 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 329 addrconf_ifid_eui48(&gid->raw[8], dev); 330 } 331 332 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, 333 union ib_gid *gid, struct ib_gid_attr *attr) 334 { 335 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 336 struct ib_gid_table *table; 337 int ix; 338 int ret = 0; 339 int empty; 340 341 table = ports_table[port - rdma_start_port(ib_dev)]; 342 343 if (!memcmp(gid, &zgid, sizeof(*gid))) 344 return -EINVAL; 345 346 mutex_lock(&table->lock); 347 write_lock_irq(&table->rwlock); 348 349 ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | 350 GID_ATTR_FIND_MASK_GID_TYPE | 351 GID_ATTR_FIND_MASK_NETDEV, &empty); 352 if (ix >= 0) 353 goto out_unlock; 354 355 if (empty < 0) { 356 ret = -ENOSPC; 357 goto out_unlock; 358 } 359 360 ret = add_gid(ib_dev, port, table, empty, gid, attr, false); 361 if (!ret) 362 dispatch_gid_change_event(ib_dev, port); 363 364 out_unlock: 365 write_unlock_irq(&table->rwlock); 366 mutex_unlock(&table->lock); 367 return ret; 368 } 369 370 int ib_cache_gid_del(struct ib_device *ib_dev, u8 port, 371 union ib_gid *gid, struct ib_gid_attr *attr) 372 { 373 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 374 struct ib_gid_table *table; 375 int ix; 376 377 table = ports_table[port - rdma_start_port(ib_dev)]; 378 379 mutex_lock(&table->lock); 380 write_lock_irq(&table->rwlock); 381 382 ix = find_gid(table, gid, attr, false, 383 GID_ATTR_FIND_MASK_GID | 384 GID_ATTR_FIND_MASK_GID_TYPE | 385 GID_ATTR_FIND_MASK_NETDEV | 386 GID_ATTR_FIND_MASK_DEFAULT, 387 NULL); 388 if (ix < 0) 389 goto out_unlock; 390 391 if (!del_gid(ib_dev, port, table, ix, false)) 392 dispatch_gid_change_event(ib_dev, port); 393 394 out_unlock: 395 write_unlock_irq(&table->rwlock); 396 mutex_unlock(&table->lock); 397 return 0; 398 } 399 400 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, 401 if_t ndev) 402 { 403 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 404 struct ib_gid_table *table; 405 int ix; 406 bool deleted = false; 407 408 table = ports_table[port - rdma_start_port(ib_dev)]; 409 410 mutex_lock(&table->lock); 411 write_lock_irq(&table->rwlock); 412 413 for (ix = 0; ix < table->sz; ix++) 414 if (table->data_vec[ix].attr.ndev == ndev) 415 if (!del_gid(ib_dev, port, table, ix, 416 !!(table->data_vec[ix].props & 417 GID_TABLE_ENTRY_DEFAULT))) 418 deleted = true; 419 420 write_unlock_irq(&table->rwlock); 421 mutex_unlock(&table->lock); 422 423 if (deleted) 424 dispatch_gid_change_event(ib_dev, port); 425 426 return 0; 427 } 428 429 static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, 430 union ib_gid *gid, struct ib_gid_attr *attr) 431 { 432 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 433 struct ib_gid_table *table; 434 435 table = ports_table[port - rdma_start_port(ib_dev)]; 436 437 if (index < 0 || index >= table->sz) 438 return -EINVAL; 439 440 if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) 441 return -EAGAIN; 442 443 memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); 444 if (attr) { 445 memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); 446 /* make sure network device is valid and attached */ 447 if (attr->ndev != NULL && 448 (if_getflags(attr->ndev) & IFF_DYING) == 0 && 449 if_getifaddr(attr->ndev) != NULL) 450 dev_hold(attr->ndev); 451 else 452 attr->ndev = NULL; 453 } 454 455 return 0; 456 } 457 458 static int _ib_cache_gid_table_find(struct ib_device *ib_dev, 459 const union ib_gid *gid, 460 const struct ib_gid_attr *val, 461 unsigned long mask, 462 u8 *port, u16 *index) 463 { 464 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 465 struct ib_gid_table *table; 466 u8 p; 467 int local_index; 468 unsigned long flags; 469 470 for (p = 0; p < ib_dev->phys_port_cnt; p++) { 471 table = ports_table[p]; 472 read_lock_irqsave(&table->rwlock, flags); 473 local_index = find_gid(table, gid, val, false, mask, NULL); 474 if (local_index >= 0) { 475 if (index) 476 *index = local_index; 477 if (port) 478 *port = p + rdma_start_port(ib_dev); 479 read_unlock_irqrestore(&table->rwlock, flags); 480 return 0; 481 } 482 read_unlock_irqrestore(&table->rwlock, flags); 483 } 484 485 return -ENOENT; 486 } 487 488 static int ib_cache_gid_find(struct ib_device *ib_dev, 489 const union ib_gid *gid, 490 enum ib_gid_type gid_type, 491 if_t ndev, u8 *port, 492 u16 *index) 493 { 494 unsigned long mask = GID_ATTR_FIND_MASK_GID | 495 GID_ATTR_FIND_MASK_GID_TYPE; 496 struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; 497 498 if (ndev) 499 mask |= GID_ATTR_FIND_MASK_NETDEV; 500 501 return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val, 502 mask, port, index); 503 } 504 505 int ib_find_cached_gid_by_port(struct ib_device *ib_dev, 506 const union ib_gid *gid, 507 enum ib_gid_type gid_type, 508 u8 port, if_t ndev, 509 u16 *index) 510 { 511 int local_index; 512 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 513 struct ib_gid_table *table; 514 unsigned long mask = GID_ATTR_FIND_MASK_GID | 515 GID_ATTR_FIND_MASK_GID_TYPE; 516 struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; 517 unsigned long flags; 518 519 if (!rdma_is_port_valid(ib_dev, port)) 520 return -ENOENT; 521 522 table = ports_table[port - rdma_start_port(ib_dev)]; 523 524 if (ndev) 525 mask |= GID_ATTR_FIND_MASK_NETDEV; 526 527 read_lock_irqsave(&table->rwlock, flags); 528 local_index = find_gid(table, gid, &val, false, mask, NULL); 529 if (local_index >= 0) { 530 if (index) 531 *index = local_index; 532 read_unlock_irqrestore(&table->rwlock, flags); 533 return 0; 534 } 535 536 read_unlock_irqrestore(&table->rwlock, flags); 537 return -ENOENT; 538 } 539 EXPORT_SYMBOL(ib_find_cached_gid_by_port); 540 541 /** 542 * ib_find_gid_by_filter - Returns the GID table index where a specified 543 * GID value occurs 544 * @device: The device to query. 545 * @gid: The GID value to search for. 546 * @port_num: The port number of the device where the GID value could be 547 * searched. 548 * @filter: The filter function is executed on any matching GID in the table. 549 * If the filter function returns true, the corresponding index is returned, 550 * otherwise, we continue searching the GID table. It's guaranteed that 551 * while filter is executed, ndev field is valid and the structure won't 552 * change. filter is executed in an atomic context. filter must not be NULL. 553 * @index: The index into the cached GID table where the GID was found. This 554 * parameter may be NULL. 555 * 556 * ib_cache_gid_find_by_filter() searches for the specified GID value 557 * of which the filter function returns true in the port's GID table. 558 * This function is only supported on RoCE ports. 559 * 560 */ 561 static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, 562 const union ib_gid *gid, 563 u8 port, 564 bool (*filter)(const union ib_gid *, 565 const struct ib_gid_attr *, 566 void *), 567 void *context, 568 u16 *index) 569 { 570 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 571 struct ib_gid_table *table; 572 unsigned int i; 573 unsigned long flags; 574 bool found = false; 575 576 if (!ports_table) 577 return -EOPNOTSUPP; 578 579 if (!rdma_is_port_valid(ib_dev, port)) 580 return -EINVAL; 581 582 if (!rdma_protocol_roce(ib_dev, port)) 583 return -EPROTONOSUPPORT; 584 585 table = ports_table[port - rdma_start_port(ib_dev)]; 586 587 read_lock_irqsave(&table->rwlock, flags); 588 for (i = 0; i < table->sz; i++) { 589 struct ib_gid_attr attr; 590 591 if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) 592 goto next; 593 594 if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) 595 goto next; 596 597 memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); 598 599 if (filter(gid, &attr, context)) 600 found = true; 601 602 next: 603 if (found) 604 break; 605 } 606 read_unlock_irqrestore(&table->rwlock, flags); 607 608 if (!found) 609 return -ENOENT; 610 611 if (index) 612 *index = i; 613 return 0; 614 } 615 616 static struct ib_gid_table *alloc_gid_table(int sz) 617 { 618 struct ib_gid_table *table = 619 kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); 620 621 if (!table) 622 return NULL; 623 624 table->data_vec = kcalloc(sz, sizeof(*table->data_vec), GFP_KERNEL); 625 if (!table->data_vec) 626 goto err_free_table; 627 628 mutex_init(&table->lock); 629 630 table->sz = sz; 631 rwlock_init(&table->rwlock); 632 633 return table; 634 635 err_free_table: 636 kfree(table); 637 return NULL; 638 } 639 640 static void release_gid_table(struct ib_gid_table *table) 641 { 642 if (table) { 643 kfree(table->data_vec); 644 kfree(table); 645 } 646 } 647 648 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, 649 struct ib_gid_table *table) 650 { 651 int i; 652 bool deleted = false; 653 654 if (!table) 655 return; 656 657 write_lock_irq(&table->rwlock); 658 for (i = 0; i < table->sz; ++i) { 659 if (memcmp(&table->data_vec[i].gid, &zgid, 660 sizeof(table->data_vec[i].gid))) 661 if (!del_gid(ib_dev, port, table, i, 662 table->data_vec[i].props & 663 GID_ATTR_FIND_MASK_DEFAULT)) 664 deleted = true; 665 } 666 write_unlock_irq(&table->rwlock); 667 668 if (deleted) 669 dispatch_gid_change_event(ib_dev, port); 670 } 671 672 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, 673 if_t ndev, 674 unsigned long gid_type_mask, 675 enum ib_cache_gid_default_mode mode) 676 { 677 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 678 union ib_gid gid; 679 struct ib_gid_attr gid_attr; 680 struct ib_gid_attr zattr_type = zattr; 681 struct ib_gid_table *table; 682 unsigned int gid_type; 683 684 table = ports_table[port - rdma_start_port(ib_dev)]; 685 686 make_default_gid(ndev, &gid); 687 memset(&gid_attr, 0, sizeof(gid_attr)); 688 gid_attr.ndev = ndev; 689 690 /* Default GID is created using unique GUID and local subnet prefix, 691 * as described in section 4.1.1 and 3.5.10 in IB spec 1.3. 692 * Therefore don't create RoCEv2 default GID based on it that 693 * resembles as IPv6 GID based on link local address when IPv6 is 694 * disabled in kernel. 695 */ 696 #ifndef INET6 697 gid_type_mask &= ~BIT(IB_GID_TYPE_ROCE_UDP_ENCAP); 698 #endif 699 700 for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) { 701 int ix; 702 union ib_gid current_gid; 703 struct ib_gid_attr current_gid_attr = {}; 704 705 if (1UL << gid_type & ~gid_type_mask) 706 continue; 707 708 gid_attr.gid_type = gid_type; 709 710 mutex_lock(&table->lock); 711 write_lock_irq(&table->rwlock); 712 ix = find_gid(table, NULL, &gid_attr, true, 713 GID_ATTR_FIND_MASK_GID_TYPE | 714 GID_ATTR_FIND_MASK_DEFAULT, 715 NULL); 716 717 /* Coudn't find default GID location */ 718 if (WARN_ON(ix < 0)) 719 goto release; 720 721 zattr_type.gid_type = gid_type; 722 723 if (!__ib_cache_gid_get(ib_dev, port, ix, 724 ¤t_gid, ¤t_gid_attr) && 725 mode == IB_CACHE_GID_DEFAULT_MODE_SET && 726 !memcmp(&gid, ¤t_gid, sizeof(gid)) && 727 !memcmp(&gid_attr, ¤t_gid_attr, sizeof(gid_attr))) 728 goto release; 729 730 if (memcmp(¤t_gid, &zgid, sizeof(current_gid)) || 731 memcmp(¤t_gid_attr, &zattr_type, 732 sizeof(current_gid_attr))) { 733 if (del_gid(ib_dev, port, table, ix, true)) { 734 pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n", 735 ix, gid.raw); 736 goto release; 737 } else { 738 dispatch_gid_change_event(ib_dev, port); 739 } 740 } 741 742 if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { 743 if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true)) 744 pr_warn("ib_cache_gid: unable to add default gid %pI6\n", 745 gid.raw); 746 else 747 dispatch_gid_change_event(ib_dev, port); 748 } 749 750 release: 751 if (current_gid_attr.ndev) 752 dev_put(current_gid_attr.ndev); 753 write_unlock_irq(&table->rwlock); 754 mutex_unlock(&table->lock); 755 } 756 } 757 758 static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, 759 struct ib_gid_table *table) 760 { 761 unsigned int i; 762 unsigned long roce_gid_type_mask; 763 unsigned int num_default_gids; 764 unsigned int current_gid = 0; 765 766 roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 767 num_default_gids = hweight_long(roce_gid_type_mask); 768 for (i = 0; i < num_default_gids && i < table->sz; i++) { 769 struct ib_gid_table_entry *entry = 770 &table->data_vec[i]; 771 772 entry->props |= GID_TABLE_ENTRY_DEFAULT; 773 current_gid = find_next_bit(&roce_gid_type_mask, 774 BITS_PER_LONG, 775 current_gid); 776 entry->attr.gid_type = current_gid++; 777 } 778 779 return 0; 780 } 781 782 static int _gid_table_setup_one(struct ib_device *ib_dev) 783 { 784 u8 port; 785 struct ib_gid_table **table; 786 int err = 0; 787 788 table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL); 789 790 if (!table) { 791 pr_warn("failed to allocate ib gid cache for %s\n", 792 ib_dev->name); 793 return -ENOMEM; 794 } 795 796 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 797 u8 rdma_port = port + rdma_start_port(ib_dev); 798 799 table[port] = 800 alloc_gid_table( 801 ib_dev->port_immutable[rdma_port].gid_tbl_len); 802 if (!table[port]) { 803 err = -ENOMEM; 804 goto rollback_table_setup; 805 } 806 807 err = gid_table_reserve_default(ib_dev, 808 port + rdma_start_port(ib_dev), 809 table[port]); 810 if (err) 811 goto rollback_table_setup; 812 } 813 814 ib_dev->cache.gid_cache = table; 815 return 0; 816 817 rollback_table_setup: 818 for (port = 0; port < ib_dev->phys_port_cnt; port++) { 819 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), 820 table[port]); 821 release_gid_table(table[port]); 822 } 823 824 kfree(table); 825 return err; 826 } 827 828 static void gid_table_release_one(struct ib_device *ib_dev) 829 { 830 struct ib_gid_table **table = ib_dev->cache.gid_cache; 831 u8 port; 832 833 if (!table) 834 return; 835 836 for (port = 0; port < ib_dev->phys_port_cnt; port++) 837 release_gid_table(table[port]); 838 839 kfree(table); 840 ib_dev->cache.gid_cache = NULL; 841 } 842 843 static void gid_table_cleanup_one(struct ib_device *ib_dev) 844 { 845 struct ib_gid_table **table = ib_dev->cache.gid_cache; 846 u8 port; 847 848 if (!table) 849 return; 850 851 for (port = 0; port < ib_dev->phys_port_cnt; port++) 852 cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev), 853 table[port]); 854 } 855 856 static int gid_table_setup_one(struct ib_device *ib_dev) 857 { 858 int err; 859 860 err = _gid_table_setup_one(ib_dev); 861 862 if (err) 863 return err; 864 865 err = roce_rescan_device(ib_dev); 866 867 if (err) { 868 gid_table_cleanup_one(ib_dev); 869 gid_table_release_one(ib_dev); 870 } 871 872 return err; 873 } 874 875 int ib_get_cached_gid(struct ib_device *device, 876 u8 port_num, 877 int index, 878 union ib_gid *gid, 879 struct ib_gid_attr *gid_attr) 880 { 881 int res; 882 unsigned long flags; 883 struct ib_gid_table **ports_table = device->cache.gid_cache; 884 struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)]; 885 886 if (!rdma_is_port_valid(device, port_num)) 887 return -EINVAL; 888 889 read_lock_irqsave(&table->rwlock, flags); 890 res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr); 891 read_unlock_irqrestore(&table->rwlock, flags); 892 893 return res; 894 } 895 EXPORT_SYMBOL(ib_get_cached_gid); 896 897 int ib_find_cached_gid(struct ib_device *device, 898 const union ib_gid *gid, 899 enum ib_gid_type gid_type, 900 if_t ndev, 901 u8 *port_num, 902 u16 *index) 903 { 904 return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); 905 } 906 EXPORT_SYMBOL(ib_find_cached_gid); 907 908 int ib_find_gid_by_filter(struct ib_device *device, 909 const union ib_gid *gid, 910 u8 port_num, 911 bool (*filter)(const union ib_gid *gid, 912 const struct ib_gid_attr *, 913 void *), 914 void *context, u16 *index) 915 { 916 /* Only RoCE GID table supports filter function */ 917 if (!rdma_cap_roce_gid_table(device, port_num) && filter) 918 return -EPROTONOSUPPORT; 919 920 return ib_cache_gid_find_by_filter(device, gid, 921 port_num, filter, 922 context, index); 923 } 924 EXPORT_SYMBOL(ib_find_gid_by_filter); 925 926 int ib_get_cached_pkey(struct ib_device *device, 927 u8 port_num, 928 int index, 929 u16 *pkey) 930 { 931 struct ib_pkey_cache *cache; 932 unsigned long flags; 933 int ret = 0; 934 935 if (!rdma_is_port_valid(device, port_num)) 936 return -EINVAL; 937 938 read_lock_irqsave(&device->cache.lock, flags); 939 940 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; 941 942 if (index < 0 || index >= cache->table_len) 943 ret = -EINVAL; 944 else 945 *pkey = cache->table[index]; 946 947 read_unlock_irqrestore(&device->cache.lock, flags); 948 949 return ret; 950 } 951 EXPORT_SYMBOL(ib_get_cached_pkey); 952 953 int ib_find_cached_pkey(struct ib_device *device, 954 u8 port_num, 955 u16 pkey, 956 u16 *index) 957 { 958 struct ib_pkey_cache *cache; 959 unsigned long flags; 960 int i; 961 int ret = -ENOENT; 962 int partial_ix = -1; 963 964 if (!rdma_is_port_valid(device, port_num)) 965 return -EINVAL; 966 967 read_lock_irqsave(&device->cache.lock, flags); 968 969 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; 970 971 *index = -1; 972 973 for (i = 0; i < cache->table_len; ++i) 974 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { 975 if (cache->table[i] & 0x8000) { 976 *index = i; 977 ret = 0; 978 break; 979 } else 980 partial_ix = i; 981 } 982 983 if (ret && partial_ix >= 0) { 984 *index = partial_ix; 985 ret = 0; 986 } 987 988 read_unlock_irqrestore(&device->cache.lock, flags); 989 990 return ret; 991 } 992 EXPORT_SYMBOL(ib_find_cached_pkey); 993 994 int ib_find_exact_cached_pkey(struct ib_device *device, 995 u8 port_num, 996 u16 pkey, 997 u16 *index) 998 { 999 struct ib_pkey_cache *cache; 1000 unsigned long flags; 1001 int i; 1002 int ret = -ENOENT; 1003 1004 if (!rdma_is_port_valid(device, port_num)) 1005 return -EINVAL; 1006 1007 read_lock_irqsave(&device->cache.lock, flags); 1008 1009 cache = device->cache.pkey_cache[port_num - rdma_start_port(device)]; 1010 1011 *index = -1; 1012 1013 for (i = 0; i < cache->table_len; ++i) 1014 if (cache->table[i] == pkey) { 1015 *index = i; 1016 ret = 0; 1017 break; 1018 } 1019 1020 read_unlock_irqrestore(&device->cache.lock, flags); 1021 1022 return ret; 1023 } 1024 EXPORT_SYMBOL(ib_find_exact_cached_pkey); 1025 1026 int ib_get_cached_lmc(struct ib_device *device, 1027 u8 port_num, 1028 u8 *lmc) 1029 { 1030 unsigned long flags; 1031 int ret = 0; 1032 1033 if (!rdma_is_port_valid(device, port_num)) 1034 return -EINVAL; 1035 1036 read_lock_irqsave(&device->cache.lock, flags); 1037 *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)]; 1038 read_unlock_irqrestore(&device->cache.lock, flags); 1039 1040 return ret; 1041 } 1042 EXPORT_SYMBOL(ib_get_cached_lmc); 1043 1044 static void ib_cache_update(struct ib_device *device, 1045 u8 port) 1046 { 1047 struct ib_port_attr *tprops = NULL; 1048 struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache; 1049 struct ib_gid_cache { 1050 int table_len; 1051 union ib_gid table[0]; 1052 } *gid_cache = NULL; 1053 int i; 1054 int ret; 1055 struct ib_gid_table *table; 1056 struct ib_gid_table **ports_table = device->cache.gid_cache; 1057 bool use_roce_gid_table = 1058 rdma_cap_roce_gid_table(device, port); 1059 1060 if (!rdma_is_port_valid(device, port)) 1061 return; 1062 1063 table = ports_table[port - rdma_start_port(device)]; 1064 1065 tprops = kmalloc(sizeof *tprops, GFP_KERNEL); 1066 if (!tprops) 1067 return; 1068 1069 ret = ib_query_port(device, port, tprops); 1070 if (ret) { 1071 pr_warn("ib_query_port failed (%d) for %s\n", 1072 ret, device->name); 1073 goto err; 1074 } 1075 1076 pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len * 1077 sizeof *pkey_cache->table, GFP_KERNEL); 1078 if (!pkey_cache) 1079 goto err; 1080 1081 pkey_cache->table_len = tprops->pkey_tbl_len; 1082 1083 if (!use_roce_gid_table) { 1084 gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len * 1085 sizeof(*gid_cache->table), GFP_KERNEL); 1086 if (!gid_cache) 1087 goto err; 1088 1089 gid_cache->table_len = tprops->gid_tbl_len; 1090 } 1091 1092 for (i = 0; i < pkey_cache->table_len; ++i) { 1093 ret = ib_query_pkey(device, port, i, pkey_cache->table + i); 1094 if (ret) { 1095 pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n", 1096 ret, device->name, i); 1097 goto err; 1098 } 1099 } 1100 1101 if (!use_roce_gid_table) { 1102 for (i = 0; i < gid_cache->table_len; ++i) { 1103 ret = ib_query_gid(device, port, i, 1104 gid_cache->table + i, NULL); 1105 if (ret) { 1106 pr_warn("ib_query_gid failed (%d) for %s (index %d)\n", 1107 ret, device->name, i); 1108 goto err; 1109 } 1110 } 1111 } 1112 1113 write_lock_irq(&device->cache.lock); 1114 1115 old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)]; 1116 1117 device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache; 1118 if (!use_roce_gid_table) { 1119 write_lock(&table->rwlock); 1120 for (i = 0; i < gid_cache->table_len; i++) { 1121 modify_gid(device, port, table, i, gid_cache->table + i, 1122 &zattr, false); 1123 } 1124 write_unlock(&table->rwlock); 1125 } 1126 1127 device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc; 1128 1129 write_unlock_irq(&device->cache.lock); 1130 1131 kfree(gid_cache); 1132 kfree(old_pkey_cache); 1133 kfree(tprops); 1134 return; 1135 1136 err: 1137 kfree(pkey_cache); 1138 kfree(gid_cache); 1139 kfree(tprops); 1140 } 1141 1142 static void ib_cache_task(struct work_struct *_work) 1143 { 1144 struct ib_update_work *work = 1145 container_of(_work, struct ib_update_work, work); 1146 1147 ib_cache_update(work->device, work->port_num); 1148 kfree(work); 1149 } 1150 1151 static void ib_cache_event(struct ib_event_handler *handler, 1152 struct ib_event *event) 1153 { 1154 struct ib_update_work *work; 1155 1156 if (event->event == IB_EVENT_PORT_ERR || 1157 event->event == IB_EVENT_PORT_ACTIVE || 1158 event->event == IB_EVENT_LID_CHANGE || 1159 event->event == IB_EVENT_PKEY_CHANGE || 1160 event->event == IB_EVENT_SM_CHANGE || 1161 event->event == IB_EVENT_CLIENT_REREGISTER || 1162 event->event == IB_EVENT_GID_CHANGE) { 1163 work = kmalloc(sizeof *work, GFP_ATOMIC); 1164 if (work) { 1165 INIT_WORK(&work->work, ib_cache_task); 1166 work->device = event->device; 1167 work->port_num = event->element.port_num; 1168 queue_work(ib_wq, &work->work); 1169 } 1170 } 1171 } 1172 1173 int ib_cache_setup_one(struct ib_device *device) 1174 { 1175 int p; 1176 int err; 1177 1178 rwlock_init(&device->cache.lock); 1179 1180 device->cache.pkey_cache = 1181 kzalloc(sizeof *device->cache.pkey_cache * 1182 (rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL); 1183 device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache * 1184 (rdma_end_port(device) - 1185 rdma_start_port(device) + 1), 1186 GFP_KERNEL); 1187 if (!device->cache.pkey_cache || 1188 !device->cache.lmc_cache) { 1189 pr_warn("Couldn't allocate cache for %s\n", device->name); 1190 return -ENOMEM; 1191 } 1192 1193 err = gid_table_setup_one(device); 1194 if (err) 1195 /* Allocated memory will be cleaned in the release function */ 1196 return err; 1197 1198 for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1199 ib_cache_update(device, p + rdma_start_port(device)); 1200 1201 INIT_IB_EVENT_HANDLER(&device->cache.event_handler, 1202 device, ib_cache_event); 1203 err = ib_register_event_handler(&device->cache.event_handler); 1204 if (err) 1205 goto err; 1206 1207 return 0; 1208 1209 err: 1210 gid_table_cleanup_one(device); 1211 return err; 1212 } 1213 1214 void ib_cache_release_one(struct ib_device *device) 1215 { 1216 int p; 1217 1218 /* 1219 * The release function frees all the cache elements. 1220 * This function should be called as part of freeing 1221 * all the device's resources when the cache could no 1222 * longer be accessed. 1223 */ 1224 if (device->cache.pkey_cache) 1225 for (p = 0; 1226 p <= rdma_end_port(device) - rdma_start_port(device); ++p) 1227 kfree(device->cache.pkey_cache[p]); 1228 1229 gid_table_release_one(device); 1230 kfree(device->cache.pkey_cache); 1231 kfree(device->cache.lmc_cache); 1232 } 1233 1234 void ib_cache_cleanup_one(struct ib_device *device) 1235 { 1236 /* The cleanup function unregisters the event handler, 1237 * waits for all in-progress workqueue elements and cleans 1238 * up the GID cache. This function should be called after 1239 * the device was removed from the devices list and all 1240 * clients were removed, so the cache exists but is 1241 * non-functional and shouldn't be updated anymore. 1242 */ 1243 ib_unregister_event_handler(&device->cache.event_handler); 1244 flush_workqueue(ib_wq); 1245 gid_table_cleanup_one(device); 1246 } 1247 1248 void __init ib_cache_setup(void) 1249 { 1250 roce_gid_mgmt_init(); 1251 } 1252 1253 void __exit ib_cache_cleanup(void) 1254 { 1255 roce_gid_mgmt_cleanup(); 1256 } 1257