1 /* 2 * Copyright (c) 2008-2009 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2008-2009 Mellanox Technologies LTD. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 * 33 */ 34 35 /* 36 * Abstract: 37 * Implementation of OpenSM Cached Unicast Routing 38 * 39 * Environment: 40 * Linux User Mode 41 * 42 */ 43 44 #if HAVE_CONFIG_H 45 # include <config.h> 46 #endif 47 48 #include <stdlib.h> 49 #include <string.h> 50 #include <ctype.h> 51 #include <errno.h> 52 #include <iba/ib_types.h> 53 #include <complib/cl_qmap.h> 54 #include <complib/cl_pool.h> 55 #include <complib/cl_debug.h> 56 #include <opensm/osm_file_ids.h> 57 #define FILE_ID OSM_FILE_UCAST_CACHE_C 58 #include <opensm/osm_opensm.h> 59 #include <opensm/osm_ucast_mgr.h> 60 #include <opensm/osm_ucast_cache.h> 61 #include <opensm/osm_switch.h> 62 #include <opensm/osm_node.h> 63 #include <opensm/osm_port.h> 64 65 typedef struct cache_port { 66 boolean_t is_leaf; 67 uint16_t remote_lid_ho; 68 } cache_port_t; 69 70 typedef struct cache_switch { 71 cl_map_item_t map_item; 72 boolean_t dropped; 73 uint16_t max_lid_ho; 74 uint16_t num_hops; 75 uint8_t **hops; 76 uint8_t *lft; 77 uint8_t num_ports; 78 cache_port_t ports[0]; 79 } cache_switch_t; 80 81 static uint16_t cache_sw_get_base_lid_ho(cache_switch_t * p_sw) 82 { 83 return p_sw->ports[0].remote_lid_ho; 84 } 85 86 static boolean_t cache_sw_is_leaf(cache_switch_t * p_sw) 87 { 88 return p_sw->ports[0].is_leaf; 89 } 90 91 static void cache_sw_set_leaf(cache_switch_t * p_sw) 92 { 93 p_sw->ports[0].is_leaf = TRUE; 94 } 95 96 static cache_switch_t *cache_sw_new(uint16_t lid_ho, unsigned num_ports) 97 { 98 cache_switch_t *p_cache_sw = malloc(sizeof(cache_switch_t) + 99 num_ports * sizeof(cache_port_t)); 100 if (!p_cache_sw) 101 return NULL; 102 103 memset(p_cache_sw, 0, 104 sizeof(*p_cache_sw) + num_ports * sizeof(cache_port_t)); 105 106 p_cache_sw->num_ports = num_ports; 107 108 /* port[0] fields represent this switch details - lid and type */ 109 p_cache_sw->ports[0].remote_lid_ho = lid_ho; 110 p_cache_sw->ports[0].is_leaf = FALSE; 111 112 return p_cache_sw; 113 } 114 115 static void cache_sw_destroy(cache_switch_t * p_sw) 116 { 117 unsigned i; 118 119 if (!p_sw) 120 return; 121 122 if (p_sw->lft) 123 free(p_sw->lft); 124 if (p_sw->hops) { 125 for (i = 0; i < p_sw->num_hops; i++) 126 if (p_sw->hops[i]) 127 free(p_sw->hops[i]); 128 free(p_sw->hops); 129 } 130 free(p_sw); 131 } 132 133 static cache_switch_t *cache_get_sw(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho) 134 { 135 cache_switch_t *p_cache_sw = (cache_switch_t *) 136 cl_qmap_get(&p_mgr->cache_sw_tbl, lid_ho); 137 if (p_cache_sw == (cache_switch_t *) 138 cl_qmap_end(&p_mgr->cache_sw_tbl)) 139 p_cache_sw = NULL; 140 141 return p_cache_sw; 142 } 143 144 static void cache_add_sw_link(osm_ucast_mgr_t * p_mgr, osm_physp_t *p, 145 uint16_t remote_lid_ho, boolean_t is_ca) 146 { 147 cache_switch_t *p_cache_sw; 148 uint16_t lid_ho = cl_ntoh16(osm_node_get_base_lid(p->p_node, 0)); 149 150 OSM_LOG_ENTER(p_mgr->p_log); 151 152 if (!lid_ho || !remote_lid_ho || !p->port_num) 153 goto Exit; 154 155 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 156 "Caching switch port: lid %u [port %u] -> lid %u (%s)\n", 157 lid_ho, p->port_num, remote_lid_ho, (is_ca) ? "CA/RTR" : "SW"); 158 159 p_cache_sw = cache_get_sw(p_mgr, lid_ho); 160 if (!p_cache_sw) { 161 p_cache_sw = cache_sw_new(lid_ho, p->p_node->sw->num_ports); 162 if (!p_cache_sw) { 163 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, 164 "ERR AD01: Out of memory - cache is invalid\n"); 165 osm_ucast_cache_invalidate(p_mgr); 166 goto Exit; 167 } 168 cl_qmap_insert(&p_mgr->cache_sw_tbl, lid_ho, 169 &p_cache_sw->map_item); 170 } 171 172 if (p->port_num >= p_cache_sw->num_ports) { 173 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, 174 "ERR AD02: Wrong switch? - cache is invalid\n"); 175 osm_ucast_cache_invalidate(p_mgr); 176 goto Exit; 177 } 178 179 if (is_ca) 180 cache_sw_set_leaf(p_cache_sw); 181 182 if (p_cache_sw->ports[p->port_num].remote_lid_ho == 0) { 183 /* cache this link only if it hasn't been already cached */ 184 p_cache_sw->ports[p->port_num].remote_lid_ho = remote_lid_ho; 185 p_cache_sw->ports[p->port_num].is_leaf = is_ca; 186 } 187 Exit: 188 OSM_LOG_EXIT(p_mgr->p_log); 189 } 190 191 static void cache_cleanup_switches(osm_ucast_mgr_t * p_mgr) 192 { 193 cache_switch_t *p_sw; 194 cache_switch_t *p_next_sw; 195 unsigned port_num; 196 boolean_t found_port; 197 198 if (!p_mgr->cache_valid) 199 return; 200 201 p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); 202 while (p_next_sw != 203 (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) { 204 p_sw = p_next_sw; 205 p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item); 206 207 found_port = FALSE; 208 for (port_num = 1; port_num < p_sw->num_ports; port_num++) 209 if (p_sw->ports[port_num].remote_lid_ho) 210 found_port = TRUE; 211 212 if (!found_port) { 213 cl_qmap_remove_item(&p_mgr->cache_sw_tbl, 214 &p_sw->map_item); 215 cache_sw_destroy(p_sw); 216 } 217 } 218 } 219 220 static void 221 cache_check_link_change(osm_ucast_mgr_t * p_mgr, 222 osm_physp_t * p_physp_1, osm_physp_t * p_physp_2) 223 { 224 OSM_LOG_ENTER(p_mgr->p_log); 225 CL_ASSERT(p_physp_1 && p_physp_2); 226 227 if (!p_mgr->cache_valid) 228 goto Exit; 229 230 if (!p_physp_1->p_remote_physp && !p_physp_2->p_remote_physp) 231 /* both ports were down - new link */ 232 goto Exit; 233 234 /* unicast cache cannot tolerate any link location change */ 235 236 if ((p_physp_1->p_remote_physp && 237 p_physp_1->p_remote_physp->p_remote_physp) || 238 (p_physp_2->p_remote_physp && 239 p_physp_2->p_remote_physp->p_remote_physp)) { 240 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 241 "Link location change discovered\n"); 242 osm_ucast_cache_invalidate(p_mgr); 243 goto Exit; 244 } 245 Exit: 246 OSM_LOG_EXIT(p_mgr->p_log); 247 } 248 249 static void cache_remove_port(osm_ucast_mgr_t * p_mgr, uint16_t lid_ho, 250 uint8_t port_num, uint16_t remote_lid_ho, 251 boolean_t is_ca) 252 { 253 cache_switch_t *p_cache_sw; 254 255 OSM_LOG_ENTER(p_mgr->p_log); 256 257 if (!p_mgr->cache_valid) 258 goto Exit; 259 260 p_cache_sw = cache_get_sw(p_mgr, lid_ho); 261 if (!p_cache_sw) { 262 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 263 "Found uncached switch/link (lid %u, port %u)\n", 264 lid_ho, port_num); 265 osm_ucast_cache_invalidate(p_mgr); 266 goto Exit; 267 } 268 269 if (port_num >= p_cache_sw->num_ports || 270 !p_cache_sw->ports[port_num].remote_lid_ho) { 271 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 272 "Found uncached switch link (lid %u, port %u)\n", 273 lid_ho, port_num); 274 osm_ucast_cache_invalidate(p_mgr); 275 goto Exit; 276 } 277 278 if (p_cache_sw->ports[port_num].remote_lid_ho != remote_lid_ho) { 279 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 280 "Remote lid change on switch lid %u, port %u " 281 "(was %u, now %u)\n", lid_ho, port_num, 282 p_cache_sw->ports[port_num].remote_lid_ho, 283 remote_lid_ho); 284 osm_ucast_cache_invalidate(p_mgr); 285 goto Exit; 286 } 287 288 if ((p_cache_sw->ports[port_num].is_leaf && !is_ca) || 289 (!p_cache_sw->ports[port_num].is_leaf && is_ca)) { 290 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 291 "Remote node type change on switch lid %u, port %u\n", 292 lid_ho, port_num); 293 osm_ucast_cache_invalidate(p_mgr); 294 goto Exit; 295 } 296 297 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 298 "New link from lid %u, port %u to lid %u - " 299 "found in cache\n", lid_ho, port_num, remote_lid_ho); 300 301 /* the new link was cached - clean it from the cache */ 302 303 p_cache_sw->ports[port_num].remote_lid_ho = 0; 304 p_cache_sw->ports[port_num].is_leaf = FALSE; 305 Exit: 306 OSM_LOG_EXIT(p_mgr->p_log); 307 } /* cache_remove_port() */ 308 309 static void 310 cache_restore_ucast_info(osm_ucast_mgr_t * p_mgr, 311 cache_switch_t * p_cache_sw, osm_switch_t * p_sw) 312 { 313 if (!p_mgr->cache_valid) 314 return; 315 316 /* when seting unicast info, the cached port 317 should have all the required info */ 318 CL_ASSERT(p_cache_sw->max_lid_ho && p_cache_sw->lft && 319 p_cache_sw->num_hops && p_cache_sw->hops); 320 321 p_sw->max_lid_ho = p_cache_sw->max_lid_ho; 322 323 if (p_sw->new_lft) 324 free(p_sw->new_lft); 325 p_sw->new_lft = p_cache_sw->lft; 326 p_cache_sw->lft = NULL; 327 328 p_sw->num_hops = p_cache_sw->num_hops; 329 p_cache_sw->num_hops = 0; 330 if (p_sw->hops) 331 free(p_sw->hops); 332 p_sw->hops = p_cache_sw->hops; 333 p_cache_sw->hops = NULL; 334 335 p_sw->need_update = 2; 336 } 337 338 static void ucast_cache_dump(osm_ucast_mgr_t * p_mgr) 339 { 340 cache_switch_t *p_sw; 341 unsigned i; 342 343 OSM_LOG_ENTER(p_mgr->p_log); 344 345 if (!OSM_LOG_IS_ACTIVE_V2(p_mgr->p_log, OSM_LOG_DEBUG)) 346 goto Exit; 347 348 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 349 "Dumping missing nodes/links as logged by unicast cache:\n"); 350 for (p_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); 351 p_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl); 352 p_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item)) { 353 354 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 355 "\t Switch lid %u %s%s\n", 356 cache_sw_get_base_lid_ho(p_sw), 357 (cache_sw_is_leaf(p_sw)) ? "[leaf switch] " : "", 358 (p_sw->dropped) ? "[whole switch missing]" : ""); 359 360 for (i = 1; i < p_sw->num_ports; i++) 361 if (p_sw->ports[i].remote_lid_ho > 0) 362 OSM_LOG(p_mgr->p_log, 363 OSM_LOG_DEBUG, 364 "\t - port %u -> lid %u %s\n", 365 i, p_sw->ports[i].remote_lid_ho, 366 (p_sw->ports[i].is_leaf) ? 367 "[remote node is leaf]" : ""); 368 } 369 Exit: 370 OSM_LOG_EXIT(p_mgr->p_log); 371 } 372 373 void osm_ucast_cache_invalidate(osm_ucast_mgr_t * p_mgr) 374 { 375 cache_switch_t *p_sw; 376 cache_switch_t *p_next_sw; 377 378 OSM_LOG_ENTER(p_mgr->p_log); 379 380 if (!p_mgr->cache_valid) 381 goto Exit; 382 383 p_mgr->cache_valid = FALSE; 384 385 p_next_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); 386 while (p_next_sw != 387 (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl)) { 388 p_sw = p_next_sw; 389 p_next_sw = (cache_switch_t *) cl_qmap_next(&p_sw->map_item); 390 cache_sw_destroy(p_sw); 391 } 392 cl_qmap_remove_all(&p_mgr->cache_sw_tbl); 393 394 OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, "Unicast Cache invalidated\n"); 395 Exit: 396 OSM_LOG_EXIT(p_mgr->p_log); 397 } 398 399 static void ucast_cache_validate(osm_ucast_mgr_t * p_mgr) 400 { 401 cache_switch_t *p_cache_sw; 402 cache_switch_t *p_remote_cache_sw; 403 unsigned port_num; 404 unsigned max_ports; 405 uint8_t remote_node_type; 406 uint16_t lid_ho; 407 uint16_t remote_lid_ho; 408 osm_switch_t *p_sw; 409 osm_switch_t *p_remote_sw; 410 osm_node_t *p_node; 411 osm_physp_t *p_physp; 412 osm_physp_t *p_remote_physp; 413 osm_port_t *p_remote_port; 414 cl_qmap_t *p_sw_tbl; 415 416 OSM_LOG_ENTER(p_mgr->p_log); 417 if (!p_mgr->cache_valid) 418 goto Exit; 419 420 /* If there are no switches in the subnet, we are done */ 421 p_sw_tbl = &p_mgr->p_subn->sw_guid_tbl; 422 if (cl_qmap_count(p_sw_tbl) == 0) { 423 osm_ucast_cache_invalidate(p_mgr); 424 goto Exit; 425 } 426 427 /* 428 * Scan all the physical switch ports in the subnet. 429 * If the port need_update flag is on, check whether 430 * it's just some node/port reset or a cached topology 431 * change. Otherwise the cache is invalid. 432 */ 433 for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl); 434 p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl); 435 p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item)) { 436 437 p_node = p_sw->p_node; 438 439 lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0)); 440 p_cache_sw = cache_get_sw(p_mgr, lid_ho); 441 442 max_ports = osm_node_get_num_physp(p_node); 443 444 /* skip port 0 */ 445 for (port_num = 1; port_num < max_ports; port_num++) { 446 447 p_physp = osm_node_get_physp_ptr(p_node, port_num); 448 449 if (!p_physp || !p_physp->p_remote_physp || 450 !osm_physp_link_exists(p_physp, 451 p_physp->p_remote_physp)) 452 /* no valid link */ 453 continue; 454 455 /* 456 * While scanning all the physical ports in the subnet, 457 * mark corresponding leaf switches in the cache. 458 */ 459 if (p_cache_sw && 460 !p_cache_sw->dropped && 461 !cache_sw_is_leaf(p_cache_sw) && 462 p_physp->p_remote_physp->p_node && 463 osm_node_get_type(p_physp->p_remote_physp-> 464 p_node) != IB_NODE_TYPE_SWITCH) 465 cache_sw_set_leaf(p_cache_sw); 466 467 if (!p_physp->need_update) 468 continue; 469 470 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 471 "Checking switch lid %u, port %u\n", 472 lid_ho, port_num); 473 474 p_remote_physp = osm_physp_get_remote(p_physp); 475 remote_node_type = 476 osm_node_get_type(p_remote_physp->p_node); 477 478 if (remote_node_type == IB_NODE_TYPE_SWITCH) 479 remote_lid_ho = 480 cl_ntoh16(osm_node_get_base_lid 481 (p_remote_physp->p_node, 0)); 482 else 483 remote_lid_ho = 484 cl_ntoh16(osm_node_get_base_lid 485 (p_remote_physp->p_node, 486 osm_physp_get_port_num 487 (p_remote_physp))); 488 489 if (!p_cache_sw || 490 port_num >= p_cache_sw->num_ports || 491 !p_cache_sw->ports[port_num].remote_lid_ho) { 492 /* 493 * There is some uncached change on the port. 494 * In general, the reasons might be as follows: 495 * - switch reset 496 * - port reset (or port down/up) 497 * - quick connection location change 498 * - new link (or new switch) 499 * 500 * First two reasons allow cache usage, while 501 * the last two reasons should invalidate cache. 502 * 503 * In case of quick connection location change, 504 * cache would have been invalidated by 505 * osm_ucast_cache_check_new_link() function. 506 * 507 * In case of new link between two known nodes, 508 * cache also would have been invalidated by 509 * osm_ucast_cache_check_new_link() function. 510 * 511 * Another reason is cached link between two 512 * known switches went back. In this case the 513 * osm_ucast_cache_check_new_link() function would 514 * clear both sides of the link from the cache 515 * during the discovery process, so effectively 516 * this would be equivalent to port reset. 517 * 518 * So three possible reasons remain: 519 * - switch reset 520 * - port reset (or port down/up) 521 * - link of a new switch 522 * 523 * To validate cache, we need to check only the 524 * third reason - link of a new node/switch: 525 * - If this is the local switch that is new, 526 * then it should have (p_sw->need_update == 2). 527 * - If the remote node is switch and it's new, 528 * then it also should have 529 * (p_sw->need_update == 2). 530 * - If the remote node is CA/RTR and it's new, 531 * then its port should have is_new flag on. 532 */ 533 if (p_sw->need_update == 2) { 534 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 535 "New switch found (lid %u)\n", 536 lid_ho); 537 osm_ucast_cache_invalidate(p_mgr); 538 goto Exit; 539 } 540 541 if (remote_node_type == IB_NODE_TYPE_SWITCH) { 542 543 p_remote_sw = 544 p_remote_physp->p_node->sw; 545 if (p_remote_sw->need_update == 2) { 546 /* this could also be case of 547 switch coming back with an 548 additional link that it 549 didn't have before */ 550 OSM_LOG(p_mgr->p_log, 551 OSM_LOG_DEBUG, 552 "New switch/link found (lid %u)\n", 553 remote_lid_ho); 554 osm_ucast_cache_invalidate 555 (p_mgr); 556 goto Exit; 557 } 558 } else { 559 /* 560 * Remote node is CA/RTR. 561 * Get p_port of the remote node and 562 * check its p_port->is_new flag. 563 */ 564 p_remote_port = 565 osm_get_port_by_guid(p_mgr->p_subn, 566 osm_physp_get_port_guid 567 (p_remote_physp)); 568 if (!p_remote_port) { 569 OSM_LOG(p_mgr->p_log, 570 OSM_LOG_ERROR, 571 "ERR AD04: No port was found for " 572 "port GUID 0x%" PRIx64 "\n", 573 cl_ntoh64(osm_physp_get_port_guid 574 (p_remote_physp))); 575 osm_ucast_cache_invalidate 576 (p_mgr); 577 goto Exit; 578 } 579 if (p_remote_port->is_new) { 580 OSM_LOG(p_mgr->p_log, 581 OSM_LOG_DEBUG, 582 "New CA/RTR found (lid %u)\n", 583 remote_lid_ho); 584 osm_ucast_cache_invalidate 585 (p_mgr); 586 goto Exit; 587 } 588 } 589 } else { 590 /* 591 * The change on the port is cached. 592 * In general, the reasons might be as follows: 593 * - link between two known nodes went back 594 * - one or more nodes went back, causing all 595 * the links to reappear 596 * 597 * If it was link that went back, then this case 598 * would have been taken care of during the 599 * discovery by osm_ucast_cache_check_new_link(), 600 * so it's some node that went back. 601 */ 602 if ((p_cache_sw->ports[port_num].is_leaf && 603 remote_node_type == IB_NODE_TYPE_SWITCH) || 604 (!p_cache_sw->ports[port_num].is_leaf && 605 remote_node_type != IB_NODE_TYPE_SWITCH)) { 606 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 607 "Remote node type change on switch lid %u, port %u\n", 608 lid_ho, port_num); 609 osm_ucast_cache_invalidate(p_mgr); 610 goto Exit; 611 } 612 613 if (p_cache_sw->ports[port_num].remote_lid_ho != 614 remote_lid_ho) { 615 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 616 "Remote lid change on switch lid %u, port %u" 617 "(was %u, now %u)\n", 618 lid_ho, port_num, 619 p_cache_sw->ports[port_num]. 620 remote_lid_ho, remote_lid_ho); 621 osm_ucast_cache_invalidate(p_mgr); 622 goto Exit; 623 } 624 625 /* 626 * We don't care who is the node that has 627 * reappeared in the subnet (local or remote). 628 * What's important that the cached link matches 629 * the real fabrics link. 630 * Just clean it from cache. 631 */ 632 633 p_cache_sw->ports[port_num].remote_lid_ho = 0; 634 p_cache_sw->ports[port_num].is_leaf = FALSE; 635 if (p_cache_sw->dropped) { 636 cache_restore_ucast_info(p_mgr, 637 p_cache_sw, 638 p_sw); 639 p_cache_sw->dropped = FALSE; 640 } 641 642 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 643 "Restored link from cache: lid %u, port %u to lid %u\n", 644 lid_ho, port_num, remote_lid_ho); 645 } 646 } 647 } 648 649 /* Remove all the cached switches that 650 have all their ports restored */ 651 cache_cleanup_switches(p_mgr); 652 653 /* 654 * Done scanning all the physical switch ports in the subnet. 655 * Now we need to check the other side: 656 * Scan all the cached switches and their ports: 657 * - If the cached switch is missing in the subnet 658 * (dropped flag is on), check that it's a leaf switch. 659 * If it's not a leaf, the cache is invalid, because 660 * cache can tolerate only leaf switch removal. 661 * - If the cached switch exists in fabric, check all 662 * its cached ports. These cached ports represent 663 * missing link in the fabric. 664 * The missing links that can be tolerated are: 665 * + link to missing CA/RTR 666 * + link to missing leaf switch 667 */ 668 for (p_cache_sw = (cache_switch_t *) cl_qmap_head(&p_mgr->cache_sw_tbl); 669 p_cache_sw != (cache_switch_t *) cl_qmap_end(&p_mgr->cache_sw_tbl); 670 p_cache_sw = 671 (cache_switch_t *) cl_qmap_next(&p_cache_sw->map_item)) { 672 673 if (p_cache_sw->dropped) { 674 if (!cache_sw_is_leaf(p_cache_sw)) { 675 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 676 "Missing non-leaf switch (lid %u)\n", 677 cache_sw_get_base_lid_ho(p_cache_sw)); 678 osm_ucast_cache_invalidate(p_mgr); 679 goto Exit; 680 } 681 682 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 683 "Missing leaf switch (lid %u) - " 684 "continuing validation\n", 685 cache_sw_get_base_lid_ho(p_cache_sw)); 686 continue; 687 } 688 689 for (port_num = 1; port_num < p_cache_sw->num_ports; port_num++) { 690 if (!p_cache_sw->ports[port_num].remote_lid_ho) 691 continue; 692 693 if (p_cache_sw->ports[port_num].is_leaf) { 694 CL_ASSERT(cache_sw_is_leaf(p_cache_sw)); 695 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 696 "Switch lid %u, port %u: missing link to CA/RTR - " 697 "continuing validation\n", 698 cache_sw_get_base_lid_ho(p_cache_sw), 699 port_num); 700 continue; 701 } 702 703 p_remote_cache_sw = cache_get_sw(p_mgr, 704 p_cache_sw-> 705 ports[port_num]. 706 remote_lid_ho); 707 708 if (!p_remote_cache_sw || !p_remote_cache_sw->dropped) { 709 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 710 "Switch lid %u, port %u: missing link to existing switch\n", 711 cache_sw_get_base_lid_ho(p_cache_sw), 712 port_num); 713 osm_ucast_cache_invalidate(p_mgr); 714 goto Exit; 715 } 716 717 if (!cache_sw_is_leaf(p_remote_cache_sw)) { 718 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 719 "Switch lid %u, port %u: missing link to non-leaf switch\n", 720 cache_sw_get_base_lid_ho(p_cache_sw), 721 port_num); 722 osm_ucast_cache_invalidate(p_mgr); 723 goto Exit; 724 } 725 726 /* 727 * At this point we know that the missing link is to 728 * a leaf switch. However, one case deserves a special 729 * treatment. If there was a link between two leaf 730 * switches, then missing leaf switch might break 731 * routing. It is possible that there are routes 732 * that use leaf switches to get from switch to switch 733 * and not just to get to the CAs behind the leaf switch. 734 */ 735 if (cache_sw_is_leaf(p_cache_sw) && 736 cache_sw_is_leaf(p_remote_cache_sw)) { 737 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 738 "Switch lid %u, port %u: missing leaf-2-leaf link\n", 739 cache_sw_get_base_lid_ho(p_cache_sw), 740 port_num); 741 osm_ucast_cache_invalidate(p_mgr); 742 goto Exit; 743 } 744 745 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 746 "Switch lid %u, port %u: missing remote leaf switch - " 747 "continuing validation\n", 748 cache_sw_get_base_lid_ho(p_cache_sw), 749 port_num); 750 } 751 } 752 753 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Unicast cache is valid\n"); 754 ucast_cache_dump(p_mgr); 755 Exit: 756 OSM_LOG_EXIT(p_mgr->p_log); 757 } /* osm_ucast_cache_validate() */ 758 759 void osm_ucast_cache_check_new_link(osm_ucast_mgr_t * p_mgr, 760 osm_node_t * p_node_1, uint8_t port_num_1, 761 osm_node_t * p_node_2, uint8_t port_num_2) 762 { 763 uint16_t lid_ho_1; 764 uint16_t lid_ho_2; 765 766 OSM_LOG_ENTER(p_mgr->p_log); 767 768 if (!p_mgr->cache_valid) 769 goto Exit; 770 771 cache_check_link_change(p_mgr, 772 osm_node_get_physp_ptr(p_node_1, port_num_1), 773 osm_node_get_physp_ptr(p_node_2, port_num_2)); 774 775 if (!p_mgr->cache_valid) 776 goto Exit; 777 778 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH && 779 osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) { 780 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Found CA-2-CA link\n"); 781 osm_ucast_cache_invalidate(p_mgr); 782 goto Exit; 783 } 784 785 /* for code simplicity, we want the first node to be switch */ 786 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) { 787 osm_node_t *tmp_node = p_node_1; 788 uint8_t tmp_port_num = port_num_1; 789 p_node_1 = p_node_2; 790 port_num_1 = port_num_2; 791 p_node_2 = tmp_node; 792 port_num_2 = tmp_port_num; 793 } 794 795 lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0)); 796 797 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) 798 lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0)); 799 else 800 lid_ho_2 = 801 cl_ntoh16(osm_node_get_base_lid(p_node_2, port_num_2)); 802 803 if (!lid_ho_1 || !lid_ho_2) { 804 /* 805 * No lid assigned, which means that one of the nodes is new. 806 * Need to wait for lid manager to process this node. 807 * The switches and their links will be checked later when 808 * the whole cache validity will be verified. 809 */ 810 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 811 "Link port %u <-> %u reveals new node - cache will " 812 "be validated later\n", port_num_1, port_num_2); 813 goto Exit; 814 } 815 816 cache_remove_port(p_mgr, lid_ho_1, port_num_1, lid_ho_2, 817 (osm_node_get_type(p_node_2) != 818 IB_NODE_TYPE_SWITCH)); 819 820 /* if node_2 is a switch, the link should be cleaned from its cache */ 821 822 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) 823 cache_remove_port(p_mgr, lid_ho_2, 824 port_num_2, lid_ho_1, FALSE); 825 826 Exit: 827 OSM_LOG_EXIT(p_mgr->p_log); 828 } /* osm_ucast_cache_check_new_link() */ 829 830 void osm_ucast_cache_add_link(osm_ucast_mgr_t * p_mgr, 831 osm_physp_t * p_physp1, osm_physp_t * p_physp2) 832 { 833 osm_node_t *p_node_1 = p_physp1->p_node, *p_node_2 = p_physp2->p_node; 834 uint16_t lid_ho_1, lid_ho_2; 835 836 OSM_LOG_ENTER(p_mgr->p_log); 837 838 if (!p_mgr->cache_valid) 839 goto Exit; 840 841 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH && 842 osm_node_get_type(p_node_2) != IB_NODE_TYPE_SWITCH) { 843 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, "Dropping CA-2-CA link\n"); 844 osm_ucast_cache_invalidate(p_mgr); 845 goto Exit; 846 } 847 848 if ((osm_node_get_type(p_node_1) == IB_NODE_TYPE_SWITCH && 849 !osm_node_get_physp_ptr(p_node_1, 0)) || 850 (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH && 851 !osm_node_get_physp_ptr(p_node_2, 0))) { 852 /* we're caching a link when one of the nodes 853 has already been dropped and cached */ 854 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 855 "Port %u <-> port %u: port0 on one of the nodes " 856 "has already been dropped and cached\n", 857 p_physp1->port_num, p_physp2->port_num); 858 goto Exit; 859 } 860 861 /* One of the nodes is switch. Just for code 862 simplicity, make sure that it's the first node. */ 863 864 if (osm_node_get_type(p_node_1) != IB_NODE_TYPE_SWITCH) { 865 osm_physp_t *tmp = p_physp1; 866 p_physp1 = p_physp2; 867 p_physp2 = tmp; 868 p_node_1 = p_physp1->p_node; 869 p_node_2 = p_physp2->p_node; 870 } 871 872 if (!p_node_1->sw) { 873 /* something is wrong - we'd better not use cache */ 874 osm_ucast_cache_invalidate(p_mgr); 875 goto Exit; 876 } 877 878 lid_ho_1 = cl_ntoh16(osm_node_get_base_lid(p_node_1, 0)); 879 880 if (osm_node_get_type(p_node_2) == IB_NODE_TYPE_SWITCH) { 881 882 if (!p_node_2->sw) { 883 /* something is wrong - we'd better not use cache */ 884 osm_ucast_cache_invalidate(p_mgr); 885 goto Exit; 886 } 887 888 lid_ho_2 = cl_ntoh16(osm_node_get_base_lid(p_node_2, 0)); 889 890 /* lost switch-2-switch link - cache both sides */ 891 cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, FALSE); 892 cache_add_sw_link(p_mgr, p_physp2, lid_ho_1, FALSE); 893 } else { 894 lid_ho_2 = cl_ntoh16(osm_physp_get_base_lid(p_physp2)); 895 896 /* lost link to CA/RTR - cache only switch side */ 897 cache_add_sw_link(p_mgr, p_physp1, lid_ho_2, TRUE); 898 } 899 900 Exit: 901 OSM_LOG_EXIT(p_mgr->p_log); 902 } /* osm_ucast_cache_add_link() */ 903 904 void osm_ucast_cache_add_node(osm_ucast_mgr_t * p_mgr, osm_node_t * p_node) 905 { 906 uint16_t lid_ho; 907 uint8_t max_ports; 908 uint8_t port_num; 909 osm_physp_t *p_physp; 910 cache_switch_t *p_cache_sw; 911 912 OSM_LOG_ENTER(p_mgr->p_log); 913 914 if (!p_mgr->cache_valid) 915 goto Exit; 916 917 if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) { 918 919 lid_ho = cl_ntoh16(osm_node_get_base_lid(p_node, 0)); 920 921 if (!lid_ho) { 922 OSM_LOG(p_mgr->p_log, OSM_LOG_VERBOSE, 923 "Skip caching. Switch dropped before " 924 "it gets a valid lid.\n"); 925 osm_ucast_cache_invalidate(p_mgr); 926 goto Exit; 927 } 928 929 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 930 "Caching dropped switch lid %u\n", lid_ho); 931 932 if (!p_node->sw) { 933 /* something is wrong - forget about cache */ 934 OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, 935 "ERR AD03: no switch info for node lid %u - " 936 "clearing cache\n", lid_ho); 937 osm_ucast_cache_invalidate(p_mgr); 938 goto Exit; 939 } 940 941 /* unlink (add to cache) all the ports of this switch */ 942 max_ports = osm_node_get_num_physp(p_node); 943 for (port_num = 1; port_num < max_ports; port_num++) { 944 945 p_physp = osm_node_get_physp_ptr(p_node, port_num); 946 if (!p_physp || !p_physp->p_remote_physp) 947 continue; 948 949 osm_ucast_cache_add_link(p_mgr, p_physp, 950 p_physp->p_remote_physp); 951 } 952 953 /* 954 * All the ports have been dropped (cached). 955 * If one of the ports was connected to CA/RTR, 956 * then the cached switch would be marked as leaf. 957 * If it isn't, then the dropped switch isn't a leaf, 958 * and cache can't handle it. 959 */ 960 961 p_cache_sw = cache_get_sw(p_mgr, lid_ho); 962 963 /* p_cache_sw could be NULL if it has no remote phys ports */ 964 if (!p_cache_sw || !cache_sw_is_leaf(p_cache_sw)) { 965 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 966 "Dropped non-leaf switch (lid %u)\n", lid_ho); 967 osm_ucast_cache_invalidate(p_mgr); 968 goto Exit; 969 } 970 971 p_cache_sw->dropped = TRUE; 972 973 if (!p_node->sw->num_hops || !p_node->sw->hops) { 974 OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG, 975 "No LID matrices for switch lid %u\n", lid_ho); 976 osm_ucast_cache_invalidate(p_mgr); 977 goto Exit; 978 } 979 980 /* lid matrices */ 981 982 p_cache_sw->num_hops = p_node->sw->num_hops; 983 p_node->sw->num_hops = 0; 984 p_cache_sw->hops = p_node->sw->hops; 985 p_node->sw->hops = NULL; 986 987 /* linear forwarding table */ 988 989 if (p_node->sw->new_lft) { 990 /* LFT buffer exists - we use it, because 991 it is more updated than the switch's LFT */ 992 p_cache_sw->lft = p_node->sw->new_lft; 993 p_node->sw->new_lft = NULL; 994 } else { 995 /* no LFT buffer, so we use the switch's LFT */ 996 p_cache_sw->lft = p_node->sw->lft; 997 p_node->sw->lft = NULL; 998 p_node->sw->lft_size = 0; 999 } 1000 p_cache_sw->max_lid_ho = p_node->sw->max_lid_ho; 1001 } else { 1002 /* dropping CA/RTR: add to cache all the ports of this node */ 1003 max_ports = osm_node_get_num_physp(p_node); 1004 for (port_num = 1; port_num < max_ports; port_num++) { 1005 1006 p_physp = osm_node_get_physp_ptr(p_node, port_num); 1007 if (!p_physp || !p_physp->p_remote_physp) 1008 continue; 1009 1010 CL_ASSERT(osm_node_get_type 1011 (p_physp->p_remote_physp->p_node) == 1012 IB_NODE_TYPE_SWITCH); 1013 1014 osm_ucast_cache_add_link(p_mgr, 1015 p_physp->p_remote_physp, 1016 p_physp); 1017 } 1018 } 1019 Exit: 1020 OSM_LOG_EXIT(p_mgr->p_log); 1021 } /* osm_ucast_cache_add_node() */ 1022 1023 int osm_ucast_cache_process(osm_ucast_mgr_t * p_mgr) 1024 { 1025 cl_qmap_t *tbl = &p_mgr->p_subn->sw_guid_tbl; 1026 cl_map_item_t *item; 1027 osm_switch_t *p_sw; 1028 uint16_t lft_size; 1029 1030 if (!p_mgr->p_subn->opt.use_ucast_cache) 1031 return 1; 1032 1033 ucast_cache_validate(p_mgr); 1034 if (!p_mgr->cache_valid) 1035 return 1; 1036 1037 OSM_LOG(p_mgr->p_log, OSM_LOG_INFO, 1038 "Configuring switch tables using cached routing\n"); 1039 1040 for (item = cl_qmap_head(tbl); item != cl_qmap_end(tbl); 1041 item = cl_qmap_next(item)) { 1042 p_sw = (osm_switch_t *) item; 1043 CL_ASSERT(p_sw->new_lft); 1044 if (!p_sw->lft) { 1045 lft_size = (p_sw->max_lid_ho / IB_SMP_DATA_SIZE + 1) 1046 * IB_SMP_DATA_SIZE; 1047 p_sw->lft = malloc(lft_size); 1048 if (!p_sw->lft) 1049 return IB_INSUFFICIENT_MEMORY; 1050 p_sw->lft_size = lft_size; 1051 memset(p_sw->lft, OSM_NO_PATH, p_sw->lft_size); 1052 } 1053 1054 } 1055 1056 osm_ucast_mgr_set_fwd_tables(p_mgr); 1057 1058 return 0; 1059 } 1060