1 /* 2 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2002-2015 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5 * Copyright (c) 2009 HNR Consulting. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 * 35 */ 36 37 /* 38 * Abstract: 39 * Implementation of osm_switch_t. 40 * This object represents an Infiniband switch. 41 * This object is part of the opensm family of objects. 42 */ 43 44 #if HAVE_CONFIG_H 45 # include <config.h> 46 #endif /* HAVE_CONFIG_H */ 47 48 #include <stdlib.h> 49 #include <string.h> 50 #include <complib/cl_math.h> 51 #include <iba/ib_types.h> 52 #include <opensm/osm_file_ids.h> 53 #define FILE_ID OSM_FILE_SWITCH_C 54 #include <opensm/osm_switch.h> 55 56 struct switch_port_path { 57 uint8_t port_num; 58 uint32_t path_count; 59 int found_sys_guid; 60 int found_node_guid; 61 uint32_t forwarded_to; 62 }; 63 64 cl_status_t osm_switch_set_hops(IN osm_switch_t * p_sw, IN uint16_t lid_ho, 65 IN uint8_t port_num, IN uint8_t num_hops) 66 { 67 if (!lid_ho || lid_ho > p_sw->max_lid_ho) 68 return -1; 69 if (port_num >= p_sw->num_ports) 70 return -1; 71 if (!p_sw->hops[lid_ho]) { 72 p_sw->hops[lid_ho] = malloc(p_sw->num_ports); 73 if (!p_sw->hops[lid_ho]) 74 return -1; 75 memset(p_sw->hops[lid_ho], OSM_NO_PATH, p_sw->num_ports); 76 } 77 78 p_sw->hops[lid_ho][port_num] = num_hops; 79 if (p_sw->hops[lid_ho][0] > num_hops) 80 p_sw->hops[lid_ho][0] = num_hops; 81 82 return 0; 83 } 84 85 void osm_switch_delete(IN OUT osm_switch_t ** pp_sw) 86 { 87 osm_switch_t *p_sw = *pp_sw; 88 unsigned i; 89 90 osm_mcast_tbl_destroy(&p_sw->mcast_tbl); 91 if (p_sw->p_prof) 92 free(p_sw->p_prof); 93 if (p_sw->search_ordering_ports) 94 free(p_sw->search_ordering_ports); 95 if (p_sw->lft) 96 free(p_sw->lft); 97 if (p_sw->new_lft) 98 free(p_sw->new_lft); 99 if (p_sw->hops) { 100 for (i = 0; i < p_sw->num_hops; i++) 101 if (p_sw->hops[i]) 102 free(p_sw->hops[i]); 103 free(p_sw->hops); 104 } 105 free(*pp_sw); 106 *pp_sw = NULL; 107 } 108 109 osm_switch_t *osm_switch_new(IN osm_node_t * p_node, 110 IN const osm_madw_t * p_madw) 111 { 112 osm_switch_t *p_sw; 113 ib_switch_info_t *p_si; 114 ib_smp_t *p_smp; 115 uint8_t num_ports; 116 uint32_t port_num; 117 118 CL_ASSERT(p_madw); 119 CL_ASSERT(p_node); 120 121 p_smp = osm_madw_get_smp_ptr(p_madw); 122 p_si = ib_smp_get_payload_ptr(p_smp); 123 num_ports = osm_node_get_num_physp(p_node); 124 125 CL_ASSERT(p_smp->attr_id == IB_MAD_ATTR_SWITCH_INFO); 126 127 if (!p_si->lin_cap) /* The switch doesn't support LFT */ 128 return NULL; 129 130 p_sw = malloc(sizeof(*p_sw)); 131 if (!p_sw) 132 return NULL; 133 134 memset(p_sw, 0, sizeof(*p_sw)); 135 136 p_sw->p_node = p_node; 137 p_sw->switch_info = *p_si; 138 p_sw->num_ports = num_ports; 139 p_sw->need_update = 2; 140 141 p_sw->p_prof = malloc(sizeof(*p_sw->p_prof) * num_ports); 142 if (!p_sw->p_prof) 143 goto err; 144 145 memset(p_sw->p_prof, 0, sizeof(*p_sw->p_prof) * num_ports); 146 147 osm_mcast_tbl_init(&p_sw->mcast_tbl, osm_node_get_num_physp(p_node), 148 cl_ntoh16(p_si->mcast_cap)); 149 150 for (port_num = 0; port_num < num_ports; port_num++) 151 osm_port_prof_construct(&p_sw->p_prof[port_num]); 152 153 return p_sw; 154 155 err: 156 osm_switch_delete(&p_sw); 157 return NULL; 158 } 159 160 boolean_t osm_switch_get_lft_block(IN const osm_switch_t * p_sw, 161 IN uint16_t block_id, OUT uint8_t * p_block) 162 { 163 uint16_t base_lid_ho = block_id * IB_SMP_DATA_SIZE; 164 165 CL_ASSERT(p_sw); 166 CL_ASSERT(p_block); 167 168 if (base_lid_ho > p_sw->max_lid_ho) 169 return FALSE; 170 171 CL_ASSERT(base_lid_ho + IB_SMP_DATA_SIZE - 1 <= IB_LID_UCAST_END_HO); 172 memcpy(p_block, &(p_sw->new_lft[base_lid_ho]), IB_SMP_DATA_SIZE); 173 return TRUE; 174 } 175 176 static struct osm_remote_node * 177 switch_find_guid_common(IN const osm_switch_t * p_sw, 178 IN struct osm_remote_guids_count *r, 179 IN uint8_t port_num, IN int find_sys_guid, 180 IN int find_node_guid) 181 { 182 struct osm_remote_node *p_remote_guid = NULL; 183 osm_physp_t *p_physp; 184 osm_physp_t *p_rem_physp; 185 osm_node_t *p_rem_node; 186 uint64_t sys_guid; 187 uint64_t node_guid; 188 unsigned int i; 189 190 CL_ASSERT(p_sw); 191 192 if (!r) 193 goto out; 194 195 p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num); 196 if (!p_physp) 197 goto out; 198 199 p_rem_physp = osm_physp_get_remote(p_physp); 200 p_rem_node = osm_physp_get_node_ptr(p_rem_physp); 201 sys_guid = p_rem_node->node_info.sys_guid; 202 node_guid = p_rem_node->node_info.node_guid; 203 204 for (i = 0; i < r->count; i++) { 205 if ((!find_sys_guid 206 || r->guids[i].node->node_info.sys_guid == sys_guid) 207 && (!find_node_guid 208 || r->guids[i].node->node_info.node_guid == node_guid)) { 209 p_remote_guid = &r->guids[i]; 210 break; 211 } 212 } 213 214 out: 215 return p_remote_guid; 216 } 217 218 static struct osm_remote_node * 219 switch_find_sys_guid_count(IN const osm_switch_t * p_sw, 220 IN struct osm_remote_guids_count *r, 221 IN uint8_t port_num) 222 { 223 return switch_find_guid_common(p_sw, r, port_num, 1, 0); 224 } 225 226 static struct osm_remote_node * 227 switch_find_node_guid_count(IN const osm_switch_t * p_sw, 228 IN struct osm_remote_guids_count *r, 229 IN uint8_t port_num) 230 { 231 return switch_find_guid_common(p_sw, r, port_num, 0, 1); 232 } 233 234 uint8_t osm_switch_recommend_path(IN const osm_switch_t * p_sw, 235 IN osm_port_t * p_port, IN uint16_t lid_ho, 236 IN unsigned start_from, 237 IN boolean_t ignore_existing, 238 IN boolean_t routing_for_lmc, 239 IN boolean_t dor, 240 IN boolean_t port_shifting, 241 IN uint32_t scatter_ports, 242 IN osm_lft_type_enum lft_enum) 243 { 244 /* 245 We support an enhanced LMC aware routing mode: 246 In the case of LMC > 0, we can track the remote side 247 system and node for all of the lids of the target 248 and try and avoid routing again through the same 249 system / node. 250 251 Assume if routing_for_lmc is true that this procedure was 252 provided the tracking array and counter via p_port->priv, 253 and we can conduct this algorithm. 254 */ 255 uint16_t base_lid; 256 uint8_t hops; 257 uint8_t least_hops; 258 uint8_t port_num; 259 uint8_t num_ports; 260 uint32_t least_paths = 0xFFFFFFFF; 261 unsigned i; 262 /* 263 The following will track the least paths if the 264 route should go through a new system/node 265 */ 266 uint32_t least_paths_other_sys = 0xFFFFFFFF; 267 uint32_t least_paths_other_nodes = 0xFFFFFFFF; 268 uint32_t least_forwarded_to = 0xFFFFFFFF; 269 uint32_t check_count; 270 uint8_t best_port = 0; 271 /* 272 These vars track the best port if it connects to 273 not used system/node. 274 */ 275 uint8_t best_port_other_sys = 0; 276 uint8_t best_port_other_node = 0; 277 boolean_t port_found = FALSE; 278 osm_physp_t *p_physp; 279 osm_physp_t *p_rem_physp; 280 osm_node_t *p_rem_node; 281 osm_node_t *p_rem_node_first = NULL; 282 struct osm_remote_node *p_remote_guid = NULL; 283 struct osm_remote_node null_remote_node = {NULL, 0, 0}; 284 struct switch_port_path port_paths[IB_NODE_NUM_PORTS_MAX]; 285 unsigned int port_paths_total_paths = 0; 286 unsigned int port_paths_count = 0; 287 uint8_t scatter_possible_ports[IB_NODE_NUM_PORTS_MAX]; 288 unsigned int scatter_possible_ports_count = 0; 289 int found_sys_guid = 0; 290 int found_node_guid = 0; 291 292 CL_ASSERT(lid_ho > 0); 293 294 if (p_port->p_node->sw) { 295 if (p_port->p_node->sw == p_sw) 296 return 0; 297 base_lid = osm_port_get_base_lid(p_port); 298 } else { 299 p_physp = p_port->p_physp; 300 if (!p_physp || !p_physp->p_remote_physp || 301 !p_physp->p_remote_physp->p_node->sw) 302 return OSM_NO_PATH; 303 304 if (p_physp->p_remote_physp->p_node->sw == p_sw) 305 return p_physp->p_remote_physp->port_num; 306 base_lid = 307 osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0); 308 } 309 base_lid = cl_ntoh16(base_lid); 310 311 num_ports = p_sw->num_ports; 312 313 least_hops = osm_switch_get_least_hops(p_sw, base_lid); 314 if (least_hops == OSM_NO_PATH) 315 return OSM_NO_PATH; 316 317 /* 318 First, inquire with the forwarding table for an existing 319 route. If one is found, honor it unless: 320 1. the ignore existing flag is set. 321 2. the physical port is not a valid one or not healthy 322 3. the physical port has a remote port (the link is up) 323 4. the port has min-hops to the target (avoid loops) 324 */ 325 if (!ignore_existing) { 326 port_num = osm_switch_get_port_by_lid(p_sw, lid_ho, lft_enum); 327 328 if (port_num != OSM_NO_PATH) { 329 CL_ASSERT(port_num < num_ports); 330 331 p_physp = 332 osm_node_get_physp_ptr(p_sw->p_node, port_num); 333 /* 334 Don't be too trusting of the current forwarding table! 335 Verify that the port number is legal and that the 336 LID is reachable through this port. 337 */ 338 if (p_physp && osm_physp_is_healthy(p_physp) && 339 osm_physp_get_remote(p_physp)) { 340 hops = 341 osm_switch_get_hop_count(p_sw, base_lid, 342 port_num); 343 /* 344 If we aren't using pre-defined user routes 345 function, then we need to make sure that the 346 current path is the minimum one. In case of 347 having such a user function - this check will 348 not be done, and the old routing will be used. 349 Note: This means that it is the user's job to 350 clean all data in the forwarding tables that 351 he wants to be overridden by the minimum 352 hop function. 353 */ 354 if (hops == least_hops) 355 return port_num; 356 } 357 } 358 } 359 360 /* 361 This algorithm selects a port based on a static load balanced 362 selection across equal hop-count ports. 363 There is lots of room for improved sophistication here, 364 possibly guided by user configuration info. 365 */ 366 367 /* 368 OpenSM routing is "local" - not considering a full lid to lid 369 path. As such we can not guarantee a path will not loop if we 370 do not always follow least hops. 371 So we must abort if not least hops. 372 */ 373 374 /* port number starts with one and num_ports is 1 + num phys ports */ 375 for (i = start_from; i < start_from + num_ports; i++) { 376 port_num = osm_switch_get_dimn_port(p_sw, i % num_ports); 377 if (!port_num || 378 osm_switch_get_hop_count(p_sw, base_lid, port_num) != 379 least_hops) 380 continue; 381 382 /* let us make sure it is not down or unhealthy */ 383 p_physp = osm_node_get_physp_ptr(p_sw->p_node, port_num); 384 if (!p_physp || !osm_physp_is_healthy(p_physp) || 385 /* 386 we require all - non sma ports to be linked 387 to be routed through 388 */ 389 !osm_physp_get_remote(p_physp)) 390 continue; 391 392 /* 393 We located a least-hop port, possibly one of many. 394 For this port, check the running total count of 395 the number of paths through this port. Select 396 the port routing the least number of paths. 397 */ 398 check_count = 399 osm_port_prof_path_count_get(&p_sw->p_prof[port_num]); 400 401 402 if (dor) { 403 /* Get the Remote Node */ 404 p_rem_physp = osm_physp_get_remote(p_physp); 405 p_rem_node = osm_physp_get_node_ptr(p_rem_physp); 406 /* use the first dimension, but spread traffic 407 * out among the group of ports representing 408 * that dimension */ 409 if (!p_rem_node_first) 410 p_rem_node_first = p_rem_node; 411 else if (p_rem_node != p_rem_node_first) 412 continue; 413 if (routing_for_lmc) { 414 struct osm_remote_guids_count *r = p_port->priv; 415 uint8_t rem_port = osm_physp_get_port_num(p_rem_physp); 416 unsigned int j; 417 418 for (j = 0; j < r->count; j++) { 419 p_remote_guid = &r->guids[j]; 420 if ((p_remote_guid->node == p_rem_node) 421 && (p_remote_guid->port == rem_port)) 422 break; 423 } 424 if (j == r->count) 425 p_remote_guid = &null_remote_node; 426 } 427 /* 428 Advanced LMC routing requires tracking of the 429 best port by the node connected to the other side of 430 it. 431 */ 432 } else if (routing_for_lmc) { 433 /* Is the sys guid already used ? */ 434 p_remote_guid = switch_find_sys_guid_count(p_sw, 435 p_port->priv, 436 port_num); 437 438 /* If not update the least hops for this case */ 439 if (!p_remote_guid) { 440 if (check_count < least_paths_other_sys) { 441 least_paths_other_sys = check_count; 442 best_port_other_sys = port_num; 443 least_forwarded_to = 0; 444 } 445 found_sys_guid = 0; 446 } else { /* same sys found - try node */ 447 448 449 /* Else is the node guid already used ? */ 450 p_remote_guid = switch_find_node_guid_count(p_sw, 451 p_port->priv, 452 port_num); 453 454 /* If not update the least hops for this case */ 455 if (!p_remote_guid 456 && check_count < least_paths_other_nodes) { 457 least_paths_other_nodes = check_count; 458 best_port_other_node = port_num; 459 least_forwarded_to = 0; 460 } 461 /* else prior sys and node guid already used */ 462 463 if (!p_remote_guid) 464 found_node_guid = 0; 465 else 466 found_node_guid = 1; 467 found_sys_guid = 1; 468 } /* same sys found */ 469 } 470 471 port_paths[port_paths_count].port_num = port_num; 472 port_paths[port_paths_count].path_count = check_count; 473 if (routing_for_lmc) { 474 port_paths[port_paths_count].found_sys_guid = found_sys_guid; 475 port_paths[port_paths_count].found_node_guid = found_node_guid; 476 } 477 if (routing_for_lmc && p_remote_guid) 478 port_paths[port_paths_count].forwarded_to = p_remote_guid->forwarded_to; 479 else 480 port_paths[port_paths_count].forwarded_to = 0; 481 port_paths_total_paths += check_count; 482 port_paths_count++; 483 484 /* routing for LMC mode */ 485 /* 486 the count is min but also lower then the max subscribed 487 */ 488 if (check_count < least_paths) { 489 port_found = TRUE; 490 best_port = port_num; 491 least_paths = check_count; 492 scatter_possible_ports_count = 0; 493 scatter_possible_ports[scatter_possible_ports_count++] = port_num; 494 if (routing_for_lmc 495 && p_remote_guid 496 && p_remote_guid->forwarded_to < least_forwarded_to) 497 least_forwarded_to = p_remote_guid->forwarded_to; 498 } else if (scatter_ports 499 && check_count == least_paths) { 500 scatter_possible_ports[scatter_possible_ports_count++] = port_num; 501 } else if (routing_for_lmc 502 && p_remote_guid 503 && check_count == least_paths 504 && p_remote_guid->forwarded_to < least_forwarded_to) { 505 least_forwarded_to = p_remote_guid->forwarded_to; 506 best_port = port_num; 507 } 508 } 509 510 if (port_found == FALSE) 511 return OSM_NO_PATH; 512 513 if (port_shifting && port_paths_count) { 514 /* In the port_paths[] array, we now have all the ports that we 515 * can route out of. Using some shifting math below, possibly 516 * select a different one so that lids won't align in LFTs 517 * 518 * If lmc > 0, we need to loop through these ports to find the 519 * least_forwarded_to port, best_port_other_sys, and 520 * best_port_other_node just like before but through the different 521 * ordering. 522 */ 523 524 least_paths = 0xFFFFFFFF; 525 least_paths_other_sys = 0xFFFFFFFF; 526 least_paths_other_nodes = 0xFFFFFFFF; 527 least_forwarded_to = 0xFFFFFFFF; 528 best_port = 0; 529 best_port_other_sys = 0; 530 best_port_other_node = 0; 531 532 for (i = 0; i < port_paths_count; i++) { 533 unsigned int idx; 534 535 idx = (port_paths_total_paths/port_paths_count + i) % port_paths_count; 536 537 if (routing_for_lmc) { 538 if (!port_paths[idx].found_sys_guid 539 && port_paths[idx].path_count < least_paths_other_sys) { 540 least_paths_other_sys = port_paths[idx].path_count; 541 best_port_other_sys = port_paths[idx].port_num; 542 least_forwarded_to = 0; 543 } 544 else if (!port_paths[idx].found_node_guid 545 && port_paths[idx].path_count < least_paths_other_nodes) { 546 least_paths_other_nodes = port_paths[idx].path_count; 547 best_port_other_node = port_paths[idx].port_num; 548 least_forwarded_to = 0; 549 } 550 } 551 552 if (port_paths[idx].path_count < least_paths) { 553 best_port = port_paths[idx].port_num; 554 least_paths = port_paths[idx].path_count; 555 if (routing_for_lmc 556 && (port_paths[idx].found_sys_guid 557 || port_paths[idx].found_node_guid) 558 && port_paths[idx].forwarded_to < least_forwarded_to) 559 least_forwarded_to = port_paths[idx].forwarded_to; 560 } 561 else if (routing_for_lmc 562 && (port_paths[idx].found_sys_guid 563 || port_paths[idx].found_node_guid) 564 && port_paths[idx].path_count == least_paths 565 && port_paths[idx].forwarded_to < least_forwarded_to) { 566 least_forwarded_to = port_paths[idx].forwarded_to; 567 best_port = port_paths[idx].port_num; 568 } 569 570 } 571 } 572 573 /* 574 if we are in enhanced routing mode and the best port is not 575 the local port 0 576 */ 577 if (routing_for_lmc && best_port && !scatter_ports) { 578 /* Select the least hop port of the non used sys first */ 579 if (best_port_other_sys) 580 best_port = best_port_other_sys; 581 else if (best_port_other_node) 582 best_port = best_port_other_node; 583 } else if (scatter_ports) { 584 /* 585 * There is some danger that this random could "rebalance" the routes 586 * every time, to combat this there is a global srandom that 587 * occurs at the start of every sweep. 588 */ 589 unsigned int idx = random() % scatter_possible_ports_count; 590 best_port = scatter_possible_ports[idx]; 591 } 592 return best_port; 593 } 594 595 void osm_switch_clear_hops(IN osm_switch_t * p_sw) 596 { 597 unsigned i; 598 599 for (i = 0; i < p_sw->num_hops; i++) 600 if (p_sw->hops[i]) 601 memset(p_sw->hops[i], OSM_NO_PATH, p_sw->num_ports); 602 } 603 604 static int alloc_lft(IN osm_switch_t * p_sw, uint16_t lids) 605 { 606 uint16_t lft_size; 607 608 /* Ensure LFT is in units of LFT block size */ 609 lft_size = (lids / IB_SMP_DATA_SIZE + 1) * IB_SMP_DATA_SIZE; 610 if (lft_size > p_sw->lft_size) { 611 uint8_t *new_lft = realloc(p_sw->lft, lft_size); 612 if (!new_lft) 613 return -1; 614 memset(new_lft + p_sw->lft_size, OSM_NO_PATH, 615 lft_size - p_sw->lft_size); 616 p_sw->lft = new_lft; 617 p_sw->lft_size = lft_size; 618 } 619 620 return 0; 621 } 622 623 int osm_switch_prepare_path_rebuild(IN osm_switch_t * p_sw, IN uint16_t max_lids) 624 { 625 uint8_t **hops; 626 uint8_t *new_lft; 627 unsigned i; 628 629 if (alloc_lft(p_sw, max_lids)) 630 return -1; 631 632 for (i = 0; i < p_sw->num_ports; i++) 633 osm_port_prof_construct(&p_sw->p_prof[i]); 634 635 osm_switch_clear_hops(p_sw); 636 637 if (!(new_lft = realloc(p_sw->new_lft, p_sw->lft_size))) 638 return -1; 639 640 p_sw->new_lft = new_lft; 641 642 memset(p_sw->new_lft, OSM_NO_PATH, p_sw->lft_size); 643 644 if (!p_sw->hops) { 645 hops = malloc((max_lids + 1) * sizeof(hops[0])); 646 if (!hops) 647 return -1; 648 memset(hops, 0, (max_lids + 1) * sizeof(hops[0])); 649 p_sw->hops = hops; 650 p_sw->num_hops = max_lids + 1; 651 } else if (max_lids + 1 > p_sw->num_hops) { 652 hops = realloc(p_sw->hops, (max_lids + 1) * sizeof(hops[0])); 653 if (!hops) 654 return -1; 655 memset(hops + p_sw->num_hops, 0, 656 (max_lids + 1 - p_sw->num_hops) * sizeof(hops[0])); 657 p_sw->hops = hops; 658 p_sw->num_hops = max_lids + 1; 659 } 660 p_sw->max_lid_ho = max_lids; 661 662 return 0; 663 } 664 665 uint8_t osm_switch_get_port_least_hops(IN const osm_switch_t * p_sw, 666 IN const osm_port_t * p_port) 667 { 668 uint16_t lid; 669 670 if (p_port->p_node->sw) { 671 if (p_port->p_node->sw == p_sw) 672 return 0; 673 lid = osm_node_get_base_lid(p_port->p_node, 0); 674 return osm_switch_get_least_hops(p_sw, cl_ntoh16(lid)); 675 } else { 676 osm_physp_t *p = p_port->p_physp; 677 uint8_t hops; 678 679 if (!p || !p->p_remote_physp || !p->p_remote_physp->p_node->sw) 680 return OSM_NO_PATH; 681 if (p->p_remote_physp->p_node->sw == p_sw) 682 return 1; 683 lid = osm_node_get_base_lid(p->p_remote_physp->p_node, 0); 684 hops = osm_switch_get_least_hops(p_sw, cl_ntoh16(lid)); 685 return hops != OSM_NO_PATH ? hops + 1 : OSM_NO_PATH; 686 } 687 } 688 689 uint8_t osm_switch_recommend_mcast_path(IN osm_switch_t * p_sw, 690 IN osm_port_t * p_port, 691 IN uint16_t mlid_ho, 692 IN boolean_t ignore_existing) 693 { 694 uint16_t base_lid; 695 uint8_t hops; 696 uint8_t port_num; 697 uint8_t num_ports; 698 uint8_t least_hops; 699 700 CL_ASSERT(mlid_ho >= IB_LID_MCAST_START_HO); 701 702 if (p_port->p_node->sw) { 703 if (p_port->p_node->sw == p_sw) 704 return 0; 705 base_lid = osm_port_get_base_lid(p_port); 706 } else { 707 osm_physp_t *p_physp = p_port->p_physp; 708 if (!p_physp || !p_physp->p_remote_physp || 709 !p_physp->p_remote_physp->p_node->sw) 710 return OSM_NO_PATH; 711 if (p_physp->p_remote_physp->p_node->sw == p_sw) 712 return p_physp->p_remote_physp->port_num; 713 base_lid = 714 osm_node_get_base_lid(p_physp->p_remote_physp->p_node, 0); 715 } 716 base_lid = cl_ntoh16(base_lid); 717 num_ports = p_sw->num_ports; 718 719 /* 720 If the user wants us to ignore existing multicast routes, 721 then simply return the shortest hop count path to the 722 target port. 723 724 Otherwise, return the first port that has a path to the target, 725 picking from the ports that are already in the multicast group. 726 */ 727 if (!ignore_existing) { 728 for (port_num = 1; port_num < num_ports; port_num++) { 729 if (!osm_mcast_tbl_is_port 730 (&p_sw->mcast_tbl, mlid_ho, port_num)) 731 continue; 732 /* 733 Don't be too trusting of the current forwarding table! 734 Verify that the LID is reachable through this port. 735 */ 736 hops = 737 osm_switch_get_hop_count(p_sw, base_lid, port_num); 738 if (hops != OSM_NO_PATH) 739 return port_num; 740 } 741 } 742 743 /* 744 Either no existing mcast paths reach this port or we are 745 ignoring existing paths. 746 747 Determine the best multicast path to the target. Note that this 748 algorithm is slightly different from the one used for unicast route 749 recommendation. In this case (multicast), we must NOT 750 perform any sort of load balancing. We MUST take the FIRST 751 port found that has <= the lowest hop count path. This prevents 752 more than one multicast path to the same remote switch which 753 prevents a multicast loop. Multicast loops are bad since the same 754 multicast packet will go around and around, inevitably creating 755 a black hole that will destroy the Earth in a firey conflagration. 756 */ 757 least_hops = osm_switch_get_least_hops(p_sw, base_lid); 758 if (least_hops == OSM_NO_PATH) 759 return OSM_NO_PATH; 760 for (port_num = 1; port_num < num_ports; port_num++) 761 if (osm_switch_get_hop_count(p_sw, base_lid, port_num) == 762 least_hops) 763 break; 764 765 CL_ASSERT(port_num < num_ports); 766 return port_num; 767 } 768