1 /* 2 * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5 * Copyright (c) 2013 Oracle and/or its affiliates. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 * 35 */ 36 37 /* 38 * Abstract: 39 * Implementation of osm_mpr_rcv_t. 40 * This object represents the MultiPath Record Receiver object. 41 * This object is part of the opensm family of objects. 42 */ 43 44 #if HAVE_CONFIG_H 45 # include <config.h> 46 #endif /* HAVE_CONFIG_H */ 47 48 #if defined (VENDOR_RMPP_SUPPORT) && defined (DUAL_SIDED_RMPP) 49 50 #include <string.h> 51 #include <iba/ib_types.h> 52 #include <complib/cl_qmap.h> 53 #include <complib/cl_passivelock.h> 54 #include <complib/cl_debug.h> 55 #include <complib/cl_qlist.h> 56 #include <opensm/osm_file_ids.h> 57 #define FILE_ID OSM_FILE_SA_MULTIPATH_RECORD_C 58 #include <vendor/osm_vendor_api.h> 59 #include <opensm/osm_port.h> 60 #include <opensm/osm_node.h> 61 #include <opensm/osm_switch.h> 62 #include <opensm/osm_partition.h> 63 #include <opensm/osm_helper.h> 64 #include <opensm/osm_qos_policy.h> 65 #include <opensm/osm_sa.h> 66 67 #define OSM_SA_MPR_MAX_NUM_PATH 127 68 #define MAX_HOPS 64 69 70 #define SA_MPR_RESP_SIZE SA_ITEM_RESP_SIZE(mpr_rec) 71 72 static boolean_t sa_multipath_rec_is_tavor_port(IN const osm_port_t * p_port) 73 { 74 osm_node_t const *p_node; 75 ib_net32_t vend_id; 76 77 p_node = p_port->p_node; 78 vend_id = ib_node_info_get_vendor_id(&p_node->node_info); 79 80 return ((p_node->node_info.device_id == CL_HTON16(23108)) && 81 ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) || 82 (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) || 83 (vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) || 84 (vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE)))); 85 } 86 87 static boolean_t 88 sa_multipath_rec_apply_tavor_mtu_limit(IN const ib_multipath_rec_t * p_mpr, 89 IN const osm_port_t * p_src_port, 90 IN const osm_port_t * p_dest_port, 91 IN const ib_net64_t comp_mask) 92 { 93 uint8_t required_mtu; 94 95 /* only if at least one of the ports is a Tavor device */ 96 if (!sa_multipath_rec_is_tavor_port(p_src_port) && 97 !sa_multipath_rec_is_tavor_port(p_dest_port)) 98 return FALSE; 99 100 /* 101 we can apply the patch if either: 102 1. No MTU required 103 2. Required MTU < 104 3. Required MTU = 1K or 512 or 256 105 4. Required MTU > 256 or 512 106 */ 107 required_mtu = ib_multipath_rec_mtu(p_mpr); 108 if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) && 109 (comp_mask & IB_MPR_COMPMASK_MTU)) { 110 switch (ib_multipath_rec_mtu_sel(p_mpr)) { 111 case 0: /* must be greater than */ 112 case 2: /* exact match */ 113 if (IB_MTU_LEN_1024 < required_mtu) 114 return FALSE; 115 break; 116 117 case 1: /* must be less than */ 118 /* can't be disqualified by this one */ 119 break; 120 121 case 3: /* largest available */ 122 /* the ULP intentionally requested */ 123 /* the largest MTU possible */ 124 return FALSE; 125 break; 126 127 default: 128 /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */ 129 CL_ASSERT(FALSE); 130 break; 131 } 132 } 133 134 return TRUE; 135 } 136 137 static ib_api_status_t mpr_rcv_get_path_parms(IN osm_sa_t * sa, 138 IN const ib_multipath_rec_t * 139 p_mpr, 140 IN const osm_alias_guid_t * p_src_alias_guid, 141 IN const osm_alias_guid_t * p_dest_alias_guid, 142 IN const uint16_t src_lid_ho, 143 IN const uint16_t dest_lid_ho, 144 IN const ib_net64_t comp_mask, 145 OUT osm_path_parms_t * p_parms) 146 { 147 const osm_node_t *p_node; 148 const osm_physp_t *p_physp, *p_physp0; 149 const osm_physp_t *p_src_physp; 150 const osm_physp_t *p_dest_physp; 151 const osm_prtn_t *p_prtn = NULL; 152 const ib_port_info_t *p_pi, *p_pi0; 153 ib_slvl_table_t *p_slvl_tbl; 154 ib_api_status_t status = IB_SUCCESS; 155 uint8_t mtu; 156 uint8_t rate, p0_extended_rate, dest_rate; 157 uint8_t pkt_life; 158 uint8_t required_mtu; 159 uint8_t required_rate; 160 ib_net16_t required_pkey; 161 uint8_t required_sl; 162 uint8_t required_pkt_life; 163 ib_net16_t dest_lid; 164 int hops = 0; 165 int in_port_num = 0; 166 uint8_t i; 167 osm_qos_level_t *p_qos_level = NULL; 168 uint16_t valid_sl_mask = 0xffff; 169 int extended, p0_extended; 170 171 OSM_LOG_ENTER(sa->p_log); 172 173 dest_lid = cl_hton16(dest_lid_ho); 174 175 p_dest_physp = p_dest_alias_guid->p_base_port->p_physp; 176 p_physp = p_src_alias_guid->p_base_port->p_physp; 177 p_src_physp = p_physp; 178 p_pi = &p_physp->port_info; 179 180 mtu = ib_port_info_get_mtu_cap(p_pi); 181 extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; 182 rate = ib_port_info_compute_rate(p_pi, extended); 183 184 /* 185 Mellanox Tavor device performance is better using 1K MTU. 186 If required MTU and MTU selector are such that 1K is OK 187 and at least one end of the path is Tavor we override the 188 port MTU with 1K. 189 */ 190 if (sa->p_subn->opt.enable_quirks && 191 sa_multipath_rec_apply_tavor_mtu_limit(p_mpr, 192 p_src_alias_guid->p_base_port, 193 p_dest_alias_guid->p_base_port, 194 comp_mask)) 195 if (mtu > IB_MTU_LEN_1024) { 196 mtu = IB_MTU_LEN_1024; 197 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 198 "Optimized Path MTU to 1K for Mellanox Tavor device\n"); 199 } 200 201 /* 202 Walk the subnet object from source to destination, 203 tracking the most restrictive rate and mtu values along the way... 204 205 If source port node is a switch, then p_physp should 206 point to the port that routes the destination lid 207 */ 208 209 p_node = osm_physp_get_node_ptr(p_physp); 210 211 if (p_node->sw) { 212 /* 213 * Source node is a switch. 214 * Make sure that p_physp points to the out port of the 215 * switch that routes to the destination lid (dest_lid_ho) 216 */ 217 p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); 218 if (p_physp == 0) { 219 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4514: " 220 "Can't find routing from LID %u to LID %u on " 221 "switch %s (GUID 0x%016" PRIx64 ")\n", 222 src_lid_ho, dest_lid_ho, p_node->print_desc, 223 cl_ntoh64(osm_node_get_node_guid(p_node))); 224 status = IB_NOT_FOUND; 225 goto Exit; 226 } 227 } 228 229 if (sa->p_subn->opt.qos) { 230 231 /* 232 * Whether this node is switch or CA, the IN port for 233 * the sl2vl table is 0, because this is a source node. 234 */ 235 p_slvl_tbl = osm_physp_get_slvl_tbl(p_physp, 0); 236 237 /* update valid SLs that still exist on this route */ 238 for (i = 0; i < IB_MAX_NUM_VLS; i++) { 239 if (valid_sl_mask & (1 << i) && 240 ib_slvl_table_get(p_slvl_tbl, i) == IB_DROP_VL) 241 valid_sl_mask &= ~(1 << i); 242 } 243 if (!valid_sl_mask) { 244 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 245 "All the SLs lead to VL15 on this path\n"); 246 status = IB_NOT_FOUND; 247 goto Exit; 248 } 249 } 250 251 /* 252 * Same as above 253 */ 254 p_node = osm_physp_get_node_ptr(p_dest_physp); 255 256 if (p_node->sw) { 257 /* 258 * if destination is switch, we want p_dest_physp to point to port 0 259 */ 260 p_dest_physp = 261 osm_switch_get_route_by_lid(p_node->sw, dest_lid); 262 263 if (p_dest_physp == 0) { 264 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4515: " 265 "Can't find routing from LID %u to LID %u on " 266 "switch %s (GUID 0x%016" PRIx64 ")\n", 267 src_lid_ho, dest_lid_ho, p_node->print_desc, 268 cl_ntoh64(osm_node_get_node_guid(p_node))); 269 status = IB_NOT_FOUND; 270 goto Exit; 271 } 272 273 } 274 275 /* 276 * Now go through the path step by step 277 */ 278 279 while (p_physp != p_dest_physp) { 280 281 int tmp_pnum = p_physp->port_num; 282 p_node = osm_physp_get_node_ptr(p_physp); 283 p_physp = osm_physp_get_remote(p_physp); 284 285 if (p_physp == 0) { 286 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4505: " 287 "Can't find remote phys port of %s (GUID " 288 "0x%016" PRIx64 ") port %d " 289 "while routing from LID %u to LID %u", 290 p_node->print_desc, 291 cl_ntoh64(osm_node_get_node_guid(p_node)), 292 tmp_pnum, src_lid_ho, dest_lid_ho); 293 status = IB_ERROR; 294 goto Exit; 295 } 296 297 /* update number of hops traversed */ 298 hops++; 299 if (hops > MAX_HOPS) { 300 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4520: " 301 "Path from GUID 0x%016" PRIx64 " (%s) to" 302 " lid %u GUID 0x%016" PRIx64 " (%s) needs" 303 " more than %d hops, max %d hops allowed\n", 304 cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), 305 p_src_physp->p_node->print_desc, dest_lid_ho, 306 cl_ntoh64(osm_physp_get_port_guid 307 (p_dest_physp)), 308 p_dest_physp->p_node->print_desc, hops, 309 MAX_HOPS); 310 status = IB_NOT_FOUND; 311 goto Exit; 312 } 313 314 in_port_num = osm_physp_get_port_num(p_physp); 315 316 /* 317 This is point to point case (no switch in between) 318 */ 319 if (p_physp == p_dest_physp) 320 break; 321 322 p_node = osm_physp_get_node_ptr(p_physp); 323 324 if (!p_node->sw) { 325 /* 326 There is some sort of problem in the subnet object! 327 If this isn't a switch, we should have reached 328 the destination by now! 329 */ 330 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4503: " 331 "Internal error, bad path while routing " 332 "from %s (GUID: 0x%016"PRIx64") port %d " 333 "to %s (GUID: 0x%016"PRIx64") port %d; " 334 "ended at %s port %d\n", 335 p_src_alias_guid->p_base_port->p_node->print_desc, 336 cl_ntoh64(p_src_alias_guid->p_base_port->p_node->node_info.node_guid), 337 p_src_alias_guid->p_base_port->p_physp->port_num, 338 p_dest_alias_guid->p_base_port->p_node->print_desc, 339 cl_ntoh64(p_dest_alias_guid->p_base_port->p_node->node_info.node_guid), 340 p_dest_alias_guid->p_base_port->p_physp->port_num, 341 p_node->print_desc, 342 p_physp->port_num); 343 status = IB_ERROR; 344 goto Exit; 345 } 346 347 /* 348 Check parameters for the ingress port in this switch. 349 */ 350 p_pi = &p_physp->port_info; 351 352 if (mtu > ib_port_info_get_mtu_cap(p_pi)) 353 mtu = ib_port_info_get_mtu_cap(p_pi); 354 355 p_physp0 = osm_node_get_physp_ptr((osm_node_t *)p_node, 0); 356 p_pi0 = &p_physp0->port_info; 357 p0_extended = p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; 358 p0_extended_rate = ib_port_info_compute_rate(p_pi, p0_extended); 359 if (ib_path_compare_rates(rate, p0_extended_rate) > 0) 360 rate = p0_extended_rate; 361 362 /* 363 Continue with the egress port on this switch. 364 */ 365 p_physp = osm_switch_get_route_by_lid(p_node->sw, dest_lid); 366 if (p_physp == 0) { 367 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4516: " 368 "Dead end path on switch " 369 "%s (GUID: 0x%016"PRIx64") to LID %u\n", 370 p_node->print_desc, 371 cl_ntoh64(osm_node_get_node_guid(p_node)), 372 dest_lid_ho); 373 status = IB_ERROR; 374 goto Exit; 375 } 376 377 p_pi = &p_physp->port_info; 378 379 if (mtu > ib_port_info_get_mtu_cap(p_pi)) 380 mtu = ib_port_info_get_mtu_cap(p_pi); 381 382 p_physp0 = osm_node_get_physp_ptr((osm_node_t *)p_node, 0); 383 p_pi0 = &p_physp0->port_info; 384 p0_extended = p_pi0->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; 385 p0_extended_rate = ib_port_info_compute_rate(p_pi, p0_extended); 386 if (ib_path_compare_rates(rate, p0_extended_rate) > 0) 387 rate = p0_extended_rate; 388 389 if (sa->p_subn->opt.qos) { 390 /* 391 * Check SL2VL table of the switch and update valid SLs 392 */ 393 p_slvl_tbl = 394 osm_physp_get_slvl_tbl(p_physp, in_port_num); 395 for (i = 0; i < IB_MAX_NUM_VLS; i++) { 396 if (valid_sl_mask & (1 << i) && 397 ib_slvl_table_get(p_slvl_tbl, 398 i) == IB_DROP_VL) 399 valid_sl_mask &= ~(1 << i); 400 } 401 if (!valid_sl_mask) { 402 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 403 "All the SLs lead to VL15 " 404 "on this path\n"); 405 status = IB_NOT_FOUND; 406 goto Exit; 407 } 408 } 409 } 410 411 /* 412 p_physp now points to the destination 413 */ 414 p_pi = &p_physp->port_info; 415 416 if (mtu > ib_port_info_get_mtu_cap(p_pi)) 417 mtu = ib_port_info_get_mtu_cap(p_pi); 418 419 extended = p_pi->capability_mask & IB_PORT_CAP_HAS_EXT_SPEEDS; 420 dest_rate = ib_port_info_compute_rate(p_pi, extended); 421 if (ib_path_compare_rates(rate, dest_rate) > 0) 422 rate = dest_rate; 423 424 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 425 "Path min MTU = %u, min rate = %u\n", mtu, rate); 426 427 /* 428 * Get QoS Level object according to the MultiPath request 429 * and adjust MultiPath parameters according to QoS settings 430 */ 431 if (sa->p_subn->opt.qos && sa->p_subn->p_qos_policy && 432 (p_qos_level = 433 osm_qos_policy_get_qos_level_by_mpr(sa->p_subn->p_qos_policy, 434 p_mpr, p_src_physp, 435 p_dest_physp, comp_mask))) { 436 437 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 438 "MultiPathRecord request matches QoS Level '%s' (%s)\n", 439 p_qos_level->name, 440 p_qos_level->use ? p_qos_level->use : "no description"); 441 442 if (p_qos_level->mtu_limit_set 443 && (mtu > p_qos_level->mtu_limit)) 444 mtu = p_qos_level->mtu_limit; 445 446 if (p_qos_level->rate_limit_set 447 && (ib_path_compare_rates(rate, p_qos_level->rate_limit) > 0)) 448 rate = p_qos_level->rate_limit; 449 450 if (p_qos_level->sl_set) { 451 required_sl = p_qos_level->sl; 452 if (!(valid_sl_mask & (1 << required_sl))) { 453 status = IB_NOT_FOUND; 454 goto Exit; 455 } 456 } 457 } 458 459 /* 460 Determine if these values meet the user criteria 461 */ 462 463 /* we silently ignore cases where only the MTU selector is defined */ 464 if ((comp_mask & IB_MPR_COMPMASK_MTUSELEC) && 465 (comp_mask & IB_MPR_COMPMASK_MTU)) { 466 required_mtu = ib_multipath_rec_mtu(p_mpr); 467 switch (ib_multipath_rec_mtu_sel(p_mpr)) { 468 case 0: /* must be greater than */ 469 if (mtu <= required_mtu) 470 status = IB_NOT_FOUND; 471 break; 472 473 case 1: /* must be less than */ 474 if (mtu >= required_mtu) { 475 /* adjust to use the highest mtu 476 lower then the required one */ 477 if (required_mtu > 1) 478 mtu = required_mtu - 1; 479 else 480 status = IB_NOT_FOUND; 481 } 482 break; 483 484 case 2: /* exact match */ 485 if (mtu < required_mtu) 486 status = IB_NOT_FOUND; 487 else 488 mtu = required_mtu; 489 break; 490 491 case 3: /* largest available */ 492 /* can't be disqualified by this one */ 493 break; 494 495 default: 496 /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */ 497 CL_ASSERT(FALSE); 498 status = IB_ERROR; 499 break; 500 } 501 } 502 if (status != IB_SUCCESS) 503 goto Exit; 504 505 /* we silently ignore cases where only the Rate selector is defined */ 506 if ((comp_mask & IB_MPR_COMPMASK_RATESELEC) && 507 (comp_mask & IB_MPR_COMPMASK_RATE)) { 508 required_rate = ib_multipath_rec_rate(p_mpr); 509 switch (ib_multipath_rec_rate_sel(p_mpr)) { 510 case 0: /* must be greater than */ 511 if (ib_path_compare_rates(rate, required_rate) <= 0) 512 status = IB_NOT_FOUND; 513 break; 514 515 case 1: /* must be less than */ 516 if (ib_path_compare_rates(rate, required_rate) >= 0) { 517 /* adjust the rate to use the highest rate 518 lower then the required one */ 519 rate = ib_path_rate_get_prev(required_rate); 520 if (!rate) 521 status = IB_NOT_FOUND; 522 } 523 break; 524 525 case 2: /* exact match */ 526 if (ib_path_compare_rates(rate, required_rate)) 527 status = IB_NOT_FOUND; 528 else 529 rate = required_rate; 530 break; 531 532 case 3: /* largest available */ 533 /* can't be disqualified by this one */ 534 break; 535 536 default: 537 /* if we're here, there's a bug in ib_multipath_rec_mtu_sel() */ 538 CL_ASSERT(FALSE); 539 status = IB_ERROR; 540 break; 541 } 542 } 543 if (status != IB_SUCCESS) 544 goto Exit; 545 546 /* Verify the pkt_life_time */ 547 /* According to spec definition IBA 1.2 Table 205 PacketLifeTime description, 548 for loopback paths, packetLifeTime shall be zero. */ 549 if (p_src_alias_guid->p_base_port == p_dest_alias_guid->p_base_port) 550 pkt_life = 0; /* loopback */ 551 else if (p_qos_level && p_qos_level->pkt_life_set) 552 pkt_life = p_qos_level->pkt_life; 553 else 554 pkt_life = sa->p_subn->opt.subnet_timeout; 555 556 /* we silently ignore cases where only the PktLife selector is defined */ 557 if ((comp_mask & IB_MPR_COMPMASK_PKTLIFETIMESELEC) && 558 (comp_mask & IB_MPR_COMPMASK_PKTLIFETIME)) { 559 required_pkt_life = ib_multipath_rec_pkt_life(p_mpr); 560 switch (ib_multipath_rec_pkt_life_sel(p_mpr)) { 561 case 0: /* must be greater than */ 562 if (pkt_life <= required_pkt_life) 563 status = IB_NOT_FOUND; 564 break; 565 566 case 1: /* must be less than */ 567 if (pkt_life >= required_pkt_life) { 568 /* adjust the lifetime to use the highest possible 569 lower then the required one */ 570 if (required_pkt_life > 1) 571 pkt_life = required_pkt_life - 1; 572 else 573 status = IB_NOT_FOUND; 574 } 575 break; 576 577 case 2: /* exact match */ 578 if (pkt_life < required_pkt_life) 579 status = IB_NOT_FOUND; 580 else 581 pkt_life = required_pkt_life; 582 break; 583 584 case 3: /* smallest available */ 585 /* can't be disqualified by this one */ 586 break; 587 588 default: 589 /* if we're here, there's a bug in ib_path_rec_pkt_life_sel() */ 590 CL_ASSERT(FALSE); 591 status = IB_ERROR; 592 break; 593 } 594 } 595 596 if (status != IB_SUCCESS) 597 goto Exit; 598 599 /* 600 * set Pkey for this MultiPath record request 601 */ 602 603 if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC && 604 cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31)) 605 required_pkey = 606 osm_physp_find_common_pkey(p_src_physp, p_dest_physp, 607 sa->p_subn->opt.allow_both_pkeys); 608 609 else if (comp_mask & IB_MPR_COMPMASK_PKEY) { 610 /* 611 * MPR request has a specific pkey: 612 * Check that source and destination share this pkey. 613 * If QoS level has pkeys, check that this pkey exists 614 * in the QoS level pkeys. 615 * MPR returned pkey is the requested pkey. 616 */ 617 required_pkey = p_mpr->pkey; 618 if (!osm_physp_share_this_pkey 619 (p_src_physp, p_dest_physp, required_pkey, 620 sa->p_subn->opt.allow_both_pkeys)) { 621 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4518: " 622 "Ports src 0x%016"PRIx64" (%s port %d) " 623 "and dst 0x%016"PRIx64" (%s port %d) " 624 "do not share the specified PKey 0x%04x\n", 625 cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), 626 p_src_physp->p_node->print_desc, 627 p_src_physp->port_num, 628 cl_ntoh64(osm_physp_get_port_guid 629 (p_dest_physp)), 630 p_dest_physp->p_node->print_desc, 631 p_dest_physp->port_num, 632 cl_ntoh16(required_pkey)); 633 status = IB_NOT_FOUND; 634 goto Exit; 635 } 636 if (p_qos_level && p_qos_level->pkey_range_len && 637 !osm_qos_level_has_pkey(p_qos_level, required_pkey)) { 638 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451C: " 639 "Ports src 0x%016"PRIx64" (%s port %d) " 640 "and dst 0x%016"PRIx64" (%s port %d) " 641 "do not share specified PKey (0x%04x) as " 642 "defined by QoS level \"%s\"\n", 643 cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), 644 p_src_physp->p_node->print_desc, 645 p_src_physp->port_num, 646 cl_ntoh64(osm_physp_get_port_guid 647 (p_dest_physp)), 648 p_dest_physp->p_node->print_desc, 649 p_dest_physp->port_num, 650 cl_ntoh16(required_pkey), 651 p_qos_level->name); 652 status = IB_NOT_FOUND; 653 goto Exit; 654 } 655 656 } else if (p_qos_level && p_qos_level->pkey_range_len) { 657 /* 658 * MPR request doesn't have a specific pkey, but QoS level 659 * has pkeys - get shared pkey from QoS level pkeys 660 */ 661 required_pkey = osm_qos_level_get_shared_pkey(p_qos_level, 662 p_src_physp, 663 p_dest_physp, 664 sa->p_subn->opt.allow_both_pkeys); 665 if (!required_pkey) { 666 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451D: " 667 "Ports src 0x%016"PRIx64" (%s port %d) " 668 "and dst 0x%016"PRIx64" (%s port %d) " 669 "do not share a PKey as defined by QoS " 670 "level \"%s\"\n", 671 cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), 672 p_src_physp->p_node->print_desc, 673 p_src_physp->port_num, 674 cl_ntoh64(osm_physp_get_port_guid 675 (p_dest_physp)), 676 p_dest_physp->p_node->print_desc, 677 p_dest_physp->port_num, 678 p_qos_level->name); 679 status = IB_NOT_FOUND; 680 goto Exit; 681 } 682 683 } else { 684 /* 685 * Neither MPR request nor QoS level have pkey. 686 * Just get any shared pkey. 687 */ 688 required_pkey = 689 osm_physp_find_common_pkey(p_src_physp, p_dest_physp, 690 sa->p_subn->opt.allow_both_pkeys); 691 if (!required_pkey) { 692 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4519: " 693 "Ports src 0x%016"PRIx64" (%s port %d) " 694 "and dst 0x%016"PRIx64" (%s port %d) " 695 "do not have any shared PKeys\n", 696 cl_ntoh64(osm_physp_get_port_guid(p_src_physp)), 697 p_src_physp->p_node->print_desc, 698 p_src_physp->port_num, 699 cl_ntoh64(osm_physp_get_port_guid 700 (p_dest_physp)), 701 p_dest_physp->p_node->print_desc, 702 p_dest_physp->port_num); 703 status = IB_NOT_FOUND; 704 goto Exit; 705 } 706 } 707 708 if (required_pkey) { 709 p_prtn = 710 (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl, 711 required_pkey & 712 cl_ntoh16((uint16_t) ~ 0x8000)); 713 if (p_prtn == 714 (osm_prtn_t *) cl_qmap_end(&sa->p_subn->prtn_pkey_tbl)) 715 p_prtn = NULL; 716 } 717 718 /* 719 * Set MultiPathRecord SL. 720 */ 721 722 if (comp_mask & IB_MPR_COMPMASK_SL) { 723 /* 724 * Specific SL was requested 725 */ 726 required_sl = ib_multipath_rec_sl(p_mpr); 727 728 if (p_qos_level && p_qos_level->sl_set && 729 p_qos_level->sl != required_sl) { 730 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451E: " 731 "QoS constraints: required MultiPathRecord SL " 732 "(%u) doesn't match QoS policy \"%s\" SL (%u) " 733 "[%s port %d <-> %s port %d]\n", required_sl, 734 p_qos_level->name, 735 p_qos_level->sl, 736 p_src_alias_guid->p_base_port->p_node->print_desc, 737 p_src_alias_guid->p_base_port->p_physp->port_num, 738 p_dest_alias_guid->p_base_port->p_node->print_desc, 739 p_dest_alias_guid->p_base_port->p_physp->port_num); 740 status = IB_NOT_FOUND; 741 goto Exit; 742 } 743 744 } else if (p_qos_level && p_qos_level->sl_set) { 745 /* 746 * No specific SL was requested, 747 * but there is an SL in QoS level. 748 */ 749 required_sl = p_qos_level->sl; 750 751 if (required_pkey && p_prtn && p_prtn->sl != p_qos_level->sl) 752 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 753 "QoS level SL (%u) overrides partition SL (%u)\n", 754 p_qos_level->sl, p_prtn->sl); 755 756 } else if (required_pkey) { 757 /* 758 * No specific SL in request or in QoS level - use partition SL 759 */ 760 p_prtn = 761 (osm_prtn_t *) cl_qmap_get(&sa->p_subn->prtn_pkey_tbl, 762 required_pkey & 763 cl_ntoh16((uint16_t) ~ 0x8000)); 764 if (!p_prtn) { 765 required_sl = OSM_DEFAULT_SL; 766 /* this may be possible when pkey tables are created somehow in 767 previous runs or things are going wrong here */ 768 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451A: " 769 "No partition found for PKey 0x%04x - " 770 "using default SL %d " 771 "[%s port %d <-> %s port %d]\n", 772 cl_ntoh16(required_pkey), required_sl, 773 p_src_alias_guid->p_base_port->p_node->print_desc, 774 p_src_alias_guid->p_base_port->p_physp->port_num, 775 p_dest_alias_guid->p_base_port->p_node->print_desc, 776 p_dest_alias_guid->p_base_port->p_physp->port_num); 777 } else 778 required_sl = p_prtn->sl; 779 780 } else if (sa->p_subn->opt.qos) { 781 if (valid_sl_mask & (1 << OSM_DEFAULT_SL)) 782 required_sl = OSM_DEFAULT_SL; 783 else { 784 for (i = 0; i < IB_MAX_NUM_VLS; i++) 785 if (valid_sl_mask & (1 << i)) 786 break; 787 required_sl = i; 788 } 789 } else 790 required_sl = OSM_DEFAULT_SL; 791 792 if (sa->p_subn->opt.qos && !(valid_sl_mask & (1 << required_sl))) { 793 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 451F: " 794 "Selected SL (%u) leads to VL15 " 795 "[%s port %d <-> %s port %d]\n", 796 required_sl, 797 p_src_alias_guid->p_base_port->p_node->print_desc, 798 p_src_alias_guid->p_base_port->p_physp->port_num, 799 p_dest_alias_guid->p_base_port->p_node->print_desc, 800 p_dest_alias_guid->p_base_port->p_physp->port_num); 801 status = IB_NOT_FOUND; 802 goto Exit; 803 } 804 805 /* reset pkey when raw traffic */ 806 if (comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC && 807 cl_ntoh32(p_mpr->hop_flow_raw) & (1 << 31)) 808 required_pkey = 0; 809 810 p_parms->mtu = mtu; 811 p_parms->rate = rate; 812 p_parms->pkey = required_pkey; 813 p_parms->pkt_life = pkt_life; 814 p_parms->sl = required_sl; 815 p_parms->hops = hops; 816 817 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "MultiPath params:" 818 " mtu = %u, rate = %u, packet lifetime = %u," 819 " pkey = 0x%04X, sl = %u, hops = %u\n", mtu, rate, 820 pkt_life, cl_ntoh16(required_pkey), required_sl, hops); 821 822 Exit: 823 OSM_LOG_EXIT(sa->p_log); 824 return status; 825 } 826 827 static void mpr_rcv_build_pr(IN osm_sa_t * sa, 828 IN const osm_alias_guid_t * p_src_alias_guid, 829 IN const osm_alias_guid_t * p_dest_alias_guid, 830 IN uint16_t src_lid_ho, IN uint16_t dest_lid_ho, 831 IN uint8_t preference, 832 IN const osm_path_parms_t * p_parms, 833 OUT ib_path_rec_t * p_pr) 834 { 835 const osm_physp_t *p_src_physp, *p_dest_physp; 836 837 OSM_LOG_ENTER(sa->p_log); 838 839 p_src_physp = p_src_alias_guid->p_base_port->p_physp; 840 p_dest_physp = p_dest_alias_guid->p_base_port->p_physp; 841 842 p_pr->dgid.unicast.prefix = osm_physp_get_subnet_prefix(p_dest_physp); 843 p_pr->dgid.unicast.interface_id = p_dest_alias_guid->alias_guid; 844 845 p_pr->sgid.unicast.prefix = osm_physp_get_subnet_prefix(p_src_physp); 846 p_pr->sgid.unicast.interface_id = p_src_alias_guid->alias_guid; 847 848 p_pr->dlid = cl_hton16(dest_lid_ho); 849 p_pr->slid = cl_hton16(src_lid_ho); 850 851 p_pr->hop_flow_raw &= cl_hton32(1 << 31); 852 853 p_pr->pkey = p_parms->pkey; 854 ib_path_rec_set_qos_class(p_pr, 0); 855 ib_path_rec_set_sl(p_pr, p_parms->sl); 856 p_pr->mtu = (uint8_t) (p_parms->mtu | 0x80); 857 p_pr->rate = (uint8_t) (p_parms->rate | 0x80); 858 859 /* According to 1.2 spec definition Table 205 PacketLifeTime description, 860 for loopback paths, packetLifeTime shall be zero. */ 861 if (p_src_alias_guid->p_base_port == p_dest_alias_guid->p_base_port) 862 p_pr->pkt_life = 0x80; /* loopback */ 863 else 864 p_pr->pkt_life = (uint8_t) (p_parms->pkt_life | 0x80); 865 866 p_pr->preference = preference; 867 868 /* always return num_path = 0 so this is only the reversible component */ 869 if (p_parms->reversible) 870 p_pr->num_path = 0x80; 871 872 OSM_LOG_EXIT(sa->p_log); 873 } 874 875 static osm_sa_item_t *mpr_rcv_get_lid_pair_path(IN osm_sa_t * sa, 876 IN const ib_multipath_rec_t * 877 p_mpr, 878 IN const osm_alias_guid_t * 879 p_src_alias_guid, 880 IN const osm_alias_guid_t * 881 p_dest_alias_guid, 882 IN const uint16_t src_lid_ho, 883 IN const uint16_t dest_lid_ho, 884 IN const ib_net64_t comp_mask, 885 IN const uint8_t preference) 886 { 887 osm_path_parms_t path_parms; 888 osm_path_parms_t rev_path_parms; 889 osm_sa_item_t *p_pr_item; 890 ib_api_status_t status, rev_path_status; 891 892 OSM_LOG_ENTER(sa->p_log); 893 894 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID %u, Dest LID %u\n", 895 src_lid_ho, dest_lid_ho); 896 897 p_pr_item = malloc(SA_MPR_RESP_SIZE); 898 if (p_pr_item == NULL) { 899 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4501: " 900 "Unable to allocate path record\n"); 901 goto Exit; 902 } 903 memset(p_pr_item, 0, SA_MPR_RESP_SIZE); 904 905 status = mpr_rcv_get_path_parms(sa, p_mpr, p_src_alias_guid, 906 p_dest_alias_guid, 907 src_lid_ho, dest_lid_ho, 908 comp_mask, &path_parms); 909 910 if (status != IB_SUCCESS) { 911 free(p_pr_item); 912 p_pr_item = NULL; 913 goto Exit; 914 } 915 916 /* now try the reversible path */ 917 rev_path_status = mpr_rcv_get_path_parms(sa, p_mpr, p_dest_alias_guid, 918 p_src_alias_guid, 919 dest_lid_ho, src_lid_ho, 920 comp_mask, &rev_path_parms); 921 path_parms.reversible = (rev_path_status == IB_SUCCESS); 922 923 /* did we get a Reversible Path compmask ? */ 924 /* 925 NOTE that if the reversible component = 0, it is a don't care 926 rather then requiring non-reversible paths ... 927 see Vol1 Ver1.2 p900 l16 928 */ 929 if (comp_mask & IB_MPR_COMPMASK_REVERSIBLE) { 930 if ((!path_parms.reversible && (p_mpr->num_path & 0x80))) { 931 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 932 "Requested reversible path but failed to get one\n"); 933 934 free(p_pr_item); 935 p_pr_item = NULL; 936 goto Exit; 937 } 938 } 939 940 p_pr_item->resp.mpr_rec.p_src_port = p_src_alias_guid->p_base_port; 941 p_pr_item->resp.mpr_rec.p_dest_port = p_dest_alias_guid->p_base_port; 942 p_pr_item->resp.mpr_rec.hops = path_parms.hops; 943 944 mpr_rcv_build_pr(sa, p_src_alias_guid, p_dest_alias_guid, src_lid_ho, 945 dest_lid_ho, preference, &path_parms, 946 &p_pr_item->resp.mpr_rec.path_rec); 947 948 Exit: 949 OSM_LOG_EXIT(sa->p_log); 950 return p_pr_item; 951 } 952 953 static uint32_t mpr_rcv_get_port_pair_paths(IN osm_sa_t * sa, 954 IN const ib_multipath_rec_t * p_mpr, 955 IN const osm_port_t * p_req_port, 956 IN const osm_alias_guid_t * p_src_alias_guid, 957 IN const osm_alias_guid_t * p_dest_alias_guid, 958 IN const uint32_t rem_paths, 959 IN const ib_net64_t comp_mask, 960 IN cl_qlist_t * p_list) 961 { 962 osm_sa_item_t *p_pr_item; 963 uint16_t src_lid_min_ho; 964 uint16_t src_lid_max_ho; 965 uint16_t dest_lid_min_ho; 966 uint16_t dest_lid_max_ho; 967 uint16_t src_lid_ho; 968 uint16_t dest_lid_ho; 969 uint32_t path_num = 0; 970 uint8_t preference; 971 unsigned src_offset, dest_offset; 972 973 OSM_LOG_ENTER(sa->p_log); 974 975 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 976 "Src port 0x%016" PRIx64 ", Dst port 0x%016" PRIx64 "\n", 977 cl_ntoh64(p_src_alias_guid->alias_guid), 978 cl_ntoh64(p_dest_alias_guid->alias_guid)); 979 980 /* Check that the req_port, src_port and dest_port all share a 981 pkey. The check is done on the default physical port of the ports. */ 982 if (osm_port_share_pkey(sa->p_log, p_req_port, 983 p_src_alias_guid->p_base_port, 984 sa->p_subn->opt.allow_both_pkeys) == FALSE 985 || osm_port_share_pkey(sa->p_log, p_req_port, 986 p_dest_alias_guid->p_base_port, 987 sa->p_subn->opt.allow_both_pkeys) == FALSE 988 || osm_port_share_pkey(sa->p_log, p_src_alias_guid->p_base_port, 989 p_dest_alias_guid->p_base_port, 990 sa->p_subn->opt.allow_both_pkeys) == FALSE) 991 /* One of the pairs doesn't share a pkey so the path is disqualified. */ 992 goto Exit; 993 994 /* 995 We shouldn't be here if the paths are disqualified in some way... 996 Thus, we assume every possible connection is valid. 997 998 We desire to return high-quality paths first. 999 In OpenSM, higher quality mean least overlap with other paths. 1000 This is acheived in practice by returning paths with 1001 different LID value on each end, which means these 1002 paths are more redundant that paths with the same LID repeated 1003 on one side. For example, in OpenSM the paths between two 1004 endpoints with LMC = 1 might be as follows: 1005 1006 Port A, LID 1 <-> Port B, LID 3 1007 Port A, LID 1 <-> Port B, LID 4 1008 Port A, LID 2 <-> Port B, LID 3 1009 Port A, LID 2 <-> Port B, LID 4 1010 1011 The OpenSM unicast routing algorithms attempt to disperse each path 1012 to as varied a physical path as is reasonable. 1<->3 and 1<->4 have 1013 more physical overlap (hence less redundancy) than 1<->3 and 2<->4. 1014 1015 OpenSM ranks paths in three preference groups: 1016 1017 Preference Value Description 1018 ---------------- ------------------------------------------- 1019 0 Redundant in both directions with other 1020 pref value = 0 paths 1021 1022 1 Redundant in one direction with other 1023 pref value = 0 and pref value = 1 paths 1024 1025 2 Not redundant in either direction with 1026 other paths 1027 1028 3-FF Unused 1029 1030 SA clients don't need to know these details, only that the lower 1031 preference paths are preferred, as stated in the spec. The paths 1032 may not actually be physically redundant depending on the topology 1033 of the subnet, but the point of LMC > 0 is to offer redundancy, 1034 so I assume the subnet is physically appropriate for the specified 1035 LMC value. A more advanced implementation could inspect for physical 1036 redundancy, but I'm not going to bother with that now. 1037 */ 1038 1039 osm_port_get_lid_range_ho(p_src_alias_guid->p_base_port, 1040 &src_lid_min_ho, &src_lid_max_ho); 1041 osm_port_get_lid_range_ho(p_dest_alias_guid->p_base_port, 1042 &dest_lid_min_ho, &dest_lid_max_ho); 1043 1044 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src LID [%u-%u], Dest LID [%u-%u]\n", 1045 src_lid_min_ho, src_lid_max_ho, 1046 dest_lid_min_ho, dest_lid_max_ho); 1047 1048 src_lid_ho = src_lid_min_ho; 1049 dest_lid_ho = dest_lid_min_ho; 1050 1051 /* 1052 Preferred paths come first in OpenSM 1053 */ 1054 preference = 0; 1055 1056 while (path_num < rem_paths) { 1057 /* 1058 These paths are "fully redundant" 1059 */ 1060 p_pr_item = mpr_rcv_get_lid_pair_path(sa, p_mpr, 1061 p_src_alias_guid, 1062 p_dest_alias_guid, 1063 src_lid_ho, dest_lid_ho, 1064 comp_mask, preference); 1065 1066 if (p_pr_item) { 1067 cl_qlist_insert_tail(p_list, &p_pr_item->list_item); 1068 ++path_num; 1069 } 1070 1071 if (++src_lid_ho > src_lid_max_ho) 1072 break; 1073 1074 if (++dest_lid_ho > dest_lid_max_ho) 1075 break; 1076 } 1077 1078 /* 1079 Check if we've accumulated all the paths that the user cares to see 1080 */ 1081 if (path_num == rem_paths) 1082 goto Exit; 1083 1084 /* 1085 Don't bother reporting preference 1 paths for now. 1086 It's more trouble than it's worth and can only occur 1087 if ports have different LMC values, which isn't supported 1088 by OpenSM right now anyway. 1089 */ 1090 preference = 2; 1091 src_lid_ho = src_lid_min_ho; 1092 dest_lid_ho = dest_lid_min_ho; 1093 src_offset = 0; 1094 dest_offset = 0; 1095 1096 /* 1097 Iterate over the remaining paths 1098 */ 1099 while (path_num < rem_paths) { 1100 dest_offset++; 1101 dest_lid_ho++; 1102 1103 if (dest_lid_ho > dest_lid_max_ho) { 1104 src_offset++; 1105 src_lid_ho++; 1106 1107 if (src_lid_ho > src_lid_max_ho) 1108 break; /* done */ 1109 1110 dest_offset = 0; 1111 dest_lid_ho = dest_lid_min_ho; 1112 } 1113 1114 /* 1115 These paths are "fully non-redundant" with paths already 1116 identified above and consequently not of much value. 1117 1118 Don't return paths we already identified above, as indicated 1119 by the offset values being equal. 1120 */ 1121 if (src_offset == dest_offset) 1122 continue; /* already reported */ 1123 1124 p_pr_item = mpr_rcv_get_lid_pair_path(sa, p_mpr, 1125 p_src_alias_guid, 1126 p_dest_alias_guid, 1127 src_lid_ho, dest_lid_ho, 1128 comp_mask, preference); 1129 1130 if (p_pr_item) { 1131 cl_qlist_insert_tail(p_list, &p_pr_item->list_item); 1132 ++path_num; 1133 } 1134 } 1135 1136 Exit: 1137 OSM_LOG_EXIT(sa->p_log); 1138 return path_num; 1139 } 1140 1141 #undef min 1142 #define min(x,y) (((x) < (y)) ? (x) : (y)) 1143 1144 static osm_sa_item_t *mpr_rcv_get_apm_port_pair_paths(IN osm_sa_t * sa, 1145 IN const 1146 ib_multipath_rec_t * 1147 p_mpr, 1148 IN const osm_alias_guid_t * 1149 p_src_alias_guid, 1150 IN const osm_alias_guid_t * 1151 p_dest_alias_guid, 1152 IN int base_offs, 1153 IN const ib_net64_t 1154 comp_mask, 1155 IN cl_qlist_t * p_list) 1156 { 1157 osm_sa_item_t *p_pr_item = 0; 1158 uint16_t src_lid_min_ho; 1159 uint16_t src_lid_max_ho; 1160 uint16_t dest_lid_min_ho; 1161 uint16_t dest_lid_max_ho; 1162 uint16_t src_lid_ho; 1163 uint16_t dest_lid_ho; 1164 unsigned iterations; 1165 int src_lids, dest_lids; 1166 1167 OSM_LOG_ENTER(sa->p_log); 1168 1169 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "Src port 0x%016" PRIx64 ", " 1170 "Dst port 0x%016" PRIx64 ", base offs %d\n", 1171 cl_ntoh64(p_src_alias_guid->alias_guid), 1172 cl_ntoh64(p_dest_alias_guid->alias_guid), 1173 base_offs); 1174 1175 osm_port_get_lid_range_ho(p_src_alias_guid->p_base_port, 1176 &src_lid_min_ho, &src_lid_max_ho); 1177 osm_port_get_lid_range_ho(p_dest_alias_guid->p_base_port, 1178 &dest_lid_min_ho, &dest_lid_max_ho); 1179 1180 src_lid_ho = src_lid_min_ho; 1181 dest_lid_ho = dest_lid_min_ho; 1182 1183 src_lids = src_lid_max_ho - src_lid_min_ho + 1; 1184 dest_lids = dest_lid_max_ho - dest_lid_min_ho + 1; 1185 1186 src_lid_ho += base_offs % src_lids; 1187 dest_lid_ho += base_offs % dest_lids; 1188 1189 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1190 "Src LIDs [%u-%u] hashed %u, " 1191 "Dest LIDs [%u-%u] hashed %u\n", 1192 src_lid_min_ho, src_lid_max_ho, src_lid_ho, 1193 dest_lid_min_ho, dest_lid_max_ho, dest_lid_ho); 1194 1195 iterations = min(src_lids, dest_lids); 1196 1197 while (iterations--) { 1198 /* 1199 These paths are "fully redundant" 1200 */ 1201 p_pr_item = mpr_rcv_get_lid_pair_path(sa, p_mpr, 1202 p_src_alias_guid, 1203 p_dest_alias_guid, 1204 src_lid_ho, dest_lid_ho, 1205 comp_mask, 0); 1206 1207 if (p_pr_item) { 1208 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1209 "Found matching path from Src LID %u to Dest LID %u with %d hops\n", 1210 src_lid_ho, dest_lid_ho, p_pr_item->resp.mpr_rec.hops); 1211 break; 1212 } 1213 1214 if (++src_lid_ho > src_lid_max_ho) 1215 src_lid_ho = src_lid_min_ho; 1216 1217 if (++dest_lid_ho > dest_lid_max_ho) 1218 dest_lid_ho = dest_lid_min_ho; 1219 } 1220 1221 OSM_LOG_EXIT(sa->p_log); 1222 return p_pr_item; 1223 } 1224 1225 static ib_net16_t mpr_rcv_get_gids(IN osm_sa_t * sa, IN const ib_gid_t * gids, 1226 IN int ngids, IN int is_sgid, 1227 OUT osm_alias_guid_t ** pp_alias_guid) 1228 { 1229 osm_alias_guid_t *p_alias_guid; 1230 ib_net16_t ib_status = IB_SUCCESS; 1231 int i; 1232 1233 OSM_LOG_ENTER(sa->p_log); 1234 1235 for (i = 0; i < ngids; i++, gids++) { 1236 if (!ib_gid_is_link_local(gids)) { 1237 if ((is_sgid && ib_gid_is_multicast(gids)) || 1238 (ib_gid_get_subnet_prefix(gids) != 1239 sa->p_subn->opt.subnet_prefix)) { 1240 /* 1241 This 'error' is the client's fault (bad gid) 1242 so don't enter it as an error in our own log. 1243 Return an error response to the client. 1244 */ 1245 OSM_LOG(sa->p_log, OSM_LOG_VERBOSE, "ERR 451B: " 1246 "%sGID 0x%016" PRIx64 1247 " is multicast or non local subnet prefix\n", 1248 is_sgid ? "S" : "D", 1249 cl_ntoh64(gids->unicast.prefix)); 1250 1251 ib_status = IB_SA_MAD_STATUS_INVALID_GID; 1252 goto Exit; 1253 } 1254 } 1255 1256 p_alias_guid = 1257 osm_get_alias_guid_by_guid(sa->p_subn, 1258 gids->unicast.interface_id); 1259 if (!p_alias_guid) { 1260 /* 1261 This 'error' is the client's fault (bad gid) so 1262 don't enter it as an error in our own log. 1263 Return an error response to the client. 1264 */ 1265 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4506: " 1266 "No port with GUID 0x%016" PRIx64 "\n", 1267 cl_ntoh64(gids->unicast.interface_id)); 1268 1269 ib_status = IB_SA_MAD_STATUS_INVALID_GID; 1270 goto Exit; 1271 } 1272 1273 pp_alias_guid[i] = p_alias_guid; 1274 } 1275 1276 Exit: 1277 OSM_LOG_EXIT(sa->p_log); 1278 1279 return ib_status; 1280 } 1281 1282 static ib_net16_t mpr_rcv_get_end_points(IN osm_sa_t * sa, 1283 IN const osm_madw_t * p_madw, 1284 OUT osm_alias_guid_t ** pp_alias_guids, 1285 OUT int *nsrc, OUT int *ndest) 1286 { 1287 const ib_multipath_rec_t *p_mpr; 1288 const ib_sa_mad_t *p_sa_mad; 1289 ib_net64_t comp_mask; 1290 ib_net16_t sa_status = IB_SA_MAD_STATUS_SUCCESS; 1291 ib_gid_t *gids; 1292 1293 OSM_LOG_ENTER(sa->p_log); 1294 1295 /* 1296 Determine what fields are valid and then get a pointer 1297 to the source and destination port objects, if possible. 1298 */ 1299 p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); 1300 p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); 1301 gids = (ib_gid_t *) p_mpr->gids; 1302 1303 comp_mask = p_sa_mad->comp_mask; 1304 1305 /* 1306 Check a few easy disqualifying cases up front before getting 1307 into the endpoints. 1308 */ 1309 *nsrc = *ndest = 0; 1310 1311 if (comp_mask & IB_MPR_COMPMASK_SGIDCOUNT) { 1312 *nsrc = p_mpr->sgid_count; 1313 if (*nsrc > IB_MULTIPATH_MAX_GIDS) 1314 *nsrc = IB_MULTIPATH_MAX_GIDS; 1315 sa_status = mpr_rcv_get_gids(sa, gids, *nsrc, 1, pp_alias_guids); 1316 if (sa_status != IB_SUCCESS) 1317 goto Exit; 1318 } 1319 1320 if (comp_mask & IB_MPR_COMPMASK_DGIDCOUNT) { 1321 *ndest = p_mpr->dgid_count; 1322 if (*ndest + *nsrc > IB_MULTIPATH_MAX_GIDS) 1323 *ndest = IB_MULTIPATH_MAX_GIDS - *nsrc; 1324 sa_status = 1325 mpr_rcv_get_gids(sa, gids + *nsrc, *ndest, 0, 1326 pp_alias_guids + *nsrc); 1327 } 1328 1329 Exit: 1330 OSM_LOG_EXIT(sa->p_log); 1331 return sa_status; 1332 } 1333 1334 #define hash_lids(a, b, lmc) \ 1335 (((((a) >> (lmc)) << 4) | ((b) >> (lmc))) % 103) 1336 1337 static void mpr_rcv_get_apm_paths(IN osm_sa_t * sa, 1338 IN const ib_multipath_rec_t * p_mpr, 1339 IN const osm_port_t * p_req_port, 1340 IN osm_alias_guid_t ** _pp_alias_guids, 1341 IN const ib_net64_t comp_mask, 1342 IN cl_qlist_t * p_list) 1343 { 1344 osm_alias_guid_t *pp_alias_guids[4]; 1345 osm_sa_item_t *matrix[2][2]; 1346 int base_offs, src_lid_ho, dest_lid_ho; 1347 int sumA, sumB, minA, minB; 1348 1349 OSM_LOG_ENTER(sa->p_log); 1350 1351 /* 1352 * We want to: 1353 * 1. use different lid offsets (from base) for the resultant paths 1354 * to increase the probability of redundant paths or in case 1355 * of Clos - to ensure it (different offset => different spine!) 1356 * 2. keep consistent paths no matter of direction and order of ports 1357 * 3. distibute the lid offsets to balance the load 1358 * So, we sort the ports (within the srcs, and within the dests), 1359 * hash the lids of S0, D0 (after the sort), and call mpr_rcv_get_apm_port_pair_paths 1360 * with base_lid for S0, D0 and base_lid + 1 for S1, D1. This way we will get 1361 * always the same offsets - order independent, and make sure different spines are used. 1362 * Note that the diagonals on a Clos have the same number of hops, so it doesn't 1363 * really matter which diagonal we use. 1364 */ 1365 if (_pp_alias_guids[0]->p_base_port->guid < 1366 _pp_alias_guids[1]->p_base_port->guid) { 1367 pp_alias_guids[0] = _pp_alias_guids[0]; 1368 pp_alias_guids[1] = _pp_alias_guids[1]; 1369 } else { 1370 pp_alias_guids[0] = _pp_alias_guids[1]; 1371 pp_alias_guids[1] = _pp_alias_guids[0]; 1372 } 1373 if (_pp_alias_guids[2]->p_base_port->guid < 1374 _pp_alias_guids[3]->p_base_port->guid) { 1375 pp_alias_guids[2] = _pp_alias_guids[2]; 1376 pp_alias_guids[3] = _pp_alias_guids[3]; 1377 } else { 1378 pp_alias_guids[2] = _pp_alias_guids[3]; 1379 pp_alias_guids[3] = _pp_alias_guids[2]; 1380 } 1381 1382 src_lid_ho = osm_port_get_base_lid(pp_alias_guids[0]->p_base_port); 1383 dest_lid_ho = osm_port_get_base_lid(pp_alias_guids[2]->p_base_port); 1384 1385 base_offs = src_lid_ho < dest_lid_ho ? 1386 hash_lids(src_lid_ho, dest_lid_ho, sa->p_subn->opt.lmc) : 1387 hash_lids(dest_lid_ho, src_lid_ho, sa->p_subn->opt.lmc); 1388 1389 matrix[0][0] = 1390 mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[0], 1391 pp_alias_guids[2], base_offs, 1392 comp_mask, p_list); 1393 matrix[0][1] = 1394 mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[0], 1395 pp_alias_guids[3], base_offs, 1396 comp_mask, p_list); 1397 matrix[1][0] = 1398 mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[1], 1399 pp_alias_guids[2], base_offs + 1, 1400 comp_mask, p_list); 1401 matrix[1][1] = 1402 mpr_rcv_get_apm_port_pair_paths(sa, p_mpr, pp_alias_guids[1], 1403 pp_alias_guids[3], base_offs + 1, 1404 comp_mask, p_list); 1405 1406 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "APM matrix:\n" 1407 "\t{0,0} 0x%X->0x%X (%d)\t| {0,1} 0x%X->0x%X (%d)\n" 1408 "\t{1,0} 0x%X->0x%X (%d)\t| {1,1} 0x%X->0x%X (%d)\n", 1409 matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.slid : 0, 1410 matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.dlid : 0, 1411 matrix[0][0] ? matrix[0][0]->resp.mpr_rec.hops : 0, 1412 matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.slid : 0, 1413 matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.dlid : 0, 1414 matrix[0][1] ? matrix[0][1]->resp.mpr_rec.hops : 0, 1415 matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.slid : 0, 1416 matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.dlid : 0, 1417 matrix[1][0] ? matrix[1][0]->resp.mpr_rec.hops : 0, 1418 matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.slid : 0, 1419 matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.dlid : 0, 1420 matrix[1][1] ? matrix[1][1]->resp.mpr_rec.hops : 0); 1421 1422 sumA = minA = sumB = minB = 0; 1423 1424 /* check diagonal A {(0,0), (1,1)} */ 1425 if (matrix[0][0]) { 1426 sumA += matrix[0][0]->resp.mpr_rec.hops; 1427 minA = matrix[0][0]->resp.mpr_rec.hops; 1428 } 1429 if (matrix[1][1]) { 1430 sumA += matrix[1][1]->resp.mpr_rec.hops; 1431 if (minA) 1432 minA = min(minA, matrix[1][1]->resp.mpr_rec.hops); 1433 else 1434 minA = matrix[1][1]->resp.mpr_rec.hops; 1435 } 1436 1437 /* check diagonal B {(0,1), (1,0)} */ 1438 if (matrix[0][1]) { 1439 sumB += matrix[0][1]->resp.mpr_rec.hops; 1440 minB = matrix[0][1]->resp.mpr_rec.hops; 1441 } 1442 if (matrix[1][0]) { 1443 sumB += matrix[1][0]->resp.mpr_rec.hops; 1444 if (minB) 1445 minB = min(minB, matrix[1][0]->resp.mpr_rec.hops); 1446 else 1447 minB = matrix[1][0]->resp.mpr_rec.hops; 1448 } 1449 1450 /* and the winner is... */ 1451 if (minA <= minB || (minA == minB && sumA < sumB)) { 1452 /* Diag A */ 1453 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1454 "Diag {0,0} & {1,1} is the best:\n" 1455 "\t{0,0} 0x%X->0x%X (%d)\t & {1,1} 0x%X->0x%X (%d)\n", 1456 matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.slid : 0, 1457 matrix[0][0] ? matrix[0][0]->resp.mpr_rec.path_rec.dlid : 0, 1458 matrix[0][0] ? matrix[0][0]->resp.mpr_rec.hops : 0, 1459 matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.slid : 0, 1460 matrix[1][1] ? matrix[1][1]->resp.mpr_rec.path_rec.dlid : 0, 1461 matrix[1][1] ? matrix[1][1]->resp.mpr_rec.hops : 0); 1462 if (matrix[0][0]) 1463 cl_qlist_insert_tail(p_list, &matrix[0][0]->list_item); 1464 if (matrix[1][1]) 1465 cl_qlist_insert_tail(p_list, &matrix[1][1]->list_item); 1466 free(matrix[0][1]); 1467 free(matrix[1][0]); 1468 } else { 1469 /* Diag B */ 1470 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1471 "Diag {0,1} & {1,0} is the best:\n" 1472 "\t{0,1} 0x%X->0x%X (%d)\t & {1,0} 0x%X->0x%X (%d)\n", 1473 matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.slid : 0, 1474 matrix[0][1] ? matrix[0][1]->resp.mpr_rec.path_rec.dlid : 0, 1475 matrix[0][1] ? matrix[0][1]->resp.mpr_rec.hops : 0, 1476 matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.slid : 0, 1477 matrix[1][0] ? matrix[1][0]->resp.mpr_rec.path_rec.dlid: 0, 1478 matrix[1][0] ? matrix[1][0]->resp.mpr_rec.hops : 0); 1479 if (matrix[0][1]) 1480 cl_qlist_insert_tail(p_list, &matrix[0][1]->list_item); 1481 if (matrix[1][0]) 1482 cl_qlist_insert_tail(p_list, &matrix[1][0]->list_item); 1483 free(matrix[0][0]); 1484 free(matrix[1][1]); 1485 } 1486 1487 OSM_LOG_EXIT(sa->p_log); 1488 } 1489 1490 static void mpr_rcv_process_pairs(IN osm_sa_t * sa, 1491 IN const ib_multipath_rec_t * p_mpr, 1492 IN osm_port_t * p_req_port, 1493 IN osm_alias_guid_t ** pp_alias_guids, 1494 IN const int nsrc, IN int ndest, 1495 IN ib_net64_t comp_mask, 1496 IN cl_qlist_t * p_list) 1497 { 1498 osm_alias_guid_t **pp_src_alias_guid, **pp_es; 1499 osm_alias_guid_t **pp_dest_alias_guid, **pp_ed; 1500 uint32_t max_paths, num_paths, total_paths = 0; 1501 1502 OSM_LOG_ENTER(sa->p_log); 1503 1504 if (comp_mask & IB_MPR_COMPMASK_NUMBPATH) 1505 max_paths = p_mpr->num_path & 0x7F; 1506 else 1507 max_paths = OSM_SA_MPR_MAX_NUM_PATH; 1508 1509 for (pp_src_alias_guid = pp_alias_guids, pp_es = pp_alias_guids + nsrc; 1510 pp_src_alias_guid < pp_es; pp_src_alias_guid++) { 1511 for (pp_dest_alias_guid = pp_es, pp_ed = pp_es + ndest; 1512 pp_dest_alias_guid < pp_ed; pp_dest_alias_guid++) { 1513 num_paths = 1514 mpr_rcv_get_port_pair_paths(sa, p_mpr, p_req_port, 1515 *pp_src_alias_guid, 1516 *pp_dest_alias_guid, 1517 max_paths - total_paths, 1518 comp_mask, p_list); 1519 total_paths += num_paths; 1520 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1521 "%d paths %d total paths %d max paths\n", 1522 num_paths, total_paths, max_paths); 1523 /* Just take first NumbPaths found */ 1524 if (total_paths >= max_paths) 1525 goto Exit; 1526 } 1527 } 1528 1529 Exit: 1530 OSM_LOG_EXIT(sa->p_log); 1531 } 1532 1533 void osm_mpr_rcv_process(IN void *context, IN void *data) 1534 { 1535 osm_sa_t *sa = context; 1536 osm_madw_t *p_madw = data; 1537 const ib_multipath_rec_t *p_mpr; 1538 ib_sa_mad_t *p_sa_mad; 1539 osm_port_t *requester_port; 1540 osm_alias_guid_t *pp_alias_guids[IB_MULTIPATH_MAX_GIDS]; 1541 cl_qlist_t pr_list; 1542 ib_net16_t sa_status; 1543 int nsrc, ndest; 1544 uint8_t rate, mtu; 1545 1546 OSM_LOG_ENTER(sa->p_log); 1547 1548 CL_ASSERT(p_madw); 1549 1550 p_sa_mad = osm_madw_get_sa_mad_ptr(p_madw); 1551 p_mpr = (ib_multipath_rec_t *) ib_sa_mad_get_payload_ptr(p_sa_mad); 1552 1553 CL_ASSERT(p_sa_mad->attr_id == IB_MAD_ATTR_MULTIPATH_RECORD); 1554 1555 if ((p_sa_mad->rmpp_flags & IB_RMPP_FLAG_ACTIVE) != IB_RMPP_FLAG_ACTIVE) { 1556 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4510: " 1557 "Invalid request since RMPP_FLAG_ACTIVE is not set\n"); 1558 osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_REQ_INVALID); 1559 goto Exit; 1560 } 1561 1562 /* we only support SubnAdmGetMulti method */ 1563 if (p_sa_mad->method != IB_MAD_METHOD_GETMULTI) { 1564 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4513: " 1565 "Unsupported Method (%s) for MultiPathRecord request\n", 1566 ib_get_sa_method_str(p_sa_mad->method)); 1567 osm_sa_send_error(sa, p_madw, IB_MAD_STATUS_UNSUP_METHOD_ATTR); 1568 goto Exit; 1569 } 1570 1571 if (OSM_LOG_IS_ACTIVE_V2(sa->p_log, OSM_LOG_DEBUG)) 1572 osm_dump_multipath_record_v2(sa->p_log, p_mpr, FILE_ID, OSM_LOG_DEBUG); 1573 1574 /* Make sure required components (S/DGIDCount) are supplied */ 1575 if (!(p_sa_mad->comp_mask & IB_MPR_COMPMASK_SGIDCOUNT) || 1576 !(p_sa_mad->comp_mask & IB_MPR_COMPMASK_DGIDCOUNT)) { 1577 osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_INSUF_COMPS); 1578 goto Exit; 1579 } 1580 1581 /* Validate rate if supplied */ 1582 if ((p_sa_mad->comp_mask & IB_MPR_COMPMASK_RATESELEC) && 1583 (p_sa_mad->comp_mask & IB_MPR_COMPMASK_RATE)) { 1584 rate = ib_multipath_rec_rate(p_mpr); 1585 if (!ib_rate_is_valid(rate)) { 1586 osm_sa_send_error(sa, p_madw, 1587 IB_SA_MAD_STATUS_REQ_INVALID); 1588 goto Exit; 1589 } 1590 } 1591 /* Validate MTU if supplied */ 1592 if ((p_sa_mad->comp_mask & IB_MPR_COMPMASK_MTUSELEC) && 1593 (p_sa_mad->comp_mask & IB_MPR_COMPMASK_MTU)) { 1594 mtu = ib_multipath_rec_mtu(p_mpr); 1595 if (!ib_mtu_is_valid(mtu)) { 1596 osm_sa_send_error(sa, p_madw, 1597 IB_SA_MAD_STATUS_REQ_INVALID); 1598 goto Exit; 1599 } 1600 } 1601 1602 /* Make sure either none or both ServiceID parameters are supplied */ 1603 if ((p_sa_mad->comp_mask & IB_MPR_COMPMASK_SERVICEID) != 0 && 1604 (p_sa_mad->comp_mask & IB_MPR_COMPMASK_SERVICEID) != 1605 IB_MPR_COMPMASK_SERVICEID) { 1606 osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_INSUF_COMPS); 1607 goto Exit; 1608 } 1609 1610 cl_qlist_init(&pr_list); 1611 1612 /* 1613 Most SA functions (including this one) are read-only on the 1614 subnet object, so we grab the lock non-exclusively. 1615 */ 1616 cl_plock_acquire(sa->p_lock); 1617 1618 /* update the requester physical port */ 1619 requester_port = osm_get_port_by_mad_addr(sa->p_log, sa->p_subn, 1620 osm_madw_get_mad_addr_ptr 1621 (p_madw)); 1622 if (requester_port == NULL) { 1623 cl_plock_release(sa->p_lock); 1624 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4517: " 1625 "Cannot find requester physical port\n"); 1626 goto Exit; 1627 } 1628 1629 OSM_LOG(sa->p_log, OSM_LOG_DEBUG, 1630 "Requester port GUID 0x%" PRIx64 "\n", 1631 cl_ntoh64(osm_port_get_guid(requester_port))); 1632 1633 sa_status = mpr_rcv_get_end_points(sa, p_madw, pp_alias_guids, 1634 &nsrc, &ndest); 1635 1636 if (sa_status != IB_SA_MAD_STATUS_SUCCESS || !nsrc || !ndest) { 1637 cl_plock_release(sa->p_lock); 1638 if (sa_status == IB_SA_MAD_STATUS_SUCCESS && (!nsrc || !ndest)) 1639 OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 4512: " 1640 "mpr_rcv_get_end_points failed, # GIDs found; " 1641 "src %d; dest %d)\n", nsrc, ndest); 1642 if (sa_status == IB_SA_MAD_STATUS_SUCCESS) 1643 osm_sa_send_error(sa, p_madw, 1644 IB_SA_MAD_STATUS_REQ_INVALID); 1645 else 1646 osm_sa_send_error(sa, p_madw, sa_status); 1647 goto Exit; 1648 } 1649 1650 /* APM request */ 1651 if (nsrc == 2 && ndest == 2 && (p_mpr->num_path & 0x7F) == 2) 1652 mpr_rcv_get_apm_paths(sa, p_mpr, requester_port, pp_alias_guids, 1653 p_sa_mad->comp_mask, &pr_list); 1654 else 1655 mpr_rcv_process_pairs(sa, p_mpr, requester_port, pp_alias_guids, 1656 nsrc, ndest, p_sa_mad->comp_mask, 1657 &pr_list); 1658 1659 cl_plock_release(sa->p_lock); 1660 1661 /* o15-0.2.7: If MultiPath is supported, then SA shall respond to a 1662 SubnAdmGetMulti() containing a valid MultiPathRecord attribute with 1663 a set of zero or more PathRecords satisfying the constraints 1664 indicated in the MultiPathRecord received. The PathRecord Attribute 1665 ID shall be used in the response. 1666 */ 1667 p_sa_mad->attr_id = IB_MAD_ATTR_PATH_RECORD; 1668 osm_sa_respond(sa, p_madw, sizeof(ib_path_rec_t), &pr_list); 1669 1670 Exit: 1671 OSM_LOG_EXIT(sa->p_log); 1672 } 1673 #endif 1674