1 /* 2 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5 * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2009-2011 ZIH, TU Dresden, Federal Republic of Germany. All rights reserved. 7 * 8 * This software is available to you under a choice of one of two 9 * licenses. You may choose to be licensed under the terms of the GNU 10 * General Public License (GPL) Version 2, available from the file 11 * COPYING in the main directory of this source tree, or the 12 * OpenIB.org BSD license below: 13 * 14 * Redistribution and use in source and binary forms, with or 15 * without modification, are permitted provided that the following 16 * conditions are met: 17 * 18 * - Redistributions of source code must retain the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer. 21 * 22 * - Redistributions in binary form must reproduce the above 23 * copyright notice, this list of conditions and the following 24 * disclaimer in the documentation and/or other materials 25 * provided with the distribution. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * SOFTWARE. 35 * 36 */ 37 38 /* 39 * Abstract: 40 * Implementation of osm_link_mgr_t. 41 * This file implements the Link Manager object. 42 */ 43 44 #if HAVE_CONFIG_H 45 # include <config.h> 46 #endif /* HAVE_CONFIG_H */ 47 48 #include <string.h> 49 #include <iba/ib_types.h> 50 #include <complib/cl_debug.h> 51 #include <opensm/osm_file_ids.h> 52 #define FILE_ID OSM_FILE_LINK_MGR_C 53 #include <opensm/osm_sm.h> 54 #include <opensm/osm_node.h> 55 #include <opensm/osm_switch.h> 56 #include <opensm/osm_helper.h> 57 #include <opensm/osm_msgdef.h> 58 #include <opensm/osm_opensm.h> 59 #include <opensm/osm_db_pack.h> 60 61 static uint8_t link_mgr_get_smsl(IN osm_sm_t * sm, IN osm_physp_t * p_physp) 62 { 63 osm_opensm_t *p_osm = sm->p_subn->p_osm; 64 struct osm_routing_engine *re = p_osm->routing_engine_used; 65 ib_net16_t slid; 66 ib_net16_t smlid; 67 uint8_t sl; 68 69 OSM_LOG_ENTER(sm->p_log); 70 71 if (!(re && re->path_sl && 72 (slid = osm_physp_get_base_lid(p_physp)))) { 73 /* 74 * Use default SL if routing engine does not provide a 75 * path SL lookup callback. 76 */ 77 OSM_LOG_EXIT(sm->p_log); 78 return sm->p_subn->opt.sm_sl; 79 } 80 81 smlid = sm->p_subn->sm_base_lid; 82 83 /* Call into routing engine to find proper SL */ 84 sl = re->path_sl(re->context, sm->p_subn->opt.sm_sl, 85 slid, smlid); 86 87 OSM_LOG_EXIT(sm->p_log); 88 return sl; 89 } 90 91 static int link_mgr_set_physp_pi(osm_sm_t * sm, IN osm_physp_t * p_physp, 92 IN uint8_t port_state) 93 { 94 uint8_t payload[IB_SMP_DATA_SIZE], payload2[IB_SMP_DATA_SIZE]; 95 ib_port_info_t *p_pi = (ib_port_info_t *) payload; 96 ib_mlnx_ext_port_info_t *p_epi = (ib_mlnx_ext_port_info_t *) payload2; 97 const ib_port_info_t *p_old_pi; 98 const ib_mlnx_ext_port_info_t *p_old_epi; 99 osm_madw_context_t context; 100 osm_node_t *p_node; 101 ib_api_status_t status; 102 uint8_t port_num, mtu, op_vls, smsl = OSM_DEFAULT_SL; 103 boolean_t esp0 = FALSE, send_set = FALSE, send_set2 = FALSE; 104 osm_physp_t *p_remote_physp, *physp0 = NULL; 105 int issue_ext = 0, fdr10_change = 0; 106 int ret = 0; 107 ib_net32_t attr_mod, cap_mask; 108 boolean_t update_mkey = FALSE; 109 ib_net64_t m_key = 0; 110 osm_port_t *p_port; 111 112 OSM_LOG_ENTER(sm->p_log); 113 114 p_node = osm_physp_get_node_ptr(p_physp); 115 116 p_old_pi = &p_physp->port_info; 117 118 port_num = osm_physp_get_port_num(p_physp); 119 120 memcpy(payload, p_old_pi, sizeof(ib_port_info_t)); 121 122 if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH || 123 port_num == 0) { 124 /* Need to make sure LID and SMLID fields in PortInfo are not 0 */ 125 if (!p_pi->base_lid) { 126 p_port = osm_get_port_by_guid(sm->p_subn, 127 osm_physp_get_port_guid(p_physp)); 128 p_pi->base_lid = p_port->lid; 129 sm->lid_mgr.dirty = TRUE; 130 send_set = TRUE; 131 } 132 133 /* we are initializing the ports with our local sm_base_lid */ 134 p_pi->master_sm_base_lid = sm->p_subn->sm_base_lid; 135 if (p_pi->master_sm_base_lid != p_old_pi->master_sm_base_lid) 136 send_set = TRUE; 137 } 138 139 if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) 140 physp0 = osm_node_get_physp_ptr(p_node, 0); 141 142 if (port_num == 0) { 143 /* 144 CAs don't have a port 0, and for switch port 0, 145 we need to check if this is enhanced or base port 0. 146 For base port 0 the following parameters are not valid 147 (IBA 1.2.1 p.830 table 146). 148 */ 149 if (!p_node->sw) { 150 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 4201: " 151 "Cannot find switch by guid: 0x%" PRIx64 "\n", 152 cl_ntoh64(p_node->node_info.node_guid)); 153 goto Exit; 154 } 155 156 if (ib_switch_info_is_enhanced_port0(&p_node->sw->switch_info) 157 == FALSE) { 158 159 /* Even for base port 0 we might have to set smsl 160 (if we are using lash routing) */ 161 smsl = link_mgr_get_smsl(sm, p_physp); 162 if (smsl != ib_port_info_get_master_smsl(p_old_pi)) { 163 send_set = TRUE; 164 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 165 "Setting SMSL to %d on port 0 GUID 0x%016" 166 PRIx64 "\n", smsl, 167 cl_ntoh64(osm_physp_get_port_guid 168 (p_physp))); 169 /* Enter if base lid and master_sm_lid didn't change */ 170 } else if (send_set == FALSE) { 171 /* This means the switch doesn't support 172 enhanced port 0 and we don't need to 173 change SMSL. Can skip it. */ 174 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 175 "Skipping port 0, GUID 0x%016" PRIx64 176 "\n", 177 cl_ntoh64(osm_physp_get_port_guid 178 (p_physp))); 179 goto Exit; 180 } 181 } else 182 esp0 = TRUE; 183 } 184 185 /* 186 Should never write back a value that is bigger then 3 in 187 the PortPhysicalState field - so can not simply copy! 188 189 Actually we want to write there: 190 port physical state - no change, 191 link down default state = polling 192 port state - as requested. 193 */ 194 p_pi->state_info2 = 0x02; 195 ib_port_info_set_port_state(p_pi, port_state); 196 197 /* Determine ports' M_Key */ 198 if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && 199 osm_physp_get_port_num(p_physp) != 0) 200 m_key = ib_port_info_get_m_key(&physp0->port_info); 201 else 202 m_key = ib_port_info_get_m_key(p_pi); 203 204 /* Check whether this is base port0 smsl handling only */ 205 if (port_num == 0 && esp0 == FALSE) { 206 ib_port_info_set_master_smsl(p_pi, smsl); 207 goto Send; 208 } 209 210 /* 211 PAST THIS POINT WE ARE HANDLING EITHER A NON PORT 0 OR ENHANCED PORT 0 212 */ 213 214 if (ib_port_info_get_link_down_def_state(p_pi) != 215 ib_port_info_get_link_down_def_state(p_old_pi)) 216 send_set = TRUE; 217 218 /* didn't get PortInfo before */ 219 if (!ib_port_info_get_port_state(p_old_pi)) 220 send_set = TRUE; 221 222 /* we only change port fields if we do not change state */ 223 if (port_state == IB_LINK_NO_CHANGE) { 224 /* The following fields are relevant only for CA port, router, or Enh. SP0 */ 225 if (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH || 226 port_num == 0) { 227 p_pi->m_key = sm->p_subn->opt.m_key; 228 if (memcmp(&p_pi->m_key, &p_old_pi->m_key, 229 sizeof(p_pi->m_key))) { 230 update_mkey = TRUE; 231 send_set = TRUE; 232 } 233 234 p_pi->subnet_prefix = sm->p_subn->opt.subnet_prefix; 235 if (memcmp(&p_pi->subnet_prefix, 236 &p_old_pi->subnet_prefix, 237 sizeof(p_pi->subnet_prefix))) 238 send_set = TRUE; 239 240 smsl = link_mgr_get_smsl(sm, p_physp); 241 if (smsl != ib_port_info_get_master_smsl(p_old_pi)) { 242 243 ib_port_info_set_master_smsl(p_pi, smsl); 244 245 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 246 "Setting SMSL to %d on GUID 0x%016" 247 PRIx64 ", port %d\n", smsl, 248 cl_ntoh64(osm_physp_get_port_guid 249 (p_physp)), port_num); 250 251 send_set = TRUE; 252 } 253 254 p_pi->m_key_lease_period = 255 sm->p_subn->opt.m_key_lease_period; 256 if (memcmp(&p_pi->m_key_lease_period, 257 &p_old_pi->m_key_lease_period, 258 sizeof(p_pi->m_key_lease_period))) 259 send_set = TRUE; 260 261 p_pi->mkey_lmc = 0; 262 ib_port_info_set_mpb(p_pi, sm->p_subn->opt.m_key_protect_bits); 263 if (esp0 == FALSE || sm->p_subn->opt.lmc_esp0) 264 ib_port_info_set_lmc(p_pi, sm->p_subn->opt.lmc); 265 if (ib_port_info_get_lmc(p_old_pi) != 266 ib_port_info_get_lmc(p_pi) || 267 ib_port_info_get_mpb(p_old_pi) != 268 ib_port_info_get_mpb(p_pi)) 269 send_set = TRUE; 270 271 ib_port_info_set_timeout(p_pi, 272 sm->p_subn->opt. 273 subnet_timeout); 274 if (ib_port_info_get_timeout(p_pi) != 275 ib_port_info_get_timeout(p_old_pi)) 276 send_set = TRUE; 277 } 278 279 /* 280 Several timeout mechanisms: 281 */ 282 p_remote_physp = osm_physp_get_remote(p_physp); 283 if (port_num != 0 && p_remote_physp) { 284 if (osm_node_get_type(osm_physp_get_node_ptr(p_physp)) 285 == IB_NODE_TYPE_ROUTER) { 286 ib_port_info_set_hoq_lifetime(p_pi, 287 sm->p_subn-> 288 opt. 289 leaf_head_of_queue_lifetime); 290 } else 291 if (osm_node_get_type 292 (osm_physp_get_node_ptr(p_physp)) == 293 IB_NODE_TYPE_SWITCH) { 294 /* Is remote end CA or router (a leaf port) ? */ 295 if (osm_node_get_type 296 (osm_physp_get_node_ptr(p_remote_physp)) != 297 IB_NODE_TYPE_SWITCH) { 298 ib_port_info_set_hoq_lifetime(p_pi, 299 sm-> 300 p_subn-> 301 opt. 302 leaf_head_of_queue_lifetime); 303 ib_port_info_set_vl_stall_count(p_pi, 304 sm-> 305 p_subn-> 306 opt. 307 leaf_vl_stall_count); 308 } else { 309 ib_port_info_set_hoq_lifetime(p_pi, 310 sm-> 311 p_subn-> 312 opt. 313 head_of_queue_lifetime); 314 ib_port_info_set_vl_stall_count(p_pi, 315 sm-> 316 p_subn-> 317 opt. 318 vl_stall_count); 319 } 320 } 321 if (ib_port_info_get_hoq_lifetime(p_pi) != 322 ib_port_info_get_hoq_lifetime(p_old_pi) || 323 ib_port_info_get_vl_stall_count(p_pi) != 324 ib_port_info_get_vl_stall_count(p_old_pi)) 325 send_set = TRUE; 326 } 327 328 ib_port_info_set_phy_and_overrun_err_thd(p_pi, 329 sm->p_subn->opt. 330 local_phy_errors_threshold, 331 sm->p_subn->opt. 332 overrun_errors_threshold); 333 if (p_pi->error_threshold != p_old_pi->error_threshold) 334 send_set = TRUE; 335 336 /* 337 Set the easy common parameters for all port types, 338 then determine the neighbor MTU. 339 */ 340 p_pi->link_width_enabled = p_old_pi->link_width_supported; 341 if (p_pi->link_width_enabled != p_old_pi->link_width_enabled) 342 send_set = TRUE; 343 344 if (sm->p_subn->opt.force_link_speed && 345 (sm->p_subn->opt.force_link_speed != 15 || 346 ib_port_info_get_link_speed_enabled(p_pi) != 347 ib_port_info_get_link_speed_sup(p_pi))) { 348 ib_port_info_set_link_speed_enabled(p_pi, 349 sm->p_subn->opt. 350 force_link_speed); 351 if (p_pi->link_speed != p_old_pi->link_speed) 352 send_set = TRUE; 353 } 354 355 if (sm->p_subn->opt.fdr10 && 356 p_physp->ext_port_info.link_speed_supported & FDR10) { 357 if (sm->p_subn->opt.fdr10 == 1) { /* enable */ 358 if (!(p_physp->ext_port_info.link_speed_enabled & FDR10)) 359 fdr10_change = 1; 360 } else { /* disable */ 361 if (p_physp->ext_port_info.link_speed_enabled & FDR10) 362 fdr10_change = 1; 363 } 364 if (fdr10_change) { 365 p_old_epi = &p_physp->ext_port_info; 366 memcpy(payload2, p_old_epi, 367 sizeof(ib_mlnx_ext_port_info_t)); 368 p_epi->state_change_enable = 0x01; 369 if (sm->p_subn->opt.fdr10 == 1) 370 p_epi->link_speed_enabled = FDR10; 371 else 372 p_epi->link_speed_enabled = 0; 373 send_set2 = TRUE; 374 } 375 } 376 377 if (osm_node_get_type(p_physp->p_node) == IB_NODE_TYPE_SWITCH && 378 osm_physp_get_port_num(p_physp) != 0) { 379 cap_mask = physp0->port_info.capability_mask; 380 } else 381 cap_mask = p_pi->capability_mask; 382 383 if (cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) 384 issue_ext = 1; 385 386 /* Do peer ports support extended link speeds ? */ 387 if (port_num != 0 && p_remote_physp) { 388 osm_physp_t *rphysp0; 389 ib_net32_t rem_cap_mask; 390 391 if (osm_node_get_type(p_remote_physp->p_node) == 392 IB_NODE_TYPE_SWITCH) { 393 rphysp0 = osm_node_get_physp_ptr(p_remote_physp->p_node, 0); 394 rem_cap_mask = rphysp0->port_info.capability_mask; 395 } else 396 rem_cap_mask = p_remote_physp->port_info.capability_mask; 397 398 if (cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS && 399 rem_cap_mask & IB_PORT_CAP_HAS_EXT_SPEEDS) { 400 if (sm->p_subn->opt.force_link_speed_ext && 401 (sm->p_subn->opt.force_link_speed_ext != IB_LINK_SPEED_EXT_SET_LSES || 402 p_pi->link_speed_ext_enabled != 403 ib_port_info_get_link_speed_ext_sup(p_pi))) { 404 p_pi->link_speed_ext_enabled = sm->p_subn->opt.force_link_speed_ext; 405 if (p_pi->link_speed_ext_enabled != 406 p_old_pi->link_speed_ext_enabled) 407 send_set = TRUE; 408 } 409 } 410 } 411 412 /* calc new op_vls and mtu */ 413 op_vls = 414 osm_physp_calc_link_op_vls(sm->p_log, sm->p_subn, p_physp, 415 ib_port_info_get_op_vls(p_old_pi)); 416 mtu = osm_physp_calc_link_mtu(sm->p_log, p_physp, 417 ib_port_info_get_neighbor_mtu(p_old_pi)); 418 419 ib_port_info_set_neighbor_mtu(p_pi, mtu); 420 if (ib_port_info_get_neighbor_mtu(p_pi) != 421 ib_port_info_get_neighbor_mtu(p_old_pi)) 422 send_set = TRUE; 423 424 ib_port_info_set_op_vls(p_pi, op_vls); 425 if (ib_port_info_get_op_vls(p_pi) != 426 ib_port_info_get_op_vls(p_old_pi)) 427 send_set = TRUE; 428 429 /* provide the vl_high_limit from the qos mgr */ 430 if (sm->p_subn->opt.qos && 431 p_physp->vl_high_limit != p_old_pi->vl_high_limit) { 432 send_set = TRUE; 433 p_pi->vl_high_limit = p_physp->vl_high_limit; 434 } 435 } 436 437 Send: 438 context.pi_context.active_transition = FALSE; 439 if (port_state != IB_LINK_NO_CHANGE && 440 port_state != ib_port_info_get_port_state(p_old_pi)) { 441 send_set = TRUE; 442 if (port_state == IB_LINK_ACTIVE) 443 context.pi_context.active_transition = TRUE; 444 } 445 446 context.pi_context.node_guid = osm_node_get_node_guid(p_node); 447 context.pi_context.port_guid = osm_physp_get_port_guid(p_physp); 448 context.pi_context.set_method = TRUE; 449 context.pi_context.light_sweep = FALSE; 450 context.pi_context.client_rereg = FALSE; 451 452 /* We need to send the PortInfoSet request with the new sm_lid 453 in the following cases: 454 1. There is a change in the values (send_set == TRUE) 455 2. This is a switch external port (so it wasn't handled yet by 456 osm_lid_mgr) and first_time_master_sweep flag on the subnet is TRUE, 457 which means the SM just became master, and it then needs to send at 458 PortInfoSet to every port. 459 */ 460 if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH && port_num 461 && sm->p_subn->first_time_master_sweep == TRUE) 462 send_set = TRUE; 463 464 if (!send_set) 465 goto SEND_EPI; 466 467 attr_mod = cl_hton32(port_num); 468 if (issue_ext) 469 attr_mod |= cl_hton32(1 << 31); /* AM SMSupportExtendedSpeeds */ 470 status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp), 471 payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, 472 attr_mod, FALSE, m_key, 473 CL_DISP_MSGID_NONE, &context); 474 if (status) 475 ret = -1; 476 477 /* If we sent a new mkey above, update our guid2mkey map 478 now, on the assumption that the SubnSet succeeds 479 */ 480 if (update_mkey) 481 osm_db_guid2mkey_set(sm->p_subn->p_g2m, 482 cl_ntoh64(p_physp->port_guid), 483 cl_ntoh64(p_pi->m_key)); 484 485 SEND_EPI: 486 if (send_set2) { 487 status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p_physp), 488 payload2, sizeof(payload2), 489 IB_MAD_ATTR_MLNX_EXTENDED_PORT_INFO, 490 cl_hton32(port_num), FALSE, m_key, 491 CL_DISP_MSGID_NONE, &context); 492 if (status) 493 ret = -1; 494 } 495 496 Exit: 497 OSM_LOG_EXIT(sm->p_log); 498 return ret; 499 } 500 501 static int link_mgr_process_node(osm_sm_t * sm, IN osm_node_t * p_node, 502 IN const uint8_t link_state) 503 { 504 osm_physp_t *p_physp, *p_physp_remote; 505 uint32_t i, num_physp; 506 int ret = 0; 507 uint8_t current_state; 508 509 OSM_LOG_ENTER(sm->p_log); 510 511 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 512 "Node 0x%" PRIx64 " going to %s\n", 513 cl_ntoh64(osm_node_get_node_guid(p_node)), 514 ib_get_port_state_str(link_state)); 515 516 /* 517 Set the PortInfo for every Physical Port associated 518 with this Port. Start iterating with port 1, since the linkstate 519 is not applicable to the management port on switches. 520 */ 521 num_physp = osm_node_get_num_physp(p_node); 522 for (i = 0; i < num_physp; i++) { 523 /* 524 Don't bother doing anything if this Physical Port is not valid. 525 or if the state of the port is already better then the 526 specified state. 527 */ 528 p_physp = osm_node_get_physp_ptr(p_node, (uint8_t) i); 529 if (!p_physp) 530 continue; 531 532 current_state = osm_physp_get_port_state(p_physp); 533 if (current_state == IB_LINK_DOWN) 534 continue; 535 536 /* 537 Set PortState to DOWN in case Remote Physical Port is 538 unreachable. We have to check this for all ports, except 539 port zero. 540 */ 541 p_physp_remote = osm_physp_get_remote(p_physp); 542 if ((i != 0) && (!p_physp_remote || 543 !osm_physp_is_valid(p_physp_remote))) { 544 if (current_state != IB_LINK_INIT) 545 link_mgr_set_physp_pi(sm, p_physp, IB_LINK_DOWN); 546 continue; 547 } 548 549 /* 550 Normally we only send state update if state is lower 551 then required state. However, we need to send update if 552 no state change required. 553 */ 554 if (link_state != IB_LINK_NO_CHANGE && 555 link_state <= current_state) 556 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 557 "Physical port %u already %s. Skipping\n", 558 p_physp->port_num, 559 ib_get_port_state_str(current_state)); 560 else if (link_mgr_set_physp_pi(sm, p_physp, link_state)) 561 ret = -1; 562 } 563 564 OSM_LOG_EXIT(sm->p_log); 565 return ret; 566 } 567 568 int osm_link_mgr_process(osm_sm_t * sm, IN const uint8_t link_state) 569 { 570 cl_qmap_t *p_node_guid_tbl; 571 osm_node_t *p_node; 572 int ret = 0; 573 574 OSM_LOG_ENTER(sm->p_log); 575 576 p_node_guid_tbl = &sm->p_subn->node_guid_tbl; 577 578 CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); 579 580 for (p_node = (osm_node_t *) cl_qmap_head(p_node_guid_tbl); 581 p_node != (osm_node_t *) cl_qmap_end(p_node_guid_tbl); 582 p_node = (osm_node_t *) cl_qmap_next(&p_node->map_item)) 583 if (link_mgr_process_node(sm, p_node, link_state)) 584 ret = -1; 585 586 CL_PLOCK_RELEASE(sm->p_lock); 587 588 OSM_LOG_EXIT(sm->p_log); 589 return ret; 590 } 591