1 /* 2 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2002-2007 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5 * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 * 35 */ 36 37 /* 38 * Abstract: 39 * Implementation of osm_trap_rcv_t. 40 * This object represents the Trap Receiver object. 41 * This object is part of the opensm family of objects. 42 */ 43 44 #if HAVE_CONFIG_H 45 # include <config.h> 46 #endif /* HAVE_CONFIG_H */ 47 48 #include <string.h> 49 #include <iba/ib_types.h> 50 #include <complib/cl_qmap.h> 51 #include <complib/cl_debug.h> 52 #include <opensm/osm_file_ids.h> 53 #define FILE_ID OSM_FILE_TRAP_RCV_C 54 #include <opensm/osm_madw.h> 55 #include <opensm/osm_log.h> 56 #include <opensm/osm_node.h> 57 #include <opensm/osm_helper.h> 58 #include <opensm/osm_subnet.h> 59 #include <opensm/osm_inform.h> 60 #include <opensm/osm_opensm.h> 61 62 extern void osm_req_get_node_desc(IN osm_sm_t * sm, osm_physp_t *p_physp); 63 64 /********************************************************************** 65 * 66 * TRAP HANDLING: 67 * 68 * Assuming traps can be caused by bad hardware we should provide 69 * a mechanism for filtering their propagation into the actual logic 70 * of OpenSM such that it is not overloaded by them. 71 * 72 * We will provide a trap filtering mechanism with "Aging" capability. 73 * This mechanism will track incoming traps, clasify them by their 74 * source and content and provide back their age. 75 * 76 * A timer running in the background will toggle a timer counter 77 * that should be referenced by the aging algorithm. 78 * To provide an efficient handling of aging, we also track all traps 79 * in a sorted list by their aging. 80 * 81 * The generic Aging Tracker mechanism is implemented in the 82 * cl_aging_tracker object. 83 * 84 **********************************************************************/ 85 86 static osm_physp_t *get_physp_by_lid_and_num(IN osm_sm_t * sm, 87 IN ib_net16_t lid, IN uint8_t num) 88 { 89 osm_port_t *p_port = osm_get_port_by_lid(sm->p_subn, lid); 90 if (!p_port) 91 return NULL; 92 93 if (osm_node_get_num_physp(p_port->p_node) <= num) 94 return NULL; 95 96 return osm_node_get_physp_ptr(p_port->p_node, num); 97 } 98 99 static uint64_t aging_tracker_callback(IN uint64_t key, IN uint32_t num_regs, 100 IN void *context) 101 { 102 osm_sm_t *sm = context; 103 ib_net16_t lid; 104 uint8_t port_num; 105 osm_physp_t *p_physp; 106 107 OSM_LOG_ENTER(sm->p_log); 108 109 if (osm_exit_flag) 110 /* We got an exit flag - do nothing */ 111 return 0; 112 113 lid = (ib_net16_t) ((key & 0x0000FFFF00000000ULL) >> 32); 114 port_num = (uint8_t) ((key & 0x00FF000000000000ULL) >> 48); 115 116 CL_PLOCK_ACQUIRE(sm->p_lock); 117 118 p_physp = get_physp_by_lid_and_num(sm, lid, port_num); 119 if (!p_physp) 120 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 121 "Cannot find port num:%u with lid:%u\n", 122 port_num, cl_ntoh16(lid)); 123 /* make sure the physp is still valid */ 124 /* If the health port was false - set it to true */ 125 else if (!osm_physp_is_healthy(p_physp)) { 126 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 127 "Clearing health bit of port num:%u with lid:%u\n", 128 port_num, cl_ntoh16(lid)); 129 130 /* Clear its health bit */ 131 osm_physp_set_health(p_physp, TRUE); 132 } 133 134 CL_PLOCK_RELEASE(sm->p_lock); 135 OSM_LOG_EXIT(sm->p_log); 136 137 /* We want to remove the event from the tracker - so 138 need to return zero. */ 139 return 0; 140 } 141 142 /********************************************************************** 143 * CRC calculation for notice identification 144 **********************************************************************/ 145 146 #define CRC32_POLYNOMIAL 0xEDB88320L 147 148 /* calculate the crc for a given buffer */ 149 static uint32_t trap_calc_crc32(void *buffer, uint32_t count) 150 { 151 uint32_t temp1, temp2; 152 uint32_t crc = -1L; 153 unsigned char *p = (unsigned char *)buffer; 154 /* precalculated table for faster crc calculation */ 155 static uint32_t crc_table[256]; 156 static boolean_t first = TRUE; 157 int i, j; 158 159 /* if we need to initialize the lookup table */ 160 if (first) { 161 /* calc the CRC table */ 162 for (i = 0; i <= 255; i++) { 163 crc = i; 164 for (j = 8; j > 0; j--) 165 if (crc & 1) 166 crc = (crc >> 1) ^ CRC32_POLYNOMIAL; 167 else 168 crc >>= 1; 169 crc_table[i] = crc; 170 } 171 first = FALSE; 172 } 173 174 crc = -1L; 175 /* do the calculation */ 176 while (count-- != 0) { 177 temp1 = (crc >> 8) & 0x00FFFFFFL; 178 temp2 = crc_table[((int)crc ^ *p++) & 0xFF]; 179 crc = temp1 ^ temp2; 180 } 181 return crc; 182 } 183 184 /* The key is created in the following manner: 185 port_num lid crc 186 \______/ \___/ \___/ 187 16b 16b 32b 188 */ 189 static uint64_t trap_get_key(IN uint16_t lid, IN uint8_t port_num, 190 IN ib_mad_notice_attr_t * p_ntci) 191 { 192 uint32_t crc = trap_calc_crc32(p_ntci, sizeof(ib_mad_notice_attr_t)); 193 return ((uint64_t) port_num << 48) | ((uint64_t) lid << 32) | crc; 194 } 195 196 static int print_num_received(IN uint32_t num_received) 197 { 198 uint32_t i; 199 200 /* Series is 10, 20, 50, 100, 200, 500, ... */ 201 i = num_received; 202 while (i >= 10) { 203 if (i % 10) 204 break; 205 i = i / 10; 206 } 207 208 if (i == 1 || i == 2 || i == 5) 209 return 1; 210 else 211 return 0; 212 } 213 214 static int disable_port(osm_sm_t *sm, osm_physp_t *p) 215 { 216 uint8_t payload[IB_SMP_DATA_SIZE]; 217 osm_madw_context_t context; 218 ib_port_info_t *pi = (ib_port_info_t *)payload; 219 osm_physp_t *physp0; 220 osm_port_t *p_port; 221 ib_net64_t m_key; 222 ib_api_status_t status; 223 224 /* select the nearest port to master opensm */ 225 if (p->p_remote_physp && 226 p->dr_path.hop_count > p->p_remote_physp->dr_path.hop_count) 227 p = p->p_remote_physp; 228 229 /* If trap 131, might want to disable peer port if available */ 230 /* but peer port has been observed not to respond to SM requests */ 231 232 memcpy(payload, &p->port_info, sizeof(ib_port_info_t)); 233 234 /* Set port to disabled/down */ 235 ib_port_info_set_port_state(pi, IB_LINK_DOWN); 236 ib_port_info_set_port_phys_state(IB_PORT_PHYS_STATE_DISABLED, pi); 237 238 /* Issue set of PortInfo */ 239 context.pi_context.node_guid = osm_node_get_node_guid(p->p_node); 240 context.pi_context.port_guid = osm_physp_get_port_guid(p); 241 context.pi_context.set_method = TRUE; 242 context.pi_context.light_sweep = FALSE; 243 context.pi_context.active_transition = FALSE; 244 context.pi_context.client_rereg = FALSE; 245 if (osm_node_get_type(p->p_node) == IB_NODE_TYPE_SWITCH && 246 osm_physp_get_port_num(p) != 0) { 247 physp0 = osm_node_get_physp_ptr(p->p_node, 0); 248 m_key = ib_port_info_get_m_key(&physp0->port_info); 249 } else 250 m_key = ib_port_info_get_m_key(&p->port_info); 251 252 if (osm_node_get_type(p->p_node) != IB_NODE_TYPE_SWITCH) { 253 if (!pi->base_lid) { 254 p_port = osm_get_port_by_guid(sm->p_subn, 255 osm_physp_get_port_guid(p)); 256 pi->base_lid = p_port->lid; 257 } 258 pi->master_sm_base_lid = sm->p_subn->sm_base_lid; 259 } 260 261 status = osm_req_set(sm, osm_physp_get_dr_path_ptr(p), 262 payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO, 263 cl_hton32(osm_physp_get_port_num(p)), 264 FALSE, m_key, 265 CL_DISP_MSGID_NONE, &context); 266 return status; 267 } 268 269 static void log_trap_info(osm_log_t *p_log, ib_mad_notice_attr_t *p_ntci, 270 ib_net16_t source_lid, ib_net64_t trans_id) 271 { 272 if (!OSM_LOG_IS_ACTIVE_V2(p_log, OSM_LOG_ERROR)) 273 return; 274 275 if (ib_notice_is_generic(p_ntci)) { 276 char str[32]; 277 278 if ((p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_LINK_INTEGRITY_THRESHOLD_TRAP)) || 279 (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BUFFER_OVERRUN_THRESHOLD_TRAP)) || 280 (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_WATCHDOG_TIMER_EXPIRED_TRAP))) 281 snprintf(str, sizeof(str), " Port %u", 282 p_ntci->data_details.ntc_129_131.port_num); 283 else 284 str[0] = '\0'; 285 286 OSM_LOG(p_log, OSM_LOG_ERROR, 287 "Received Generic Notice type:%u " 288 "num:%u (%s) Producer:%u (%s) " 289 "from LID:%u%s TID:0x%016" PRIx64 "\n", 290 ib_notice_get_type(p_ntci), 291 cl_ntoh16(p_ntci->g_or_v.generic.trap_num), 292 ib_get_trap_str(p_ntci->g_or_v.generic.trap_num), 293 cl_ntoh32(ib_notice_get_prod_type(p_ntci)), 294 ib_get_producer_type_str(ib_notice_get_prod_type(p_ntci)), 295 cl_hton16(source_lid), str, cl_ntoh64(trans_id)); 296 if ((p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BAD_PKEY_TRAP)) || 297 (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BAD_QKEY_TRAP))) { 298 OSM_LOG(p_log, OSM_LOG_ERROR, 299 "Bad %s_Key:0x%x on SL:%d from " 300 "LID1:%u QP1:0x%x to " 301 "LID2:%u QP2:0x%x\n", 302 (p_ntci->g_or_v.generic.trap_num == CL_HTON16(257)) ? "P" : "Q", 303 cl_ntoh32(p_ntci->data_details.ntc_257_258.key), 304 cl_ntoh32(p_ntci->data_details.ntc_257_258.qp1) >> 28, 305 cl_ntoh16(p_ntci->data_details.ntc_257_258.lid1), 306 cl_ntoh32(p_ntci->data_details.ntc_257_258.qp1) & 0xfff, 307 cl_ntoh16(p_ntci->data_details.ntc_257_258.lid2), 308 cl_ntoh32(p_ntci->data_details.ntc_257_258.qp2)); 309 } 310 } else 311 OSM_LOG(p_log, OSM_LOG_ERROR, 312 "Received Vendor Notice type:%u vend:0x%06X " 313 "dev:%u from LID:%u TID:0x%016" PRIx64 "\n", 314 ib_notice_get_type(p_ntci), 315 cl_ntoh32(ib_notice_get_vend_id(p_ntci)), 316 cl_ntoh16(p_ntci->g_or_v.vend.dev_id), 317 cl_ntoh16(source_lid), cl_ntoh64(trans_id)); 318 } 319 320 static int shutup_noisy_port(osm_sm_t *sm, ib_net16_t lid, uint8_t port, 321 unsigned num) 322 { 323 osm_physp_t *p = get_physp_by_lid_and_num(sm, lid, port); 324 if (!p) { 325 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3805: " 326 "Failed to find physical port by lid:%u num:%u\n", 327 cl_ntoh16(lid), port); 328 return -1; 329 } 330 331 /* When babbling port policy option is enabled and 332 Threshold for disabling a "babbling" port is exceeded */ 333 if (sm->p_subn->opt.babbling_port_policy && num >= 250) { 334 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 335 "Disabling noisy physical port 0x%016" PRIx64 336 ": lid %u, num %u\n", 337 cl_ntoh64(osm_physp_get_port_guid(p)), 338 cl_ntoh16(lid), port); 339 if (disable_port(sm, p)) 340 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3811: " 341 "Failed to disable noisy physical port 0x%016" 342 PRIx64 ": lid %u, num %u\n", 343 cl_ntoh64(osm_physp_get_port_guid(p)), 344 cl_ntoh16(lid), port); 345 else 346 return 1; 347 } 348 349 /* check if the current state of the p_physp is healthy. If 350 it is - then this is a first change of state. Run a heavy sweep. */ 351 if (osm_physp_is_healthy(p)) { 352 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 353 "Marking unhealthy physical port by lid:%u num:%u\n", 354 cl_ntoh16(lid), port); 355 osm_physp_set_health(p, FALSE); 356 return 2; 357 } 358 return 0; 359 } 360 361 static void trap_rcv_process_request(IN osm_sm_t * sm, 362 IN const osm_madw_t * p_madw) 363 { 364 uint8_t payload[sizeof(ib_mad_notice_attr_t)]; 365 ib_smp_t *p_smp; 366 ib_mad_notice_attr_t *p_ntci = (ib_mad_notice_attr_t *) payload; 367 ib_api_status_t status; 368 osm_madw_t tmp_madw; /* we need a copy to last after repress */ 369 uint64_t trap_key; 370 uint32_t num_received; 371 osm_physp_t *p_physp; 372 osm_port_t *p_port; 373 ib_net16_t source_lid = 0; 374 boolean_t is_gsi = TRUE; 375 uint8_t port_num = 0; 376 boolean_t physp_change_trap = FALSE; 377 uint64_t event_wheel_timeout = OSM_DEFAULT_TRAP_SUPRESSION_TIMEOUT; 378 boolean_t run_heavy_sweep = FALSE; 379 char buf[1024]; 380 osm_dr_path_t *p_path; 381 unsigned n; 382 383 OSM_LOG_ENTER(sm->p_log); 384 385 CL_ASSERT(p_madw); 386 387 if (osm_exit_flag) 388 /* 389 We got an exit flag - do nothing 390 Otherwise we start a sweep on the trap 144 caused by 391 cleaning up SM Cap bit... 392 */ 393 goto Exit2; 394 395 /* update the is_gsi flag according to the mgmt_class field */ 396 if (p_madw->p_mad->mgmt_class == IB_MCLASS_SUBN_LID || 397 p_madw->p_mad->mgmt_class == IB_MCLASS_SUBN_DIR) 398 is_gsi = FALSE; 399 400 /* No real need to grab the lock for this function. */ 401 memset(payload, 0, sizeof(payload)); 402 memset(&tmp_madw, 0, sizeof(tmp_madw)); 403 404 p_smp = osm_madw_get_smp_ptr(p_madw); 405 406 if (p_smp->method != IB_MAD_METHOD_TRAP) { 407 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3801: " 408 "Unsupported method 0x%X\n", p_smp->method); 409 goto Exit2; 410 } 411 412 /* 413 * The NOTICE Attribute is part of the SMP CLASS attributes 414 * As such the actual attribute data resides inside the SMP 415 * payload. 416 */ 417 418 memcpy(payload, &p_smp->data, IB_SMP_DATA_SIZE); 419 memcpy(&tmp_madw, p_madw, sizeof(tmp_madw)); 420 421 if (is_gsi == FALSE) { 422 /* We are in smi flow */ 423 /* 424 * When we receive a TRAP with dlid = 0 - it means it 425 * came from our own node. So we need to fix it. 426 */ 427 428 if (p_madw->mad_addr.addr_type.smi.source_lid == 0) { 429 /* Check if the sm_base_lid is 0. If yes - this means 430 that the local lid wasn't configured yet. Don't send 431 a response to the trap. */ 432 if (sm->p_subn->sm_base_lid == 0) { 433 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 434 "Received SLID=0 Trap with local LID=0. Ignoring MAD\n"); 435 goto Exit2; 436 } 437 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 438 "Received SLID=0 Trap. Using local LID:%u instead\n", 439 cl_ntoh16(sm->p_subn->sm_base_lid)); 440 tmp_madw.mad_addr.addr_type.smi.source_lid = 441 sm->p_subn->sm_base_lid; 442 } 443 444 source_lid = tmp_madw.mad_addr.addr_type.smi.source_lid; 445 446 /* Print some info about the incoming Trap */ 447 log_trap_info(sm->p_log, p_ntci, source_lid, p_smp->trans_id); 448 } 449 450 osm_dump_notice_v2(sm->p_log, p_ntci, FILE_ID, OSM_LOG_VERBOSE); 451 CL_PLOCK_ACQUIRE(sm->p_lock); 452 p_physp = osm_get_physp_by_mad_addr(sm->p_log, sm->p_subn, 453 &tmp_madw.mad_addr); 454 if (p_physp) 455 p_smp->m_key = ib_port_info_get_m_key(&p_physp->port_info); 456 else 457 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3809: " 458 "Failed to find source physical port for trap\n"); 459 460 status = osm_resp_send(sm, &tmp_madw, 0, payload); 461 if (status != IB_SUCCESS) { 462 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3802: " 463 "Error sending response (%s)\n", 464 ib_get_err_str(status)); 465 goto Exit; 466 } 467 468 /* 469 * We would like to filter out recurring Traps so we track them by 470 * their source lid and content. If the same trap was already 471 * received within the aging time window more than 10 times, 472 * we simply ignore it. This is done only if we are in smi mode 473 */ 474 475 if (is_gsi == FALSE) { 476 if (ib_notice_is_generic(p_ntci) && 477 (p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_LINK_INTEGRITY_THRESHOLD_TRAP) || 478 p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_BUFFER_OVERRUN_THRESHOLD_TRAP) || 479 p_ntci->g_or_v.generic.trap_num == CL_HTON16(SM_WATCHDOG_TIMER_EXPIRED_TRAP))) { 480 /* If this is a trap 129, 130, or 131 - then this is a 481 * trap signaling a change on a physical port. 482 * Mark the physp_change_trap flag as TRUE. 483 */ 484 physp_change_trap = TRUE; 485 /* The source_lid should be based on the source_lid from the trap */ 486 source_lid = p_ntci->data_details.ntc_129_131.lid; 487 port_num = p_ntci->data_details.ntc_129_131.port_num; 488 } 489 490 /* try to find it in the aging tracker */ 491 trap_key = trap_get_key(source_lid, port_num, p_ntci); 492 num_received = cl_event_wheel_num_regs(&sm->trap_aging_tracker, 493 trap_key); 494 495 /* Now we know how many times it provided this trap */ 496 if (num_received > 10) { 497 if (print_num_received(num_received)) 498 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 499 "Received trap %u times consecutively\n", 500 num_received); 501 /* 502 * If the trap provides info about a bad port 503 * we mark it as unhealthy. 504 */ 505 if (physp_change_trap == TRUE) { 506 int ret = shutup_noisy_port(sm, source_lid, 507 port_num, 508 num_received); 509 if (ret == 1) /* port disabled */ 510 goto Exit; 511 else if (ret == 2) /* unhealthy - run sweep */ 512 run_heavy_sweep = TRUE; 513 /* in any case increase timeout interval */ 514 event_wheel_timeout = 515 OSM_DEFAULT_UNHEALTHY_TIMEOUT; 516 } 517 } 518 519 /* restart the aging anyway */ 520 /* If physp_change_trap is TRUE - then use a callback to unset 521 the healthy bit. If not - no need to use a callback. */ 522 if (physp_change_trap == TRUE) 523 cl_event_wheel_reg(&sm->trap_aging_tracker, trap_key, 524 cl_get_time_stamp() + event_wheel_timeout, 525 aging_tracker_callback, sm); 526 else 527 cl_event_wheel_reg(&sm->trap_aging_tracker, trap_key, 528 cl_get_time_stamp() + event_wheel_timeout, 529 NULL, NULL); 530 531 /* If was already registered do nothing more */ 532 if (num_received > 10 && run_heavy_sweep == FALSE) { 533 if (print_num_received(num_received)) 534 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 535 "Ignoring noisy traps.\n"); 536 goto Exit; 537 } 538 } 539 540 /* Check for node description update. IB Spec v1.2.1 pg 823 */ 541 if (!ib_notice_is_generic(p_ntci)) 542 goto check_sweep; 543 if (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LOCAL_CHANGES_TRAP && 544 p_ntci->data_details.ntc_144.local_changes & TRAP_144_MASK_OTHER_LOCAL_CHANGES && 545 p_ntci->data_details.ntc_144.change_flgs & TRAP_144_MASK_NODE_DESCRIPTION_CHANGE) { 546 OSM_LOG(sm->p_log, OSM_LOG_INFO, "Trap 144 Node description update\n"); 547 548 if (p_physp) { 549 osm_req_get_node_desc(sm, p_physp); 550 if (!(p_ntci->data_details.ntc_144.change_flgs & ~TRAP_144_MASK_NODE_DESCRIPTION_CHANGE) && 551 p_ntci->data_details.ntc_144.new_cap_mask == p_physp->port_info.capability_mask) 552 goto check_report; 553 } else 554 OSM_LOG(sm->p_log, OSM_LOG_ERROR, 555 "ERR 3812: No physical port found for " 556 "trap 144: \"node description update\"\n"); 557 goto check_sweep; 558 } else if (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_SYS_IMG_GUID_CHANGED_TRAP) { 559 if (p_physp) { 560 CL_PLOCK_RELEASE(sm->p_lock); 561 CL_PLOCK_EXCL_ACQUIRE(sm->p_lock); 562 p_physp = osm_get_physp_by_mad_addr(sm->p_log, 563 sm->p_subn, 564 &tmp_madw.mad_addr); 565 if (p_physp) { 566 /* this assumes that trap 145 content is not broken? */ 567 p_physp->p_node->node_info.sys_guid = 568 p_ntci->data_details.ntc_145.new_sys_guid; 569 } 570 CL_PLOCK_RELEASE(sm->p_lock); 571 CL_PLOCK_ACQUIRE(sm->p_lock); 572 } else 573 OSM_LOG(sm->p_log, OSM_LOG_ERROR, 574 "ERR 3813: No physical port found for " 575 "trap 145: \"SystemImageGUID update\"\n"); 576 goto check_report; 577 } 578 579 check_sweep: 580 if (osm_log_is_active_v2(sm->p_log, OSM_LOG_INFO, FILE_ID)) { 581 if (ib_notice_is_generic(p_ntci) && 582 cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LINK_STATE_CHANGED_TRAP) { 583 p_path = (p_physp) ? 584 osm_physp_get_dr_path_ptr(p_physp) : NULL; 585 if (p_path) { 586 n = sprintf(buf, "SM class trap %u: ", 587 cl_ntoh16(p_ntci->g_or_v.generic.trap_num)); 588 n += snprintf(buf + n, sizeof(buf) - n, 589 "Directed Path Dump of %u hop path: " 590 "Path = ", p_path->hop_count); 591 592 osm_dump_dr_path_as_buf(sizeof(buf) - n, p_path, 593 buf + n); 594 595 osm_log_v2(sm->p_log, OSM_LOG_INFO, FILE_ID, 596 "%s\n", buf); 597 } 598 } 599 } 600 601 /* do a sweep if we received a trap */ 602 if (sm->p_subn->opt.sweep_on_trap) { 603 /* if this is trap number 128 or run_heavy_sweep is TRUE - 604 update the force_heavy_sweep flag of the subnet. 605 Sweep also on traps 144 - these traps signal a change of 606 certain port capabilities. 607 TODO: In the future this can be changed to just getting 608 PortInfo on this port instead of sweeping the entire subnet. */ 609 if (ib_notice_is_generic(p_ntci) && 610 (cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LINK_STATE_CHANGED_TRAP || 611 cl_ntoh16(p_ntci->g_or_v.generic.trap_num) == SM_LOCAL_CHANGES_TRAP || 612 run_heavy_sweep)) { 613 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 614 "Forcing heavy sweep. Received trap:%u\n", 615 cl_ntoh16(p_ntci->g_or_v.generic.trap_num)); 616 617 sm->p_subn->force_heavy_sweep = TRUE; 618 } 619 osm_sm_signal(sm, OSM_SIGNAL_SWEEP); 620 } 621 622 /* If we reached here due to trap 129/130/131 - do not need to do 623 the notice report. Just goto exit. We know this is the case 624 if physp_change_trap is TRUE. */ 625 if (physp_change_trap == TRUE) 626 goto Exit; 627 628 check_report: 629 /* We are going to report the notice - so need to fix the IssuerGID 630 accordingly. See IBA 1.2 p.739 or IBA 1.1 p.653 for details. */ 631 if (is_gsi) { 632 if (!tmp_madw.mad_addr.addr_type.gsi.global_route) { 633 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3806: " 634 "Received gsi trap with global_route FALSE. " 635 "Cannot update issuer_gid!\n"); 636 goto Exit; 637 } 638 memcpy(&p_ntci->issuer_gid, 639 &tmp_madw.mad_addr.addr_type.gsi.grh_info.src_gid, 640 sizeof(ib_gid_t)); 641 } else { 642 /* Need to use the IssuerLID */ 643 p_port = osm_get_port_by_lid(sm->p_subn, source_lid); 644 if (!p_port) { 645 OSM_LOG(sm->p_log, OSM_LOG_VERBOSE, 646 "Cannot find port corresponding to lid:%u\n", 647 cl_ntoh16(source_lid)); 648 649 goto Exit; 650 } 651 652 p_ntci->issuer_gid.unicast.prefix = 653 sm->p_subn->opt.subnet_prefix; 654 p_ntci->issuer_gid.unicast.interface_id = p_port->guid; 655 } 656 657 /* we need a lock here as the InformInfo DB must be stable */ 658 status = osm_report_notice(sm->p_log, sm->p_subn, p_ntci); 659 if (status != IB_SUCCESS) { 660 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 3803: " 661 "Error sending trap reports (%s)\n", 662 ib_get_err_str(status)); 663 goto Exit; 664 } 665 666 Exit: 667 CL_PLOCK_RELEASE(sm->p_lock); 668 Exit2: 669 OSM_LOG_EXIT(sm->p_log); 670 } 671 672 void osm_trap_rcv_process(IN void *context, IN void *data) 673 { 674 osm_sm_t *sm = context; 675 osm_madw_t *p_madw = data; 676 ib_smp_t __attribute__((unused)) *p_smp; 677 678 OSM_LOG_ENTER(sm->p_log); 679 680 CL_ASSERT(p_madw); 681 682 p_smp = osm_madw_get_smp_ptr(p_madw); 683 684 /* Only Trap requests get here */ 685 CL_ASSERT(!ib_smp_is_response(p_smp)); 686 trap_rcv_process_request(sm, p_madw); 687 688 OSM_LOG_EXIT(sm->p_log); 689 } 690