1 /* 2 * Copyright (c) 2007 The Regents of the University of California. 3 * Copyright (c) 2007-2009 Voltaire, Inc. All rights reserved. 4 * Copyright (c) 2009,2010 HNR Consulting. All rights reserved. 5 * Copyright (c) 2013 Lawrence Livermore National Security. All rights reserved. 6 * Copyright (c) 2011-2014 Mellanox Technologies LTD. All rights reserved. 7 * 8 * This software is available to you under a choice of one of two 9 * licenses. You may choose to be licensed under the terms of the GNU 10 * General Public License (GPL) Version 2, available from the file 11 * COPYING in the main directory of this source tree, or the 12 * OpenIB.org BSD license below: 13 * 14 * Redistribution and use in source and binary forms, with or 15 * without modification, are permitted provided that the following 16 * conditions are met: 17 * 18 * - Redistributions of source code must retain the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer. 21 * 22 * - Redistributions in binary form must reproduce the above 23 * copyright notice, this list of conditions and the following 24 * disclaimer in the documentation and/or other materials 25 * provided with the distribution. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 34 * SOFTWARE. 35 * 36 */ 37 38 /* 39 * Abstract: 40 * Implementation of osm_perfmgr_t. 41 * This object implements an IBA performance manager. 42 * 43 * Author: 44 * Ira Weiny, LLNL 45 */ 46 47 #if HAVE_CONFIG_H 48 # include <config.h> 49 #endif /* HAVE_CONFIG_H */ 50 51 #ifdef ENABLE_OSM_PERF_MGR 52 #include <stdlib.h> 53 #include <stdint.h> 54 #include <string.h> 55 #include <poll.h> 56 #include <errno.h> 57 #include <sys/time.h> 58 #include <netinet/in.h> 59 #include <float.h> 60 #include <arpa/inet.h> 61 #include <sys/socket.h> 62 #include <iba/ib_types.h> 63 #include <complib/cl_debug.h> 64 #include <complib/cl_thread.h> 65 #include <opensm/osm_file_ids.h> 66 #define FILE_ID OSM_FILE_PERFMGR_C 67 #include <vendor/osm_vendor_api.h> 68 #include <opensm/osm_perfmgr.h> 69 #include <opensm/osm_log.h> 70 #include <opensm/osm_node.h> 71 #include <opensm/osm_opensm.h> 72 #include <opensm/osm_helper.h> 73 74 #define PERFMGR_INITIAL_TID_VALUE 0xcafe 75 76 #ifdef ENABLE_OSM_PERF_MGR_PROFILE 77 struct { 78 double fastest_us; 79 double slowest_us; 80 double avg_us; 81 uint64_t num; 82 } perfmgr_mad_stats = { 83 fastest_us: DBL_MAX, slowest_us: DBL_MIN, avg_us: 0, num:0}; 84 85 /* diff must be something which can fit in a susecond_t */ 86 static inline void update_mad_stats(struct timeval *diff) 87 { 88 double new = (diff->tv_sec * 1000000) + diff->tv_usec; 89 if (new < perfmgr_mad_stats.fastest_us) 90 perfmgr_mad_stats.fastest_us = new; 91 if (new > perfmgr_mad_stats.slowest_us) 92 perfmgr_mad_stats.slowest_us = new; 93 94 perfmgr_mad_stats.avg_us = 95 ((perfmgr_mad_stats.avg_us * perfmgr_mad_stats.num) + new) 96 / (perfmgr_mad_stats.num + 1); 97 perfmgr_mad_stats.num++; 98 } 99 100 static inline void clear_mad_stats(void) 101 { 102 perfmgr_mad_stats.fastest_us = DBL_MAX; 103 perfmgr_mad_stats.slowest_us = DBL_MIN; 104 perfmgr_mad_stats.avg_us = 0; 105 perfmgr_mad_stats.num = 0; 106 } 107 108 /* after and diff can be the same struct */ 109 static inline void diff_time(struct timeval *before, struct timeval *after, 110 struct timeval *diff) 111 { 112 struct timeval tmp = *after; 113 if (tmp.tv_usec < before->tv_usec) { 114 tmp.tv_sec--; 115 tmp.tv_usec += 1000000; 116 } 117 diff->tv_sec = tmp.tv_sec - before->tv_sec; 118 diff->tv_usec = tmp.tv_usec - before->tv_usec; 119 } 120 #endif 121 122 /********************************************************************** 123 * Internal helper functions 124 **********************************************************************/ 125 static void init_monitored_nodes(osm_perfmgr_t * pm) 126 { 127 cl_qmap_init(&pm->monitored_map); 128 pm->remove_list = NULL; 129 cl_event_construct(&pm->sig_query); 130 cl_event_init(&pm->sig_query, FALSE); 131 } 132 133 static void mark_for_removal(osm_perfmgr_t * pm, monitored_node_t * node) 134 { 135 if (pm->remove_list) { 136 node->next = pm->remove_list; 137 pm->remove_list = node; 138 } else { 139 node->next = NULL; 140 pm->remove_list = node; 141 } 142 } 143 144 static void remove_marked_nodes(osm_perfmgr_t * pm) 145 { 146 while (pm->remove_list) { 147 monitored_node_t *next = pm->remove_list->next; 148 int port; 149 150 cl_qmap_remove_item(&pm->monitored_map, 151 (cl_map_item_t *) (pm->remove_list)); 152 153 if (pm->rm_nodes) 154 perfmgr_db_delete_entry(pm->db, pm->remove_list->guid); 155 else 156 perfmgr_db_mark_active(pm->db, pm->remove_list->guid, FALSE); 157 158 if (pm->remove_list->name) 159 free(pm->remove_list->name); 160 161 for (port = pm->remove_list->esp0 ? 0 : 1; 162 port < pm->remove_list->num_ports; 163 port++) { 164 if (pm->remove_list->port[port].remote_name) 165 free(pm->remove_list->port[port].remote_name); 166 } 167 168 free(pm->remove_list); 169 pm->remove_list = next; 170 } 171 } 172 173 static inline void decrement_outstanding_queries(osm_perfmgr_t * pm) 174 { 175 cl_atomic_dec(&pm->outstanding_queries); 176 177 if (!pm->outstanding_queries) { 178 cl_spinlock_acquire(&pm->lock); 179 if (pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) { 180 pm->sweep_state = PERFMGR_SWEEP_SLEEP; 181 OSM_LOG(pm->log, OSM_LOG_INFO, 182 "PM sweep state exiting Post Processing\n"); 183 } 184 cl_spinlock_release(&pm->lock); 185 } 186 187 cl_event_signal(&pm->sig_query); 188 } 189 190 /********************************************************************** 191 * Receive the MAD from the vendor layer and post it for processing by 192 * the dispatcher 193 **********************************************************************/ 194 static void perfmgr_mad_recv_callback(osm_madw_t * p_madw, void *bind_context, 195 osm_madw_t * p_req_madw) 196 { 197 osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; 198 199 OSM_LOG_ENTER(pm->log); 200 201 CL_ASSERT(p_madw); 202 CL_ASSERT(p_req_madw != NULL); 203 204 osm_madw_copy_context(p_madw, p_req_madw); 205 osm_mad_pool_put(pm->mad_pool, p_req_madw); 206 207 decrement_outstanding_queries(pm); 208 209 /* post this message for later processing. */ 210 if (cl_disp_post(pm->pc_disp_h, OSM_MSG_MAD_PORT_COUNTERS, 211 p_madw, NULL, NULL) != CL_SUCCESS) { 212 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5401: " 213 "PerfMgr Dispatcher post failed\n"); 214 osm_mad_pool_put(pm->mad_pool, p_madw); 215 } 216 OSM_LOG_EXIT(pm->log); 217 } 218 219 /********************************************************************** 220 * Process MAD send errors 221 **********************************************************************/ 222 static void perfmgr_mad_send_err_callback(void *bind_context, 223 osm_madw_t * p_madw) 224 { 225 osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context; 226 osm_madw_context_t *context = &p_madw->context; 227 uint64_t node_guid = context->perfmgr_context.node_guid; 228 uint8_t port = context->perfmgr_context.port; 229 cl_map_item_t *p_node; 230 monitored_node_t *p_mon_node; 231 ib_net16_t orig_lid; 232 233 OSM_LOG_ENTER(pm->log); 234 235 /* 236 * get the monitored node struct to have the printable name 237 * for log messages 238 */ 239 if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) == 240 cl_qmap_end(&pm->monitored_map)) { 241 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5415: GUID 0x%016" 242 PRIx64 " not found in monitored map\n", node_guid); 243 goto Exit; 244 } 245 p_mon_node = (monitored_node_t *) p_node; 246 247 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5402: %s (0x%" PRIx64 248 ") port %u LID %u TID 0x%" PRIx64 "\n", 249 p_mon_node->name, p_mon_node->guid, port, 250 cl_ntoh16(p_madw->mad_addr.dest_lid), 251 cl_ntoh64(p_madw->p_mad->trans_id)); 252 253 if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) { 254 /* First, find the node in the monitored map */ 255 cl_plock_acquire(&pm->osm->lock); 256 /* Now, validate port number */ 257 if (port >= p_mon_node->num_ports) { 258 cl_plock_release(&pm->osm->lock); 259 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5416: " 260 "Invalid port num %u for %s (GUID 0x%016" 261 PRIx64 ") num ports %u\n", port, 262 p_mon_node->name, p_mon_node->guid, 263 p_mon_node->num_ports); 264 goto Exit; 265 } 266 /* Clear redirection info for this port except orig_lid */ 267 orig_lid = p_mon_node->port[port].orig_lid; 268 memset(&p_mon_node->port[port], 0, sizeof(monitored_port_t)); 269 p_mon_node->port[port].orig_lid = orig_lid; 270 p_mon_node->port[port].valid = TRUE; 271 cl_plock_release(&pm->osm->lock); 272 } 273 274 Exit: 275 osm_mad_pool_put(pm->mad_pool, p_madw); 276 277 decrement_outstanding_queries(pm); 278 279 OSM_LOG_EXIT(pm->log); 280 } 281 282 /********************************************************************** 283 * Bind the PerfMgr to the vendor layer for MAD sends/receives 284 **********************************************************************/ 285 ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, ib_net64_t port_guid) 286 { 287 osm_bind_info_t bind_info; 288 ib_api_status_t status = IB_SUCCESS; 289 290 OSM_LOG_ENTER(pm->log); 291 292 if (pm->bind_handle != OSM_BIND_INVALID_HANDLE) { 293 OSM_LOG(pm->log, OSM_LOG_ERROR, 294 "ERR 5403: Multiple binds not allowed\n"); 295 status = IB_ERROR; 296 goto Exit; 297 } 298 299 bind_info.port_guid = pm->port_guid = port_guid; 300 bind_info.mad_class = IB_MCLASS_PERF; 301 bind_info.class_version = 1; 302 bind_info.is_responder = FALSE; 303 bind_info.is_report_processor = FALSE; 304 bind_info.is_trap_processor = FALSE; 305 bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE; 306 bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE; 307 bind_info.timeout = pm->subn->opt.transaction_timeout; 308 bind_info.retries = pm->subn->opt.transaction_retries; 309 310 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 311 "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); 312 313 pm->bind_handle = osm_vendor_bind(pm->vendor, &bind_info, pm->mad_pool, 314 perfmgr_mad_recv_callback, 315 perfmgr_mad_send_err_callback, pm); 316 317 if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { 318 status = IB_ERROR; 319 OSM_LOG(pm->log, OSM_LOG_ERROR, 320 "ERR 5404: Vendor specific bind failed (%s)\n", 321 ib_get_err_str(status)); 322 } 323 324 Exit: 325 OSM_LOG_EXIT(pm->log); 326 return status; 327 } 328 329 /********************************************************************** 330 * Unbind the PerfMgr from the vendor layer for MAD sends/receives 331 **********************************************************************/ 332 static void perfmgr_mad_unbind(osm_perfmgr_t * pm) 333 { 334 OSM_LOG_ENTER(pm->log); 335 if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) { 336 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5405: No previous bind\n"); 337 goto Exit; 338 } 339 osm_vendor_unbind(pm->bind_handle); 340 Exit: 341 OSM_LOG_EXIT(pm->log); 342 } 343 344 /********************************************************************** 345 * Given a monitored node and a port, return the qp 346 **********************************************************************/ 347 static ib_net32_t get_qp(monitored_node_t * mon_node, uint8_t port) 348 { 349 ib_net32_t qp = IB_QP1; 350 351 if (mon_node && mon_node->num_ports && port < mon_node->num_ports && 352 mon_node->port[port].redirection && mon_node->port[port].qp) 353 qp = mon_node->port[port].qp; 354 355 return qp; 356 } 357 358 static ib_net16_t get_base_lid(osm_node_t * p_node, uint8_t port) 359 { 360 switch (p_node->node_info.node_type) { 361 case IB_NODE_TYPE_CA: 362 case IB_NODE_TYPE_ROUTER: 363 return osm_node_get_base_lid(p_node, port); 364 case IB_NODE_TYPE_SWITCH: 365 return osm_node_get_base_lid(p_node, 0); 366 default: 367 return 0; 368 } 369 } 370 371 /********************************************************************** 372 * Given a node, a port, and an optional monitored node, 373 * return the lid appropriate to query that port 374 **********************************************************************/ 375 static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port, 376 monitored_node_t * mon_node) 377 { 378 if (mon_node && mon_node->num_ports && port < mon_node->num_ports && 379 mon_node->port[port].lid) 380 return mon_node->port[port].lid; 381 382 return get_base_lid(p_node, port); 383 } 384 385 /********************************************************************** 386 * Build a Performance Management class MAD 387 **********************************************************************/ 388 static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr, 389 ib_net16_t dest_lid, 390 uint8_t sl, 391 ib_net32_t dest_qp, 392 uint16_t pkey_ix, 393 uint8_t mad_method, 394 ib_net16_t attr_id, 395 osm_madw_context_t * p_context, 396 ib_perfmgt_mad_t ** p_pm_mad) 397 { 398 ib_perfmgt_mad_t *pm_mad = NULL; 399 osm_madw_t *p_madw = NULL; 400 401 OSM_LOG_ENTER(perfmgr->log); 402 403 p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle, 404 MAD_BLOCK_SIZE, NULL); 405 if (p_madw == NULL) 406 return NULL; 407 408 pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw); 409 410 /* build the mad */ 411 pm_mad->header.base_ver = 1; 412 pm_mad->header.mgmt_class = IB_MCLASS_PERF; 413 pm_mad->header.class_ver = 1; 414 pm_mad->header.method = mad_method; 415 pm_mad->header.status = 0; 416 pm_mad->header.class_spec = 0; 417 pm_mad->header.trans_id = 418 cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) & 419 (uint64_t) (0xFFFFFFFF)); 420 if (perfmgr->trans_id == 0) 421 pm_mad->header.trans_id = 422 cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) & 423 (uint64_t) (0xFFFFFFFF)); 424 pm_mad->header.attr_id = attr_id; 425 pm_mad->header.resv = 0; 426 pm_mad->header.attr_mod = 0; 427 428 p_madw->mad_addr.dest_lid = dest_lid; 429 p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp; 430 p_madw->mad_addr.addr_type.gsi.remote_qkey = 431 cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY); 432 p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix; 433 p_madw->mad_addr.addr_type.gsi.service_level = sl; 434 p_madw->mad_addr.addr_type.gsi.global_route = FALSE; 435 p_madw->resp_expected = TRUE; 436 437 if (p_context) 438 p_madw->context = *p_context; 439 440 if (p_pm_mad) 441 *p_pm_mad = pm_mad; 442 443 OSM_LOG_EXIT(perfmgr->log); 444 445 return (p_madw); 446 } 447 448 /********************************************************************** 449 * Send a Performance Management class MAD 450 **********************************************************************/ 451 static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr, 452 osm_madw_t * const p_madw) 453 { 454 cl_status_t sts; 455 ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw, 456 TRUE); 457 if (status == IB_SUCCESS) { 458 /* pause thread if there are too many outstanding requests */ 459 cl_atomic_inc(&(perfmgr->outstanding_queries)); 460 while (perfmgr->outstanding_queries > 461 (int32_t)perfmgr->max_outstanding_queries) { 462 cl_spinlock_acquire(&perfmgr->lock); 463 if (perfmgr->sweep_state == PERFMGR_SWEEP_SLEEP) { 464 perfmgr->sweep_state = PERFMGR_SWEEP_POST_PROCESSING; 465 OSM_LOG(perfmgr->log, OSM_LOG_INFO, 466 "PM sweep state going into Post Processing\n"); 467 } else if (perfmgr->sweep_state == PERFMGR_SWEEP_ACTIVE) 468 perfmgr->sweep_state = PERFMGR_SWEEP_SUSPENDED; 469 cl_spinlock_release(&perfmgr->lock); 470 wait: 471 sts = cl_event_wait_on(&perfmgr->sig_query, 472 EVENT_NO_TIMEOUT, TRUE); 473 if (sts != CL_SUCCESS) 474 goto wait; 475 476 cl_spinlock_acquire(&perfmgr->lock); 477 if (perfmgr->sweep_state == PERFMGR_SWEEP_SUSPENDED) 478 perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE; 479 cl_spinlock_release(&perfmgr->lock); 480 } 481 } 482 return (status); 483 } 484 485 486 /********************************************************************** 487 * Form and send the PortCounters MAD for a single port 488 **********************************************************************/ 489 static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr, 490 ib_net16_t dest_lid, 491 ib_net32_t dest_qp, uint16_t pkey_ix, 492 uint8_t port, uint8_t mad_method, 493 uint16_t counter_select, 494 uint8_t counter_select2, 495 osm_madw_context_t * p_context, 496 uint8_t sl) 497 { 498 ib_api_status_t status = IB_SUCCESS; 499 ib_port_counters_t *port_counter = NULL; 500 ib_perfmgt_mad_t *pm_mad = NULL; 501 osm_madw_t *p_madw = NULL; 502 503 OSM_LOG_ENTER(perfmgr->log); 504 505 p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS; 506 p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix, 507 mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context, 508 &pm_mad); 509 if (p_madw == NULL) 510 return IB_INSUFFICIENT_MEMORY; 511 512 port_counter = (ib_port_counters_t *) & pm_mad->data; 513 memset(port_counter, 0, sizeof(*port_counter)); 514 port_counter->port_select = port; 515 port_counter->counter_select = cl_hton16(counter_select); 516 port_counter->counter_select2 = counter_select2; 517 518 status = perfmgr_send_mad(perfmgr, p_madw); 519 520 OSM_LOG_EXIT(perfmgr->log); 521 return status; 522 } 523 524 /********************************************************************** 525 * sweep the node_guid_tbl and collect the node guids to be tracked 526 **********************************************************************/ 527 static void collect_guids(cl_map_item_t * p_map_item, void *context) 528 { 529 osm_node_t *node = (osm_node_t *) p_map_item; 530 uint64_t node_guid = cl_ntoh64(node->node_info.node_guid); 531 osm_perfmgr_t *pm = (osm_perfmgr_t *) context; 532 monitored_node_t *mon_node = NULL; 533 uint32_t num_ports; 534 int port; 535 536 OSM_LOG_ENTER(pm->log); 537 538 if (cl_qmap_get(&pm->monitored_map, node_guid) == 539 cl_qmap_end(&pm->monitored_map)) { 540 541 if (pm->ignore_cas && 542 (node->node_info.node_type == IB_NODE_TYPE_CA)) 543 goto Exit; 544 545 /* if not already in map add it */ 546 num_ports = osm_node_get_num_physp(node); 547 mon_node = malloc(sizeof(*mon_node) + 548 sizeof(monitored_port_t) * num_ports); 549 if (!mon_node) { 550 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5406: " 551 "malloc failed: not handling node %s" 552 "(GUID 0x%" PRIx64 ")\n", node->print_desc, 553 node_guid); 554 goto Exit; 555 } 556 memset(mon_node, 0, 557 sizeof(*mon_node) + sizeof(monitored_port_t) * num_ports); 558 mon_node->guid = node_guid; 559 mon_node->name = strdup(node->print_desc); 560 mon_node->num_ports = num_ports; 561 mon_node->node_type = node->node_info.node_type; 562 /* check for enhanced switch port 0 */ 563 mon_node->esp0 = (node->sw && 564 ib_switch_info_is_enhanced_port0(&node->sw-> 565 switch_info)); 566 for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) { 567 monitored_port_t *mon_port = &mon_node->port[port]; 568 osm_physp_t *p_physp = &node->physp_table[port]; 569 osm_physp_t *p_remote_physp = p_physp->p_remote_physp; 570 571 mon_port->orig_lid = 0; 572 mon_port->valid = FALSE; 573 if (osm_physp_is_valid(p_physp)) { 574 mon_port->orig_lid = get_base_lid(node, port); 575 mon_port->valid = TRUE; 576 } 577 mon_port->remote_valid = FALSE; 578 mon_port->remote_name = NULL; 579 if (p_remote_physp && osm_physp_is_valid(p_remote_physp)) { 580 osm_node_t *p_remote_node = p_remote_physp->p_node; 581 mon_port->remote_valid = TRUE; 582 mon_port->remote_guid = p_remote_node->node_info.node_guid; 583 mon_port->remote_name = strdup(p_remote_node->print_desc); 584 mon_port->remote_port = p_remote_physp->port_num; 585 } 586 } 587 588 cl_qmap_insert(&pm->monitored_map, node_guid, 589 (cl_map_item_t *) mon_node); 590 } 591 592 Exit: 593 OSM_LOG_EXIT(pm->log); 594 } 595 596 /********************************************************************** 597 * Form and send the ClassPortInfo MAD for a single port 598 **********************************************************************/ 599 static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm, 600 ib_net16_t dest_lid, 601 ib_net32_t dest_qp, 602 uint16_t pkey_ix, 603 uint8_t port, 604 osm_madw_context_t * p_context, 605 uint8_t sl) 606 { 607 ib_api_status_t status = IB_SUCCESS; 608 osm_madw_t *p_madw = NULL; 609 610 OSM_LOG_ENTER(pm->log); 611 612 p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_CLASS_PORT_INFO; 613 p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp, 614 pkey_ix, IB_MAD_METHOD_GET, 615 IB_MAD_ATTR_CLASS_PORT_INFO, p_context, 616 NULL); 617 if (p_madw == NULL) 618 return IB_INSUFFICIENT_MEMORY; 619 620 status = perfmgr_send_mad(pm, p_madw); 621 622 OSM_LOG_EXIT(pm->log); 623 return status; 624 } 625 626 /********************************************************************** 627 * return if some form of PortCountersExtended (PCE || PCE NoIETF) are supported 628 **********************************************************************/ 629 static inline boolean_t pce_supported(monitored_node_t *mon_node, uint8_t port) 630 { 631 monitored_port_t *mon_port = &(mon_node->port[port]); 632 return (mon_port->cpi_valid 633 && (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED 634 || mon_port->cap_mask & IB_PM_EXT_WIDTH_NOIETF_SUP)); 635 } 636 637 /********************************************************************** 638 * return if CapMask.PortCountersXmitWaitSupported is set 639 **********************************************************************/ 640 static inline boolean_t xmit_wait_supported(monitored_node_t *mon_node, uint8_t port) 641 { 642 monitored_port_t *mon_port = &(mon_node->port[port]); 643 return (mon_port->cpi_valid 644 && (mon_port->cap_mask & IB_PM_PC_XMIT_WAIT_SUP)); 645 } 646 647 /********************************************************************** 648 * return if "full" PortCountersExtended (IETF) is indicated 649 **********************************************************************/ 650 static inline boolean_t ietf_supported(monitored_node_t *mon_node, uint8_t port) 651 { 652 monitored_port_t *mon_port = &(mon_node->port[port]); 653 return (mon_port->cpi_valid 654 && (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED)); 655 } 656 657 /********************************************************************** 658 * Form and send the PortCountersExtended MAD for a single port 659 **********************************************************************/ 660 static ib_api_status_t perfmgr_send_pce_mad(osm_perfmgr_t * perfmgr, 661 ib_net16_t dest_lid, 662 ib_net32_t dest_qp, 663 uint16_t pkey_ix, 664 uint8_t port, uint8_t mad_method, 665 osm_madw_context_t * p_context, 666 uint8_t sl) 667 { 668 ib_api_status_t status = IB_SUCCESS; 669 ib_port_counters_ext_t *port_counter_ext = NULL; 670 ib_perfmgt_mad_t *pm_mad = NULL; 671 osm_madw_t *p_madw = NULL; 672 673 OSM_LOG_ENTER(perfmgr->log); 674 675 p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS_EXT; 676 p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix, 677 mad_method, IB_MAD_ATTR_PORT_CNTRS_EXT, p_context, 678 &pm_mad); 679 if (p_madw == NULL) 680 return IB_INSUFFICIENT_MEMORY; 681 682 port_counter_ext = (ib_port_counters_ext_t *) & pm_mad->data; 683 memset(port_counter_ext, 0, sizeof(*port_counter_ext)); 684 port_counter_ext->port_select = port; 685 port_counter_ext->counter_select = cl_hton16(0x00FF); 686 687 status = perfmgr_send_mad(perfmgr, p_madw); 688 689 OSM_LOG_EXIT(perfmgr->log); 690 return status; 691 } 692 693 /********************************************************************** 694 * query the Port Counters of all the nodes in the subnet 695 **********************************************************************/ 696 static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context) 697 { 698 ib_api_status_t status = IB_SUCCESS; 699 osm_perfmgr_t *pm = context; 700 osm_node_t *node = NULL; 701 monitored_node_t *mon_node = (monitored_node_t *) p_map_item; 702 osm_madw_context_t mad_context; 703 uint64_t node_guid = 0; 704 ib_net32_t remote_qp; 705 uint8_t port, num_ports = 0; 706 707 OSM_LOG_ENTER(pm->log); 708 709 cl_plock_acquire(&pm->osm->lock); 710 node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); 711 if (!node) { 712 OSM_LOG(pm->log, OSM_LOG_ERROR, 713 "ERR 5407: Node \"%s\" (guid 0x%" PRIx64 714 ") no longer exists so removing from PerfMgr monitoring\n", 715 mon_node->name, mon_node->guid); 716 mark_for_removal(pm, mon_node); 717 goto Exit; 718 } 719 720 num_ports = osm_node_get_num_physp(node); 721 node_guid = cl_ntoh64(node->node_info.node_guid); 722 723 /* make sure there is a database object ready to store this info */ 724 if (perfmgr_db_create_entry(pm->db, node_guid, mon_node->esp0, 725 num_ports, node->print_desc) != 726 PERFMGR_EVENT_DB_SUCCESS) { 727 OSM_LOG(pm->log, OSM_LOG_ERROR, 728 "ERR 5408: DB create entry failed for 0x%" 729 PRIx64 " (%s) : %s\n", node_guid, node->print_desc, 730 strerror(errno)); 731 goto Exit; 732 } 733 734 perfmgr_db_mark_active(pm->db, node_guid, TRUE); 735 736 /* issue the query for each port */ 737 for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) { 738 ib_net16_t lid; 739 740 if (!osm_node_get_physp_ptr(node, port)) 741 continue; 742 743 if (!mon_node->port[port].valid) 744 continue; 745 746 lid = get_lid(node, port, mon_node); 747 if (lid == 0) { 748 OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64 749 " port %d (%s): port out of range, skipping\n", 750 cl_ntoh64(node->node_info.node_guid), port, 751 node->print_desc); 752 continue; 753 } 754 755 remote_qp = get_qp(mon_node, port); 756 757 mad_context.perfmgr_context.node_guid = node_guid; 758 mad_context.perfmgr_context.port = port; 759 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET; 760 761 if (pm->query_cpi && !mon_node->port[port].cpi_valid) { 762 status = perfmgr_send_cpi_mad(pm, lid, remote_qp, 763 mon_node->port[port].pkey_ix, 764 port, &mad_context, 765 0); /* FIXME SL != 0 */ 766 if (status != IB_SUCCESS) 767 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: " 768 "Failed to issue ClassPortInfo query " 769 "for node 0x%" PRIx64 770 " port %d (%s)\n", 771 node->node_info.node_guid, port, 772 node->print_desc); 773 if (mon_node->node_type == IB_NODE_TYPE_SWITCH) 774 goto Exit; /* only need to issue 1 CPI query 775 for switches */ 776 } else { 777 778 #ifdef ENABLE_OSM_PERF_MGR_PROFILE 779 gettimeofday(&mad_context.perfmgr_context.query_start, NULL); 780 #endif 781 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%" 782 PRIx64 " port %d (lid %u) (%s)\n", 783 node_guid, port, cl_ntoh16(lid), 784 node->print_desc); 785 status = perfmgr_send_pc_mad(pm, lid, remote_qp, 786 mon_node->port[port].pkey_ix, 787 port, IB_MAD_METHOD_GET, 788 0xffff, 789 1, 790 &mad_context, 791 0); /* FIXME SL != 0 */ 792 if (status != IB_SUCCESS) 793 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: " 794 "Failed to issue port counter query for node 0x%" 795 PRIx64 " port %d (%s)\n", 796 node->node_info.node_guid, port, 797 node->print_desc); 798 799 if (pce_supported(mon_node, port)) { 800 801 #if ENABLE_OSM_PERF_MGR_PROFILE 802 gettimeofday(&mad_context.perfmgr_context.query_start, NULL); 803 #endif 804 status = perfmgr_send_pce_mad(pm, lid, remote_qp, 805 mon_node->port[port].pkey_ix, 806 port, 807 IB_MAD_METHOD_GET, 808 &mad_context, 809 0); /* FIXME SL != 0 */ 810 if (status != IB_SUCCESS) 811 OSM_LOG(pm->log, OSM_LOG_ERROR, 812 "ERR 5417: Failed to issue " 813 "port counter query for " 814 "node 0x%" PRIx64 " port " 815 "%d (%s)\n", 816 node->node_info.node_guid, 817 port, 818 node->print_desc); 819 } 820 } 821 } 822 Exit: 823 cl_plock_release(&pm->osm->lock); 824 OSM_LOG_EXIT(pm->log); 825 } 826 827 /********************************************************************** 828 * Discovery stuff 829 * This code should not be here, but merged with main OpenSM 830 **********************************************************************/ 831 extern int wait_for_pending_transactions(osm_stats_t * stats); 832 extern void osm_drop_mgr_process(IN osm_sm_t * sm); 833 834 static int sweep_hop_1(osm_sm_t * sm) 835 { 836 ib_api_status_t status = IB_SUCCESS; 837 osm_madw_context_t context; 838 osm_node_t *p_node; 839 osm_port_t *p_port; 840 osm_dr_path_t hop_1_path; 841 ib_net64_t port_guid; 842 uint8_t port_num; 843 uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; 844 uint8_t num_ports; 845 osm_physp_t *p_ext_physp; 846 847 port_guid = sm->p_subn->sm_port_guid; 848 849 p_port = osm_get_port_by_guid(sm->p_subn, port_guid); 850 if (!p_port) { 851 OSM_LOG(sm->p_log, OSM_LOG_ERROR, 852 "ERR 5481: No SM port object\n"); 853 return -1; 854 } 855 856 p_node = p_port->p_node; 857 port_num = ib_node_info_get_local_port_num(&p_node->node_info); 858 859 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, 860 "Probing hop 1 on local port %u\n", port_num); 861 862 memset(path_array, 0, sizeof(path_array)); 863 /* the hop_1 operations depend on the type of our node. 864 * Currently - legal nodes that can host SM are SW and CA */ 865 switch (osm_node_get_type(p_node)) { 866 case IB_NODE_TYPE_CA: 867 case IB_NODE_TYPE_ROUTER: 868 memset(&context, 0, sizeof(context)); 869 context.ni_context.node_guid = osm_node_get_node_guid(p_node); 870 context.ni_context.port_num = port_num; 871 872 path_array[1] = port_num; 873 874 osm_dr_path_init(&hop_1_path, 1, path_array); 875 CL_PLOCK_ACQUIRE(sm->p_lock); 876 status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0, 877 TRUE, 0, CL_DISP_MSGID_NONE, &context); 878 CL_PLOCK_RELEASE(sm->p_lock); 879 880 if (status != IB_SUCCESS) 881 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5482: " 882 "Request for NodeInfo failed\n"); 883 break; 884 885 case IB_NODE_TYPE_SWITCH: 886 /* Need to go over all the ports of the switch, and send a node_info 887 * from them. This doesn't include the port 0 of the switch, which 888 * hosts the SM. 889 * Note: We'll send another switchInfo on port 0, since if no ports 890 * are connected, we still want to get some response, and have the 891 * subnet come up. 892 */ 893 num_ports = osm_node_get_num_physp(p_node); 894 for (port_num = 0; port_num < num_ports; port_num++) { 895 /* go through the port only if the port is not DOWN */ 896 p_ext_physp = osm_node_get_physp_ptr(p_node, port_num); 897 if (!p_ext_physp || ib_port_info_get_port_state 898 (&p_ext_physp->port_info) <= IB_LINK_DOWN) 899 continue; 900 901 memset(&context, 0, sizeof(context)); 902 context.ni_context.node_guid = 903 osm_node_get_node_guid(p_node); 904 context.ni_context.port_num = port_num; 905 906 path_array[1] = port_num; 907 908 osm_dr_path_init(&hop_1_path, 1, path_array); 909 CL_PLOCK_ACQUIRE(sm->p_lock); 910 status = osm_req_get(sm, &hop_1_path, 911 IB_MAD_ATTR_NODE_INFO, 0, TRUE, 0, 912 CL_DISP_MSGID_NONE, &context); 913 CL_PLOCK_RELEASE(sm->p_lock); 914 915 if (status != IB_SUCCESS) 916 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5484: " 917 "Request for NodeInfo failed\n"); 918 } 919 break; 920 921 default: 922 OSM_LOG(sm->p_log, OSM_LOG_ERROR, 923 "ERR 5483: Unknown node type %d\n", 924 osm_node_get_type(p_node)); 925 } 926 927 return status; 928 } 929 930 static unsigned is_sm_port_down(osm_sm_t * sm) 931 { 932 ib_net64_t port_guid; 933 osm_port_t *p_port; 934 935 port_guid = sm->p_subn->sm_port_guid; 936 if (port_guid == 0) 937 return 1; 938 939 CL_PLOCK_ACQUIRE(sm->p_lock); 940 p_port = osm_get_port_by_guid(sm->p_subn, port_guid); 941 if (!p_port) { 942 CL_PLOCK_RELEASE(sm->p_lock); 943 OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5485: " 944 "SM port with GUID:%016" PRIx64 " is unknown\n", 945 cl_ntoh64(port_guid)); 946 return 1; 947 } 948 CL_PLOCK_RELEASE(sm->p_lock); 949 950 if (p_port->p_node->sw && 951 !ib_switch_info_is_enhanced_port0(&p_port->p_node->sw->switch_info)) 952 return 0; /* base SP0 */ 953 954 return osm_physp_get_port_state(p_port->p_physp) == IB_LINK_DOWN; 955 } 956 957 static int sweep_hop_0(osm_sm_t * sm) 958 { 959 ib_api_status_t status; 960 osm_dr_path_t dr_path; 961 osm_bind_handle_t h_bind; 962 uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX]; 963 964 memset(path_array, 0, sizeof(path_array)); 965 966 h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl); 967 if (h_bind == OSM_BIND_INVALID_HANDLE) { 968 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports\n"); 969 return -1; 970 } 971 972 osm_dr_path_init(&dr_path, 0, path_array); 973 CL_PLOCK_ACQUIRE(sm->p_lock); 974 status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0, 975 TRUE, 0, CL_DISP_MSGID_NONE, NULL); 976 CL_PLOCK_RELEASE(sm->p_lock); 977 978 if (status != IB_SUCCESS) 979 OSM_LOG(sm->p_log, OSM_LOG_ERROR, 980 "ERR 5486: Request for NodeInfo failed\n"); 981 982 return status; 983 } 984 985 static void reset_node_count(cl_map_item_t * p_map_item, void *cxt) 986 { 987 osm_node_t *p_node = (osm_node_t *) p_map_item; 988 p_node->discovery_count = 0; 989 990 memset(p_node->physp_discovered, 0, 991 sizeof(uint8_t) * p_node->physp_tbl_size); 992 } 993 994 static void reset_port_count(cl_map_item_t * p_map_item, void *cxt) 995 { 996 osm_port_t *p_port = (osm_port_t *) p_map_item; 997 p_port->discovery_count = 0; 998 } 999 1000 static void reset_switch_count(cl_map_item_t * p_map_item, void *cxt) 1001 { 1002 osm_switch_t *p_sw = (osm_switch_t *) p_map_item; 1003 p_sw->need_update = 0; 1004 } 1005 1006 static int perfmgr_discovery(osm_opensm_t * osm) 1007 { 1008 int ret; 1009 1010 CL_PLOCK_ACQUIRE(&osm->lock); 1011 cl_qmap_apply_func(&osm->subn.node_guid_tbl, reset_node_count, NULL); 1012 cl_qmap_apply_func(&osm->subn.port_guid_tbl, reset_port_count, NULL); 1013 cl_qmap_apply_func(&osm->subn.sw_guid_tbl, reset_switch_count, NULL); 1014 CL_PLOCK_RELEASE(&osm->lock); 1015 1016 osm->subn.in_sweep_hop_0 = TRUE; 1017 1018 ret = sweep_hop_0(&osm->sm); 1019 if (ret) 1020 goto _exit; 1021 1022 if (wait_for_pending_transactions(&osm->stats)) 1023 goto _exit; 1024 1025 if (is_sm_port_down(&osm->sm)) { 1026 OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "SM port is down\n"); 1027 goto _drop; 1028 } 1029 1030 osm->subn.in_sweep_hop_0 = FALSE; 1031 1032 ret = sweep_hop_1(&osm->sm); 1033 if (ret) 1034 goto _exit; 1035 1036 if (wait_for_pending_transactions(&osm->stats)) 1037 goto _exit; 1038 1039 _drop: 1040 osm_drop_mgr_process(&osm->sm); 1041 1042 _exit: 1043 return ret; 1044 } 1045 1046 /********************************************************************** 1047 * Main PerfMgr processor - query the performance counters 1048 **********************************************************************/ 1049 void osm_perfmgr_process(osm_perfmgr_t * pm) 1050 { 1051 #ifdef ENABLE_OSM_PERF_MGR_PROFILE 1052 struct timeval before, after; 1053 #endif 1054 1055 if (pm->state != PERFMGR_STATE_ENABLED) 1056 return; 1057 1058 cl_spinlock_acquire(&pm->lock); 1059 if (pm->sweep_state == PERFMGR_SWEEP_ACTIVE || 1060 pm->sweep_state == PERFMGR_SWEEP_SUSPENDED || 1061 pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) { 1062 cl_spinlock_release(&pm->lock); 1063 OSM_LOG(pm->log, OSM_LOG_INFO, 1064 "PM sweep state %d, skipping sweep\n", 1065 pm->sweep_state); 1066 return; 1067 } 1068 1069 pm->sweep_state = PERFMGR_SWEEP_ACTIVE; 1070 cl_spinlock_release(&pm->lock); 1071 1072 if (pm->subn->sm_state == IB_SMINFO_STATE_STANDBY || 1073 pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE) 1074 perfmgr_discovery(pm->subn->p_osm); 1075 1076 /* if redirection enabled, determine local port */ 1077 if (pm->subn->opt.perfmgr_redir && pm->local_port == -1) { 1078 osm_node_t *p_node; 1079 osm_port_t *p_port; 1080 1081 CL_PLOCK_ACQUIRE(pm->sm->p_lock); 1082 p_port = osm_get_port_by_guid(pm->subn, pm->port_guid); 1083 if (p_port) { 1084 p_node = p_port->p_node; 1085 CL_ASSERT(p_node); 1086 pm->local_port = 1087 ib_node_info_get_local_port_num(&p_node->node_info); 1088 } else 1089 OSM_LOG(pm->log, OSM_LOG_ERROR, 1090 "ERR 5487: No PerfMgr port object for " 1091 "port GUID 0x%" PRIx64 "\n", 1092 cl_ntoh64(pm->port_guid)); 1093 CL_PLOCK_RELEASE(pm->sm->p_lock); 1094 } 1095 1096 #ifdef ENABLE_OSM_PERF_MGR_PROFILE 1097 gettimeofday(&before, NULL); 1098 #endif 1099 /* With the global lock held, collect the node guids */ 1100 /* FIXME we should be able to track SA notices 1101 * and not have to sweep the node_guid_tbl each pass 1102 */ 1103 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Gathering PerfMgr stats\n"); 1104 cl_plock_acquire(&pm->osm->lock); 1105 cl_qmap_apply_func(&pm->subn->node_guid_tbl, collect_guids, pm); 1106 cl_plock_release(&pm->osm->lock); 1107 1108 /* then for each node query their counters */ 1109 cl_qmap_apply_func(&pm->monitored_map, perfmgr_query_counters, pm); 1110 1111 /* clean out any nodes found to be removed during the sweep */ 1112 remove_marked_nodes(pm); 1113 1114 #ifdef ENABLE_OSM_PERF_MGR_PROFILE 1115 gettimeofday(&after, NULL); 1116 diff_time(&before, &after, &after); 1117 osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID, 1118 "PerfMgr total sweep time : %ld.%06ld s\n" 1119 " fastest mad : %g us\n" 1120 " slowest mad : %g us\n" 1121 " average mad : %g us\n", 1122 after.tv_sec, after.tv_usec, perfmgr_mad_stats.fastest_us, 1123 perfmgr_mad_stats.slowest_us, perfmgr_mad_stats.avg_us); 1124 clear_mad_stats(); 1125 #endif 1126 1127 cl_spinlock_acquire(&pm->lock); 1128 pm->sweep_state = PERFMGR_SWEEP_SLEEP; 1129 cl_spinlock_release(&pm->lock); 1130 } 1131 1132 /********************************************************************** 1133 * PerfMgr timer - loop continuously and signal SM to run PerfMgr 1134 * processor if enabled 1135 **********************************************************************/ 1136 static void perfmgr_sweep(void *arg) 1137 { 1138 osm_perfmgr_t *pm = arg; 1139 1140 osm_sm_signal(pm->sm, OSM_SIGNAL_PERFMGR_SWEEP); 1141 cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000); 1142 } 1143 1144 void osm_perfmgr_shutdown(osm_perfmgr_t * pm) 1145 { 1146 OSM_LOG_ENTER(pm->log); 1147 cl_timer_stop(&pm->sweep_timer); 1148 cl_disp_unregister(pm->pc_disp_h); 1149 perfmgr_mad_unbind(pm); 1150 OSM_LOG_EXIT(pm->log); 1151 } 1152 1153 void osm_perfmgr_destroy(osm_perfmgr_t * pm) 1154 { 1155 OSM_LOG_ENTER(pm->log); 1156 perfmgr_db_destroy(pm->db); 1157 cl_timer_destroy(&pm->sweep_timer); 1158 OSM_LOG_EXIT(pm->log); 1159 } 1160 1161 /********************************************************************** 1162 * Detect if someone else on the network could have cleared the counters 1163 * without us knowing. This is easy to detect because the counters never 1164 * wrap but are "sticky". 1165 * 1166 * The one time this will not work is if the port is getting errors fast 1167 * enough to have the reading overtake the previous reading. In this case, 1168 * counters will be missed. 1169 **********************************************************************/ 1170 static void perfmgr_check_oob_clear(osm_perfmgr_t * pm, 1171 monitored_node_t * mon_node, uint8_t port, 1172 perfmgr_db_err_reading_t * cr) 1173 { 1174 perfmgr_db_err_reading_t prev_err; 1175 1176 if (perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_err) 1177 != PERFMGR_EVENT_DB_SUCCESS) { 1178 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous " 1179 "error reading for %s (guid 0x%" PRIx64 ") port %u\n", 1180 mon_node->name, mon_node->guid, port); 1181 return; 1182 } 1183 1184 OSM_LOG(pm->log, OSM_LOG_DEBUG, 1185 "Errors vs previous node %s (0x%" PRIx64 ") port %u\n" 1186 "SE: %"PRIu64" ?< %"PRIu64"\n" 1187 "LE: %"PRIu64" ?< %"PRIu64"\n" 1188 "LD: %"PRIu64" ?< %"PRIu64"\n" 1189 "RE: %"PRIu64" ?< %"PRIu64"\n" 1190 "RPE: %"PRIu64" ?< %"PRIu64"\n" 1191 "SRE: %"PRIu64" ?< %"PRIu64"\n" 1192 "XD: %"PRIu64" ?< %"PRIu64"\n" 1193 "XCE: %"PRIu64" ?< %"PRIu64"\n" 1194 "RCE: %"PRIu64" ?< %"PRIu64"\n" 1195 "LI: %"PRIu64" ?< %"PRIu64"\n" 1196 "BO: %"PRIu64" ?< %"PRIu64"\n" 1197 "VL15: %"PRIu64" ?< %"PRIu64"\n" 1198 "XW: %"PRIu64" ?< %"PRIu64"\n" 1199 , 1200 mon_node->name, mon_node->guid, port, 1201 cr->symbol_err_cnt, prev_err.symbol_err_cnt, 1202 cr->link_err_recover, prev_err.link_err_recover, 1203 cr->link_downed, prev_err.link_downed, 1204 cr->rcv_err, prev_err.rcv_err, 1205 cr->rcv_rem_phys_err, prev_err.rcv_rem_phys_err, 1206 cr->rcv_switch_relay_err, prev_err.rcv_switch_relay_err, 1207 cr->xmit_discards, prev_err.xmit_discards, 1208 cr->xmit_constraint_err, prev_err.xmit_constraint_err, 1209 cr->rcv_constraint_err, prev_err.rcv_constraint_err, 1210 cr->link_integrity, prev_err.link_integrity, 1211 cr->buffer_overrun, prev_err.buffer_overrun, 1212 cr->vl15_dropped, prev_err.vl15_dropped, 1213 cr->xmit_wait, prev_err.xmit_wait); 1214 1215 if (cr->symbol_err_cnt < prev_err.symbol_err_cnt || 1216 cr->link_err_recover < prev_err.link_err_recover || 1217 cr->link_downed < prev_err.link_downed || 1218 cr->rcv_err < prev_err.rcv_err || 1219 cr->rcv_rem_phys_err < prev_err.rcv_rem_phys_err || 1220 cr->rcv_switch_relay_err < prev_err.rcv_switch_relay_err || 1221 cr->xmit_discards < prev_err.xmit_discards || 1222 cr->xmit_constraint_err < prev_err.xmit_constraint_err || 1223 cr->rcv_constraint_err < prev_err.rcv_constraint_err || 1224 cr->link_integrity < prev_err.link_integrity || 1225 cr->buffer_overrun < prev_err.buffer_overrun || 1226 cr->vl15_dropped < prev_err.vl15_dropped || 1227 cr->xmit_wait < prev_err.xmit_wait) { 1228 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540A: " 1229 "Detected an out of band error clear " 1230 "on %s (0x%" PRIx64 ") port %u\n", 1231 mon_node->name, mon_node->guid, port); 1232 perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port); 1233 } 1234 } 1235 1236 /********************************************************************** 1237 * Return 1 if the value is "close" to overflowing 1238 * "close" is defined at 25% for now 1239 **********************************************************************/ 1240 static int counter_overflow_4(uint8_t val) 1241 { 1242 return (val >= 10); 1243 } 1244 1245 static int counter_overflow_8(uint8_t val) 1246 { 1247 return (val >= (UINT8_MAX - (UINT8_MAX / 4))); 1248 } 1249 1250 static int counter_overflow_16(ib_net16_t val) 1251 { 1252 return (cl_ntoh16(val) >= (UINT16_MAX - (UINT16_MAX / 4))); 1253 } 1254 1255 static int counter_overflow_32(ib_net32_t val) 1256 { 1257 return (cl_ntoh32(val) >= (UINT32_MAX - (UINT32_MAX / 4))); 1258 } 1259 1260 static int counter_overflow_64(ib_net64_t val) 1261 { 1262 return (cl_ntoh64(val) >= (UINT64_MAX - (UINT64_MAX / 4))); 1263 } 1264 1265 /********************************************************************** 1266 * Check if the port counters have overflowed and if so issue a clear 1267 * MAD to the port 1268 **********************************************************************/ 1269 static void perfmgr_check_overflow(osm_perfmgr_t * pm, 1270 monitored_node_t * mon_node, int16_t pkey_ix, 1271 uint8_t port, ib_port_counters_t * pc, 1272 boolean_t xmit_wait_sup) 1273 { 1274 osm_madw_context_t mad_context; 1275 ib_api_status_t status; 1276 ib_net32_t remote_qp; 1277 uint16_t counter_select; 1278 uint8_t counter_select2; 1279 1280 OSM_LOG_ENTER(pm->log); 1281 1282 if (counter_overflow_16(pc->symbol_err_cnt) || 1283 counter_overflow_8(pc->link_err_recover) || 1284 counter_overflow_8(pc->link_downed) || 1285 counter_overflow_16(pc->rcv_err) || 1286 counter_overflow_16(pc->rcv_rem_phys_err) || 1287 counter_overflow_16(pc->rcv_switch_relay_err) || 1288 counter_overflow_16(pc->xmit_discards) || 1289 counter_overflow_8(pc->xmit_constraint_err) || 1290 counter_overflow_8(pc->rcv_constraint_err) || 1291 counter_overflow_4(PC_LINK_INT(pc->link_int_buffer_overrun)) || 1292 counter_overflow_4(PC_BUF_OVERRUN(pc->link_int_buffer_overrun)) || 1293 counter_overflow_16(pc->vl15_dropped) || 1294 (xmit_wait_sup && counter_overflow_32(pc->xmit_wait)) || 1295 (!pce_supported(mon_node, port) && 1296 (counter_overflow_32(pc->xmit_data) || 1297 counter_overflow_32(pc->rcv_data) || 1298 counter_overflow_32(pc->xmit_pkts) || 1299 counter_overflow_32(pc->rcv_pkts)))) { 1300 osm_node_t *p_node = NULL; 1301 ib_net16_t lid = 0; 1302 1303 if (!mon_node->port[port].valid) 1304 goto Exit; 1305 1306 osm_log_v2(pm->log, OSM_LOG_VERBOSE, FILE_ID, 1307 "PerfMgr: Counter overflow: %s (0x%" PRIx64 1308 ") port %d; clearing counters\n", 1309 mon_node->name, mon_node->guid, port); 1310 1311 cl_plock_acquire(&pm->osm->lock); 1312 p_node = 1313 osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); 1314 if (!p_node) { 1315 OSM_LOG(pm->log, OSM_LOG_ERROR, 1316 "ERR 5407: Node \"%s\" (guid 0x%" PRIx64 1317 ") no longer exists so removing from PerfMgr" 1318 " monitoring\n", 1319 mon_node->name, mon_node->guid); 1320 goto Exit; 1321 } 1322 lid = get_lid(p_node, port, mon_node); 1323 cl_plock_release(&pm->osm->lock); 1324 if (lid == 0) { 1325 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540C: " 1326 "Failed to clear counters for %s (0x%" 1327 PRIx64 ") port %d; failed to get lid\n", 1328 mon_node->name, mon_node->guid, port); 1329 goto Exit; 1330 } 1331 1332 remote_qp = get_qp(NULL, port); 1333 1334 mad_context.perfmgr_context.node_guid = mon_node->guid; 1335 mad_context.perfmgr_context.port = port; 1336 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET; 1337 1338 /* apparently some HW uses the same counters for the 32 and 64 1339 * bit versions and a clear of them in the PortCounters 1340 * attribute also clears the ExtendedPortCounters equivalant 1341 * counters 1342 */ 1343 if (pce_supported(mon_node, port)) 1344 counter_select = 0x0fff; 1345 else 1346 counter_select = 0xffff; 1347 1348 if (xmit_wait_sup) 1349 counter_select2 = 1; 1350 else 1351 counter_select2 = 0; 1352 1353 status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix, 1354 port, IB_MAD_METHOD_SET, 1355 counter_select, 1356 counter_select2, 1357 &mad_context, 1358 0); /* FIXME SL != 0 */ 1359 if (status != IB_SUCCESS) 1360 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: " 1361 "Failed to send clear counters MAD for %s (0x%" 1362 PRIx64 ") port %d\n", 1363 mon_node->name, mon_node->guid, port); 1364 1365 perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port); 1366 if (!pce_supported(mon_node, port)) 1367 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); 1368 } 1369 1370 Exit: 1371 OSM_LOG_EXIT(pm->log); 1372 } 1373 1374 /********************************************************************** 1375 * Check if the port counters have overflowed and if so issue a clear 1376 * MAD to the port 1377 **********************************************************************/ 1378 static void perfmgr_check_pce_overflow(osm_perfmgr_t * pm, 1379 monitored_node_t * mon_node, 1380 int16_t pkey_ix, 1381 uint8_t port, 1382 ib_port_counters_ext_t * pc) 1383 { 1384 osm_madw_context_t mad_context; 1385 ib_api_status_t status; 1386 ib_net32_t remote_qp; 1387 1388 OSM_LOG_ENTER(pm->log); 1389 1390 if (counter_overflow_64(pc->xmit_data) || 1391 counter_overflow_64(pc->rcv_data) || 1392 counter_overflow_64(pc->xmit_pkts) || 1393 counter_overflow_64(pc->rcv_pkts) || 1394 (ietf_supported(mon_node, port) && 1395 (counter_overflow_64(pc->unicast_xmit_pkts) || 1396 counter_overflow_64(pc->unicast_rcv_pkts) || 1397 counter_overflow_64(pc->multicast_xmit_pkts) || 1398 counter_overflow_64(pc->multicast_rcv_pkts)))) { 1399 osm_node_t *p_node = NULL; 1400 ib_net16_t lid = 0; 1401 1402 if (!mon_node->port[port].valid) 1403 goto Exit; 1404 1405 osm_log(pm->log, OSM_LOG_VERBOSE, 1406 "PerfMgr: PortCountersExtended overflow: %s (0x%" 1407 PRIx64 ") port %d; clearing counters\n", 1408 mon_node->name, mon_node->guid, port); 1409 1410 cl_plock_acquire(&pm->osm->lock); 1411 p_node = 1412 osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid)); 1413 if (!p_node) { 1414 OSM_LOG(pm->log, OSM_LOG_ERROR, 1415 "ERR 5407: Node \"%s\" (guid 0x%" PRIx64 1416 ") no longer exists so removing from PerfMgr" 1417 " monitoring\n", 1418 mon_node->name, mon_node->guid); 1419 goto Exit; 1420 } 1421 lid = get_lid(p_node, port, mon_node); 1422 cl_plock_release(&pm->osm->lock); 1423 if (lid == 0) { 1424 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5418: " 1425 "Failed to clear counters for %s (0x%" 1426 PRIx64 ") port %d; failed to get lid\n", 1427 mon_node->name, mon_node->guid, port); 1428 goto Exit; 1429 } 1430 1431 remote_qp = get_qp(NULL, port); 1432 1433 mad_context.perfmgr_context.node_guid = mon_node->guid; 1434 mad_context.perfmgr_context.port = port; 1435 mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET; 1436 /* clear port counters */ 1437 status = perfmgr_send_pce_mad(pm, lid, remote_qp, pkey_ix, 1438 port, IB_MAD_METHOD_SET, 1439 &mad_context, 1440 0); /* FIXME SL != 0 */ 1441 if (status != IB_SUCCESS) 1442 OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5419: " 1443 "Failed to send clear counters MAD for %s (0x%" 1444 PRIx64 ") port %d\n", 1445 mon_node->name, mon_node->guid, port); 1446 1447 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); 1448 } 1449 1450 Exit: 1451 OSM_LOG_EXIT(pm->log); 1452 } 1453 1454 /********************************************************************** 1455 * Check values for logging of errors 1456 **********************************************************************/ 1457 static void perfmgr_log_errors(osm_perfmgr_t * pm, 1458 monitored_node_t * mon_node, uint8_t port, 1459 perfmgr_db_err_reading_t * reading) 1460 { 1461 perfmgr_db_err_reading_t prev_read; 1462 perfmgr_db_err_t err = 1463 perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_read); 1464 uint64_t cur, prev; 1465 1466 if (err != PERFMGR_EVENT_DB_SUCCESS) { 1467 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous " 1468 "reading for %s (0x%" PRIx64 ") port %u\n", 1469 mon_node->name, mon_node->guid, port); 1470 return; 1471 } 1472 1473 #define LOG_ERR_CNT(errname, errnum, counter_name) \ 1474 if (reading->counter_name > prev_read.counter_name) { \ 1475 if (mon_node->port[port].remote_valid == TRUE) \ 1476 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \ 1477 "%s : %" PRIu64 " : node " \ 1478 "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u " \ 1479 "connected to \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \ 1480 errnum, errname, \ 1481 reading->counter_name - prev_read.counter_name, \ 1482 mon_node->name, mon_node->guid, port, \ 1483 mon_node->port[port].remote_name, \ 1484 mon_node->port[port].remote_guid, \ 1485 mon_node->port[port].remote_port); \ 1486 else \ 1487 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \ 1488 "%s : %" PRIu64 " : node " \ 1489 "\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \ 1490 errnum, errname, \ 1491 reading->counter_name - prev_read.counter_name, \ 1492 mon_node->name, mon_node->guid, port); \ 1493 } 1494 1495 LOG_ERR_CNT("SymbolErrorCounter", "5431", symbol_err_cnt); 1496 LOG_ERR_CNT("LinkErrorRecoveryCounter", "5432", link_err_recover); 1497 LOG_ERR_CNT("LinkDownedCounter", "5433", link_downed); 1498 LOG_ERR_CNT("PortRcvErrors", "5434", rcv_err); 1499 LOG_ERR_CNT("PortRcvRemotePhysicalErrors", "5435", rcv_rem_phys_err); 1500 LOG_ERR_CNT("PortRcvSwitchRelayErrors", "5436", rcv_switch_relay_err); 1501 LOG_ERR_CNT("PortXmitDiscards", "5437", xmit_discards); 1502 LOG_ERR_CNT("PortXmitConstraintErrors", "5438", xmit_constraint_err); 1503 LOG_ERR_CNT("PortRcvConstraintErrors", "5439", rcv_constraint_err); 1504 LOG_ERR_CNT("LocalLinkIntegrityErrors", "543A", link_integrity); 1505 LOG_ERR_CNT("ExcessiveBufferOverrunErrors", "543B", buffer_overrun); 1506 LOG_ERR_CNT("VL15Dropped", "543C", vl15_dropped); 1507 1508 cur = reading->xmit_wait; 1509 prev = prev_read.xmit_wait; 1510 if (pm->xmit_wait_log && cur > prev && 1511 (cur - prev) >= pm->xmit_wait_threshold) { 1512 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 543D: XmitWait : %" PRIu64 1513 " : node \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", 1514 cur - prev, mon_node->name, mon_node->guid, port); 1515 } 1516 } 1517 1518 static int16_t validate_redir_pkey(osm_perfmgr_t *pm, ib_net16_t pkey) 1519 { 1520 int16_t pkey_ix = -1; 1521 osm_port_t *p_port; 1522 osm_pkey_tbl_t *p_pkey_tbl; 1523 ib_net16_t *p_orig_pkey; 1524 uint16_t block; 1525 uint8_t index; 1526 1527 OSM_LOG_ENTER(pm->log); 1528 1529 CL_PLOCK_ACQUIRE(pm->sm->p_lock); 1530 p_port = osm_get_port_by_guid(pm->subn, pm->port_guid); 1531 if (!p_port) { 1532 CL_PLOCK_RELEASE(pm->sm->p_lock); 1533 OSM_LOG(pm->log, OSM_LOG_ERROR, 1534 "ERR 541E: No PerfMgr port object\n"); 1535 goto Exit; 1536 } 1537 if (p_port->p_physp && osm_physp_is_valid(p_port->p_physp)) { 1538 p_pkey_tbl = &p_port->p_physp->pkeys; 1539 if (!p_pkey_tbl) { 1540 CL_PLOCK_RELEASE(pm->sm->p_lock); 1541 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1542 "No PKey table found for PerfMgr port\n"); 1543 goto Exit; 1544 } 1545 p_orig_pkey = cl_map_get(&p_pkey_tbl->keys, 1546 ib_pkey_get_base(pkey)); 1547 if (!p_orig_pkey) { 1548 CL_PLOCK_RELEASE(pm->sm->p_lock); 1549 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1550 "PKey 0x%x not found for PerfMgr port\n", 1551 cl_ntoh16(pkey)); 1552 goto Exit; 1553 } 1554 if (osm_pkey_tbl_get_block_and_idx(p_pkey_tbl, p_orig_pkey, 1555 &block, &index) == IB_SUCCESS) { 1556 CL_PLOCK_RELEASE(pm->sm->p_lock); 1557 pkey_ix = block * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + index; 1558 } else { 1559 CL_PLOCK_RELEASE(pm->sm->p_lock); 1560 OSM_LOG(pm->log, OSM_LOG_ERROR, 1561 "ERR 541F: Failed to obtain P_Key 0x%04x " 1562 "block and index for PerfMgr port\n", 1563 cl_ntoh16(pkey)); 1564 } 1565 } else { 1566 CL_PLOCK_RELEASE(pm->sm->p_lock); 1567 OSM_LOG(pm->log, OSM_LOG_ERROR, 1568 "ERR 5420: Local PerfMgt port physp invalid\n"); 1569 } 1570 1571 Exit: 1572 OSM_LOG_EXIT(pm->log); 1573 return pkey_ix; 1574 } 1575 1576 static boolean_t handle_redirect(osm_perfmgr_t *pm, 1577 ib_class_port_info_t *cpi, 1578 monitored_node_t *p_mon_node, 1579 uint8_t port, 1580 osm_madw_context_t *mad_context) 1581 { 1582 char gid_str[INET6_ADDRSTRLEN]; 1583 ib_api_status_t status; 1584 boolean_t valid = TRUE; 1585 int16_t pkey_ix = 0; 1586 uint8_t mad_method; 1587 1588 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1589 "Redirection to LID %u GID %s QP 0x%x received\n", 1590 cl_ntoh16(cpi->redir_lid), 1591 inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str, 1592 sizeof gid_str), cl_ntoh32(cpi->redir_qp)); 1593 1594 if (!pm->subn->opt.perfmgr_redir) { 1595 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1596 "Redirection requested but disabled\n"); 1597 valid = FALSE; 1598 } 1599 1600 /* valid redirection ? */ 1601 if (cpi->redir_lid == 0) { 1602 if (!ib_gid_is_notzero(&cpi->redir_gid)) { 1603 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1604 "Invalid redirection " 1605 "(both redirect LID and GID are zero)\n"); 1606 valid = FALSE; 1607 } 1608 } 1609 if (cpi->redir_qp == 0) { 1610 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQP\n"); 1611 valid = FALSE; 1612 } 1613 if (cpi->redir_pkey == 0) { 1614 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectP_Key\n"); 1615 valid = FALSE; 1616 } 1617 if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) { 1618 OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQ_Key\n"); 1619 valid = FALSE; 1620 } 1621 1622 pkey_ix = validate_redir_pkey(pm, cpi->redir_pkey); 1623 if (pkey_ix == -1) { 1624 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1625 "Index for Pkey 0x%x not found\n", 1626 cl_ntoh16(cpi->redir_pkey)); 1627 valid = FALSE; 1628 } 1629 1630 if (cpi->redir_lid == 0) { 1631 /* GID redirection: get PathRecord information */ 1632 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1633 "GID redirection not currently supported\n"); 1634 goto Exit; 1635 } 1636 1637 if (!valid) 1638 goto Exit; 1639 1640 /* LID redirection support (easier than GID redirection) */ 1641 cl_plock_acquire(&pm->osm->lock); 1642 p_mon_node->port[port].redirection = TRUE; 1643 p_mon_node->port[port].valid = valid; 1644 memcpy(&p_mon_node->port[port].gid, &cpi->redir_gid, 1645 sizeof(ib_gid_t)); 1646 p_mon_node->port[port].lid = cpi->redir_lid; 1647 p_mon_node->port[port].qp = cpi->redir_qp; 1648 p_mon_node->port[port].pkey = cpi->redir_pkey; 1649 if (pkey_ix != -1) 1650 p_mon_node->port[port].pkey_ix = pkey_ix; 1651 cl_plock_release(&pm->osm->lock); 1652 1653 /* either */ 1654 if (pm->query_cpi) 1655 { 1656 /* issue a CPI query to the redirected location */ 1657 mad_method = IB_MAD_METHOD_GET; 1658 p_mon_node->port[port].cpi_valid = FALSE; 1659 status = perfmgr_send_cpi_mad(pm, cpi->redir_lid, 1660 cpi->redir_qp, pkey_ix, 1661 port, mad_context, 1662 0); /* FIXME SL != 0 */ 1663 } else { 1664 /* reissue the original query to the redirected location */ 1665 uint8_t counter_select2; 1666 1667 if (xmit_wait_supported(p_mon_node, port)) 1668 counter_select2 = 1; 1669 else 1670 counter_select2 = 0; 1671 1672 mad_method = mad_context->perfmgr_context.mad_method; 1673 if (mad_context->perfmgr_context.mad_attr_id 1674 == IB_MAD_ATTR_PORT_CNTRS) { 1675 status = perfmgr_send_pc_mad(pm, cpi->redir_lid, 1676 cpi->redir_qp, 1677 pkey_ix, port, 1678 mad_method, 1679 0xffff, 1680 counter_select2, 1681 mad_context, 1682 0); /* FIXME SL != 0 */ 1683 } else { 1684 status = perfmgr_send_pce_mad(pm, cpi->redir_lid, 1685 cpi->redir_qp, 1686 pkey_ix, port, 1687 mad_method, 1688 mad_context, 1689 0); /* FIXME SL != 0 */ 1690 } 1691 } 1692 if (status != IB_SUCCESS) 1693 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: " 1694 "Failed to send redirected MAD " 1695 "with method 0x%x for node %s " 1696 "(NodeGuid 0x%" PRIx64 ") port %d\n", 1697 mad_method, p_mon_node->name, p_mon_node->guid, port); 1698 Exit: 1699 return (valid); 1700 } 1701 1702 /********************************************************************** 1703 * Detect if someone else on the network could have cleared the counters 1704 * without us knowing. This is easy to detect because the counters never 1705 * wrap but are "sticky" PortCountersExtended version. 1706 * 1707 * The one time this will not work is if the port is getting errors fast 1708 * enough to have the reading overtake the previous reading. In this case, 1709 * counters will be missed. 1710 **********************************************************************/ 1711 static void perfmgr_check_data_cnt_oob_clear(osm_perfmgr_t * pm, 1712 monitored_node_t * mon_node, 1713 uint8_t port, 1714 perfmgr_db_data_cnt_reading_t * dc) 1715 { 1716 perfmgr_db_data_cnt_reading_t prev_dc; 1717 1718 if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc) 1719 != PERFMGR_EVENT_DB_SUCCESS) { 1720 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1721 "Failed to find previous data count " 1722 "reading for %s (0x%" PRIx64 ") port %u\n", 1723 mon_node->name, mon_node->guid, port); 1724 return; 1725 } 1726 1727 OSM_LOG(pm->log, OSM_LOG_DEBUG, 1728 "Data vs previous node %s (0x%" PRIx64 ") port %u\n" 1729 "TX: %"PRIu64" ?< %"PRIu64"\n" 1730 "RX: %"PRIu64" ?< %"PRIu64"\n" 1731 "TXP: %"PRIu64" ?< %"PRIu64"\n" 1732 "RXP: %"PRIu64" ?< %"PRIu64"\n" 1733 "UTXP: %"PRIu64" ?< %"PRIu64"\n" 1734 "URXP: %"PRIu64" ?< %"PRIu64"\n" 1735 "MTXP: %"PRIu64" ?< %"PRIu64"\n" 1736 "MRXP: %"PRIu64" ?< %"PRIu64"\n" 1737 , 1738 mon_node->name, mon_node->guid, port, 1739 dc->xmit_data, prev_dc.xmit_data, 1740 dc->rcv_data, prev_dc.rcv_data, 1741 dc->xmit_pkts, prev_dc.xmit_pkts, 1742 dc->rcv_pkts, prev_dc.rcv_pkts, 1743 dc->unicast_xmit_pkts, prev_dc.unicast_xmit_pkts, 1744 dc->unicast_rcv_pkts, prev_dc.unicast_rcv_pkts, 1745 dc->multicast_xmit_pkts, prev_dc.multicast_xmit_pkts, 1746 dc->multicast_rcv_pkts, prev_dc.multicast_rcv_pkts); 1747 1748 if (dc->xmit_data < prev_dc.xmit_data || 1749 dc->rcv_data < prev_dc.rcv_data || 1750 dc->xmit_pkts < prev_dc.xmit_pkts || 1751 dc->rcv_pkts < prev_dc.rcv_pkts || 1752 (ietf_supported(mon_node, port) && 1753 (dc->unicast_xmit_pkts < prev_dc.unicast_xmit_pkts || 1754 dc->unicast_rcv_pkts < prev_dc.unicast_rcv_pkts || 1755 dc->multicast_xmit_pkts < prev_dc.multicast_xmit_pkts || 1756 dc->multicast_rcv_pkts < prev_dc.multicast_rcv_pkts))) { 1757 OSM_LOG(pm->log, OSM_LOG_ERROR, 1758 "PerfMgr: ERR 540B: Detected an out of band data counter " 1759 "clear on node %s (0x%" PRIx64 ") port %u\n", 1760 mon_node->name, mon_node->guid, port); 1761 1762 perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port); 1763 } 1764 } 1765 1766 /********************************************************************** 1767 * The dispatcher uses a thread pool which will call this function when 1768 * there is a thread available to process the mad received on the wire 1769 **********************************************************************/ 1770 static void pc_recv_process(void *context, void *data) 1771 { 1772 osm_perfmgr_t *pm = context; 1773 osm_madw_t *p_madw = data; 1774 osm_madw_context_t *mad_context = &p_madw->context; 1775 ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw); 1776 uint64_t node_guid = mad_context->perfmgr_context.node_guid; 1777 uint8_t port = mad_context->perfmgr_context.port; 1778 perfmgr_db_err_reading_t err_reading; 1779 perfmgr_db_data_cnt_reading_t data_reading; 1780 cl_map_item_t *p_node; 1781 monitored_node_t *p_mon_node; 1782 ib_class_port_info_t *cpi = NULL; 1783 1784 OSM_LOG_ENTER(pm->log); 1785 1786 /* 1787 * get the monitored node struct to have the printable name 1788 * for log messages 1789 */ 1790 if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) == 1791 cl_qmap_end(&pm->monitored_map)) { 1792 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5412: GUID 0x%016" 1793 PRIx64 " not found in monitored map\n", node_guid); 1794 goto Exit; 1795 } 1796 p_mon_node = (monitored_node_t *) p_node; 1797 1798 OSM_LOG(pm->log, OSM_LOG_VERBOSE, 1799 "Processing received MAD status 0x%x context 0x%" 1800 PRIx64 " port %u\n", cl_ntoh16(p_mad->status), node_guid, port); 1801 1802 CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS || 1803 p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT || 1804 p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO); 1805 1806 cl_plock_acquire(&pm->osm->lock); 1807 /* validate port number */ 1808 if (port >= p_mon_node->num_ports) { 1809 cl_plock_release(&pm->osm->lock); 1810 OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: " 1811 "Invalid port num %d for GUID 0x%016" 1812 PRIx64 " num ports %d\n", port, node_guid, 1813 p_mon_node->num_ports); 1814 goto Exit; 1815 } 1816 cl_plock_release(&pm->osm->lock); 1817 1818 /* capture CLASS_PORT_INFO data */ 1819 if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) { 1820 boolean_t cpi_valid = TRUE; 1821 1822 cpi = (ib_class_port_info_t *) & 1823 (osm_madw_get_perfmgt_mad_ptr(p_madw)->data); 1824 1825 /* Response could be redirection (IBM eHCA PMA does this) */ 1826 if (p_mad->status & IB_MAD_STATUS_REDIRECT) 1827 cpi_valid = handle_redirect(pm, cpi, p_mon_node, port, 1828 mad_context); 1829 1830 if (pm->query_cpi && cpi_valid) { 1831 cl_plock_acquire(&pm->osm->lock); 1832 if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) { 1833 int i; 1834 for (i = p_mon_node->esp0 ? 0 : 1; 1835 i < p_mon_node->num_ports; 1836 i++) { 1837 p_mon_node->port[i].cap_mask = cpi->cap_mask; 1838 p_mon_node->port[i].cpi_valid = cpi_valid; 1839 } 1840 } else { 1841 p_mon_node->port[port].cap_mask = cpi->cap_mask; 1842 p_mon_node->port[port].cpi_valid = cpi_valid; 1843 } 1844 cl_plock_release(&pm->osm->lock); 1845 } 1846 goto Exit; 1847 } 1848 1849 if (p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT) { 1850 ib_port_counters_ext_t *ext_wire_read = 1851 (ib_port_counters_ext_t *) 1852 &osm_madw_get_perfmgt_mad_ptr(p_madw)->data; 1853 1854 /* convert wire data to perfmgr data counter reading */ 1855 perfmgr_db_fill_data_cnt_read_pce(ext_wire_read, &data_reading, 1856 ietf_supported(p_mon_node, 1857 port)); 1858 1859 /* add counter */ 1860 if (mad_context->perfmgr_context.mad_method 1861 == IB_MAD_METHOD_GET) { 1862 /* detect an out of band clear on the port */ 1863 perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port, 1864 &data_reading); 1865 1866 perfmgr_db_add_dc_reading(pm->db, node_guid, port, 1867 &data_reading, 1868 ietf_supported(p_mon_node, 1869 port)); 1870 } else { 1871 perfmgr_db_clear_prev_dc(pm->db, node_guid, port); 1872 } 1873 1874 perfmgr_check_pce_overflow(pm, p_mon_node, 1875 p_mon_node->port[port].pkey_ix, 1876 port, ext_wire_read); 1877 } else { 1878 boolean_t pce_sup = pce_supported(p_mon_node, port); 1879 boolean_t xmit_wait_sup = xmit_wait_supported(p_mon_node, port); 1880 ib_port_counters_t *wire_read = 1881 (ib_port_counters_t *) 1882 &osm_madw_get_perfmgt_mad_ptr(p_madw)->data; 1883 1884 perfmgr_db_fill_err_read(wire_read, &err_reading, xmit_wait_sup); 1885 if (!pce_sup) 1886 perfmgr_db_fill_data_cnt_read_pc(wire_read, &data_reading); 1887 1888 if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) { 1889 /* detect an out of band clear on the port */ 1890 perfmgr_check_oob_clear(pm, p_mon_node, port, &err_reading); 1891 if (!pce_sup) 1892 perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port, 1893 &data_reading); 1894 1895 /* log errors from this reading */ 1896 if (pm->subn->opt.perfmgr_log_errors) 1897 perfmgr_log_errors(pm, p_mon_node, port, &err_reading); 1898 1899 perfmgr_db_add_err_reading(pm->db, node_guid, port, 1900 &err_reading); 1901 if (!pce_sup) 1902 perfmgr_db_add_dc_reading(pm->db, node_guid, port, 1903 &data_reading, 0); 1904 } else { 1905 perfmgr_db_clear_prev_err(pm->db, node_guid, port); 1906 if (!pce_sup) 1907 perfmgr_db_clear_prev_dc(pm->db, node_guid, port); 1908 } 1909 1910 perfmgr_check_overflow(pm, p_mon_node, p_mon_node->port[port].pkey_ix, 1911 port, wire_read, xmit_wait_sup); 1912 1913 } 1914 1915 #ifdef ENABLE_OSM_PERF_MGR_PROFILE 1916 do { 1917 struct timeval proc_time; 1918 gettimeofday(&proc_time, NULL); 1919 diff_time(&p_madw->context.perfmgr_context.query_start, 1920 &proc_time, &proc_time); 1921 update_mad_stats(&proc_time); 1922 } while (0); 1923 #endif 1924 1925 Exit: 1926 osm_mad_pool_put(pm->mad_pool, p_madw); 1927 1928 OSM_LOG_EXIT(pm->log); 1929 } 1930 1931 /********************************************************************** 1932 * Initialize the PerfMgr object 1933 **********************************************************************/ 1934 ib_api_status_t osm_perfmgr_init(osm_perfmgr_t * pm, osm_opensm_t * osm, 1935 const osm_subn_opt_t * p_opt) 1936 { 1937 ib_api_status_t status; 1938 1939 OSM_LOG_ENTER(&osm->log); 1940 1941 OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n"); 1942 1943 memset(pm, 0, sizeof(*pm)); 1944 1945 pm->subn = &osm->subn; 1946 pm->sm = &osm->sm; 1947 pm->log = &osm->log; 1948 pm->mad_pool = &osm->mad_pool; 1949 pm->vendor = osm->p_vendor; 1950 pm->trans_id = PERFMGR_INITIAL_TID_VALUE; 1951 pm->state = 1952 p_opt->perfmgr ? PERFMGR_STATE_ENABLED : PERFMGR_STATE_DISABLE; 1953 pm->sweep_state = PERFMGR_SWEEP_SLEEP; 1954 status = cl_spinlock_init(&pm->lock); 1955 if (status != IB_SUCCESS) 1956 goto Exit; 1957 pm->sweep_time_s = p_opt->perfmgr_sweep_time_s; 1958 pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries; 1959 pm->ignore_cas = p_opt->perfmgr_ignore_cas; 1960 pm->osm = osm; 1961 pm->local_port = -1; 1962 1963 status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm); 1964 if (status != IB_SUCCESS) 1965 goto Exit; 1966 1967 status = IB_INSUFFICIENT_RESOURCES; 1968 pm->db = perfmgr_db_construct(pm); 1969 if (!pm->db) { 1970 pm->state = PERFMGR_STATE_NO_DB; 1971 goto Exit; 1972 } 1973 1974 pm->pc_disp_h = cl_disp_register(&osm->disp, OSM_MSG_MAD_PORT_COUNTERS, 1975 pc_recv_process, pm); 1976 if (pm->pc_disp_h == CL_DISP_INVALID_HANDLE) { 1977 perfmgr_db_destroy(pm->db); 1978 goto Exit; 1979 } 1980 1981 init_monitored_nodes(pm); 1982 1983 if (pm->state == PERFMGR_STATE_ENABLED) 1984 cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000); 1985 1986 pm->rm_nodes = p_opt->perfmgr_rm_nodes; 1987 pm->query_cpi = p_opt->perfmgr_query_cpi; 1988 pm->xmit_wait_log = p_opt->perfmgr_xmit_wait_log; 1989 pm->xmit_wait_threshold = p_opt->perfmgr_xmit_wait_threshold; 1990 status = IB_SUCCESS; 1991 Exit: 1992 OSM_LOG_EXIT(pm->log); 1993 return status; 1994 } 1995 1996 /********************************************************************** 1997 * Clear the counters from the db 1998 **********************************************************************/ 1999 void osm_perfmgr_clear_counters(osm_perfmgr_t * pm) 2000 { 2001 /** 2002 * FIXME todo issue clear on the fabric? 2003 */ 2004 perfmgr_db_clear_counters(pm->db); 2005 osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID, "PerfMgr counters cleared\n"); 2006 } 2007 2008 /******************************************************************* 2009 * Dump the DB information to the file specified 2010 *******************************************************************/ 2011 void osm_perfmgr_dump_counters(osm_perfmgr_t * pm, perfmgr_db_dump_t dump_type) 2012 { 2013 char path[256]; 2014 char *file_name; 2015 if (pm->subn->opt.event_db_dump_file) 2016 file_name = pm->subn->opt.event_db_dump_file; 2017 else { 2018 snprintf(path, sizeof(path), "%s/%s", 2019 pm->subn->opt.dump_files_dir, 2020 OSM_PERFMGR_DEFAULT_DUMP_FILE); 2021 file_name = path; 2022 } 2023 if (perfmgr_db_dump(pm->db, file_name, dump_type) != 0) 2024 OSM_LOG(pm->log, OSM_LOG_ERROR, "Failed to dump file %s : %s", 2025 file_name, strerror(errno)); 2026 } 2027 2028 /******************************************************************* 2029 * Print the DB information to the fp specified 2030 *******************************************************************/ 2031 void osm_perfmgr_print_counters(osm_perfmgr_t * pm, char *nodename, FILE * fp, 2032 char *port, int err_only) 2033 { 2034 if (nodename) { 2035 char *end = NULL; 2036 uint64_t guid = strtoull(nodename, &end, 0); 2037 if (nodename + strlen(nodename) != end) 2038 perfmgr_db_print_by_name(pm->db, nodename, fp, port, 2039 err_only); 2040 else 2041 perfmgr_db_print_by_guid(pm->db, guid, fp, port, 2042 err_only); 2043 } else 2044 perfmgr_db_print_all(pm->db, fp, err_only); 2045 } 2046 2047 void osm_perfmgr_update_nodename(osm_perfmgr_t *pm, uint64_t node_guid, 2048 char *nodename) 2049 { 2050 if (pm->db) 2051 perfmgr_db_update_name(pm->db, node_guid, nodename); 2052 } 2053 #endif /* ENABLE_OSM_PERF_MGR */ 2054