1 /* 2 * Copyright (c) 2008-2009 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2007 The Regents of the University of California. 4 * Copyright (c) 2009 HNR Consulting. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 * 34 */ 35 36 #if HAVE_CONFIG_H 37 # include <config.h> 38 #endif /* HAVE_CONFIG_H */ 39 40 #ifdef ENABLE_OSM_PERF_MGR 41 42 #include <stdlib.h> 43 #include <errno.h> 44 #include <limits.h> 45 #include <dlfcn.h> 46 #include <sys/stat.h> 47 48 #include <opensm/osm_file_ids.h> 49 #define FILE_ID OSM_FILE_PERFMGR_DB_C 50 #include <opensm/osm_perfmgr_db.h> 51 #include <opensm/osm_perfmgr.h> 52 #include <opensm/osm_opensm.h> 53 54 static void free_node(db_node_t * node); 55 56 /** ========================================================================= 57 */ 58 perfmgr_db_t *perfmgr_db_construct(osm_perfmgr_t *perfmgr) 59 { 60 perfmgr_db_t *db = malloc(sizeof(*db)); 61 if (!db) 62 return NULL; 63 64 cl_qmap_init(&db->pc_data); 65 cl_plock_construct(&db->lock); 66 cl_plock_init(&db->lock); 67 db->perfmgr = perfmgr; 68 return db; 69 } 70 71 /** ========================================================================= 72 */ 73 void perfmgr_db_destroy(perfmgr_db_t * db) 74 { 75 cl_map_item_t *item, *next_item; 76 77 if (db) { 78 item = cl_qmap_head(&db->pc_data); 79 while (item != cl_qmap_end(&db->pc_data)) { 80 next_item = cl_qmap_next(item); 81 free_node((db_node_t *)item); 82 item = next_item; 83 } 84 cl_plock_destroy(&db->lock); 85 free(db); 86 } 87 } 88 89 /********************************************************************** 90 * Internal call db->lock should be held when calling 91 **********************************************************************/ 92 static inline db_node_t *get(perfmgr_db_t * db, uint64_t guid) 93 { 94 cl_map_item_t *rc = cl_qmap_get(&db->pc_data, guid); 95 const cl_map_item_t *end = cl_qmap_end(&db->pc_data); 96 97 if (rc == end) 98 return NULL; 99 return (db_node_t *) rc; 100 } 101 102 static inline perfmgr_db_err_t bad_node_port(db_node_t * node, uint8_t port) 103 { 104 if (!node) 105 return PERFMGR_EVENT_DB_GUIDNOTFOUND; 106 if (port >= node->num_ports || (!node->esp0 && port == 0)) 107 return PERFMGR_EVENT_DB_PORTNOTFOUND; 108 109 return PERFMGR_EVENT_DB_SUCCESS; 110 } 111 112 static inline void mark_port_valid(db_node_t * node, uint8_t port) 113 { 114 node->ports[port].valid = TRUE; 115 } 116 117 /** ========================================================================= 118 */ 119 static db_node_t *malloc_node(uint64_t guid, boolean_t esp0, 120 uint8_t num_ports, char *name) 121 { 122 int i = 0; 123 time_t cur_time = 0; 124 db_node_t *rc = malloc(sizeof(*rc)); 125 if (!rc) 126 return NULL; 127 128 rc->ports = calloc(num_ports, sizeof(db_port_t)); 129 if (!rc->ports) 130 goto free_rc; 131 rc->num_ports = num_ports; 132 rc->node_guid = guid; 133 rc->esp0 = esp0; 134 135 cur_time = time(NULL); 136 for (i = 0; i < num_ports; i++) { 137 rc->ports[i].last_reset = cur_time; 138 rc->ports[i].err_previous.time = cur_time; 139 rc->ports[i].dc_previous.time = cur_time; 140 rc->ports[i].valid = FALSE; 141 } 142 snprintf(rc->node_name, sizeof(rc->node_name), "%s", name); 143 rc->active = FALSE; 144 145 return rc; 146 147 free_rc: 148 free(rc); 149 return NULL; 150 } 151 152 /** ========================================================================= 153 */ 154 static void free_node(db_node_t * node) 155 { 156 if (!node) 157 return; 158 if (node->ports) 159 free(node->ports); 160 free(node); 161 } 162 163 /* insert nodes to the database */ 164 static perfmgr_db_err_t insert(perfmgr_db_t * db, db_node_t * node) 165 { 166 cl_map_item_t *rc = cl_qmap_insert(&db->pc_data, node->node_guid, 167 (cl_map_item_t *) node); 168 169 if ((void *)rc != (void *)node) 170 return PERFMGR_EVENT_DB_FAIL; 171 return PERFMGR_EVENT_DB_SUCCESS; 172 } 173 174 perfmgr_db_err_t 175 perfmgr_db_create_entry(perfmgr_db_t * db, uint64_t guid, boolean_t esp0, 176 uint8_t num_ports, char *name) 177 { 178 perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; 179 180 cl_plock_excl_acquire(&db->lock); 181 if (!get(db, guid)) { 182 db_node_t *pc_node = malloc_node(guid, esp0, num_ports, 183 name); 184 if (!pc_node) { 185 rc = PERFMGR_EVENT_DB_NOMEM; 186 goto Exit; 187 } 188 if (insert(db, pc_node)) { 189 free_node(pc_node); 190 rc = PERFMGR_EVENT_DB_FAIL; 191 goto Exit; 192 } 193 } 194 Exit: 195 cl_plock_release(&db->lock); 196 return rc; 197 } 198 199 perfmgr_db_err_t 200 perfmgr_db_update_name(perfmgr_db_t * db, uint64_t node_guid, char *name) 201 { 202 db_node_t *node = NULL; 203 204 cl_plock_excl_acquire(&db->lock); 205 node = get(db, node_guid); 206 if (node) 207 snprintf(node->node_name, sizeof(node->node_name), "%s", name); 208 cl_plock_release(&db->lock); 209 return (PERFMGR_EVENT_DB_SUCCESS); 210 } 211 212 perfmgr_db_err_t 213 perfmgr_db_delete_entry(perfmgr_db_t * db, uint64_t guid) 214 { 215 cl_map_item_t * rc = cl_qmap_remove(&db->pc_data, guid); 216 217 if (rc == cl_qmap_end(&db->pc_data)) 218 return(PERFMGR_EVENT_DB_GUIDNOTFOUND); 219 220 db_node_t *pc_node = (db_node_t *)rc; 221 free_node(pc_node); 222 return(PERFMGR_EVENT_DB_SUCCESS); 223 } 224 225 perfmgr_db_err_t 226 perfmgr_db_delete_inactive(perfmgr_db_t * db, unsigned *cnt) 227 { 228 perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; 229 int i = 0; 230 int num = 0; 231 uint64_t * guid_list = NULL; 232 cl_map_item_t * p_map_item = cl_qmap_head(&db->pc_data); 233 234 if (p_map_item == cl_qmap_end(&db->pc_data)) { 235 rc = PERFMGR_EVENT_DB_SUCCESS; 236 goto Done; 237 } 238 239 while (p_map_item != cl_qmap_end(&db->pc_data)) { 240 db_node_t *n = (db_node_t *)p_map_item; 241 if (n->active == FALSE) { 242 guid_list = realloc(guid_list, 243 sizeof(*guid_list) * (num+1)); 244 if (!guid_list) { 245 num = 0; 246 rc = PERFMGR_EVENT_DB_NOMEM; 247 goto Done; 248 } 249 guid_list[num] = n->node_guid; 250 num++; 251 } 252 p_map_item = cl_qmap_next(p_map_item); 253 } 254 255 for (i = 0 ; i < num; i++) 256 perfmgr_db_delete_entry(db, guid_list[i]); 257 258 free(guid_list); 259 260 Done: 261 if (cnt) 262 *cnt = num; 263 264 return(rc); 265 } 266 267 perfmgr_db_err_t 268 perfmgr_db_mark_active(perfmgr_db_t *db, uint64_t guid, boolean_t active) 269 { 270 db_node_t *node = NULL; 271 272 cl_plock_excl_acquire(&db->lock); 273 node = get(db, guid); 274 if (node) 275 node->active = active; 276 cl_plock_release(&db->lock); 277 return (PERFMGR_EVENT_DB_SUCCESS); 278 } 279 280 281 /********************************************************************** 282 * Dump a reading vs the previous reading to stdout 283 **********************************************************************/ 284 static inline void 285 debug_dump_err_reading(perfmgr_db_t * db, uint64_t guid, uint8_t port_num, 286 db_port_t * port, perfmgr_db_err_reading_t * cur) 287 { 288 osm_log_t *log = db->perfmgr->log; 289 290 if (!OSM_LOG_IS_ACTIVE_V2(log, OSM_LOG_DEBUG)) 291 return; /* optimize this a bit */ 292 293 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 294 "GUID 0x%" PRIx64 " Port %u:\n", guid, port_num); 295 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 296 "sym %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 297 cur->symbol_err_cnt, port->err_previous.symbol_err_cnt, 298 port->err_total.symbol_err_cnt); 299 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 300 "ler %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 301 cur->link_err_recover, port->err_previous.link_err_recover, 302 port->err_total.link_err_recover); 303 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 304 "ld %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 305 cur->link_downed, port->err_previous.link_downed, 306 port->err_total.link_downed); 307 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 308 "re %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", cur->rcv_err, 309 port->err_previous.rcv_err, port->err_total.rcv_err); 310 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 311 "rrp %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 312 cur->rcv_rem_phys_err, port->err_previous.rcv_rem_phys_err, 313 port->err_total.rcv_rem_phys_err); 314 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 315 "rsr %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 316 cur->rcv_switch_relay_err, 317 port->err_previous.rcv_switch_relay_err, 318 port->err_total.rcv_switch_relay_err); 319 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 320 "xd %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 321 cur->xmit_discards, port->err_previous.xmit_discards, 322 port->err_total.xmit_discards); 323 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 324 "xce %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 325 cur->xmit_constraint_err, 326 port->err_previous.xmit_constraint_err, 327 port->err_total.xmit_constraint_err); 328 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 329 "rce %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 330 cur->rcv_constraint_err, port->err_previous.rcv_constraint_err, 331 port->err_total.rcv_constraint_err); 332 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 333 "li %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 334 cur->link_integrity, port->err_previous.link_integrity, 335 port->err_total.link_integrity); 336 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 337 "bo %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 338 cur->buffer_overrun, port->err_previous.buffer_overrun, 339 port->err_total.buffer_overrun); 340 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 341 "vld %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 342 cur->vl15_dropped, port->err_previous.vl15_dropped, 343 port->err_total.vl15_dropped); 344 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 345 "xw %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 346 cur->xmit_wait, port->err_previous.xmit_wait, 347 port->err_total.xmit_wait); 348 } 349 350 /********************************************************************** 351 * perfmgr_db_err_reading_t functions 352 **********************************************************************/ 353 perfmgr_db_err_t 354 perfmgr_db_add_err_reading(perfmgr_db_t * db, uint64_t guid, uint8_t port, 355 perfmgr_db_err_reading_t * reading) 356 { 357 db_port_t *p_port = NULL; 358 db_node_t *node = NULL; 359 perfmgr_db_err_reading_t *previous = NULL; 360 perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; 361 osm_epi_pe_event_t epi_pe_data; 362 363 cl_plock_excl_acquire(&db->lock); 364 node = get(db, guid); 365 if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) 366 goto Exit; 367 368 mark_port_valid(node, port); 369 370 p_port = &(node->ports[port]); 371 previous = &(node->ports[port].err_previous); 372 373 debug_dump_err_reading(db, guid, port, p_port, reading); 374 375 epi_pe_data.time_diff_s = (reading->time - previous->time); 376 osm_epi_create_port_id(&epi_pe_data.port_id, guid, port, 377 node->node_name); 378 379 /* calculate changes from previous reading */ 380 epi_pe_data.symbol_err_cnt = 381 (reading->symbol_err_cnt - previous->symbol_err_cnt); 382 p_port->err_total.symbol_err_cnt += epi_pe_data.symbol_err_cnt; 383 epi_pe_data.link_err_recover = 384 (reading->link_err_recover - previous->link_err_recover); 385 p_port->err_total.link_err_recover += epi_pe_data.link_err_recover; 386 epi_pe_data.link_downed = 387 (reading->link_downed - previous->link_downed); 388 p_port->err_total.link_downed += epi_pe_data.link_downed; 389 epi_pe_data.rcv_err = (reading->rcv_err - previous->rcv_err); 390 p_port->err_total.rcv_err += epi_pe_data.rcv_err; 391 epi_pe_data.rcv_rem_phys_err = 392 (reading->rcv_rem_phys_err - previous->rcv_rem_phys_err); 393 p_port->err_total.rcv_rem_phys_err += epi_pe_data.rcv_rem_phys_err; 394 epi_pe_data.rcv_switch_relay_err = 395 (reading->rcv_switch_relay_err - previous->rcv_switch_relay_err); 396 p_port->err_total.rcv_switch_relay_err += 397 epi_pe_data.rcv_switch_relay_err; 398 epi_pe_data.xmit_discards = 399 (reading->xmit_discards - previous->xmit_discards); 400 p_port->err_total.xmit_discards += epi_pe_data.xmit_discards; 401 epi_pe_data.xmit_constraint_err = 402 (reading->xmit_constraint_err - previous->xmit_constraint_err); 403 p_port->err_total.xmit_constraint_err += 404 epi_pe_data.xmit_constraint_err; 405 epi_pe_data.rcv_constraint_err = 406 (reading->rcv_constraint_err - previous->rcv_constraint_err); 407 p_port->err_total.rcv_constraint_err += epi_pe_data.rcv_constraint_err; 408 epi_pe_data.link_integrity = 409 (reading->link_integrity - previous->link_integrity); 410 p_port->err_total.link_integrity += epi_pe_data.link_integrity; 411 epi_pe_data.buffer_overrun = 412 (reading->buffer_overrun - previous->buffer_overrun); 413 p_port->err_total.buffer_overrun += epi_pe_data.buffer_overrun; 414 epi_pe_data.vl15_dropped = 415 (reading->vl15_dropped - previous->vl15_dropped); 416 p_port->err_total.vl15_dropped += epi_pe_data.vl15_dropped; 417 epi_pe_data.xmit_wait = 418 (reading->xmit_wait - previous->xmit_wait); 419 p_port->err_total.xmit_wait += epi_pe_data.xmit_wait; 420 421 p_port->err_previous = *reading; 422 423 /* mark the time this total was updated */ 424 p_port->err_total.time = reading->time; 425 426 osm_opensm_report_event(db->perfmgr->osm, OSM_EVENT_ID_PORT_ERRORS, 427 &epi_pe_data); 428 429 Exit: 430 cl_plock_release(&db->lock); 431 return rc; 432 } 433 434 perfmgr_db_err_t perfmgr_db_get_prev_err(perfmgr_db_t * db, uint64_t guid, 435 uint8_t port, 436 perfmgr_db_err_reading_t * reading) 437 { 438 db_node_t *node = NULL; 439 perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; 440 441 cl_plock_acquire(&db->lock); 442 443 node = get(db, guid); 444 if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) 445 goto Exit; 446 447 *reading = node->ports[port].err_previous; 448 449 Exit: 450 cl_plock_release(&db->lock); 451 return rc; 452 } 453 454 perfmgr_db_err_t 455 perfmgr_db_clear_prev_err(perfmgr_db_t * db, uint64_t guid, uint8_t port) 456 { 457 db_node_t *node = NULL; 458 perfmgr_db_err_reading_t *previous = NULL; 459 perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; 460 461 cl_plock_excl_acquire(&db->lock); 462 node = get(db, guid); 463 if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) 464 goto Exit; 465 466 previous = &(node->ports[port].err_previous); 467 468 memset(previous, 0, sizeof(*previous)); 469 node->ports[port].err_previous.time = time(NULL); 470 471 Exit: 472 cl_plock_release(&db->lock); 473 return rc; 474 } 475 476 static inline void 477 debug_dump_dc_reading(perfmgr_db_t * db, uint64_t guid, uint8_t port_num, 478 db_port_t * port, perfmgr_db_data_cnt_reading_t * cur) 479 { 480 osm_log_t *log = db->perfmgr->log; 481 if (!OSM_LOG_IS_ACTIVE_V2(log, OSM_LOG_DEBUG)) 482 return; 483 484 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 485 "xd %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 486 cur->xmit_data, port->dc_previous.xmit_data, 487 port->dc_total.xmit_data); 488 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 489 "rd %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", cur->rcv_data, 490 port->dc_previous.rcv_data, port->dc_total.rcv_data); 491 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 492 "xp %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", 493 cur->xmit_pkts, port->dc_previous.xmit_pkts, 494 port->dc_total.xmit_pkts); 495 osm_log_v2(log, OSM_LOG_DEBUG, FILE_ID, 496 "rp %" PRIu64 " <-- %" PRIu64 " (%" PRIu64 ")\n", cur->rcv_pkts, 497 port->dc_previous.rcv_pkts, port->dc_total.rcv_pkts); 498 } 499 500 /********************************************************************** 501 * perfmgr_db_data_cnt_reading_t functions 502 **********************************************************************/ 503 perfmgr_db_err_t 504 perfmgr_db_add_dc_reading(perfmgr_db_t * db, uint64_t guid, uint8_t port, 505 perfmgr_db_data_cnt_reading_t * reading, 506 int ietf_sup) 507 { 508 db_port_t *p_port = NULL; 509 db_node_t *node = NULL; 510 perfmgr_db_data_cnt_reading_t *previous = NULL; 511 perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; 512 osm_epi_dc_event_t epi_dc_data; 513 514 cl_plock_excl_acquire(&db->lock); 515 node = get(db, guid); 516 if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) 517 goto Exit; 518 519 mark_port_valid(node, port); 520 521 p_port = &node->ports[port]; 522 previous = &node->ports[port].dc_previous; 523 524 debug_dump_dc_reading(db, guid, port, p_port, reading); 525 526 epi_dc_data.time_diff_s = reading->time - previous->time; 527 osm_epi_create_port_id(&epi_dc_data.port_id, guid, port, 528 node->node_name); 529 530 /* calculate changes from previous reading */ 531 epi_dc_data.xmit_data = reading->xmit_data - previous->xmit_data; 532 p_port->dc_total.xmit_data += epi_dc_data.xmit_data; 533 epi_dc_data.rcv_data = reading->rcv_data - previous->rcv_data; 534 p_port->dc_total.rcv_data += epi_dc_data.rcv_data; 535 epi_dc_data.xmit_pkts = reading->xmit_pkts - previous->xmit_pkts; 536 p_port->dc_total.xmit_pkts += epi_dc_data.xmit_pkts; 537 epi_dc_data.rcv_pkts = reading->rcv_pkts - previous->rcv_pkts; 538 p_port->dc_total.rcv_pkts += epi_dc_data.rcv_pkts; 539 540 if (ietf_sup) 541 { 542 epi_dc_data.unicast_xmit_pkts = 543 reading->unicast_xmit_pkts - previous->unicast_xmit_pkts; 544 p_port->dc_total.unicast_xmit_pkts += epi_dc_data.unicast_xmit_pkts; 545 epi_dc_data.unicast_rcv_pkts = 546 reading->unicast_rcv_pkts - previous->unicast_rcv_pkts; 547 p_port->dc_total.unicast_rcv_pkts += epi_dc_data.unicast_rcv_pkts; 548 epi_dc_data.multicast_xmit_pkts = 549 reading->multicast_xmit_pkts - previous->multicast_xmit_pkts; 550 p_port->dc_total.multicast_xmit_pkts += epi_dc_data.multicast_xmit_pkts; 551 epi_dc_data.multicast_rcv_pkts = 552 reading->multicast_rcv_pkts - previous->multicast_rcv_pkts; 553 p_port->dc_total.multicast_rcv_pkts += epi_dc_data.multicast_rcv_pkts; 554 } 555 556 p_port->dc_previous = *reading; 557 558 /* mark the time this total was updated */ 559 p_port->dc_total.time = reading->time; 560 561 osm_opensm_report_event(db->perfmgr->osm, 562 OSM_EVENT_ID_PORT_DATA_COUNTERS, &epi_dc_data); 563 564 Exit: 565 cl_plock_release(&db->lock); 566 return rc; 567 } 568 569 perfmgr_db_err_t perfmgr_db_get_prev_dc(perfmgr_db_t * db, uint64_t guid, 570 uint8_t port, 571 perfmgr_db_data_cnt_reading_t * reading) 572 { 573 db_node_t *node = NULL; 574 perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; 575 576 cl_plock_acquire(&db->lock); 577 578 node = get(db, guid); 579 if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) 580 goto Exit; 581 582 *reading = node->ports[port].dc_previous; 583 584 Exit: 585 cl_plock_release(&db->lock); 586 return rc; 587 } 588 589 perfmgr_db_err_t 590 perfmgr_db_clear_prev_dc(perfmgr_db_t * db, uint64_t guid, uint8_t port) 591 { 592 db_node_t *node = NULL; 593 perfmgr_db_data_cnt_reading_t *previous = NULL; 594 perfmgr_db_err_t rc = PERFMGR_EVENT_DB_SUCCESS; 595 596 cl_plock_excl_acquire(&db->lock); 597 node = get(db, guid); 598 if ((rc = bad_node_port(node, port)) != PERFMGR_EVENT_DB_SUCCESS) 599 goto Exit; 600 601 previous = &node->ports[port].dc_previous; 602 603 memset(previous, 0, sizeof(*previous)); 604 node->ports[port].dc_previous.time = time(NULL); 605 606 Exit: 607 cl_plock_release(&db->lock); 608 return rc; 609 } 610 611 static void clear_counters(cl_map_item_t * const p_map_item, void *context) 612 { 613 db_node_t *node = (db_node_t *) p_map_item; 614 int i = 0; 615 time_t ts = time(NULL); 616 617 for (i = 0; i < node->num_ports; i++) { 618 node->ports[i].err_total.symbol_err_cnt = 0; 619 node->ports[i].err_total.link_err_recover = 0; 620 node->ports[i].err_total.link_downed = 0; 621 node->ports[i].err_total.rcv_err = 0; 622 node->ports[i].err_total.rcv_rem_phys_err = 0; 623 node->ports[i].err_total.rcv_switch_relay_err = 0; 624 node->ports[i].err_total.xmit_discards = 0; 625 node->ports[i].err_total.xmit_constraint_err = 0; 626 node->ports[i].err_total.rcv_constraint_err = 0; 627 node->ports[i].err_total.link_integrity = 0; 628 node->ports[i].err_total.buffer_overrun = 0; 629 node->ports[i].err_total.vl15_dropped = 0; 630 node->ports[i].err_total.xmit_wait = 0; 631 node->ports[i].err_total.time = ts; 632 633 node->ports[i].dc_total.xmit_data = 0; 634 node->ports[i].dc_total.rcv_data = 0; 635 node->ports[i].dc_total.xmit_pkts = 0; 636 node->ports[i].dc_total.rcv_pkts = 0; 637 node->ports[i].dc_total.unicast_xmit_pkts = 0; 638 node->ports[i].dc_total.unicast_rcv_pkts = 0; 639 node->ports[i].dc_total.multicast_xmit_pkts = 0; 640 node->ports[i].dc_total.multicast_rcv_pkts = 0; 641 node->ports[i].dc_total.time = ts; 642 643 node->ports[i].last_reset = ts; 644 } 645 } 646 647 /********************************************************************** 648 * Clear all the counters from the db 649 **********************************************************************/ 650 void perfmgr_db_clear_counters(perfmgr_db_t * db) 651 { 652 cl_plock_excl_acquire(&db->lock); 653 cl_qmap_apply_func(&db->pc_data, clear_counters, (void *)db); 654 cl_plock_release(&db->lock); 655 #if 0 656 if (db->db_impl->clear_counters) 657 db->db_impl->clear_counters(db->db_data); 658 #endif 659 } 660 661 /********************************************************************** 662 * Output a tab delimited output of the port counters 663 **********************************************************************/ 664 static void dump_node_mr(db_node_t * node, FILE * fp) 665 { 666 int i = 0; 667 668 fprintf(fp, "\nName\tGUID\tActive\tPort\tLast Reset\t" 669 "Last Error Update\tLast Data Update\t" 670 "%s\t%s\t" 671 "%s\t%s\t%s\t%s\t%s\t%s\t%s\t" 672 "%s\t%s\t%s\t%s\t%s\t%s\t%s\t" 673 "%s\t%s\t%s\t%s\t%s\n", 674 "symbol_err_cnt", 675 "link_err_recover", 676 "link_downed", 677 "rcv_err", 678 "rcv_rem_phys_err", 679 "rcv_switch_relay_err", 680 "xmit_discards", 681 "xmit_constraint_err", 682 "rcv_constraint_err", 683 "link_int_err", 684 "buf_overrun_err", 685 "vl15_dropped", 686 "xmit_wait", 687 "xmit_data", 688 "rcv_data", 689 "xmit_pkts", 690 "rcv_pkts", 691 "unicast_xmit_pkts", 692 "unicast_rcv_pkts", 693 "multicast_xmit_pkts", 694 "multicast_rcv_pkts"); 695 for (i = (node->esp0) ? 0 : 1; i < node->num_ports; i++) { 696 char lr[128]; 697 char *last_reset = ctime_r(&node->ports[i].last_reset, lr); 698 char leu[128]; 699 char *last_err_update = ctime_r(&node->ports[i].err_total.time, leu); 700 char ldu[128]; 701 char *last_data_update = ctime_r(&node->ports[i].dc_total.time, ldu); 702 703 if (!node->ports[i].valid) 704 continue; 705 706 last_reset[strlen(last_reset) - 1] = '\0'; /* remove \n */ 707 last_err_update[strlen(last_err_update) - 1] = '\0'; /* remove \n */ 708 last_data_update[strlen(last_data_update) - 1] = '\0'; /* remove \n */ 709 710 fprintf(fp, 711 "%s\t0x%" PRIx64 "\t%s\t%d\t%s\t%s\t%s\t%" PRIu64 "\t%" PRIu64 "\t" 712 "%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t" 713 "%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t" "%" PRIu64 714 "\t%" PRIu64 "\t%" PRIu64 "\t" "%" PRIu64 "\t%" PRIu64 715 "\t%" PRIu64 "\t%" PRIu64 "\t" "%" PRIu64 "\t%" PRIu64 716 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\n", node->node_name, 717 node->node_guid, 718 node->active ? "TRUE" : "FALSE", 719 i, last_reset, last_err_update, last_data_update, 720 node->ports[i].err_total.symbol_err_cnt, 721 node->ports[i].err_total.link_err_recover, 722 node->ports[i].err_total.link_downed, 723 node->ports[i].err_total.rcv_err, 724 node->ports[i].err_total.rcv_rem_phys_err, 725 node->ports[i].err_total.rcv_switch_relay_err, 726 node->ports[i].err_total.xmit_discards, 727 node->ports[i].err_total.xmit_constraint_err, 728 node->ports[i].err_total.rcv_constraint_err, 729 node->ports[i].err_total.link_integrity, 730 node->ports[i].err_total.buffer_overrun, 731 node->ports[i].err_total.vl15_dropped, 732 node->ports[i].err_total.xmit_wait, 733 node->ports[i].dc_total.xmit_data, 734 node->ports[i].dc_total.rcv_data, 735 node->ports[i].dc_total.xmit_pkts, 736 node->ports[i].dc_total.rcv_pkts, 737 node->ports[i].dc_total.unicast_xmit_pkts, 738 node->ports[i].dc_total.unicast_rcv_pkts, 739 node->ports[i].dc_total.multicast_xmit_pkts, 740 node->ports[i].dc_total.multicast_rcv_pkts); 741 } 742 } 743 744 static void dump_hr_dc(FILE *fp, uint64_t val64, int data) 745 { 746 const char *unit = ""; 747 uint64_t tmp = val64; 748 float val = 0.0; 749 int ui = 0; 750 uint64_t div = 1; 751 752 tmp /= 1024; 753 while (tmp) { 754 ui++; 755 tmp /= 1024; 756 div *= 1024; 757 } 758 759 val = (float)(val64); 760 if (data) { 761 val *= 4; 762 if (val/div > 1024) { 763 ui++; 764 div *= 1024; 765 } 766 } 767 val /= div; 768 769 switch (ui) { 770 case 1: 771 unit = "K"; 772 break; 773 case 2: 774 unit = "M"; 775 break; 776 case 3: 777 unit = "G"; 778 break; 779 case 4: 780 unit = "T"; 781 break; 782 case 5: 783 unit = "P"; 784 break; 785 case 6: 786 unit = "E"; 787 break; 788 } 789 790 fprintf(fp, " (%5.3f%s%s)\n", val, unit, data ? "B" : ""); 791 } 792 793 /********************************************************************** 794 * Output a human readable output of the port counters 795 **********************************************************************/ 796 static void dump_node_hr(db_node_t * node, FILE * fp, char *port, int err_only) 797 { 798 int i = (node->esp0) ? 0 : 1; 799 int num_ports = node->num_ports; 800 801 if (port) { 802 char *end = NULL; 803 int p = strtoul(port, &end, 0); 804 if (port + strlen(port) == end && p >= i && p < num_ports) { 805 i = p; 806 num_ports = p+1; 807 } else { 808 fprintf(fp, "Warning: \"%s\" is not a valid port\n", port); 809 } 810 } 811 for (/* set above */; i < num_ports; i++) { 812 char lr[128]; 813 char *last_reset = ctime_r(&node->ports[i].last_reset, lr); 814 char leu[128]; 815 char *last_err_update = ctime_r(&node->ports[i].err_total.time, leu); 816 char ldu[128]; 817 char *last_data_update = ctime_r(&node->ports[i].dc_total.time, ldu); 818 819 if (!node->ports[i].valid) 820 continue; 821 822 last_reset[strlen(last_reset) - 1] = '\0'; /* remove \n */ 823 last_err_update[strlen(last_err_update) - 1] = '\0'; /* remove \n */ 824 last_data_update[strlen(last_data_update) - 1] = '\0'; /* remove \n */ 825 826 perfmgr_db_err_reading_t *err = &node->ports[i].err_total; 827 828 if (err_only 829 && err->symbol_err_cnt == 0 830 && err->link_err_recover == 0 831 && err->link_downed == 0 832 && err->rcv_err == 0 833 && err->rcv_rem_phys_err == 0 834 && err->rcv_switch_relay_err == 0 835 && err->xmit_discards == 0 836 && err->xmit_constraint_err == 0 837 && err->rcv_constraint_err == 0 838 && err->link_integrity == 0 839 && err->buffer_overrun == 0 840 && err->vl15_dropped == 0 841 && err->xmit_wait == 0) 842 continue; 843 844 fprintf(fp, "\"%s\" 0x%" PRIx64 " active %s port %d\n" 845 " Last Reset : %s\n" 846 " Last Error Update : %s\n", 847 node->node_name, node->node_guid, 848 node->active ? "TRUE":"FALSE", i, last_reset, 849 last_err_update); 850 851 if (!err_only || err->symbol_err_cnt != 0) 852 fprintf(fp, " symbol_err_cnt : %" PRIu64 "\n", 853 err->symbol_err_cnt); 854 if (!err_only || err->link_err_recover != 0) 855 fprintf(fp, " link_err_recover : %" PRIu64 "\n", 856 err->link_err_recover); 857 if (!err_only || err->link_downed != 0) 858 fprintf(fp, " link_downed : %" PRIu64 "\n", 859 err->link_downed); 860 if (!err_only || err->rcv_err != 0) 861 fprintf(fp, " rcv_err : %" PRIu64 "\n", 862 err->rcv_err); 863 if (!err_only || err->rcv_rem_phys_err != 0) 864 fprintf(fp, " rcv_rem_phys_err : %" PRIu64 "\n", 865 err->rcv_rem_phys_err); 866 if (!err_only || err->rcv_switch_relay_err != 0) 867 fprintf(fp, " rcv_switch_relay_err : %" PRIu64 "\n", 868 err->rcv_switch_relay_err); 869 if (!err_only || err->xmit_discards != 0) 870 fprintf(fp, " xmit_discards : %" PRIu64 "\n", 871 err->xmit_discards); 872 if (!err_only || err->xmit_constraint_err != 0) 873 fprintf(fp, " xmit_constraint_err : %" PRIu64 "\n", 874 err->xmit_constraint_err); 875 if (!err_only || err->rcv_constraint_err != 0) 876 fprintf(fp, " rcv_constraint_err : %" PRIu64 "\n", 877 err->rcv_constraint_err); 878 if (!err_only || err->link_integrity != 0) 879 fprintf(fp, " link_integrity_err : %" PRIu64 "\n", 880 err->link_integrity); 881 if (!err_only || err->buffer_overrun != 0) 882 fprintf(fp, " buf_overrun_err : %" PRIu64 "\n", 883 err->buffer_overrun); 884 if (!err_only || err->vl15_dropped != 0) 885 fprintf(fp, " vl15_dropped : %" PRIu64 "\n", 886 err->vl15_dropped); 887 if (!err_only || err->xmit_wait != 0) 888 fprintf(fp, " xmit_wait : %" PRIu64 "\n", 889 err->xmit_wait); 890 891 if (err_only) 892 continue; 893 894 fprintf(fp, " Last Data Update : %s\n", 895 last_data_update); 896 fprintf(fp, " xmit_data : %" PRIu64, 897 node->ports[i].dc_total.xmit_data); 898 dump_hr_dc(fp, node->ports[i].dc_total.xmit_data, 1); 899 fprintf(fp, " rcv_data : %" PRIu64, 900 node->ports[i].dc_total.rcv_data); 901 dump_hr_dc(fp, node->ports[i].dc_total.rcv_data, 1); 902 fprintf(fp, " xmit_pkts : %" PRIu64, 903 node->ports[i].dc_total.xmit_pkts); 904 dump_hr_dc(fp, node->ports[i].dc_total.xmit_pkts, 0); 905 fprintf(fp, " rcv_pkts : %" PRIu64, 906 node->ports[i].dc_total.rcv_pkts); 907 dump_hr_dc(fp, node->ports[i].dc_total.rcv_pkts, 0); 908 fprintf(fp, " unicast_xmit_pkts : %" PRIu64, 909 node->ports[i].dc_total.unicast_xmit_pkts); 910 dump_hr_dc(fp, node->ports[i].dc_total.unicast_xmit_pkts, 0); 911 fprintf(fp, " unicast_rcv_pkts : %" PRIu64, 912 node->ports[i].dc_total.unicast_rcv_pkts); 913 dump_hr_dc(fp, node->ports[i].dc_total.unicast_rcv_pkts, 0); 914 fprintf(fp, " multicast_xmit_pkts : %" PRIu64, 915 node->ports[i].dc_total.multicast_xmit_pkts); 916 dump_hr_dc(fp, node->ports[i].dc_total.multicast_xmit_pkts, 0); 917 fprintf(fp, " multicast_rcv_pkts : %" PRIu64, 918 node->ports[i].dc_total.multicast_rcv_pkts); 919 dump_hr_dc(fp, node->ports[i].dc_total.multicast_rcv_pkts, 0); 920 921 } 922 } 923 924 /* Define a context for the __db_dump callback */ 925 typedef struct { 926 FILE *fp; 927 perfmgr_db_dump_t dump_type; 928 } dump_context_t; 929 930 static void db_dump(cl_map_item_t * const p_map_item, void *context) 931 { 932 db_node_t *node = (db_node_t *) p_map_item; 933 dump_context_t *c = (dump_context_t *) context; 934 FILE *fp = c->fp; 935 936 switch (c->dump_type) { 937 case PERFMGR_EVENT_DB_DUMP_MR: 938 dump_node_mr(node, fp); 939 break; 940 case PERFMGR_EVENT_DB_DUMP_HR: 941 default: 942 dump_node_hr(node, fp, NULL, 0); 943 break; 944 } 945 } 946 947 /********************************************************************** 948 * print all node data to fp 949 **********************************************************************/ 950 void 951 perfmgr_db_print_all(perfmgr_db_t * db, FILE *fp, int err_only) 952 { 953 cl_map_item_t *item; 954 db_node_t *node; 955 956 cl_plock_acquire(&db->lock); 957 item = cl_qmap_head(&db->pc_data); 958 while (item != cl_qmap_end(&db->pc_data)) { 959 node = (db_node_t *)item; 960 dump_node_hr(node, fp, NULL, err_only); 961 item = cl_qmap_next(item); 962 } 963 cl_plock_release(&db->lock); 964 } 965 966 /********************************************************************** 967 * print node data to fp 968 **********************************************************************/ 969 void 970 perfmgr_db_print_by_name(perfmgr_db_t * db, char *nodename, FILE *fp, 971 char *port, int err_only) 972 { 973 cl_map_item_t *item; 974 db_node_t *node; 975 976 cl_plock_acquire(&db->lock); 977 978 /* find the node */ 979 item = cl_qmap_head(&db->pc_data); 980 while (item != cl_qmap_end(&db->pc_data)) { 981 node = (db_node_t *)item; 982 if (strcmp(node->node_name, nodename) == 0) { 983 dump_node_hr(node, fp, port, err_only); 984 goto done; 985 } 986 item = cl_qmap_next(item); 987 } 988 989 fprintf(fp, "Node %s not found...\n", nodename); 990 done: 991 cl_plock_release(&db->lock); 992 } 993 994 /********************************************************************** 995 * print node data to fp 996 **********************************************************************/ 997 void 998 perfmgr_db_print_by_guid(perfmgr_db_t * db, uint64_t nodeguid, FILE *fp, 999 char *port, int err_only) 1000 { 1001 cl_map_item_t *node; 1002 1003 cl_plock_acquire(&db->lock); 1004 1005 node = cl_qmap_get(&db->pc_data, nodeguid); 1006 if (node != cl_qmap_end(&db->pc_data)) 1007 dump_node_hr((db_node_t *)node, fp, port, err_only); 1008 else 1009 fprintf(fp, "Node 0x%" PRIx64 " not found...\n", nodeguid); 1010 1011 cl_plock_release(&db->lock); 1012 } 1013 1014 /********************************************************************** 1015 * dump the data to the file "file" 1016 **********************************************************************/ 1017 perfmgr_db_err_t 1018 perfmgr_db_dump(perfmgr_db_t * db, char *file, perfmgr_db_dump_t dump_type) 1019 { 1020 dump_context_t context; 1021 1022 context.fp = fopen(file, "w+"); 1023 if (!context.fp) 1024 return PERFMGR_EVENT_DB_FAIL; 1025 context.dump_type = dump_type; 1026 1027 cl_plock_acquire(&db->lock); 1028 cl_qmap_apply_func(&db->pc_data, db_dump, (void *)&context); 1029 cl_plock_release(&db->lock); 1030 fclose(context.fp); 1031 return PERFMGR_EVENT_DB_SUCCESS; 1032 } 1033 1034 /********************************************************************** 1035 * Fill in the various DB objects from their wire counter parts 1036 **********************************************************************/ 1037 void 1038 perfmgr_db_fill_err_read(ib_port_counters_t * wire_read, 1039 perfmgr_db_err_reading_t * reading, 1040 boolean_t xmit_wait_sup) 1041 { 1042 reading->symbol_err_cnt = cl_ntoh16(wire_read->symbol_err_cnt); 1043 reading->link_err_recover = wire_read->link_err_recover; 1044 reading->link_downed = wire_read->link_downed; 1045 reading->rcv_err = cl_ntoh16(wire_read->rcv_err); 1046 reading->rcv_rem_phys_err = cl_ntoh16(wire_read->rcv_rem_phys_err); 1047 reading->rcv_switch_relay_err = 1048 cl_ntoh16(wire_read->rcv_switch_relay_err); 1049 reading->xmit_discards = cl_ntoh16(wire_read->xmit_discards); 1050 reading->xmit_constraint_err = wire_read->xmit_constraint_err; 1051 reading->rcv_constraint_err = wire_read->rcv_constraint_err; 1052 reading->link_integrity = 1053 PC_LINK_INT(wire_read->link_int_buffer_overrun); 1054 reading->buffer_overrun = 1055 PC_BUF_OVERRUN(wire_read->link_int_buffer_overrun); 1056 reading->vl15_dropped = cl_ntoh16(wire_read->vl15_dropped); 1057 if (xmit_wait_sup) 1058 reading->xmit_wait = cl_ntoh32(wire_read->xmit_wait); 1059 else 1060 reading->xmit_wait = 0; 1061 reading->time = time(NULL); 1062 } 1063 1064 void 1065 perfmgr_db_fill_data_cnt_read_pc(ib_port_counters_t * wire_read, 1066 perfmgr_db_data_cnt_reading_t * reading) 1067 { 1068 reading->xmit_data = cl_ntoh32(wire_read->xmit_data); 1069 reading->rcv_data = cl_ntoh32(wire_read->rcv_data); 1070 reading->xmit_pkts = cl_ntoh32(wire_read->xmit_pkts); 1071 reading->rcv_pkts = cl_ntoh32(wire_read->rcv_pkts); 1072 reading->unicast_xmit_pkts = 0; 1073 reading->unicast_rcv_pkts = 0; 1074 reading->multicast_xmit_pkts = 0; 1075 reading->multicast_rcv_pkts = 0; 1076 reading->time = time(NULL); 1077 } 1078 1079 void 1080 perfmgr_db_fill_data_cnt_read_pce(ib_port_counters_ext_t * wire_read, 1081 perfmgr_db_data_cnt_reading_t * reading, 1082 int ietf_sup) 1083 { 1084 reading->xmit_data = cl_ntoh64(wire_read->xmit_data); 1085 reading->rcv_data = cl_ntoh64(wire_read->rcv_data); 1086 reading->xmit_pkts = cl_ntoh64(wire_read->xmit_pkts); 1087 reading->rcv_pkts = cl_ntoh64(wire_read->rcv_pkts); 1088 if (ietf_sup) 1089 { 1090 reading->unicast_xmit_pkts = cl_ntoh64(wire_read->unicast_xmit_pkts); 1091 reading->unicast_rcv_pkts = cl_ntoh64(wire_read->unicast_rcv_pkts); 1092 reading->multicast_xmit_pkts = 1093 cl_ntoh64(wire_read->multicast_xmit_pkts); 1094 reading->multicast_rcv_pkts = cl_ntoh64(wire_read->multicast_rcv_pkts); 1095 } 1096 reading->time = time(NULL); 1097 } 1098 #endif /* ENABLE_OSM_PERF_MGR */ 1099