1 /* 2 * Copyright (c) 2006-2009 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2009 HNR Consulting. All rights reserved. 4 * Copyright (c) 2012 Lawrence Livermore National Lab. All rights reserved. 5 * Copyright (c) 2014 Mellanox Technologies LTD. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 * 35 */ 36 37 /* 38 * Abstract: 39 * OSM Congestion Control configuration implementation 40 * 41 * Author: 42 * Albert Chu, LLNL 43 */ 44 45 #if HAVE_CONFIG_H 46 # include <config.h> 47 #endif /* HAVE_CONFIG_H */ 48 49 #include <stdlib.h> 50 #include <string.h> 51 52 #include <iba/ib_types.h> 53 #include <complib/cl_debug.h> 54 #include <opensm/osm_file_ids.h> 55 #define FILE_ID OSM_FILE_CONGESTION_CONTROL_C 56 #include <opensm/osm_subnet.h> 57 #include <opensm/osm_opensm.h> 58 #include <opensm/osm_log.h> 59 #include <opensm/osm_subnet.h> 60 #include <opensm/osm_congestion_control.h> 61 62 #define CONGESTION_CONTROL_INITIAL_TID_VALUE 0x7A93 63 64 static void cc_mad_post(osm_congestion_control_t *p_cc, 65 osm_madw_t *p_madw, 66 osm_node_t *p_node, 67 osm_physp_t *p_physp, 68 ib_net16_t attr_id, 69 ib_net32_t attr_mod) 70 { 71 osm_subn_opt_t *p_opt = &p_cc->subn->opt; 72 ib_cc_mad_t *p_cc_mad; 73 uint8_t port; 74 75 OSM_LOG_ENTER(p_cc->log); 76 77 port = osm_physp_get_port_num(p_physp); 78 79 p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw); 80 81 p_cc_mad->header.base_ver = 1; 82 p_cc_mad->header.mgmt_class = IB_MCLASS_CC; 83 p_cc_mad->header.class_ver = 2; 84 p_cc_mad->header.method = IB_MAD_METHOD_SET; 85 p_cc_mad->header.status = 0; 86 p_cc_mad->header.class_spec = 0; 87 p_cc_mad->header.trans_id = 88 cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) & 89 (uint64_t) (0xFFFFFFFF)); 90 if (p_cc_mad->header.trans_id == 0) 91 p_cc_mad->header.trans_id = 92 cl_hton64((uint64_t) cl_atomic_inc(&p_cc->trans_id) & 93 (uint64_t) (0xFFFFFFFF)); 94 p_cc_mad->header.attr_id = attr_id; 95 p_cc_mad->header.resv = 0; 96 p_cc_mad->header.attr_mod = attr_mod; 97 98 p_cc_mad->cc_key = p_opt->cc_key; 99 100 memset(p_cc_mad->log_data, '\0', IB_CC_LOG_DATA_SIZE); 101 102 p_madw->mad_addr.dest_lid = osm_node_get_base_lid(p_node, port); 103 p_madw->mad_addr.addr_type.gsi.remote_qp = IB_QP1; 104 p_madw->mad_addr.addr_type.gsi.remote_qkey = 105 cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY); 106 p_madw->resp_expected = TRUE; 107 p_madw->fail_msg = CL_DISP_MSGID_NONE; 108 109 p_madw->context.cc_context.node_guid = osm_node_get_node_guid(p_node); 110 p_madw->context.cc_context.port_guid = osm_physp_get_port_guid(p_physp); 111 p_madw->context.cc_context.port = port; 112 p_madw->context.cc_context.mad_method = IB_MAD_METHOD_SET; 113 p_madw->context.cc_context.attr_mod = attr_mod; 114 115 cl_spinlock_acquire(&p_cc->mad_queue_lock); 116 cl_atomic_inc(&p_cc->outstanding_mads); 117 cl_qlist_insert_tail(&p_cc->mad_queue, &p_madw->list_item); 118 cl_spinlock_release(&p_cc->mad_queue_lock); 119 120 cl_event_signal(&p_cc->cc_poller_wakeup); 121 122 OSM_LOG_EXIT(p_cc->log); 123 } 124 125 static void cc_setup_mad_data(osm_sm_t * p_sm) 126 { 127 osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc; 128 osm_subn_opt_t *p_opt = &p_sm->p_subn->opt; 129 uint16_t ccti_limit; 130 int i; 131 132 /* Switch Congestion Setting */ 133 p_cc->sw_cong_setting.control_map = p_opt->cc_sw_cong_setting_control_map; 134 135 memcpy(p_cc->sw_cong_setting.victim_mask, 136 p_opt->cc_sw_cong_setting_victim_mask, 137 IB_CC_PORT_MASK_DATA_SIZE); 138 139 memcpy(p_cc->sw_cong_setting.credit_mask, 140 p_opt->cc_sw_cong_setting_credit_mask, 141 IB_CC_PORT_MASK_DATA_SIZE); 142 143 /* threshold is 4 bits, takes up upper nibble of byte */ 144 p_cc->sw_cong_setting.threshold_resv = (p_opt->cc_sw_cong_setting_threshold << 4); 145 146 p_cc->sw_cong_setting.packet_size = p_opt->cc_sw_cong_setting_packet_size; 147 148 /* cs threshold is 4 bits, takes up upper nibble of short */ 149 p_cc->sw_cong_setting.cs_threshold_resv = 150 cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_threshold << 12); 151 152 p_cc->sw_cong_setting.cs_return_delay = 153 cl_hton16(p_opt->cc_sw_cong_setting_credit_starvation_return_delay.shift << 14 154 | p_opt->cc_sw_cong_setting_credit_starvation_return_delay.multiplier); 155 156 p_cc->sw_cong_setting.marking_rate = p_opt->cc_sw_cong_setting_marking_rate; 157 158 /* CA Congestion Setting */ 159 p_cc->ca_cong_setting.port_control = p_opt->cc_ca_cong_setting_port_control; 160 p_cc->ca_cong_setting.control_map = p_opt->cc_ca_cong_setting_control_map; 161 162 for (i = 0; i < IB_CA_CONG_ENTRY_DATA_SIZE; i++) { 163 ib_ca_cong_entry_t *p_entry; 164 165 p_entry = &p_cc->ca_cong_setting.entry_list[i]; 166 167 p_entry->ccti_timer = p_opt->cc_ca_cong_entries[i].ccti_timer; 168 p_entry->ccti_increase = p_opt->cc_ca_cong_entries[i].ccti_increase; 169 p_entry->trigger_threshold = p_opt->cc_ca_cong_entries[i].trigger_threshold; 170 p_entry->ccti_min = p_opt->cc_ca_cong_entries[i].ccti_min; 171 p_entry->resv0 = 0; 172 p_entry->resv1 = 0; 173 } 174 175 /* Congestion Control Table */ 176 177 /* if no entries, we will always send at least 1 mad to set ccti_limit = 0 */ 178 if (!p_opt->cc_cct.entries_len) 179 p_cc->cc_tbl_mads = 1; 180 else { 181 p_cc->cc_tbl_mads = p_opt->cc_cct.entries_len - 1; 182 p_cc->cc_tbl_mads /= IB_CC_TBL_ENTRY_LIST_MAX; 183 p_cc->cc_tbl_mads += 1; 184 } 185 186 CL_ASSERT(p_cc->cc_tbl_mads <= OSM_CCT_ENTRY_MAD_BLOCKS); 187 188 if (!p_opt->cc_cct.entries_len) 189 ccti_limit = 0; 190 else 191 ccti_limit = p_opt->cc_cct.entries_len - 1; 192 193 for (i = 0; i < p_cc->cc_tbl_mads; i++) { 194 int j; 195 196 p_cc->cc_tbl[i].ccti_limit = cl_hton16(ccti_limit); 197 p_cc->cc_tbl[i].resv = 0; 198 199 memset(p_cc->cc_tbl[i].entry_list, 200 '\0', 201 sizeof(p_cc->cc_tbl[i].entry_list)); 202 203 if (!ccti_limit) 204 break; 205 206 for (j = 0; j < IB_CC_TBL_ENTRY_LIST_MAX; j++) { 207 int k; 208 209 k = (i * IB_CC_TBL_ENTRY_LIST_MAX) + j; 210 p_cc->cc_tbl[i].entry_list[j].shift_multiplier = 211 cl_hton16(p_opt->cc_cct.entries[k].shift << 14 212 | p_opt->cc_cct.entries[k].multiplier); 213 } 214 } 215 } 216 217 static ib_api_status_t cc_send_sw_cong_setting(osm_sm_t * p_sm, 218 osm_node_t *p_node) 219 { 220 osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc; 221 unsigned force_update; 222 osm_physp_t *p_physp; 223 osm_madw_t *p_madw = NULL; 224 ib_cc_mad_t *p_cc_mad = NULL; 225 ib_sw_cong_setting_t *p_sw_cong_setting = NULL; 226 227 OSM_LOG_ENTER(p_sm->p_log); 228 229 p_physp = osm_node_get_physp_ptr(p_node, 0); 230 231 force_update = p_physp->need_update || p_sm->p_subn->need_update; 232 233 if (!force_update 234 && !memcmp(&p_cc->sw_cong_setting, 235 &p_physp->cc.sw.sw_cong_setting, 236 sizeof(p_cc->sw_cong_setting))) 237 return IB_SUCCESS; 238 239 p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle, 240 MAD_BLOCK_SIZE, NULL); 241 if (p_madw == NULL) { 242 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C101: " 243 "failed to allocate mad\n"); 244 return IB_INSUFFICIENT_MEMORY; 245 } 246 247 p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw); 248 249 p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad); 250 251 memcpy(p_sw_cong_setting, 252 &p_cc->sw_cong_setting, 253 sizeof(p_cc->sw_cong_setting)); 254 255 cc_mad_post(p_cc, p_madw, p_node, p_physp, 256 IB_MAD_ATTR_SW_CONG_SETTING, 0); 257 258 OSM_LOG_EXIT(p_sm->p_log); 259 260 return IB_SUCCESS; 261 } 262 263 static ib_api_status_t cc_send_ca_cong_setting(osm_sm_t * p_sm, 264 osm_node_t *p_node, 265 osm_physp_t *p_physp) 266 { 267 osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc; 268 unsigned force_update; 269 osm_madw_t *p_madw = NULL; 270 ib_cc_mad_t *p_cc_mad = NULL; 271 ib_ca_cong_setting_t *p_ca_cong_setting = NULL; 272 273 OSM_LOG_ENTER(p_sm->p_log); 274 275 force_update = p_physp->need_update || p_sm->p_subn->need_update; 276 277 if (!force_update 278 && !memcmp(&p_cc->ca_cong_setting, 279 &p_physp->cc.ca.ca_cong_setting, 280 sizeof(p_cc->ca_cong_setting))) 281 return IB_SUCCESS; 282 283 p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle, 284 MAD_BLOCK_SIZE, NULL); 285 if (p_madw == NULL) { 286 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C102: " 287 "failed to allocate mad\n"); 288 return IB_INSUFFICIENT_MEMORY; 289 } 290 291 p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw); 292 293 p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad); 294 295 memcpy(p_ca_cong_setting, 296 &p_cc->ca_cong_setting, 297 sizeof(p_cc->ca_cong_setting)); 298 299 cc_mad_post(p_cc, p_madw, p_node, p_physp, 300 IB_MAD_ATTR_CA_CONG_SETTING, 0); 301 302 OSM_LOG_EXIT(p_sm->p_log); 303 304 return IB_SUCCESS; 305 } 306 307 static ib_api_status_t cc_send_cct(osm_sm_t * p_sm, 308 osm_node_t *p_node, 309 osm_physp_t *p_physp) 310 { 311 osm_congestion_control_t *p_cc = &p_sm->p_subn->p_osm->cc; 312 unsigned force_update; 313 osm_madw_t *p_madw = NULL; 314 ib_cc_mad_t *p_cc_mad = NULL; 315 ib_cc_tbl_t *p_cc_tbl = NULL; 316 unsigned int index = 0; 317 318 OSM_LOG_ENTER(p_sm->p_log); 319 320 force_update = p_physp->need_update || p_sm->p_subn->need_update; 321 322 for (index = 0; index < p_cc->cc_tbl_mads; index++) { 323 if (!force_update 324 && !memcmp(&p_cc->cc_tbl[index], 325 &p_physp->cc.ca.cc_tbl[index], 326 sizeof(p_cc->cc_tbl[index]))) 327 continue; 328 329 p_madw = osm_mad_pool_get(p_cc->mad_pool, p_cc->bind_handle, 330 MAD_BLOCK_SIZE, NULL); 331 if (p_madw == NULL) { 332 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR C103: " 333 "failed to allocate mad\n"); 334 return IB_INSUFFICIENT_MEMORY; 335 } 336 337 p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw); 338 339 p_cc_tbl = (ib_cc_tbl_t *)ib_cc_mad_get_mgt_data_ptr(p_cc_mad); 340 341 memcpy(p_cc_tbl, 342 &p_cc->cc_tbl[index], 343 sizeof(p_cc->cc_tbl[index])); 344 345 cc_mad_post(p_cc, p_madw, p_node, p_physp, 346 IB_MAD_ATTR_CC_TBL, cl_hton32(index)); 347 } 348 349 OSM_LOG_EXIT(p_sm->p_log); 350 351 return IB_SUCCESS; 352 } 353 354 int osm_congestion_control_setup(struct osm_opensm *p_osm) 355 { 356 cl_qmap_t *p_tbl; 357 cl_map_item_t *p_next; 358 int ret = 0; 359 360 if (!p_osm->subn.opt.congestion_control) 361 return 0; 362 363 OSM_LOG_ENTER(&p_osm->log); 364 365 /* 366 * Do nothing unless the most recent routing attempt was successful. 367 */ 368 if (!p_osm->routing_engine_used) 369 return 0; 370 371 cc_setup_mad_data(&p_osm->sm); 372 373 cl_plock_acquire(&p_osm->lock); 374 375 p_tbl = &p_osm->subn.port_guid_tbl; 376 p_next = cl_qmap_head(p_tbl); 377 while (p_next != cl_qmap_end(p_tbl)) { 378 osm_port_t *p_port = (osm_port_t *) p_next; 379 osm_node_t *p_node = p_port->p_node; 380 ib_api_status_t status; 381 382 p_next = cl_qmap_next(p_next); 383 384 if (p_port->cc_unavailable_flag) 385 continue; 386 387 if (osm_node_get_type(p_node) == IB_NODE_TYPE_SWITCH) { 388 status = cc_send_sw_cong_setting(&p_osm->sm, p_node); 389 if (status != IB_SUCCESS) 390 ret = -1; 391 } else if (osm_node_get_type(p_node) == IB_NODE_TYPE_CA) { 392 status = cc_send_ca_cong_setting(&p_osm->sm, 393 p_node, 394 p_port->p_physp); 395 if (status != IB_SUCCESS) 396 ret = -1; 397 398 status = cc_send_cct(&p_osm->sm, 399 p_node, 400 p_port->p_physp); 401 if (status != IB_SUCCESS) 402 ret = -1; 403 } 404 } 405 406 cl_plock_release(&p_osm->lock); 407 408 OSM_LOG_EXIT(&p_osm->log); 409 410 return ret; 411 } 412 413 int osm_congestion_control_wait_pending_transactions(struct osm_opensm *p_osm) 414 { 415 osm_congestion_control_t *cc = &p_osm->cc; 416 417 if (!p_osm->subn.opt.congestion_control) 418 return 0; 419 420 while (1) { 421 unsigned count = cc->outstanding_mads; 422 if (!count || osm_exit_flag) 423 break; 424 cl_event_wait_on(&cc->outstanding_mads_done_event, 425 EVENT_NO_TIMEOUT, 426 TRUE); 427 } 428 429 return osm_exit_flag; 430 } 431 432 static inline void decrement_outstanding_mads(osm_congestion_control_t *p_cc) 433 { 434 uint32_t outstanding; 435 436 outstanding = cl_atomic_dec(&p_cc->outstanding_mads); 437 if (!outstanding) 438 cl_event_signal(&p_cc->outstanding_mads_done_event); 439 440 cl_atomic_dec(&p_cc->outstanding_mads_on_wire); 441 cl_event_signal(&p_cc->sig_mads_on_wire_continue); 442 } 443 444 static void cc_rcv_mad(void *context, void *data) 445 { 446 osm_congestion_control_t *p_cc = context; 447 osm_opensm_t *p_osm = p_cc->osm; 448 osm_madw_t *p_madw = data; 449 ib_cc_mad_t *p_cc_mad; 450 osm_madw_context_t *p_mad_context = &p_madw->context; 451 ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw); 452 ib_net64_t node_guid = p_mad_context->cc_context.node_guid; 453 ib_net64_t port_guid = p_mad_context->cc_context.port_guid; 454 uint8_t port = p_mad_context->cc_context.port; 455 osm_port_t *p_port; 456 457 OSM_LOG_ENTER(p_cc->log); 458 459 OSM_LOG(p_cc->log, OSM_LOG_VERBOSE, 460 "Processing received MAD status 0x%x for " 461 "attr ID %u mod 0x%x node 0x%" PRIx64 " port %u\n", 462 cl_ntoh16(p_mad->status), cl_ntoh16(p_mad->attr_id), 463 cl_ntoh32(p_mad_context->cc_context.attr_mod), 464 cl_ntoh64(node_guid), port); 465 466 p_cc_mad = osm_madw_get_cc_mad_ptr(p_madw); 467 468 cl_plock_acquire(&p_osm->lock); 469 470 p_port = osm_get_port_by_guid(p_cc->subn, port_guid); 471 if (!p_port) { 472 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C109: " 473 "Port GUID 0x%" PRIx64 " not in table\n", 474 cl_ntoh64(port_guid)); 475 cl_plock_release(&p_osm->lock); 476 goto Exit; 477 } 478 479 p_port->cc_timeout_count = 0; 480 481 if (p_cc_mad->header.status) { 482 if (p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_CLASS_VER 483 || p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD 484 || p_cc_mad->header.status & IB_MAD_STATUS_UNSUP_METHOD_ATTR) 485 p_port->cc_unavailable_flag = TRUE; 486 cl_plock_release(&p_osm->lock); 487 goto Exit; 488 } 489 else 490 p_port->cc_unavailable_flag = FALSE; 491 492 if (p_cc_mad->header.attr_id == IB_MAD_ATTR_SW_CONG_SETTING) { 493 ib_sw_cong_setting_t *p_sw_cong_setting; 494 495 p_sw_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad); 496 p_port->p_physp->cc.sw.sw_cong_setting = *p_sw_cong_setting; 497 } 498 else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CA_CONG_SETTING) { 499 ib_ca_cong_setting_t *p_ca_cong_setting; 500 501 p_ca_cong_setting = ib_cc_mad_get_mgt_data_ptr(p_cc_mad); 502 p_port->p_physp->cc.ca.ca_cong_setting = *p_ca_cong_setting; 503 } 504 else if (p_cc_mad->header.attr_id == IB_MAD_ATTR_CC_TBL) { 505 ib_net32_t attr_mod = p_mad_context->cc_context.attr_mod; 506 uint32_t index = cl_ntoh32(attr_mod); 507 ib_cc_tbl_t *p_cc_tbl; 508 509 p_cc_tbl = ib_cc_mad_get_mgt_data_ptr(p_cc_mad); 510 p_port->p_physp->cc.ca.cc_tbl[index] = *p_cc_tbl; 511 } 512 else 513 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C10A: " 514 "Unexpected MAD attribute ID %u received\n", 515 cl_ntoh16(p_cc_mad->header.attr_id)); 516 517 cl_plock_release(&p_osm->lock); 518 519 Exit: 520 decrement_outstanding_mads(p_cc); 521 osm_mad_pool_put(p_cc->mad_pool, p_madw); 522 OSM_LOG_EXIT(p_cc->log); 523 } 524 525 static void cc_poller_send(osm_congestion_control_t *p_cc, 526 osm_madw_t *p_madw) 527 { 528 osm_subn_opt_t *p_opt = &p_cc->subn->opt; 529 ib_api_status_t status; 530 cl_status_t sts; 531 osm_madw_context_t mad_context = p_madw->context; 532 533 status = osm_vendor_send(p_cc->bind_handle, p_madw, TRUE); 534 if (status == IB_SUCCESS) { 535 cl_atomic_inc(&p_cc->outstanding_mads_on_wire); 536 while (p_cc->outstanding_mads_on_wire > 537 (int32_t)p_opt->cc_max_outstanding_mads) { 538 wait: 539 sts = cl_event_wait_on(&p_cc->sig_mads_on_wire_continue, 540 EVENT_NO_TIMEOUT, TRUE); 541 if (sts != CL_SUCCESS) 542 goto wait; 543 } 544 } else 545 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C104: " 546 "send failed to node 0x%" PRIx64 "port %u\n", 547 cl_ntoh64(mad_context.cc_context.node_guid), 548 mad_context.cc_context.port); 549 } 550 551 static void cc_poller(void *p_ptr) 552 { 553 osm_congestion_control_t *p_cc = p_ptr; 554 osm_madw_t *p_madw; 555 556 OSM_LOG_ENTER(p_cc->log); 557 558 if (p_cc->thread_state == OSM_THREAD_STATE_NONE) 559 p_cc->thread_state = OSM_THREAD_STATE_RUN; 560 561 while (p_cc->thread_state == OSM_THREAD_STATE_RUN) { 562 cl_spinlock_acquire(&p_cc->mad_queue_lock); 563 564 p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue); 565 566 cl_spinlock_release(&p_cc->mad_queue_lock); 567 568 if (p_madw != (osm_madw_t *) cl_qlist_end(&p_cc->mad_queue)) 569 cc_poller_send(p_cc, p_madw); 570 else 571 cl_event_wait_on(&p_cc->cc_poller_wakeup, 572 EVENT_NO_TIMEOUT, TRUE); 573 } 574 575 OSM_LOG_EXIT(p_cc->log); 576 } 577 578 ib_api_status_t osm_congestion_control_init(osm_congestion_control_t * p_cc, 579 struct osm_opensm *p_osm, 580 const osm_subn_opt_t * p_opt) 581 { 582 ib_api_status_t status = IB_SUCCESS; 583 584 OSM_LOG_ENTER(&p_osm->log); 585 586 memset(p_cc, 0, sizeof(*p_cc)); 587 588 p_cc->osm = p_osm; 589 p_cc->subn = &p_osm->subn; 590 p_cc->sm = &p_osm->sm; 591 p_cc->log = &p_osm->log; 592 p_cc->mad_pool = &p_osm->mad_pool; 593 p_cc->trans_id = CONGESTION_CONTROL_INITIAL_TID_VALUE; 594 p_cc->vendor = p_osm->p_vendor; 595 596 p_cc->cc_disp_h = cl_disp_register(&p_osm->disp, OSM_MSG_MAD_CC, 597 cc_rcv_mad, p_cc); 598 if (p_cc->cc_disp_h == CL_DISP_INVALID_HANDLE) 599 goto Exit; 600 601 cl_qlist_init(&p_cc->mad_queue); 602 603 status = cl_spinlock_init(&p_cc->mad_queue_lock); 604 if (status != IB_SUCCESS) 605 goto Exit; 606 607 cl_event_construct(&p_cc->cc_poller_wakeup); 608 status = cl_event_init(&p_cc->cc_poller_wakeup, FALSE); 609 if (status != IB_SUCCESS) 610 goto Exit; 611 612 cl_event_construct(&p_cc->outstanding_mads_done_event); 613 status = cl_event_init(&p_cc->outstanding_mads_done_event, FALSE); 614 if (status != IB_SUCCESS) 615 goto Exit; 616 617 cl_event_construct(&p_cc->sig_mads_on_wire_continue); 618 status = cl_event_init(&p_cc->sig_mads_on_wire_continue, FALSE); 619 if (status != IB_SUCCESS) 620 goto Exit; 621 622 p_cc->thread_state = OSM_THREAD_STATE_NONE; 623 624 status = cl_thread_init(&p_cc->cc_poller, cc_poller, p_cc, 625 "cc poller"); 626 if (status != IB_SUCCESS) 627 goto Exit; 628 629 status = IB_SUCCESS; 630 Exit: 631 OSM_LOG_EXIT(p_cc->log); 632 return status; 633 } 634 635 static void cc_mad_recv_callback(osm_madw_t * p_madw, void *bind_context, 636 osm_madw_t * p_req_madw) 637 { 638 osm_congestion_control_t *p_cc = bind_context; 639 640 OSM_LOG_ENTER(p_cc->log); 641 642 CL_ASSERT(p_madw); 643 644 /* HACK - should be extended when supporting CC traps */ 645 CL_ASSERT(p_req_madw != NULL); 646 647 osm_madw_copy_context(p_madw, p_req_madw); 648 osm_mad_pool_put(p_cc->mad_pool, p_req_madw); 649 650 /* Do not decrement outstanding mads here, do it in the dispatcher */ 651 652 if (cl_disp_post(p_cc->cc_disp_h, OSM_MSG_MAD_CC, 653 p_madw, NULL, NULL) != CL_SUCCESS) { 654 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C105: " 655 "Congestion Control Dispatcher post failed\n"); 656 osm_mad_pool_put(p_cc->mad_pool, p_madw); 657 } 658 659 OSM_LOG_EXIT(p_cc->log); 660 } 661 662 static void cc_mad_send_err_callback(void *bind_context, 663 osm_madw_t * p_madw) 664 { 665 osm_congestion_control_t *p_cc = bind_context; 666 osm_madw_context_t *p_madw_context = &p_madw->context; 667 osm_opensm_t *p_osm = p_cc->osm; 668 uint64_t node_guid = p_madw_context->cc_context.node_guid; 669 uint64_t port_guid = p_madw_context->cc_context.port_guid; 670 uint8_t port = p_madw_context->cc_context.port; 671 osm_port_t *p_port; 672 int log_flag = 1; 673 674 OSM_LOG_ENTER(p_cc->log); 675 676 cl_plock_acquire(&p_osm->lock); 677 678 p_port = osm_get_port_by_guid(p_cc->subn, port_guid); 679 if (!p_port) { 680 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C10B: " 681 "Port GUID 0x%" PRIx64 " not in table\n", 682 cl_ntoh64(port_guid)); 683 cl_plock_release(&p_osm->lock); 684 goto Exit; 685 } 686 687 /* If timed out before, don't bothering logging again 688 * we assume no CC support 689 */ 690 if (p_madw->status == IB_TIMEOUT 691 && p_port->cc_timeout_count) 692 log_flag = 0; 693 694 if (log_flag) 695 OSM_LOG(p_cc->log, OSM_LOG_ERROR, "ERR C106: MAD Error (%s): " 696 "attr id = %u LID %u GUID 0x%016" PRIx64 " port %u " 697 "TID 0x%" PRIx64 "\n", 698 ib_get_err_str(p_madw->status), 699 p_madw->p_mad->attr_id, 700 cl_ntoh16(p_madw->mad_addr.dest_lid), 701 cl_ntoh64(node_guid), 702 port, 703 cl_ntoh64(p_madw->p_mad->trans_id)); 704 705 if (p_madw->status == IB_TIMEOUT) { 706 p_port->cc_timeout_count++; 707 if (p_port->cc_timeout_count > OSM_CC_TIMEOUT_COUNT_THRESHOLD 708 && !p_port->cc_unavailable_flag) { 709 p_port->cc_unavailable_flag = TRUE; 710 p_port->cc_timeout_count = 0; 711 } 712 } else 713 p_cc->subn->subnet_initialization_error = TRUE; 714 715 cl_plock_release(&p_osm->lock); 716 717 Exit: 718 osm_mad_pool_put(p_cc->mad_pool, p_madw); 719 720 decrement_outstanding_mads(p_cc); 721 722 OSM_LOG_EXIT(p_cc->log); 723 } 724 725 ib_api_status_t osm_congestion_control_bind(osm_congestion_control_t * p_cc, 726 ib_net64_t port_guid) 727 { 728 osm_bind_info_t bind_info; 729 ib_api_status_t status = IB_SUCCESS; 730 731 OSM_LOG_ENTER(p_cc->log); 732 733 bind_info.port_guid = p_cc->port_guid = port_guid; 734 bind_info.mad_class = IB_MCLASS_CC; 735 bind_info.class_version = 2; 736 bind_info.is_responder = FALSE; 737 bind_info.is_report_processor = FALSE; 738 bind_info.is_trap_processor = FALSE; 739 bind_info.recv_q_size = OSM_SM_DEFAULT_QP1_RCV_SIZE; 740 bind_info.send_q_size = OSM_SM_DEFAULT_QP1_SEND_SIZE; 741 bind_info.timeout = p_cc->subn->opt.transaction_timeout; 742 bind_info.retries = p_cc->subn->opt.transaction_retries; 743 744 OSM_LOG(p_cc->log, OSM_LOG_VERBOSE, 745 "Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid)); 746 747 p_cc->bind_handle = osm_vendor_bind(p_cc->vendor, &bind_info, 748 p_cc->mad_pool, 749 cc_mad_recv_callback, 750 cc_mad_send_err_callback, p_cc); 751 752 if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) { 753 status = IB_ERROR; 754 OSM_LOG(p_cc->log, OSM_LOG_ERROR, 755 "ERR C107: Vendor specific bind failed (%s)\n", 756 ib_get_err_str(status)); 757 goto Exit; 758 } 759 760 Exit: 761 OSM_LOG_EXIT(p_cc->log); 762 return status; 763 } 764 765 void osm_congestion_control_shutdown(osm_congestion_control_t * p_cc) 766 { 767 OSM_LOG_ENTER(p_cc->log); 768 if (p_cc->bind_handle == OSM_BIND_INVALID_HANDLE) { 769 OSM_LOG(p_cc->log, OSM_LOG_ERROR, 770 "ERR C108: No previous bind\n"); 771 goto Exit; 772 } 773 cl_disp_unregister(p_cc->cc_disp_h); 774 Exit: 775 OSM_LOG_EXIT(p_cc->log); 776 } 777 778 void osm_congestion_control_destroy(osm_congestion_control_t * p_cc) 779 { 780 osm_madw_t *p_madw; 781 782 OSM_LOG_ENTER(p_cc->log); 783 784 p_cc->thread_state = OSM_THREAD_STATE_EXIT; 785 786 cl_event_signal(&p_cc->sig_mads_on_wire_continue); 787 cl_event_signal(&p_cc->cc_poller_wakeup); 788 789 cl_thread_destroy(&p_cc->cc_poller); 790 791 cl_spinlock_acquire(&p_cc->mad_queue_lock); 792 793 while (!cl_is_qlist_empty(&p_cc->mad_queue)) { 794 p_madw = (osm_madw_t *) cl_qlist_remove_head(&p_cc->mad_queue); 795 osm_mad_pool_put(p_cc->mad_pool, p_madw); 796 } 797 798 cl_spinlock_release(&p_cc->mad_queue_lock); 799 800 cl_spinlock_destroy(&p_cc->mad_queue_lock); 801 802 cl_event_destroy(&p_cc->cc_poller_wakeup); 803 cl_event_destroy(&p_cc->outstanding_mads_done_event); 804 cl_event_destroy(&p_cc->sig_mads_on_wire_continue); 805 806 OSM_LOG_EXIT(p_cc->log); 807 } 808