1 /* 2 * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2002-2011 Mellanox Technologies LTD. All rights reserved. 4 * Copyright (c) 1996-2003 Intel Corporation. All rights reserved. 5 * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved. 6 * 7 * This software is available to you under a choice of one of two 8 * licenses. You may choose to be licensed under the terms of the GNU 9 * General Public License (GPL) Version 2, available from the file 10 * COPYING in the main directory of this source tree, or the 11 * OpenIB.org BSD license below: 12 * 13 * Redistribution and use in source and binary forms, with or 14 * without modification, are permitted provided that the following 15 * conditions are met: 16 * 17 * - Redistributions of source code must retain the above 18 * copyright notice, this list of conditions and the following 19 * disclaimer. 20 * 21 * - Redistributions in binary form must reproduce the above 22 * copyright notice, this list of conditions and the following 23 * disclaimer in the documentation and/or other materials 24 * provided with the distribution. 25 * 26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 27 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 28 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 29 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 30 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 31 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 32 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 33 * SOFTWARE. 34 * 35 */ 36 37 /* 38 * Abstract: 39 * Implementation of osm_sm_t. 40 * This object represents the SM Receiver object. 41 * This object is part of the opensm family of objects. 42 */ 43 44 #if HAVE_CONFIG_H 45 # include <config.h> 46 #endif /* HAVE_CONFIG_H */ 47 48 #include <stdlib.h> 49 #include <string.h> 50 #include <iba/ib_types.h> 51 #include <complib/cl_qmap.h> 52 #include <complib/cl_passivelock.h> 53 #include <complib/cl_debug.h> 54 #include <complib/cl_thread.h> 55 #include <opensm/osm_file_ids.h> 56 #define FILE_ID OSM_FILE_SM_C 57 #include <opensm/osm_sm.h> 58 #include <opensm/osm_madw.h> 59 #include <opensm/osm_log.h> 60 #include <opensm/osm_node.h> 61 #include <opensm/osm_msgdef.h> 62 #include <opensm/osm_perfmgr.h> 63 #include <opensm/osm_opensm.h> 64 65 #define OSM_SM_INITIAL_TID_VALUE 0x1233 66 67 extern void osm_lft_rcv_process(IN void *context, IN void *data); 68 extern void osm_mft_rcv_process(IN void *context, IN void *data); 69 extern void osm_nd_rcv_process(IN void *context, IN void *data); 70 extern void osm_ni_rcv_process(IN void *context, IN void *data); 71 extern void osm_pkey_rcv_process(IN void *context, IN void *data); 72 extern void osm_pi_rcv_process(IN void *context, IN void *data); 73 extern void osm_gi_rcv_process(IN void *context, IN void *data); 74 extern void osm_slvl_rcv_process(IN void *context, IN void *p_data); 75 extern void osm_sminfo_rcv_process(IN void *context, IN void *data); 76 extern void osm_si_rcv_process(IN void *context, IN void *data); 77 extern void osm_trap_rcv_process(IN void *context, IN void *data); 78 extern void osm_vla_rcv_process(IN void *context, IN void *data); 79 extern void osm_mlnx_epi_rcv_process(IN void *context, IN void *data); 80 81 extern void osm_state_mgr_process(IN osm_sm_t * sm, IN osm_signal_t signal); 82 extern void osm_sm_state_mgr_polling_callback(IN void *context); 83 84 static void sm_process(osm_sm_t * sm, osm_signal_t signal) 85 { 86 #ifdef ENABLE_OSM_PERF_MGR 87 if (signal == OSM_SIGNAL_PERFMGR_SWEEP) 88 osm_perfmgr_process(&sm->p_subn->p_osm->perfmgr); 89 else 90 #endif 91 osm_state_mgr_process(sm, signal); 92 } 93 94 static void sm_sweeper(IN void *p_ptr) 95 { 96 ib_api_status_t status; 97 osm_sm_t * p_sm = p_ptr; 98 unsigned signals, i; 99 100 OSM_LOG_ENTER(p_sm->p_log); 101 102 while (p_sm->thread_state == OSM_THREAD_STATE_RUN) { 103 /* 104 * Wait on the event with a timeout. 105 * Sweeps may be initiated "off schedule" by simply 106 * signaling the event. 107 */ 108 status = cl_event_wait_on(&p_sm->signal_event, 109 EVENT_NO_TIMEOUT, TRUE); 110 111 if (status == CL_SUCCESS) 112 OSM_LOG(p_sm->p_log, OSM_LOG_DEBUG, 113 "Off schedule sweep signalled\n"); 114 else { 115 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E01: " 116 "Event wait failed (%s)\n", 117 CL_STATUS_MSG(status)); 118 continue; 119 } 120 121 if (osm_exit_flag) 122 break; 123 124 cl_spinlock_acquire(&p_sm->signal_lock); 125 signals = p_sm->signal_mask; 126 p_sm->signal_mask = 0; 127 cl_spinlock_release(&p_sm->signal_lock); 128 129 for (i = 0; signals; signals >>= 1, i++) 130 if (signals & 1) 131 sm_process(p_sm, i); 132 } 133 134 OSM_LOG_EXIT(p_sm->p_log); 135 } 136 137 static void sm_sweep(void *arg) 138 { 139 osm_sm_t *sm = arg; 140 141 /* do the sweep only if we are in MASTER state */ 142 if (sm->p_subn->sm_state == IB_SMINFO_STATE_MASTER || 143 sm->p_subn->sm_state == IB_SMINFO_STATE_DISCOVERING) 144 osm_sm_signal(sm, OSM_SIGNAL_SWEEP); 145 cl_timer_start(&sm->sweep_timer, sm->p_subn->opt.sweep_interval * 1000); 146 } 147 148 static void sweep_fail_process(IN void *context, IN void *p_data) 149 { 150 osm_sm_t *sm = context; 151 152 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "light sweep failed\n"); 153 sm->p_subn->force_heavy_sweep = TRUE; 154 } 155 156 void osm_sm_construct(IN osm_sm_t * p_sm) 157 { 158 memset(p_sm, 0, sizeof(*p_sm)); 159 p_sm->thread_state = OSM_THREAD_STATE_NONE; 160 p_sm->sm_trans_id = OSM_SM_INITIAL_TID_VALUE; 161 cl_spinlock_construct(&p_sm->signal_lock); 162 cl_spinlock_construct(&p_sm->state_lock); 163 cl_timer_construct(&p_sm->polling_timer); 164 cl_event_construct(&p_sm->signal_event); 165 cl_event_construct(&p_sm->subnet_up_event); 166 cl_event_wheel_construct(&p_sm->trap_aging_tracker); 167 cl_thread_construct(&p_sm->sweeper); 168 osm_sm_mad_ctrl_construct(&p_sm->mad_ctrl); 169 osm_lid_mgr_construct(&p_sm->lid_mgr); 170 osm_ucast_mgr_construct(&p_sm->ucast_mgr); 171 } 172 173 void osm_sm_shutdown(IN osm_sm_t * p_sm) 174 { 175 boolean_t signal_event = FALSE; 176 177 OSM_LOG_ENTER(p_sm->p_log); 178 179 /* 180 * Signal our threads that we're leaving. 181 */ 182 if (p_sm->thread_state != OSM_THREAD_STATE_NONE) 183 signal_event = TRUE; 184 185 p_sm->thread_state = OSM_THREAD_STATE_EXIT; 186 187 /* 188 * Don't trigger unless event has been initialized. 189 * Destroy the thread before we tear down the other objects. 190 */ 191 if (signal_event) 192 cl_event_signal(&p_sm->signal_event); 193 194 cl_timer_stop(&p_sm->polling_timer); 195 cl_timer_stop(&p_sm->sweep_timer); 196 cl_thread_destroy(&p_sm->sweeper); 197 198 /* 199 * Always destroy controllers before the corresponding 200 * receiver to guarantee that all callbacks from the 201 * dispatcher are complete. 202 */ 203 osm_sm_mad_ctrl_destroy(&p_sm->mad_ctrl); 204 cl_disp_unregister(p_sm->ni_disp_h); 205 cl_disp_unregister(p_sm->pi_disp_h); 206 cl_disp_unregister(p_sm->gi_disp_h); 207 cl_disp_unregister(p_sm->si_disp_h); 208 cl_disp_unregister(p_sm->nd_disp_h); 209 cl_disp_unregister(p_sm->lft_disp_h); 210 cl_disp_unregister(p_sm->mft_disp_h); 211 cl_disp_unregister(p_sm->sm_info_disp_h); 212 cl_disp_unregister(p_sm->trap_disp_h); 213 cl_disp_unregister(p_sm->slvl_disp_h); 214 cl_disp_unregister(p_sm->vla_disp_h); 215 cl_disp_unregister(p_sm->pkey_disp_h); 216 cl_disp_unregister(p_sm->mlnx_epi_disp_h); 217 cl_disp_unregister(p_sm->sweep_fail_disp_h); 218 219 OSM_LOG_EXIT(p_sm->p_log); 220 } 221 222 void osm_sm_destroy(IN osm_sm_t * p_sm) 223 { 224 OSM_LOG_ENTER(p_sm->p_log); 225 osm_lid_mgr_destroy(&p_sm->lid_mgr); 226 osm_ucast_mgr_destroy(&p_sm->ucast_mgr); 227 cl_event_wheel_destroy(&p_sm->trap_aging_tracker); 228 cl_timer_destroy(&p_sm->sweep_timer); 229 cl_timer_destroy(&p_sm->polling_timer); 230 cl_event_destroy(&p_sm->signal_event); 231 cl_event_destroy(&p_sm->subnet_up_event); 232 cl_spinlock_destroy(&p_sm->signal_lock); 233 cl_spinlock_destroy(&p_sm->state_lock); 234 free(p_sm->mlids_req); 235 236 osm_log_v2(p_sm->p_log, OSM_LOG_SYS, FILE_ID, "Exiting SM\n"); /* Format Waived */ 237 OSM_LOG_EXIT(p_sm->p_log); 238 } 239 240 ib_api_status_t osm_sm_init(IN osm_sm_t * p_sm, IN osm_subn_t * p_subn, 241 IN osm_db_t * p_db, IN osm_vendor_t * p_vendor, 242 IN osm_mad_pool_t * p_mad_pool, 243 IN osm_vl15_t * p_vl15, IN osm_log_t * p_log, 244 IN osm_stats_t * p_stats, 245 IN cl_dispatcher_t * p_disp, IN cl_plock_t * p_lock) 246 { 247 ib_api_status_t status; 248 249 OSM_LOG_ENTER(p_log); 250 251 p_sm->p_subn = p_subn; 252 p_sm->p_db = p_db; 253 p_sm->p_vendor = p_vendor; 254 p_sm->p_mad_pool = p_mad_pool; 255 p_sm->p_vl15 = p_vl15; 256 p_sm->p_log = p_log; 257 p_sm->p_disp = p_disp; 258 p_sm->p_lock = p_lock; 259 260 status = cl_spinlock_init(&p_sm->signal_lock); 261 if (status != CL_SUCCESS) 262 goto Exit; 263 264 status = cl_spinlock_init(&p_sm->state_lock); 265 if (status != CL_SUCCESS) 266 goto Exit; 267 268 status = cl_event_init(&p_sm->signal_event, FALSE); 269 if (status != CL_SUCCESS) 270 goto Exit; 271 272 status = cl_event_init(&p_sm->subnet_up_event, FALSE); 273 if (status != CL_SUCCESS) 274 goto Exit; 275 276 status = cl_timer_init(&p_sm->sweep_timer, sm_sweep, p_sm); 277 if (status != CL_SUCCESS) 278 goto Exit; 279 280 status = cl_timer_init(&p_sm->polling_timer, 281 osm_sm_state_mgr_polling_callback, p_sm); 282 if (status != CL_SUCCESS) 283 goto Exit; 284 285 p_sm->mlids_req_max = 0; 286 p_sm->mlids_req = malloc((IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO + 287 1) * sizeof(p_sm->mlids_req[0])); 288 if (!p_sm->mlids_req) 289 goto Exit; 290 memset(p_sm->mlids_req, 0, 291 (IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO + 292 1) * sizeof(p_sm->mlids_req[0])); 293 294 status = osm_sm_mad_ctrl_init(&p_sm->mad_ctrl, p_sm->p_subn, 295 p_sm->p_mad_pool, p_sm->p_vl15, 296 p_sm->p_vendor, 297 p_log, p_stats, p_lock, p_disp); 298 if (status != IB_SUCCESS) 299 goto Exit; 300 301 status = cl_event_wheel_init(&p_sm->trap_aging_tracker); 302 if (status != IB_SUCCESS) 303 goto Exit; 304 305 status = osm_lid_mgr_init(&p_sm->lid_mgr, p_sm); 306 if (status != IB_SUCCESS) 307 goto Exit; 308 309 status = osm_ucast_mgr_init(&p_sm->ucast_mgr, p_sm); 310 if (status != IB_SUCCESS) 311 goto Exit; 312 313 status = IB_INSUFFICIENT_RESOURCES; 314 p_sm->sweep_fail_disp_h = cl_disp_register(p_disp, 315 OSM_MSG_LIGHT_SWEEP_FAIL, 316 sweep_fail_process, p_sm); 317 if (p_sm->sweep_fail_disp_h == CL_DISP_INVALID_HANDLE) 318 goto Exit; 319 320 p_sm->ni_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NODE_INFO, 321 osm_ni_rcv_process, p_sm); 322 if (p_sm->ni_disp_h == CL_DISP_INVALID_HANDLE) 323 goto Exit; 324 325 p_sm->pi_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PORT_INFO, 326 osm_pi_rcv_process, p_sm); 327 if (p_sm->pi_disp_h == CL_DISP_INVALID_HANDLE) 328 goto Exit; 329 330 p_sm->gi_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_GUID_INFO, 331 osm_gi_rcv_process, p_sm); 332 if (p_sm->gi_disp_h == CL_DISP_INVALID_HANDLE) 333 goto Exit; 334 335 p_sm->si_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SWITCH_INFO, 336 osm_si_rcv_process, p_sm); 337 if (p_sm->si_disp_h == CL_DISP_INVALID_HANDLE) 338 goto Exit; 339 340 p_sm->nd_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NODE_DESC, 341 osm_nd_rcv_process, p_sm); 342 if (p_sm->nd_disp_h == CL_DISP_INVALID_HANDLE) 343 goto Exit; 344 345 p_sm->lft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_LFT, 346 osm_lft_rcv_process, p_sm); 347 if (p_sm->lft_disp_h == CL_DISP_INVALID_HANDLE) 348 goto Exit; 349 350 p_sm->mft_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_MFT, 351 osm_mft_rcv_process, p_sm); 352 if (p_sm->mft_disp_h == CL_DISP_INVALID_HANDLE) 353 goto Exit; 354 355 p_sm->sm_info_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SM_INFO, 356 osm_sminfo_rcv_process, p_sm); 357 if (p_sm->sm_info_disp_h == CL_DISP_INVALID_HANDLE) 358 goto Exit; 359 360 p_sm->trap_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_NOTICE, 361 osm_trap_rcv_process, p_sm); 362 if (p_sm->trap_disp_h == CL_DISP_INVALID_HANDLE) 363 goto Exit; 364 365 p_sm->slvl_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_SLVL, 366 osm_slvl_rcv_process, p_sm); 367 if (p_sm->slvl_disp_h == CL_DISP_INVALID_HANDLE) 368 goto Exit; 369 370 p_sm->vla_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_VL_ARB, 371 osm_vla_rcv_process, p_sm); 372 if (p_sm->vla_disp_h == CL_DISP_INVALID_HANDLE) 373 goto Exit; 374 375 p_sm->pkey_disp_h = cl_disp_register(p_disp, OSM_MSG_MAD_PKEY, 376 osm_pkey_rcv_process, p_sm); 377 if (p_sm->pkey_disp_h == CL_DISP_INVALID_HANDLE) 378 goto Exit; 379 380 p_sm->mlnx_epi_disp_h = cl_disp_register(p_disp, 381 OSM_MSG_MAD_MLNX_EXT_PORT_INFO, 382 osm_mlnx_epi_rcv_process, p_sm); 383 if (p_sm->mlnx_epi_disp_h == CL_DISP_INVALID_HANDLE) 384 goto Exit; 385 386 p_subn->sm_state = p_subn->opt.sm_inactive ? 387 IB_SMINFO_STATE_NOTACTIVE : IB_SMINFO_STATE_DISCOVERING; 388 osm_report_sm_state(p_sm); 389 390 /* 391 * Now that the component objects are initialized, start 392 * the sweeper thread if the user wants sweeping. 393 */ 394 p_sm->thread_state = OSM_THREAD_STATE_RUN; 395 status = cl_thread_init(&p_sm->sweeper, sm_sweeper, p_sm, 396 "opensm sweeper"); 397 if (status != IB_SUCCESS) 398 goto Exit; 399 400 if (p_sm->p_subn->opt.sweep_interval) 401 cl_timer_start(&p_sm->sweep_timer, 402 p_sm->p_subn->opt.sweep_interval * 1000); 403 404 Exit: 405 OSM_LOG_EXIT(p_log); 406 return status; 407 } 408 409 void osm_sm_signal(osm_sm_t * p_sm, osm_signal_t signal) 410 { 411 cl_spinlock_acquire(&p_sm->signal_lock); 412 p_sm->signal_mask |= 1 << signal; 413 cl_event_signal(&p_sm->signal_event); 414 cl_spinlock_release(&p_sm->signal_lock); 415 } 416 417 void osm_sm_sweep(IN osm_sm_t * p_sm) 418 { 419 OSM_LOG_ENTER(p_sm->p_log); 420 osm_sm_signal(p_sm, OSM_SIGNAL_SWEEP); 421 OSM_LOG_EXIT(p_sm->p_log); 422 } 423 424 ib_api_status_t osm_sm_bind(IN osm_sm_t * p_sm, IN ib_net64_t port_guid) 425 { 426 ib_api_status_t status; 427 428 OSM_LOG_ENTER(p_sm->p_log); 429 430 status = osm_sm_mad_ctrl_bind(&p_sm->mad_ctrl, port_guid); 431 432 if (status != IB_SUCCESS) { 433 OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E10: " 434 "SM MAD Controller bind failed (%s)\n", 435 ib_get_err_str(status)); 436 goto Exit; 437 } 438 439 Exit: 440 OSM_LOG_EXIT(p_sm->p_log); 441 return status; 442 } 443 444 void osm_sm_reroute_mlid(osm_sm_t * sm, ib_net16_t mlid) 445 { 446 mlid = cl_ntoh16(mlid) - IB_LID_MCAST_START_HO; 447 sm->mlids_req[mlid] = 1; 448 if (sm->mlids_req_max < mlid) 449 sm->mlids_req_max = mlid; 450 osm_sm_signal(sm, OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST); 451 OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "rerouting requested for MLID 0x%x\n", 452 mlid + IB_LID_MCAST_START_HO); 453 } 454 455 void osm_set_sm_priority(osm_sm_t * sm, uint8_t priority) 456 { 457 uint8_t old_pri = sm->p_subn->opt.sm_priority; 458 459 sm->p_subn->opt.sm_priority = priority; 460 461 if (old_pri < priority && 462 sm->p_subn->sm_state == IB_SMINFO_STATE_STANDBY) 463 osm_send_trap144(sm, TRAP_144_MASK_SM_PRIORITY_CHANGE); 464 } 465