1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_agents.c 29 * Tavor InfiniBand Management Agent (SMA, PMA, BMA) routines 30 * 31 * Implements all the routines necessary for initializing, handling, 32 * and (later) tearing down all the infrastructure necessary for Tavor 33 * MAD processing. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/conf.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/modctl.h> 41 42 #include <sys/ib/adapters/tavor/tavor.h> 43 #include <sys/ib/mgt/ibmf/ibmf.h> 44 #include <sys/disp.h> 45 46 static void tavor_agent_request_cb(ibmf_handle_t ibmf_handle, 47 ibmf_msg_t *msgp, void *args); 48 static void tavor_agent_handle_req(void *cb_args); 49 static void tavor_agent_response_cb(ibmf_handle_t ibmf_handle, 50 ibmf_msg_t *msgp, void *args); 51 static int tavor_agent_list_init(tavor_state_t *state); 52 static void tavor_agent_list_fini(tavor_state_t *state); 53 static int tavor_agent_register_all(tavor_state_t *state); 54 static int tavor_agent_unregister_all(tavor_state_t *state, int num_reg); 55 static void tavor_agent_mad_resp_handling(tavor_state_t *state, 56 ibmf_msg_t *msgp, uint_t port); 57 58 /* 59 * tavor_agent_handlers_init() 60 * Context: Only called from attach() and/or detach() path contexts 61 */ 62 int 63 tavor_agent_handlers_init(tavor_state_t *state) 64 { 65 int status; 66 char *rsrc_name; 67 68 /* Determine if we need to register any agents with the IBMF */ 69 if ((state->ts_cfg_profile->cp_qp0_agents_in_fw) && 70 (state->ts_cfg_profile->cp_qp1_agents_in_fw)) { 71 return (DDI_SUCCESS); 72 } 73 74 /* 75 * Build a unique name for the Tavor task queue from the Tavor driver 76 * instance number and TAVOR_TASKQ_NAME 77 */ 78 rsrc_name = (char *)kmem_zalloc(TAVOR_RSRC_NAME_MAXLEN, KM_SLEEP); 79 TAVOR_RSRC_NAME(rsrc_name, TAVOR_TASKQ_NAME); 80 81 /* Initialize the Tavor IB management agent list */ 82 status = tavor_agent_list_init(state); 83 if (status != DDI_SUCCESS) { 84 goto agentsinit_fail; 85 } 86 87 /* 88 * Initialize the agent handling task queue. Note: We set the task 89 * queue priority to the minimum system priority. At this point this 90 * is considered acceptable because MADs are unreliable datagrams 91 * and could get lost (in general) anyway. 92 */ 93 state->ts_taskq_agents = ddi_taskq_create(state->ts_dip, 94 rsrc_name, TAVOR_TASKQ_NTHREADS, TASKQ_DEFAULTPRI, 0); 95 if (state->ts_taskq_agents == NULL) { 96 tavor_agent_list_fini(state); 97 goto agentsinit_fail; 98 } 99 100 /* Now attempt to register all of the agents with the IBMF */ 101 status = tavor_agent_register_all(state); 102 if (status != DDI_SUCCESS) { 103 ddi_taskq_destroy(state->ts_taskq_agents); 104 tavor_agent_list_fini(state); 105 goto agentsinit_fail; 106 } 107 108 kmem_free(rsrc_name, TAVOR_RSRC_NAME_MAXLEN); 109 return (DDI_SUCCESS); 110 111 agentsinit_fail: 112 kmem_free(rsrc_name, TAVOR_RSRC_NAME_MAXLEN); 113 return (status); 114 } 115 116 117 /* 118 * tavor_agent_handlers_fini() 119 * Context: Only called from detach() path context 120 */ 121 int 122 tavor_agent_handlers_fini(tavor_state_t *state) 123 { 124 int status; 125 126 /* Determine if we need to unregister any agents from the IBMF */ 127 if ((state->ts_cfg_profile->cp_qp0_agents_in_fw) && 128 (state->ts_cfg_profile->cp_qp1_agents_in_fw)) { 129 return (DDI_SUCCESS); 130 } 131 132 /* Now attempt to unregister all of the agents from the IBMF */ 133 status = tavor_agent_unregister_all(state, state->ts_num_agents); 134 if (status != DDI_SUCCESS) { 135 return (DDI_FAILURE); 136 } 137 138 /* 139 * Destroy the task queue. The task queue destroy is guaranteed to 140 * wait until any scheduled tasks have completed. We are able to 141 * guarantee that no _new_ tasks will be added the task queue while 142 * we are in the ddi_taskq_destroy() call because we have 143 * (at this point) successfully unregistered from IBMF (in 144 * tavor_agent_unregister_all() above). 145 */ 146 ddi_taskq_destroy(state->ts_taskq_agents); 147 148 /* Teardown the Tavor IB management agent list */ 149 tavor_agent_list_fini(state); 150 151 return (DDI_SUCCESS); 152 } 153 154 155 /* 156 * tavor_agent_request_cb() 157 * Context: Called from the IBMF context 158 */ 159 static void 160 tavor_agent_request_cb(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, 161 void *args) 162 { 163 tavor_agent_handler_arg_t *cb_args; 164 tavor_agent_list_t *curr; 165 tavor_state_t *state; 166 int status; 167 168 curr = (tavor_agent_list_t *)args; 169 state = curr->agl_state; 170 171 /* 172 * Allocate space to hold the callback args (for passing to the 173 * task queue). Note: If we are unable to allocate space for the 174 * the callback args here, then we just return. But we must ensure 175 * that we call ibmf_free_msg() to free up the message. 176 */ 177 cb_args = (tavor_agent_handler_arg_t *)kmem_zalloc( 178 sizeof (tavor_agent_handler_arg_t), KM_NOSLEEP); 179 if (cb_args == NULL) { 180 (void) ibmf_free_msg(ibmf_handle, &msgp); 181 return; 182 } 183 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cb_args)) 184 185 /* Fill in the callback args */ 186 cb_args->ahd_ibmfhdl = ibmf_handle; 187 cb_args->ahd_ibmfmsg = msgp; 188 cb_args->ahd_agentlist = args; 189 190 /* 191 * Dispatch the message to the task queue. Note: Just like above, 192 * if this request fails for any reason then make sure to free up 193 * the IBMF message and then return 194 */ 195 status = ddi_taskq_dispatch(state->ts_taskq_agents, 196 tavor_agent_handle_req, cb_args, DDI_NOSLEEP); 197 if (status == DDI_FAILURE) { 198 kmem_free(cb_args, sizeof (tavor_agent_handler_arg_t)); 199 (void) ibmf_free_msg(ibmf_handle, &msgp); 200 } 201 } 202 203 /* 204 * tavor_agent_handle_req() 205 * Context: Called with priority of taskQ thread 206 */ 207 static void 208 tavor_agent_handle_req(void *cb_args) 209 { 210 tavor_agent_handler_arg_t *agent_args; 211 tavor_agent_list_t *curr; 212 tavor_state_t *state; 213 ibmf_handle_t ibmf_handle; 214 ibmf_msg_t *msgp; 215 ibmf_msg_bufs_t *recv_msgbufp; 216 ibmf_msg_bufs_t *send_msgbufp; 217 ibmf_retrans_t retrans; 218 uint_t port; 219 int status; 220 221 /* Extract the necessary info from the callback args parameter */ 222 agent_args = (tavor_agent_handler_arg_t *)cb_args; 223 ibmf_handle = agent_args->ahd_ibmfhdl; 224 msgp = agent_args->ahd_ibmfmsg; 225 curr = agent_args->ahd_agentlist; 226 state = curr->agl_state; 227 port = curr->agl_port; 228 229 /* 230 * Set the message send buffer pointers to the message receive buffer 231 * pointers to reuse the IBMF provided buffers for the sender 232 * information. 233 */ 234 recv_msgbufp = &msgp->im_msgbufs_recv; 235 send_msgbufp = &msgp->im_msgbufs_send; 236 bcopy(recv_msgbufp, send_msgbufp, sizeof (ibmf_msg_bufs_t)); 237 238 /* 239 * Check if the incoming packet is a special "Tavor Trap" MAD. If it 240 * is, then do the special handling. If it isn't, then simply pass it 241 * on to the firmware and forward the response back to the IBMF. 242 * 243 * Note: Tavor has a unique method for handling internally generated 244 * Traps. All internally detected/generated Trap messages are 245 * automatically received by the IBMF (as receive completions on QP0), 246 * which (because all Tavor Trap MADs have SLID == 0) detects it as a 247 * special "Tavor Trap" and forwards it here to the driver's SMA. 248 * It is then our responsibility here to fill in the Trap MAD's DLID 249 * for forwarding to the real Master SM (as programmed in the port's 250 * PortInfo.MasterSMLID field.) 251 */ 252 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(msgp->im_local_addr)) 253 if (TAVOR_IS_SPECIAL_TRAP_MAD(msgp)) { 254 msgp->im_local_addr.ia_remote_lid = 255 TAVOR_PORT_MASTERSMLID_GET(state, port - 1); 256 } else { 257 /* 258 * Post the command to the firmware (using the MAD_IFC 259 * command). Note: We also reuse the command that was passed 260 * in. We pass the pointer to the original MAD payload as if 261 * it were both the source of the incoming MAD as well as the 262 * destination for the response. This is acceptable and saves 263 * us the step of one additional copy. Note: If this command 264 * fails for any reason other than TAVOR_CMD_BAD_PKT, it 265 * probably indicates a serious problem. 266 */ 267 status = tavor_mad_ifc_cmd_post(state, port, 268 TAVOR_CMD_SLEEP_NOSPIN, 269 (uint32_t *)recv_msgbufp->im_bufs_mad_hdr, 270 (uint32_t *)send_msgbufp->im_bufs_mad_hdr); 271 if (status != TAVOR_CMD_SUCCESS) { 272 if ((status != TAVOR_CMD_BAD_PKT) && 273 (status != TAVOR_CMD_INSUFF_RSRC)) { 274 cmn_err(CE_CONT, "Tavor: MAD_IFC (port %02d) " 275 "command failed: %08x\n", port, status); 276 } 277 278 /* finish cleanup */ 279 goto tavor_agent_handle_req_skip_response; 280 } 281 } 282 283 /* 284 * If incoming MAD was "TrapRepress", then no response is necessary. 285 * Free the IBMF message and return. 286 */ 287 if (TAVOR_IS_TRAP_REPRESS_MAD(msgp)) { 288 goto tavor_agent_handle_req_skip_response; 289 } 290 291 /* 292 * Modify the response MAD as necessary (for any special cases). 293 * Specifically, if this MAD was a directed route MAD, then some 294 * additional packet manipulation may be necessary because the Tavor 295 * firmware does not do all the required steps to respond to the 296 * MAD. 297 */ 298 tavor_agent_mad_resp_handling(state, msgp, port); 299 300 /* 301 * Send response (or forwarded "Trap" MAD) back to IBMF. We use the 302 * "response callback" to indicate when it is appropriate (later) to 303 * free the IBMF msg. 304 */ 305 status = ibmf_msg_transport(ibmf_handle, IBMF_QP_HANDLE_DEFAULT, 306 msgp, &retrans, tavor_agent_response_cb, state, 0); 307 if (status != IBMF_SUCCESS) { 308 goto tavor_agent_handle_req_skip_response; 309 } 310 311 /* Free up the callback args parameter */ 312 kmem_free(agent_args, sizeof (tavor_agent_handler_arg_t)); 313 return; 314 315 tavor_agent_handle_req_skip_response: 316 /* Free up the ibmf message */ 317 status = ibmf_free_msg(ibmf_handle, &msgp); 318 /* Free up the callback args parameter */ 319 kmem_free(agent_args, sizeof (tavor_agent_handler_arg_t)); 320 } 321 322 323 /* 324 * tavor_agent_response_cb() 325 * Context: Called from the IBMF context 326 */ 327 /* ARGSUSED */ 328 static void 329 tavor_agent_response_cb(ibmf_handle_t ibmf_handle, ibmf_msg_t *msgp, 330 void *args) 331 { 332 /* 333 * It is the responsibility of each IBMF callback recipient to free 334 * the packets that it has been given. Now that we are in the 335 * response callback, we can be assured that it is safe to do so. 336 */ 337 (void) ibmf_free_msg(ibmf_handle, &msgp); 338 } 339 340 341 /* 342 * tavor_agent_list_init() 343 * Context: Only called from attach() path context 344 */ 345 static int 346 tavor_agent_list_init(tavor_state_t *state) 347 { 348 tavor_agent_list_t *curr; 349 uint_t num_ports, num_agents, num_agents_per_port; 350 uint_t num_sma_agents = 0; 351 uint_t num_pma_agents = 0; 352 uint_t num_bma_agents = 0; 353 uint_t do_qp0, do_qp1; 354 int i, j, indx; 355 356 /* 357 * Calculate the number of registered agents for each port 358 * (SMA, PMA, and BMA) and determine whether or not to register 359 * a given agent with the IBMF (or whether to let the Tavor firmware 360 * handle it) 361 */ 362 num_ports = state->ts_cfg_profile->cp_num_ports; 363 num_agents = 0; 364 num_agents_per_port = 0; 365 do_qp0 = state->ts_cfg_profile->cp_qp0_agents_in_fw; 366 do_qp1 = state->ts_cfg_profile->cp_qp1_agents_in_fw; 367 if (do_qp0 == 0) { 368 num_agents += (num_ports * TAVOR_NUM_QP0_AGENTS_PER_PORT); 369 num_agents_per_port += TAVOR_NUM_QP0_AGENTS_PER_PORT; 370 num_sma_agents = num_ports; 371 } 372 if (do_qp1 == 0) { 373 num_agents += (num_ports * TAVOR_NUM_QP1_AGENTS_PER_PORT); 374 num_agents_per_port += TAVOR_NUM_QP1_AGENTS_PER_PORT; 375 num_pma_agents = num_ports; 376 /* 377 * The following line is commented out because the Tavor 378 * firmware does not currently support a BMA. If it did, 379 * then we would want to register the agent with the IBMF. 380 * (We would also need to have TAVOR_NUM_QP1_AGENTS_PER_PORT 381 * set to 2, instead of 1.) 382 * 383 * num_bma_agents = num_ports; 384 */ 385 } 386 387 state->ts_num_agents = num_agents; 388 389 /* 390 * Allocate the memory for all of the agent list entries 391 */ 392 state->ts_agents = (tavor_agent_list_t *)kmem_zalloc(num_agents * 393 sizeof (tavor_agent_list_t), KM_SLEEP); 394 if (state->ts_agents == NULL) { 395 return (DDI_FAILURE); 396 } 397 398 /* 399 * Fill in each of the agent list entries with the agent's 400 * MgmtClass, port number, and Tavor softstate pointer 401 */ 402 indx = 0; 403 for (i = 0; i < num_agents_per_port; i++) { 404 for (j = 0; j < num_ports; j++) { 405 curr = &state->ts_agents[indx]; 406 curr->agl_state = state; 407 curr->agl_port = j + 1; 408 409 if ((do_qp0 == 0) && num_sma_agents) { 410 curr->agl_mgmtclass = SUBN_AGENT; 411 num_sma_agents--; 412 indx++; 413 } else if ((do_qp1 == 0) && (num_pma_agents)) { 414 curr->agl_mgmtclass = PERF_AGENT; 415 num_pma_agents--; 416 indx++; 417 } else if ((do_qp1 == 0) && (num_bma_agents)) { 418 curr->agl_mgmtclass = BM_AGENT; 419 num_bma_agents--; 420 indx++; 421 } 422 } 423 } 424 425 return (DDI_SUCCESS); 426 } 427 428 429 /* 430 * tavor_agent_list_fini() 431 * Context: Only called from attach() and/or detach() path contexts 432 */ 433 static void 434 tavor_agent_list_fini(tavor_state_t *state) 435 { 436 /* Free up the memory for the agent list entries */ 437 kmem_free(state->ts_agents, 438 state->ts_num_agents * sizeof (tavor_agent_list_t)); 439 } 440 441 442 /* 443 * tavor_agent_register_all() 444 * Context: Only called from attach() path context 445 */ 446 static int 447 tavor_agent_register_all(tavor_state_t *state) 448 { 449 tavor_agent_list_t *curr; 450 ibmf_register_info_t ibmf_reg; 451 ibmf_impl_caps_t impl_caps; 452 ib_guid_t nodeguid; 453 int i, status, num_registered; 454 455 /* Get the Tavor NodeGUID from the softstate */ 456 nodeguid = state->ts_ibtfinfo.hca_attr->hca_node_guid; 457 458 /* 459 * Register each of the agents with the IBMF (and add callbacks for 460 * each to the tavor_agent_request_cb() routine). Note: If we 461 * fail somewhere along the line here, we attempt to cleanup as much 462 * of the mess as we can and then jump to tavor_agent_unregister_all() 463 * to cleanup the rest. 464 */ 465 num_registered = 0; 466 for (i = 0; i < state->ts_num_agents; i++) { 467 468 /* Register each agent with the IBMF */ 469 curr = &state->ts_agents[i]; 470 ibmf_reg.ir_ci_guid = nodeguid; 471 ibmf_reg.ir_port_num = curr->agl_port; 472 ibmf_reg.ir_client_class = curr->agl_mgmtclass; 473 status = ibmf_register(&ibmf_reg, IBMF_VERSION, 0, 474 NULL, NULL, &curr->agl_ibmfhdl, &impl_caps); 475 if (status != IBMF_SUCCESS) { 476 goto agents_reg_fail; 477 } 478 479 /* Setup callbacks with the IBMF */ 480 status = ibmf_setup_async_cb(curr->agl_ibmfhdl, 481 IBMF_QP_HANDLE_DEFAULT, tavor_agent_request_cb, curr, 0); 482 if (status != IBMF_SUCCESS) { 483 (void) ibmf_unregister(&curr->agl_ibmfhdl, 0); 484 goto agents_reg_fail; 485 } 486 num_registered++; 487 } 488 489 return (DDI_SUCCESS); 490 491 agents_reg_fail: 492 (void) tavor_agent_unregister_all(state, num_registered); 493 return (DDI_FAILURE); 494 } 495 496 497 /* 498 * tavor_agent_unregister_all() 499 * Context: Only called from detach() path context 500 */ 501 static int 502 tavor_agent_unregister_all(tavor_state_t *state, int num_reg) 503 { 504 tavor_agent_list_t *curr; 505 int i, status; 506 507 /* 508 * For each registered agent in the agent list, teardown the 509 * callbacks from the IBMF and unregister. 510 */ 511 for (i = 0; i < num_reg; i++) { 512 curr = &state->ts_agents[i]; 513 514 /* Teardown the IBMF callback */ 515 status = ibmf_tear_down_async_cb(curr->agl_ibmfhdl, 516 IBMF_QP_HANDLE_DEFAULT, 0); 517 if (status != IBMF_SUCCESS) { 518 return (DDI_FAILURE); 519 } 520 521 /* Unregister the agent from the IBMF */ 522 status = ibmf_unregister(&curr->agl_ibmfhdl, 0); 523 if (status != IBMF_SUCCESS) { 524 return (DDI_FAILURE); 525 } 526 } 527 528 return (DDI_SUCCESS); 529 } 530 531 532 /* 533 * tavor_agent_mad_resp_handling() 534 * Context: Called with priority of taskQ thread 535 */ 536 /* ARGSUSED */ 537 static void 538 tavor_agent_mad_resp_handling(tavor_state_t *state, ibmf_msg_t *msgp, 539 uint_t port) 540 { 541 ib_mad_hdr_t *rmadhdrp = msgp->im_msgbufs_recv.im_bufs_mad_hdr; 542 ib_mad_hdr_t *smadhdrp = msgp->im_msgbufs_send.im_bufs_mad_hdr; 543 uint_t hop_count, hop_point; 544 uchar_t *resp, *ret_path; 545 546 resp = (uchar_t *)msgp->im_msgbufs_send.im_bufs_cl_data; 547 548 /* 549 * Handle directed route MADs as a special case. Tavor firmware 550 * does not update the "direction" bit, "hop pointer", "Return 551 * Path" or, in fact, any of the "directed route" parameters. So 552 * the responsibility falls on Tavor driver software to inspect the 553 * MADs and update those fields as appropriate (see section 14.2.2 554 * of the IBA specification, rev 1.1) 555 */ 556 if (TAVOR_MAD_IS_DR(rmadhdrp)) { 557 558 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*((sm_dr_mad_hdr_t *)rmadhdrp))) 559 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*((sm_dr_mad_hdr_t *)smadhdrp))) 560 561 /* 562 * Set the "Direction" bit to one. This indicates that this 563 * is now directed route response 564 */ 565 TAVOR_DRMAD_SET_DIRECTION(rmadhdrp); 566 567 /* Extract the "hop pointer" and "hop count" from the MAD */ 568 hop_count = TAVOR_DRMAD_GET_HOPCOUNT(rmadhdrp); 569 hop_point = TAVOR_DRMAD_GET_HOPPOINTER(rmadhdrp); 570 571 /* Append the port we came in on to the "Return Path" */ 572 if ((hop_count != 0) && ((hop_point == hop_count) || 573 (hop_point == hop_count + 1))) { 574 ret_path = &resp[TAVOR_DRMAD_RETURN_PATH_OFFSET]; 575 ret_path[hop_point] = port; 576 } 577 578 /* Then increment the "hop pointer" in the MAD */ 579 hop_point++; 580 TAVOR_DRMAD_SET_HOPPOINTER(smadhdrp, hop_point); 581 } 582 } 583