1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2019, Joyent, Inc. 28 */ 29 30 #ifndef _SYS_IB_EOIB_ENX_IMPL_H 31 #define _SYS_IB_EOIB_ENX_IMPL_H 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/varargs.h> 40 #include <sys/ib/ibtl/ibti.h> 41 #include <sys/ib/ibtl/ibvti.h> 42 #include <sys/ib/ib_pkt_hdrs.h> 43 #include <sys/ib/ibtl/impl/ibtl_ibnex.h> 44 #include <sys/ib/mgt/sm_attr.h> 45 46 #include <sys/ib/clients/eoib/fip.h> 47 #include <sys/ib/clients/eoib/eib.h> 48 49 /* 50 * Driver specific constants 51 */ 52 #define ENX_E_SUCCESS 0 53 #define ENX_E_FAILURE -1 54 #define ENX_MAX_LINE 128 55 #define ENX_GRH_SZ (sizeof (ib_grh_t)) 56 57 /* 58 * Debug messages 59 */ 60 #define ENX_MSGS_CRIT 0x01 61 #define ENX_MSGS_ERR 0x02 62 #define ENX_MSGS_WARN 0x04 63 #define ENX_MSGS_DEBUG 0x08 64 #define ENX_MSGS_ARGS 0x10 65 #define ENX_MSGS_VERBOSE 0x20 66 #define ENX_MSGS_DEFAULT (ENX_MSGS_CRIT | ENX_MSGS_ERR | ENX_MSGS_WARN) 67 68 #define ENX_LOGSZ_DEFAULT 0x20000 69 70 #define ENX_DPRINTF_CRIT eibnx_dprintf_crit 71 #define ENX_DPRINTF_ERR eibnx_dprintf_err 72 #define ENX_DPRINTF_WARN eibnx_dprintf_warn 73 #ifdef ENX_DEBUG 74 #define ENX_DPRINTF_DEBUG eibnx_dprintf_debug 75 #define ENX_DPRINTF_ARGS eibnx_dprintf_args 76 #define ENX_DPRINTF_VERBOSE eibnx_dprintf_verbose 77 #else 78 #define ENX_DPRINTF_DEBUG(...) (void)(0) 79 #define ENX_DPRINTF_ARGS(...) (void)(0) 80 #define ENX_DPRINTF_VERBOSE(...) (void)(0) 81 #endif 82 83 /* 84 * EoIB Nexus service threads 85 */ 86 #define ENX_PORT_MONITOR "eibnx_port_%d_monitor" 87 #define ENX_NODE_CREATOR "eibnx_node_creator" 88 89 /* 90 * Default period (us) for unicast solicitations to discovered gateways. 91 * EoIB specification requires that hosts send solicitation atleast every 92 * 4 * GW_ADV_PERIOD. 93 */ 94 #define ENX_DFL_SOLICIT_PERIOD_USEC 32000000 95 96 /* 97 * Portinfo list per HCA 98 */ 99 typedef struct eibnx_port_s { 100 struct eibnx_port_s *po_next; 101 ibt_hca_portinfo_t *po_pi; 102 uint_t po_pi_size; 103 } eibnx_port_t; 104 105 /* 106 * HCA details 107 */ 108 typedef struct eibnx_hca_s { 109 struct eibnx_hca_s *hc_next; 110 ib_guid_t hc_guid; 111 ibt_hca_hdl_t hc_hdl; 112 ibt_pd_hdl_t hc_pd; 113 eibnx_port_t *hc_port; 114 } eibnx_hca_t; 115 116 /* 117 * The port_monitor thread in EoIB nexus driver only sends two types of 118 * packets: multicast solicitation the first time around, and periodic 119 * unicast solicitations later to gateways that have been discovered. So 120 * we need a couple of send wqes for the multicast solicitation and 121 * probably as many send wqes as the number of gateways that may be 122 * discovered from each port, for sending the unicast solicitations. 123 * For unicast solicitations though, the UD destination needs to be set 124 * up at the time we receive the advertisement from the gateway, using 125 * ibt_modify_reply_ud_dest(), so we'll assign one send wqe for each 126 * gateway that we discover. This means that we need to acquire these 127 * send wqe entries during rx processing in the completion handler, which 128 * means we must avoid sleeping in trying to acquire the swqe. Therefore, 129 * we'll pre-allocate these unicast solication send wqes to be atleast 130 * twice the number of recv wqes. 131 * 132 * The receive packets expected by the EoIB nexus driver are the multicast 133 * and unicast messages on the SOLICIT and ADVERTISE groups. These 134 * shouldn't be too many, and should be tuned as we gain experience on 135 * the traffic pattern. We'll start with 16. 136 */ 137 #define ENX_NUM_SWQE 46 138 #define ENX_NUM_RWQE 16 139 #define ENX_CQ_SIZE (ENX_NUM_SWQE + ENX_NUM_RWQE + 2) 140 141 /* 142 * qe_type values 143 */ 144 #define ENX_QETYP_RWQE 0x1 145 #define ENX_QETYP_SWQE 0x2 146 147 /* 148 * qe_flags bitmasks (protected by qe_lock). None of the 149 * flag values may be zero. 150 */ 151 #define ENX_QEFL_INUSE 0x01 152 #define ENX_QEFL_POSTED 0x02 153 #define ENX_QEFL_RELONCOMP 0x04 154 155 /* 156 * Recv and send workq entries 157 */ 158 typedef struct eibnx_wqe_s { 159 uint_t qe_type; 160 uint_t qe_bufsz; 161 ibt_wr_ds_t qe_sgl; 162 ibt_all_wr_t qe_wr; 163 kmutex_t qe_lock; 164 uint_t qe_flags; 165 } eibnx_wqe_t; 166 167 /* 168 * Tx descriptor 169 */ 170 typedef struct eibnx_tx_s { 171 ib_vaddr_t tx_vaddr; 172 ibt_mr_hdl_t tx_mr; 173 ibt_lkey_t tx_lkey; 174 eibnx_wqe_t tx_wqe[ENX_NUM_SWQE]; 175 } eibnx_tx_t; 176 177 /* 178 * Rx descriptor 179 */ 180 typedef struct eibnx_rx_s { 181 ib_vaddr_t rx_vaddr; 182 ibt_mr_hdl_t rx_mr; 183 ibt_lkey_t rx_lkey; 184 eibnx_wqe_t rx_wqe[ENX_NUM_RWQE]; 185 } eibnx_rx_t; 186 187 /* 188 * Details about the address of each gateway we discover. 189 */ 190 typedef struct eibnx_gw_addr_s { 191 ibt_adds_vect_t *ga_vect; 192 ib_gid_t ga_gid; 193 ib_qpn_t ga_qpn; 194 ib_qkey_t ga_qkey; 195 ib_pkey_t ga_pkey; 196 } eibnx_gw_addr_t; 197 198 /* 199 * States for each GW 200 */ 201 #define ENX_GW_STATE_UNAVAILABLE 1 /* GW nackd availability */ 202 #define ENX_GW_STATE_AVAILABLE 2 /* GW mcasted availability */ 203 #define ENX_GW_STATE_READY_TO_LOGIN 3 /* GW ucasted availability */ 204 205 typedef struct eibnx_gw_info_s { 206 struct eibnx_gw_info_s *gw_next; 207 eibnx_wqe_t *gw_swqe; 208 uint_t gw_state; 209 210 kmutex_t gw_adv_lock; 211 uint_t gw_adv_flag; 212 int64_t gw_adv_last_lbolt; 213 int64_t gw_adv_timeout_ticks; 214 215 eibnx_gw_addr_t gw_addr; 216 217 ib_guid_t gw_system_guid; 218 ib_guid_t gw_guid; 219 220 uint32_t gw_adv_period; 221 uint32_t gw_ka_period; 222 uint32_t gw_vnic_ka_period; 223 ib_qpn_t gw_ctrl_qpn; 224 225 ib_lid_t gw_lid; 226 uint16_t gw_portid; 227 uint16_t gw_num_net_vnics; 228 229 uint8_t gw_is_host_adm_vnics; 230 uint8_t gw_sl; 231 uint8_t gw_n_rss_qpn; 232 uint8_t gw_flag_ucast_advt; 233 uint8_t gw_flag_available; 234 235 uint8_t gw_system_name[EIB_GW_SYSNAME_LEN]; 236 uint8_t gw_port_name[EIB_GW_PORTNAME_LEN]; 237 uint8_t gw_vendor_id[EIB_GW_VENDOR_LEN]; 238 } eibnx_gw_info_t; 239 240 /* 241 * Values for gw_adv_flag (non-zero only) 242 */ 243 #define ENX_GW_DEAD 1 244 #define ENX_GW_ALIVE 2 245 #define ENX_GW_AWARE 3 246 247 /* 248 * Currently, we only expect the advertisement type of packets 249 * from the gw. But we do get login acks from the gateway also 250 * here in the nexus, so we'll need an identifier for that. 251 */ 252 typedef enum { 253 FIP_GW_ADVERTISE_MCAST = 0, 254 FIP_GW_ADVERTISE_UCAST, 255 FIP_VNIC_LOGIN_ACK 256 } eibnx_gw_pkt_type_t; 257 258 /* 259 * Currently, the only gw response handled by the eibnx driver 260 * are the ucast/mcast advertisements. Information collected from 261 * both these responses may be packed into a eibnx_gw_info_t. 262 * In the future, if we decide to handle other types of responses 263 * from the gw, we could simply add the new types to the union. 264 */ 265 typedef struct eibnx_gw_msg_s { 266 eibnx_gw_pkt_type_t gm_type; 267 union { 268 eibnx_gw_info_t gm_info; 269 } u; 270 } eibnx_gw_msg_t; 271 272 /* 273 * List to hold the devinfo nodes of eoib instances 274 */ 275 typedef struct eibnx_child_s { 276 struct eibnx_child_s *ch_next; 277 dev_info_t *ch_dip; 278 eibnx_gw_info_t *ch_gwi; 279 char *ch_node_name; 280 } eibnx_child_t; 281 282 /* 283 * Event bitmasks for the port-monitor to wait on. None of these flags 284 * may be zero. 285 */ 286 #define ENX_EVENT_LINK_UP 0x01 287 #define ENX_EVENT_MCGS_AVAILABLE 0x02 288 #define ENX_EVENT_TIMED_OUT 0x04 289 #define ENX_EVENT_DIE 0x08 290 #define ENX_EVENT_COMPLETION 0x10 291 292 /* 293 * MCG Query/Join status 294 */ 295 #define ENX_MCGS_FOUND 0x1 296 #define ENX_MCGS_JOINED 0x2 297 298 /* 299 * Information that each port-monitor thread cares about 300 */ 301 typedef struct eibnx_thr_info_s { 302 struct eibnx_thr_info_s *ti_next; 303 uint_t ti_progress; 304 305 /* 306 * Our kernel thread id 307 */ 308 kt_did_t ti_kt_did; 309 310 /* 311 * HCA, port and protection domain information 312 */ 313 ib_guid_t ti_hca_guid; 314 ibt_hca_hdl_t ti_hca; 315 ibt_pd_hdl_t ti_pd; 316 ibt_hca_portinfo_t *ti_pi; 317 char *ti_ident; 318 319 /* 320 * Well-known multicast groups for solicitations 321 * and advertisements. 322 */ 323 kmutex_t ti_mcg_lock; 324 uint_t ti_mcg_status; 325 ibt_mcg_info_t *ti_advertise_mcg; 326 ibt_mcg_info_t *ti_solicit_mcg; 327 uint_t ti_mcast_done; 328 329 /* 330 * Completion queue stuff 331 */ 332 ibt_cq_hdl_t ti_cq_hdl; 333 uint_t ti_cq_sz; 334 ibt_wc_t *ti_wc; 335 ddi_softint_handle_t ti_softint_hdl; 336 337 /* 338 * Channel related 339 */ 340 ibt_channel_hdl_t ti_chan; 341 ib_qpn_t ti_qpn; 342 343 /* 344 * Transmit/Receive stuff 345 */ 346 eibnx_tx_t ti_snd; 347 eibnx_rx_t ti_rcv; 348 349 /* 350 * GW related stuff 351 */ 352 kmutex_t ti_gw_lock; 353 eibnx_gw_info_t *ti_gw; 354 355 /* 356 * Devinfo nodes for the eoib children 357 */ 358 kmutex_t ti_child_lock; 359 eibnx_child_t *ti_child; 360 361 /* 362 * Events that we wait on and/or handle 363 */ 364 kmutex_t ti_event_lock; 365 kcondvar_t ti_event_cv; 366 uint_t ti_event; 367 } eibnx_thr_info_t; 368 369 /* 370 * Workq entry for creation of eoib nodes 371 */ 372 typedef struct eibnx_nodeq_s { 373 struct eibnx_nodeq_s *nc_next; 374 eibnx_thr_info_t *nc_info; 375 eibnx_gw_info_t *nc_gwi; 376 } eibnx_nodeq_t; 377 378 /* 379 * Bus config status flags. The in-prog is protected by 380 * nx_lock, and the rest of the flags (currently only 381 * buscfg-complete) is protected by the in-prog bit itself. 382 */ 383 #define NX_FL_BUSOP_INPROG 0x1 384 #define NX_FL_BUSCFG_COMPLETE 0x2 385 #define NX_FL_BUSOP_MASK 0x3 386 387 /* 388 * EoIB nexus per-instance state 389 */ 390 typedef struct eibnx_s { 391 dev_info_t *nx_dip; 392 ibt_clnt_hdl_t nx_ibt_hdl; 393 394 kmutex_t nx_lock; 395 eibnx_hca_t *nx_hca; 396 eibnx_thr_info_t *nx_thr_info; 397 boolean_t nx_monitors_up; 398 399 kmutex_t nx_nodeq_lock; 400 kcondvar_t nx_nodeq_cv; 401 eibnx_nodeq_t *nx_nodeq; 402 kt_did_t nx_nodeq_kt_did; 403 uint_t nx_nodeq_thr_die; 404 405 kmutex_t nx_busop_lock; 406 kcondvar_t nx_busop_cv; 407 uint_t nx_busop_flags; 408 } eibnx_t; 409 410 411 /* 412 * Event tags for EoIB Nexus events delivered to EoIB instances 413 */ 414 #define ENX_EVENT_TAG_GW_INFO_UPDATE 0 415 #define ENX_EVENT_TAG_GW_AVAILABLE 1 416 #define ENX_EVENT_TAG_LOGIN_ACK 2 417 418 /* 419 * FUNCTION PROTOTYPES FOR CROSS-FILE LINKAGE 420 */ 421 422 /* 423 * Threads and Event Handlers 424 */ 425 void eibnx_port_monitor(eibnx_thr_info_t *); 426 void eibnx_subnet_notices_handler(void *, ib_gid_t, ibt_subnet_event_code_t, 427 ibt_subnet_event_t *); 428 void eibnx_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 429 ibt_async_event_t *); 430 boolean_t eibnx_is_gw_dead(eibnx_gw_info_t *); 431 void eibnx_create_eoib_node(void); 432 void eibnx_comp_intr(ibt_cq_hdl_t, void *); 433 uint_t eibnx_comp_handler(caddr_t, caddr_t); 434 435 /* 436 * IBT related functions 437 */ 438 int eibnx_ibt_init(eibnx_t *); 439 int eibnx_find_mgroups(eibnx_thr_info_t *); 440 int eibnx_setup_cq(eibnx_thr_info_t *); 441 int eibnx_setup_ud_channel(eibnx_thr_info_t *); 442 int eibnx_setup_bufs(eibnx_thr_info_t *); 443 int eibnx_setup_cq_handler(eibnx_thr_info_t *); 444 int eibnx_join_mcgs(eibnx_thr_info_t *); 445 int eibnx_rejoin_mcgs(eibnx_thr_info_t *); 446 int eibnx_ibt_fini(eibnx_t *); 447 448 void eibnx_rb_find_mgroups(eibnx_thr_info_t *); 449 void eibnx_rb_setup_cq(eibnx_thr_info_t *); 450 void eibnx_rb_setup_ud_channel(eibnx_thr_info_t *); 451 void eibnx_rb_setup_bufs(eibnx_thr_info_t *); 452 void eibnx_rb_setup_cq_handler(eibnx_thr_info_t *); 453 void eibnx_rb_join_mcgs(eibnx_thr_info_t *); 454 455 eibnx_hca_t *eibnx_prepare_hca(ib_guid_t); 456 int eibnx_cleanup_hca(eibnx_hca_t *); 457 458 /* 459 * FIP packetizing related functions 460 */ 461 int eibnx_fip_solicit_mcast(eibnx_thr_info_t *); 462 int eibnx_fip_solicit_ucast(eibnx_thr_info_t *, clock_t *); 463 int eibnx_fip_parse_pkt(uint8_t *, eibnx_gw_msg_t *); 464 465 /* 466 * Queue and List related routines 467 */ 468 eibnx_wqe_t *eibnx_acquire_swqe(eibnx_thr_info_t *, int); 469 void eibnx_return_swqe(eibnx_wqe_t *); 470 void eibnx_return_rwqe(eibnx_thr_info_t *, eibnx_wqe_t *); 471 void eibnx_release_swqe(eibnx_wqe_t *); 472 473 void eibnx_enqueue_child(eibnx_thr_info_t *, eibnx_gw_info_t *, char *, 474 dev_info_t *); 475 int eibnx_update_child(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t *); 476 dev_info_t *eibnx_find_child_dip_by_inst(eibnx_thr_info_t *, int); 477 dev_info_t *eibnx_find_child_dip_by_gw(eibnx_thr_info_t *, uint16_t); 478 479 eibnx_gw_info_t *eibnx_find_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *); 480 eibnx_gw_info_t *eibnx_add_gw_to_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *, 481 ibt_wc_t *, uint8_t *); 482 void eibnx_replace_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *, 483 eibnx_gw_info_t *, ibt_wc_t *, uint8_t *, boolean_t *); 484 void eibnx_queue_for_creation(eibnx_thr_info_t *, eibnx_gw_info_t *); 485 486 /* 487 * Logging and Error reporting routines 488 */ 489 void eibnx_debug_init(void); 490 void eibnx_debug_fini(void); 491 void eibnx_dprintf_crit(const char *fmt, ...); 492 void eibnx_dprintf_err(const char *fmt, ...); 493 void eibnx_dprintf_warn(const char *fmt, ...); 494 #ifdef ENX_DEBUG 495 void eibnx_dprintf_debug(const char *fmt, ...); 496 void eibnx_dprintf_args(const char *fmt, ...); 497 void eibnx_dprintf_verbose(const char *fmt, ...); 498 #endif 499 500 /* 501 * Miscellaneous 502 */ 503 void eibnx_cleanup_port_nodes(eibnx_thr_info_t *); 504 void eibnx_create_node_props(dev_info_t *, eibnx_thr_info_t *, 505 eibnx_gw_info_t *); 506 int eibnx_name_child(dev_info_t *, char *, size_t); 507 void eibnx_busop_inprog_enter(eibnx_t *); 508 void eibnx_busop_inprog_exit(eibnx_t *); 509 eibnx_thr_info_t *eibnx_start_port_monitor(eibnx_hca_t *, eibnx_port_t *); 510 void eibnx_stop_port_monitor(eibnx_thr_info_t *); 511 void eibnx_terminate_monitors(void); 512 int eibnx_configure_node(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t **); 513 int eibnx_unconfigure_node(eibnx_thr_info_t *, eibnx_gw_info_t *); 514 int eibnx_locate_node_name(char *, eibnx_thr_info_t **, eibnx_gw_info_t **); 515 int eibnx_locate_unconfigured_node(eibnx_thr_info_t **, eibnx_gw_info_t **); 516 517 /* 518 * Devctl cbops (currently dummy) 519 */ 520 int eibnx_devctl_open(dev_t *, int, int, cred_t *); 521 int eibnx_devctl_close(dev_t, int, int, cred_t *); 522 int eibnx_devctl_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 523 524 /* 525 * External variable references 526 */ 527 extern pri_t minclsyspri; 528 extern eibnx_t *enx_global_ss; 529 extern ib_gid_t enx_solicit_mgid; 530 extern ib_gid_t enx_advertise_mgid; 531 532 #ifdef __cplusplus 533 } 534 #endif 535 536 #endif /* _SYS_IB_EOIB_ENX_IMPL_H */ 537