1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _SYS_IB_CLIENTS_IBD_H 28 #define _SYS_IB_CLIENTS_IBD_H 29 30 #ifdef __cplusplus 31 extern "C" { 32 #endif 33 34 /* 35 * IETF defined IPoIB encapsulation header, with 2b of ethertype 36 * followed by 2 reserved bytes. This is at the start of the 37 * datagram sent to and received over the wire by the driver. 38 */ 39 typedef struct ipoib_header { 40 ushort_t ipoib_type; 41 ushort_t ipoib_mbz; 42 } ipoib_hdr_t; 43 44 #define IPOIB_HDRSIZE sizeof (struct ipoib_header) 45 46 /* 47 * IETF defined IPoIB link address; IBA QPN, followed by GID, 48 * which has a prefix and suffix, as reported via ARP. 49 */ 50 typedef struct ipoib_mac { 51 uint32_t ipoib_qpn; 52 uint32_t ipoib_gidpref[2]; 53 uint32_t ipoib_gidsuff[2]; 54 } ipoib_mac_t; 55 56 #define IPOIB_ADDRL sizeof (struct ipoib_mac) 57 58 /* 59 * Pseudo header prepended to datagram in DLIOCRAW transmit path 60 * and when GLD hands the datagram to the gldm_send entry point. 61 */ 62 typedef struct ipoib_ptxhdr { 63 ipoib_mac_t ipoib_dest; 64 ipoib_hdr_t ipoib_rhdr; 65 } ipoib_ptxhdr_t; 66 67 #define IPOIBDLSAP(p, offset) ((ipoib_ptxhdr_t *)((caddr_t)(p)+offset)) 68 69 /* 70 * The pseudo-GRH structure that sits before the data in the 71 * receive buffer, and is overlaid on top of the real GRH. 72 * The driver sets the ipoib_vertcflow to 0 if the pseudo-GRH 73 * does not hold valid information. If it is indicated valid, 74 * the driver must additionally provide the sender's qpn in 75 * network byte order in ipoib_sqpn, and not touch the 76 * remaining parts which were DMA'ed in by the IBA hardware. 77 */ 78 typedef struct ipoib_pgrh { 79 uint32_t ipoib_vertcflow; 80 uint32_t ipoib_sqpn; 81 uint32_t ipoib_sgid_pref[2]; 82 uint32_t ipoib_sgid_suff[2]; 83 uint32_t ipoib_dgid_pref[2]; 84 uint32_t ipoib_dgid_suff[2]; 85 } ipoib_pgrh_t; 86 87 /* 88 * The GRH is also dma'ed into recv buffers, thus space needs 89 * to be allocated for them. 90 */ 91 #define IPOIB_GRH_SIZE sizeof (ipoib_pgrh_t) 92 93 #if defined(_KERNEL) && !defined(_BOOT) 94 95 #include <sys/ib/ibtl/ibti.h> 96 #include <sys/ib/ib_pkt_hdrs.h> 97 #include <sys/list.h> 98 #include <sys/mac_provider.h> 99 #include <sys/mac_ib.h> 100 #include <sys/modhash.h> 101 102 /* 103 * Structure to encapsulate various types of async requests. 104 */ 105 typedef struct ibd_acache_rq { 106 struct list_node rq_list; /* list of pending work */ 107 int rq_op; /* what operation */ 108 ipoib_mac_t rq_mac; 109 ib_gid_t rq_gid; 110 void *rq_ptr; 111 } ibd_req_t; 112 113 typedef struct ibd_mcache { 114 struct list_node mc_list; /* full/non list */ 115 uint8_t mc_jstate; 116 boolean_t mc_fullreap; 117 ibt_mcg_info_t mc_info; 118 ibd_req_t mc_req; /* to queue LEAVE req */ 119 } ibd_mce_t; 120 121 typedef struct ibd_acache_s { 122 struct list_node ac_list; /* free/active list */ 123 ibt_ud_dest_hdl_t ac_dest; 124 ipoib_mac_t ac_mac; 125 uint32_t ac_ref; 126 ibd_mce_t *ac_mce; /* for MCG AHs */ 127 } ibd_ace_t; 128 129 #define IBD_MAX_SQSEG 59 130 #define IBD_MAX_RQSEG 1 131 132 typedef enum { 133 IBD_WQE_SEND, 134 IBD_WQE_RECV 135 } ibd_wqe_type_t; 136 137 typedef enum { 138 IBD_WQE_TXBUF = 1, 139 IBD_WQE_LSOBUF = 2, 140 IBD_WQE_MAPPED = 3 141 } ibd_wqe_buftype_t; 142 143 /* 144 * Pre-registered copybuf used for send and receive 145 */ 146 typedef struct ibd_copybuf_s { 147 ibt_wr_ds_t ic_sgl; 148 uint8_t *ic_bufaddr; 149 } ibd_copybuf_t; 150 151 typedef struct ibd_wqe_s { 152 struct ibd_wqe_s *w_next; 153 ibd_copybuf_t w_copybuf; 154 mblk_t *im_mblk; 155 } ibd_wqe_t; 156 157 /* 158 * Send WQE 159 */ 160 typedef struct ibd_swqe_s { 161 ibd_wqe_t w_ibd_swqe; 162 ibd_wqe_buftype_t w_buftype; 163 ibt_send_wr_t w_swr; 164 ibd_ace_t *w_ahandle; 165 ibt_mi_hdl_t w_mi_hdl; 166 ibt_wr_ds_t w_sgl[IBD_MAX_SQSEG]; 167 } ibd_swqe_t; 168 169 #define swqe_next w_ibd_swqe.w_next 170 #define swqe_copybuf w_ibd_swqe.w_copybuf 171 #define swqe_im_mblk w_ibd_swqe.im_mblk 172 #define SWQE_TO_WQE(swqe) (ibd_wqe_t *)&((swqe)->w_ibd_swqe) 173 #define WQE_TO_SWQE(wqe) (ibd_swqe_t *)wqe 174 175 /* 176 * Receive WQE 177 */ 178 typedef struct ibd_rwqe_s { 179 ibd_wqe_t w_ibd_rwqe; 180 struct ibd_state_s *w_state; 181 ibt_recv_wr_t w_rwr; 182 frtn_t w_freemsg_cb; 183 } ibd_rwqe_t; 184 185 #define rwqe_next w_ibd_rwqe.w_next 186 #define rwqe_copybuf w_ibd_rwqe.w_copybuf 187 #define rwqe_im_mblk w_ibd_rwqe.im_mblk 188 #define RWQE_TO_WQE(rwqe) (ibd_wqe_t *)&((rwqe)->w_ibd_rwqe) 189 #define WQE_TO_RWQE(wqe) (ibd_rwqe_t *)wqe 190 191 typedef struct ibd_list_s { 192 kmutex_t dl_mutex; 193 ibd_wqe_t *dl_head; 194 union { 195 boolean_t pending_sends; 196 uint32_t bufs_outstanding; 197 } ustat; 198 uint32_t dl_cnt; 199 } ibd_list_t; 200 201 #define dl_pending_sends ustat.pending_sends 202 #define dl_bufs_outstanding ustat.bufs_outstanding 203 204 /* 205 * LSO buffers 206 * 207 * Under normal circumstances we should never need to use any buffer 208 * that's larger than MTU. Unfortunately, IB HCA has limitations 209 * on the length of SGL that are much smaller than those for regular 210 * ethernet NICs. Since the network layer doesn't care to limit the 211 * number of mblk fragments in any send mp chain, we end up having to 212 * use these larger-than-MTU sized (larger than id_tx_buf_sz actually) 213 * buffers occasionally. 214 */ 215 typedef struct ibd_lsobuf_s { 216 struct ibd_lsobuf_s *lb_next; 217 uint8_t *lb_buf; 218 int lb_isfree; 219 } ibd_lsobuf_t; 220 221 typedef struct ibd_lsobkt_s { 222 uint8_t *bkt_mem; 223 ibd_lsobuf_t *bkt_bufl; 224 ibd_lsobuf_t *bkt_free_head; 225 ibt_mr_hdl_t bkt_mr_hdl; 226 ibt_mr_desc_t bkt_mr_desc; 227 uint_t bkt_nelem; 228 uint_t bkt_nfree; 229 } ibd_lsobkt_t; 230 231 /* 232 * Posting to a single software rx post queue is contentious, 233 * so break it out to (multiple) an array of queues. 234 * 235 * Try to ensure rx_queue structs fall in different cache lines using a filler. 236 * Note: the RX_QUEUE_CACHE_LINE needs to change if the struct changes. 237 */ 238 #define RX_QUEUE_CACHE_LINE \ 239 (64 - (sizeof (kmutex_t) + sizeof (ibd_wqe_t *) + sizeof (uint_t))) 240 typedef struct ibd_rx_queue_s { 241 kmutex_t rx_post_lock; 242 ibd_wqe_t *rx_head; 243 uint_t rx_cnt; 244 uint8_t rx_pad[RX_QUEUE_CACHE_LINE]; 245 } ibd_rx_queue_t; 246 247 /* 248 * This structure maintains information per port per HCA 249 * (per network interface). 250 */ 251 typedef struct ibd_state_s { 252 dev_info_t *id_dip; 253 ibt_clnt_hdl_t id_ibt_hdl; 254 ibt_hca_hdl_t id_hca_hdl; 255 ibt_pd_hdl_t id_pd_hdl; 256 kmem_cache_t *id_req_kmc; 257 258 ibd_list_t id_tx_rel_list; 259 260 uint32_t id_running; 261 262 uint32_t id_max_sqseg; 263 uint32_t id_max_sqseg_hiwm; 264 ibd_list_t id_tx_list; 265 ddi_softintr_t id_tx; 266 uint32_t id_tx_sends; 267 268 kmutex_t id_txpost_lock; 269 ibd_swqe_t *id_tx_head; 270 ibd_swqe_t *id_tx_tail; 271 int id_tx_busy; 272 273 uint_t id_tx_buf_sz; 274 uint8_t *id_tx_bufs; 275 ibd_swqe_t *id_tx_wqes; 276 ibt_mr_hdl_t id_tx_mr_hdl; 277 ibt_mr_desc_t id_tx_mr_desc; 278 279 kmutex_t id_lso_lock; 280 ibd_lsobkt_t *id_lso; 281 282 kmutex_t id_scq_poll_lock; 283 int id_scq_poll_busy; 284 285 ibt_cq_hdl_t id_scq_hdl; 286 ibt_wc_t *id_txwcs; 287 uint32_t id_txwcs_size; 288 289 int id_rx_nqueues; 290 ibd_rx_queue_t *id_rx_queues; 291 int id_rx_post_queue_index; 292 uint32_t id_rx_post_active; 293 294 ibd_rwqe_t *id_rx_wqes; 295 uint8_t *id_rx_bufs; 296 ibt_mr_hdl_t id_rx_mr_hdl; 297 ibt_mr_desc_t id_rx_mr_desc; 298 uint_t id_rx_buf_sz; 299 uint32_t id_num_rwqe; 300 ibd_list_t id_rx_list; 301 ddi_softintr_t id_rx; 302 uint32_t id_rx_bufs_outstanding_limit; 303 uint32_t id_rx_allocb; 304 uint32_t id_rx_allocb_failed; 305 ibd_list_t id_rx_free_list; 306 307 kmutex_t id_rcq_poll_lock; 308 int id_rcq_poll_busy; 309 uint32_t id_rxwcs_size; 310 ibt_wc_t *id_rxwcs; 311 ibt_cq_hdl_t id_rcq_hdl; 312 313 ibt_channel_hdl_t id_chnl_hdl; 314 ib_pkey_t id_pkey; 315 uint16_t id_pkix; 316 uint8_t id_port; 317 ibt_mcg_info_t *id_mcinfo; 318 319 mac_handle_t id_mh; 320 mac_resource_handle_t id_rh; 321 ib_gid_t id_sgid; 322 ib_qpn_t id_qpnum; 323 ipoib_mac_t id_macaddr; 324 ib_gid_t id_mgid; 325 ipoib_mac_t id_bcaddr; 326 327 int id_mtu; 328 uchar_t id_scope; 329 330 kmutex_t id_acache_req_lock; 331 kcondvar_t id_acache_req_cv; 332 struct list id_req_list; 333 kt_did_t id_async_thrid; 334 335 kmutex_t id_ac_mutex; 336 ibd_ace_t *id_ac_hot_ace; 337 struct list id_ah_active; 338 struct list id_ah_free; 339 ipoib_mac_t id_ah_addr; 340 ibd_req_t id_ah_req; 341 char id_ah_op; 342 uint64_t id_ah_error; 343 ibd_ace_t *id_ac_list; 344 mod_hash_t *id_ah_active_hash; 345 346 kmutex_t id_mc_mutex; 347 struct list id_mc_full; 348 struct list id_mc_non; 349 350 kmutex_t id_trap_lock; 351 kcondvar_t id_trap_cv; 352 boolean_t id_trap_stop; 353 uint32_t id_trap_inprog; 354 355 char id_prom_op; 356 357 kmutex_t id_sched_lock; 358 int id_sched_needed; 359 int id_sched_cnt; 360 int id_sched_lso_cnt; 361 362 kmutex_t id_link_mutex; 363 link_state_t id_link_state; 364 uint64_t id_link_speed; 365 366 uint64_t id_num_intrs; 367 uint64_t id_tx_short; 368 uint32_t id_num_swqe; 369 370 uint64_t id_xmt_bytes; 371 uint64_t id_rcv_bytes; 372 uint64_t id_multi_xmt; 373 uint64_t id_brd_xmt; 374 uint64_t id_multi_rcv; 375 uint64_t id_brd_rcv; 376 uint64_t id_xmt_pkt; 377 uint64_t id_rcv_pkt; 378 379 uint32_t id_hwcksum_capab; 380 boolean_t id_lso_policy; 381 boolean_t id_lso_capable; 382 uint_t id_lso_maxlen; 383 int id_hca_res_lkey_capab; 384 ibt_lkey_t id_res_lkey; 385 386 boolean_t id_bgroup_created; 387 kmutex_t id_macst_lock; 388 kcondvar_t id_macst_cv; 389 uint32_t id_mac_state; 390 } ibd_state_t; 391 392 #endif /* _KERNEL && !_BOOT */ 393 394 #ifdef __cplusplus 395 } 396 #endif 397 398 #endif /* _SYS_IB_CLIENTS_IBD_H */ 399