1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This file implements the client interfaces of the IBMF.
31  */
32 
33 #include <sys/ib/mgt/ibmf/ibmf_impl.h>
34 #include <sys/ib/mgt/ib_mad.h>
35 
36 extern ibmf_state_t *ibmf_statep;
37 
38 /* global settable */
39 int	ibmf_send_wqes_per_port = IBMF_MAX_SQ_WRE;
40 int	ibmf_recv_wqes_per_port = IBMF_MAX_RQ_WRE;
41 int	ibmf_send_wqes_posted_per_qp = IBMF_MAX_POSTED_SQ_PER_QP;
42 int	ibmf_recv_wqes_posted_per_qp = IBMF_MAX_POSTED_RQ_PER_QP;
43 
44 int	ibmf_taskq_max_tasks = 1024;
45 
46 int	ibmf_trace_level = DPRINT_L0;
47 
48 #define	IBMF_MAD_CL_HDR_OFF_1	0
49 #define	IBMF_MAD_CL_HDR_OFF_2	12
50 #define	IBMF_MAD_CL_HDR_SZ_1	40
51 #define	IBMF_MAD_CL_HDR_SZ_2	20
52 #define	IBMF_MAD_CL_HDR_SZ_3	0
53 #define	IBMF_MAD_CL_HDR_SZ_4	4
54 
55 #define	IBMF_VALID_CLIENT_TYPE(client_type)		\
56 	((client_type) == SUBN_AGENT ||			\
57 	(client_type) == SUBN_MANAGER ||		\
58 	(client_type) == SUBN_ADM_AGENT ||		\
59 	(client_type) == SUBN_ADM_MANAGER ||		\
60 	(client_type) == PERF_AGENT ||			\
61 	(client_type) == PERF_MANAGER ||		\
62 	(client_type) == BM_AGENT ||			\
63 	(client_type) == BM_MANAGER ||			\
64 	(client_type) == DEV_MGT_AGENT ||		\
65 	(client_type) == DEV_MGT_MANAGER ||		\
66 	(client_type) == COMM_MGT_MANAGER_AGENT ||	\
67 	(client_type) == SNMP_MANAGER_AGENT ||		\
68 	(client_type) == VENDOR_09_MANAGER_AGENT ||	\
69 	(client_type) == VENDOR_0A_MANAGER_AGENT ||	\
70 	(client_type) == VENDOR_0B_MANAGER_AGENT ||	\
71 	(client_type) == VENDOR_0C_MANAGER_AGENT ||	\
72 	(client_type) == VENDOR_0D_MANAGER_AGENT ||	\
73 	(client_type) == VENDOR_0E_MANAGER_AGENT ||	\
74 	(client_type) == VENDOR_0F_MANAGER_AGENT ||	\
75 	(client_type) == VENDOR_30_MANAGER_AGENT ||	\
76 	(client_type) == VENDOR_31_MANAGER_AGENT ||	\
77 	(client_type) == VENDOR_32_MANAGER_AGENT ||	\
78 	(client_type) == VENDOR_33_MANAGER_AGENT ||	\
79 	(client_type) == VENDOR_34_MANAGER_AGENT ||	\
80 	(client_type) == VENDOR_35_MANAGER_AGENT ||	\
81 	(client_type) == VENDOR_36_MANAGER_AGENT ||	\
82 	(client_type) == VENDOR_37_MANAGER_AGENT ||	\
83 	(client_type) == VENDOR_38_MANAGER_AGENT ||	\
84 	(client_type) == VENDOR_39_MANAGER_AGENT ||	\
85 	(client_type) == VENDOR_3A_MANAGER_AGENT ||	\
86 	(client_type) == VENDOR_3B_MANAGER_AGENT ||	\
87 	(client_type) == VENDOR_3C_MANAGER_AGENT ||	\
88 	(client_type) == VENDOR_3D_MANAGER_AGENT ||	\
89 	(client_type) == VENDOR_3E_MANAGER_AGENT ||	\
90 	(client_type) == VENDOR_3F_MANAGER_AGENT ||	\
91 	(client_type) == VENDOR_40_MANAGER_AGENT ||	\
92 	(client_type) == VENDOR_41_MANAGER_AGENT ||	\
93 	(client_type) == VENDOR_42_MANAGER_AGENT ||	\
94 	(client_type) == VENDOR_43_MANAGER_AGENT ||	\
95 	(client_type) == VENDOR_44_MANAGER_AGENT ||	\
96 	(client_type) == VENDOR_45_MANAGER_AGENT ||	\
97 	(client_type) == VENDOR_46_MANAGER_AGENT ||	\
98 	(client_type) == VENDOR_47_MANAGER_AGENT ||	\
99 	(client_type) == VENDOR_48_MANAGER_AGENT ||	\
100 	(client_type) == VENDOR_49_MANAGER_AGENT ||	\
101 	(client_type) == VENDOR_4A_MANAGER_AGENT ||	\
102 	(client_type) == VENDOR_4B_MANAGER_AGENT ||	\
103 	(client_type) == VENDOR_4C_MANAGER_AGENT ||	\
104 	(client_type) == VENDOR_4D_MANAGER_AGENT ||	\
105 	(client_type) == VENDOR_4E_MANAGER_AGENT ||	\
106 	(client_type) == VENDOR_4F_MANAGER_AGENT ||	\
107 	(client_type) == APPLICATION_10_MANAGER_AGENT || \
108 	(client_type) == APPLICATION_11_MANAGER_AGENT || \
109 	(client_type) == APPLICATION_12_MANAGER_AGENT || \
110 	(client_type) == APPLICATION_13_MANAGER_AGENT || \
111 	(client_type) == APPLICATION_14_MANAGER_AGENT || \
112 	(client_type) == APPLICATION_15_MANAGER_AGENT || \
113 	(client_type) == APPLICATION_16_MANAGER_AGENT || \
114 	(client_type) == APPLICATION_17_MANAGER_AGENT || \
115 	(client_type) == APPLICATION_18_MANAGER_AGENT || \
116 	(client_type) == APPLICATION_19_MANAGER_AGENT || \
117 	(client_type) == APPLICATION_1A_MANAGER_AGENT || \
118 	(client_type) == APPLICATION_1B_MANAGER_AGENT || \
119 	(client_type) == APPLICATION_1C_MANAGER_AGENT || \
120 	(client_type) == APPLICATION_1D_MANAGER_AGENT || \
121 	(client_type) == APPLICATION_1E_MANAGER_AGENT || \
122 	(client_type) == APPLICATION_1F_MANAGER_AGENT || \
123 	(client_type) == APPLICATION_20_MANAGER_AGENT || \
124 	(client_type) == APPLICATION_21_MANAGER_AGENT || \
125 	(client_type) == APPLICATION_22_MANAGER_AGENT || \
126 	(client_type) == APPLICATION_23_MANAGER_AGENT || \
127 	(client_type) == APPLICATION_24_MANAGER_AGENT || \
128 	(client_type) == APPLICATION_25_MANAGER_AGENT || \
129 	(client_type) == APPLICATION_26_MANAGER_AGENT || \
130 	(client_type) == APPLICATION_27_MANAGER_AGENT || \
131 	(client_type) == APPLICATION_28_MANAGER_AGENT || \
132 	(client_type) == APPLICATION_29_MANAGER_AGENT || \
133 	(client_type) == APPLICATION_2A_MANAGER_AGENT || \
134 	(client_type) == APPLICATION_2B_MANAGER_AGENT || \
135 	(client_type) == APPLICATION_2C_MANAGER_AGENT || \
136 	(client_type) == APPLICATION_2D_MANAGER_AGENT || \
137 	(client_type) == APPLICATION_2E_MANAGER_AGENT || \
138 	(client_type) == APPLICATION_2F_MANAGER_AGENT || \
139 	(client_type) == UNIVERSAL_CLASS)
140 
141 static ibmf_ci_t *ibmf_i_lookup_ci(ib_guid_t ci_guid);
142 static int ibmf_i_init_ci(ibmf_register_info_t *client_infop,
143     ibmf_ci_t *cip);
144 static void ibmf_i_uninit_ci(ibmf_ci_t *cip);
145 static void ibmf_i_init_ci_done(ibmf_ci_t *cip);
146 static void ibmf_i_uninit_ci_done(ibmf_ci_t *cip);
147 static int ibmf_i_init_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp);
148 static void ibmf_i_uninit_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp);
149 static int ibmf_i_init_cqs(ibmf_ci_t *cip);
150 static void ibmf_i_fini_cqs(ibmf_ci_t *cip);
151 static void ibmf_i_init_qplist(ibmf_ci_t *ibmf_cip);
152 static void ibmf_i_fini_qplist(ibmf_ci_t *ibmf_cip);
153 static int ibmf_i_lookup_client_by_info(ibmf_ci_t *ibmf_cip,
154     ibmf_register_info_t *ir_client, ibmf_client_t **clientpp);
155 
156 /*
157  * ibmf_init():
158  *	Initializes module state and registers with the IBT framework.
159  * 	Returns 0 if initialization was successful, else returns non-zero.
160  */
161 int
162 ibmf_init(void)
163 {
164 	ibt_status_t 	status;
165 	ibt_clnt_hdl_t 	ibmf_ibt_handle;
166 
167 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_init_start,
168 	    IBMF_TNF_TRACE, "", "ibmf_init() enter\n");
169 
170 	/* setup the IBT module information */
171 	ibmf_statep->ibmf_ibt_modinfo.mi_ibt_version = IBTI_V2;
172 	ibmf_statep->ibmf_ibt_modinfo.mi_clnt_class = IBT_IBMA;
173 	ibmf_statep->ibmf_ibt_modinfo.mi_async_handler
174 	    = ibmf_ibt_async_handler;
175 	ibmf_statep->ibmf_ibt_modinfo.mi_reserved = NULL;
176 	ibmf_statep->ibmf_ibt_modinfo.mi_clnt_name = "ibmf";
177 
178 	/* setup a connection to IB transport layer (IBTF) */
179 	status = ibt_attach(&ibmf_statep->ibmf_ibt_modinfo, (void *)NULL,
180 	    (void *)NULL, (void *)&ibmf_ibt_handle);
181 	if (status != IBT_SUCCESS) {
182 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_init_err,
183 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
184 		    "ibt attach failed", tnf_uint, status, status);
185 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_init_end,
186 		    IBMF_TNF_TRACE, "", "ibmf_init() exit\n");
187 		return (1);
188 	}
189 
190 	/* initialize the IBMF state context */
191 	ibmf_statep->ibmf_ibt_handle = ibmf_ibt_handle;
192 	ibmf_statep->ibmf_ci_list = (ibmf_ci_t *)NULL;
193 	ibmf_statep->ibmf_ci_list_tail = (ibmf_ci_t *)NULL;
194 	mutex_init(&ibmf_statep->ibmf_mutex, NULL, MUTEX_DRIVER, NULL);
195 	ibmf_statep->ibmf_cq_handler = ibmf_i_mad_completions;
196 
197 	ibmf_statep->ibmf_taskq = taskq_create("ibmf_taskq", IBMF_TASKQ_1THREAD,
198 	    MINCLSYSPRI, 1, ibmf_taskq_max_tasks, TASKQ_PREPOPULATE);
199 
200 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_init_end,
201 	    IBMF_TNF_TRACE, "", "ibmf_init() exit\n");
202 
203 	return (0);
204 }
205 
206 /*
207  * ibmf_fini():
208  *	Cleans up module state resources and unregisters from IBT framework.
209  */
210 int
211 ibmf_fini(void)
212 {
213 	ibmf_ci_t	*cip;
214 	ibmf_ci_t	*tcip;
215 	ibt_status_t	status;
216 
217 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_fini_start,
218 	    IBMF_TNF_TRACE, "", "ibmf_fini() enter\n");
219 
220 	ASSERT(MUTEX_NOT_HELD(&ibmf_statep->ibmf_mutex));
221 
222 	mutex_enter(&ibmf_statep->ibmf_mutex);
223 
224 	/* free all the Channel Interface (CI) context structures */
225 	cip = ibmf_statep->ibmf_ci_list;
226 	tcip = NULL;
227 	while (cip != (ibmf_ci_t *)NULL) {
228 
229 		mutex_enter(&cip->ci_mutex);
230 		ASSERT((cip->ci_state == IBMF_CI_STATE_PRESENT && cip->ci_ref ==
231 		    0) || (cip->ci_state == IBMF_CI_STATE_GONE));
232 		ASSERT(cip->ci_init_state == IBMF_CI_INIT_HCA_LINKED);
233 		ASSERT(cip->ci_qp_list == NULL && cip->ci_qp_list_tail == NULL);
234 		if (tcip != (ibmf_ci_t *)NULL)
235 			tcip->ci_next = cip->ci_next;
236 		if (ibmf_statep->ibmf_ci_list_tail == cip)
237 			ibmf_statep->ibmf_ci_list_tail = NULL;
238 		if (ibmf_statep->ibmf_ci_list == cip)
239 			ibmf_statep->ibmf_ci_list = cip->ci_next;
240 		tcip = cip->ci_next;
241 		mutex_exit(&cip->ci_mutex);
242 		/* free up the ci structure */
243 		if (cip->ci_port_kstatp != NULL) {
244 			kstat_delete(cip->ci_port_kstatp);
245 		}
246 		mutex_destroy(&cip->ci_mutex);
247 		mutex_destroy(&cip->ci_clients_mutex);
248 		mutex_destroy(&cip->ci_wqe_mutex);
249 		cv_destroy(&cip->ci_state_cv);
250 		cv_destroy(&cip->ci_wqes_cv);
251 		kmem_free((void *) cip, sizeof (ibmf_ci_t));
252 		cip = tcip;
253 	}
254 
255 	ASSERT(ibmf_statep->ibmf_ci_list == NULL);
256 	ASSERT(ibmf_statep->ibmf_ci_list_tail == NULL);
257 
258 	taskq_destroy(ibmf_statep->ibmf_taskq);
259 
260 	mutex_exit(&ibmf_statep->ibmf_mutex);
261 
262 	/* detach from IBTF */
263 	status = ibt_detach(ibmf_statep->ibmf_ibt_handle);
264 	if (status != IBT_SUCCESS) {
265 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_fini_err,
266 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
267 		    "ibt detach error", tnf_uint, status, status);
268 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_fini_end,
269 		    IBMF_TNF_TRACE, "", "ibmf_fini() exit\n");
270 		return (1);
271 	}
272 
273 	mutex_destroy(&ibmf_statep->ibmf_mutex);
274 
275 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_fini_end,
276 	    IBMF_TNF_TRACE, "", "ibmf_fini() exit\n");
277 
278 	return (0);
279 }
280 
281 /*
282  * ibmf_i_validate_class_mask():
283  *	Checks client type value in client information structure.
284  */
285 int
286 ibmf_i_validate_class_mask(ibmf_register_info_t	*client_infop)
287 {
288 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
289 	    ibmf_i_validate_class_mask_start, IBMF_TNF_TRACE, "",
290 	    "ibmf_i_validate_class_mask() enter, client_infop = %p\n",
291 	    tnf_opaque, client_infop, client_infop);
292 
293 	if (IBMF_VALID_CLIENT_TYPE(client_infop->ir_client_class) == B_FALSE) {
294 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
295 		    ibmf_i_validate_class_mask_err, IBMF_TNF_ERROR, "",
296 		    "%s, class = %x\n", tnf_string, msg,
297 		    "invalid class", tnf_uint, class,
298 		    client_infop->ir_client_class);
299 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
300 		    ibmf_i_validate_class_mask_end, IBMF_TNF_TRACE, "",
301 		    "ibmf_i_validate_class_mask() exit\n");
302 		return (IBMF_BAD_CLASS);
303 	}
304 
305 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_validate_class_mask_end,
306 	    IBMF_TNF_TRACE, "", "ibmf_i_validate_class_mask() exit\n");
307 	return (IBMF_SUCCESS);
308 }
309 
310 /*
311  * ibmf_i_validate_ci_guid_and_port():
312  *	Checks validity of port number and HCA GUID at client
313  *	registration time.
314  */
315 int
316 ibmf_i_validate_ci_guid_and_port(ib_guid_t hca_guid, uint8_t port_num)
317 {
318 	ibt_status_t	status;
319 	ibt_hca_attr_t	hca_attrs;
320 
321 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
322 	    ibmf_i_validate_ci_guid_and_port_start, IBMF_TNF_TRACE, "",
323 	    "ibmf_i_validate_ci_guid_and_port() enter, hca_guid = %x, "
324 	    "port_num = %d\n", tnf_opaque, hca_guid, hca_guid,
325 	    tnf_uint, port_num, port_num);
326 
327 	/* check for incorrect port number specification */
328 	if (port_num == 0) {
329 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, 1,
330 		    ibmf_i_validate_ci_guid_and_port_err, IBMF_TNF_ERROR, "",
331 		    "%s\n", tnf_string, msg, "port num is 0");
332 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
333 		    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
334 		    "ibmf_i_validate_ci_guid_and_port() exit\n");
335 		return (IBMF_BAD_PORT);
336 	}
337 
338 	/* call IB transport layer for HCA attributes */
339 	status = ibt_query_hca_byguid(hca_guid, &hca_attrs);
340 	if (status != IBT_SUCCESS) {
341 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
342 		    ibmf_i_validate_ci_guid_and_port_err,
343 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
344 		    "query_hca_guid failed", tnf_uint, status, status);
345 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
346 		    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
347 		    "ibmf_i_validate_ci_guid_and_port() exit\n");
348 		return (IBMF_BAD_NODE);
349 	}
350 
351 	/* check if the specified port number is within the HCAs range */
352 	if (port_num > hca_attrs.hca_nports) {
353 		IBMF_TRACE_3(IBMF_TNF_NODEBUG, 1,
354 		    ibmf_i_validate_ci_guid_and_port_err, IBMF_TNF_ERROR, "",
355 		    "%s, num = %d, hca_ports = %d\n",
356 		    tnf_string, msg, "port num > valid ports",
357 		    tnf_uint, num, port_num, tnf_uint, hca_nports,
358 		    hca_attrs.hca_nports);
359 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
360 		    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
361 		    "ibmf_i_validate_ci_guid_and_port() exit\n");
362 		return (IBMF_BAD_PORT);
363 	}
364 
365 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
366 	    ibmf_i_validate_ci_guid_and_port_end, IBMF_TNF_TRACE, "",
367 	    "ibmf_i_validate_ci_guid_and_port() exit\n");
368 	return (IBMF_SUCCESS);
369 }
370 
371 /*
372  * ibmf_i_lookup_ci():
373  * 	Lookup the ci and return if found. If the CI is not found, returns
374  * 	NULL.
375  */
376 static ibmf_ci_t *
377 ibmf_i_lookup_ci(ib_guid_t ci_guid)
378 {
379 	ibmf_ci_t	*cip = NULL;
380 
381 	ASSERT(MUTEX_NOT_HELD(&ibmf_statep->ibmf_mutex));
382 
383 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_lookup_ci_start,
384 	    IBMF_TNF_TRACE, "", "ibmf_i_lookup_ci(): enter, guid = 0x%x\n",
385 	    tnf_uint64, guid, ci_guid);
386 
387 	/* walk the CI list looking for one that matches the provided GUID */
388 	mutex_enter(&ibmf_statep->ibmf_mutex);
389 	cip = ibmf_statep->ibmf_ci_list;
390 	while (cip != (ibmf_ci_t *)NULL) {
391 		if (ci_guid == cip->ci_node_guid) {
392 			/* found it in our list */
393 			break;
394 		}
395 		cip = cip->ci_next;
396 	}
397 	mutex_exit(&ibmf_statep->ibmf_mutex);
398 
399 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_lookup_ci_end,
400 	    IBMF_TNF_TRACE, "", "ibmf_i_lookup_ci() exit\n");
401 
402 	return (cip);
403 }
404 
405 /*
406  * ibmf_i_get_ci():
407  *	Get the CI structure based on the HCA GUID from a list if it exists.
408  *	If the CI structure does not exist, and the HCA GUID is valid,
409  *	create a new CI structure and add it to the list.
410  */
411 int
412 ibmf_i_get_ci(ibmf_register_info_t *client_infop, ibmf_ci_t **cipp)
413 {
414 	ibmf_ci_t 		*cip;
415 	ibt_status_t		status;
416 	boolean_t		invalid = B_FALSE;
417 	ibt_hca_attr_t		hca_attrs;
418 	ibmf_port_kstat_t	*ksp;
419 
420 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_start,
421 	    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() enter, clinfop = %p\n",
422 	    tnf_opaque, client_infop, client_infop);
423 
424 	/* look for a CI context with a matching GUID */
425 	cip = ibmf_i_lookup_ci(client_infop->ir_ci_guid);
426 
427 	if (cip == NULL) {
428 
429 		/*
430 		 * attempt to create the ci. First, verify the ci exists.
431 		 * If it exists, allocate ci memory and insert in the ci list.
432 		 * It is possible that some other thread raced with us
433 		 * and inserted created ci while we are blocked in
434 		 * allocating memory. Check for that case and if that is indeed
435 		 * the case, free up what we allocated and try to get a
436 		 * reference count on the ci that the other thread added.
437 		 */
438 		status = ibt_query_hca_byguid(client_infop->ir_ci_guid,
439 		    &hca_attrs);
440 		if (status == IBT_SUCCESS) {
441 
442 			ibmf_ci_t *tcip;
443 			char buf[128];
444 
445 			/* allocate memory for the CI structure */
446 			cip = (ibmf_ci_t *)kmem_zalloc(sizeof (ibmf_ci_t),
447 			    KM_SLEEP);
448 
449 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cip))
450 
451 			mutex_init(&cip->ci_mutex, NULL, MUTEX_DRIVER, NULL);
452 			mutex_init(&cip->ci_clients_mutex, NULL, MUTEX_DRIVER,
453 			    NULL);
454 			mutex_init(&cip->ci_wqe_mutex, NULL, MUTEX_DRIVER,
455 			    NULL);
456 			cv_init(&cip->ci_state_cv, NULL, CV_DRIVER, NULL);
457 			cv_init(&cip->ci_wqes_cv, NULL, CV_DRIVER, NULL);
458 
459 			(void) sprintf(buf, "r%08X",
460 			    (uint32_t)client_infop->ir_ci_guid);
461 			mutex_enter(&cip->ci_mutex);
462 
463 			cip->ci_state = IBMF_CI_STATE_PRESENT;
464 			cip->ci_node_guid = client_infop->ir_ci_guid;
465 
466 			/* set up per CI kstats */
467 			(void) sprintf(buf, "ibmf_%016" PRIx64 "_%d_stat",
468 			    client_infop->ir_ci_guid,
469 			    client_infop->ir_port_num);
470 			if ((cip->ci_port_kstatp = kstat_create("ibmf", 0, buf,
471 			    "misc", KSTAT_TYPE_NAMED,
472 			    sizeof (ibmf_port_kstat_t) / sizeof (kstat_named_t),
473 			    KSTAT_FLAG_WRITABLE)) == NULL) {
474 				mutex_exit(&cip->ci_mutex);
475 				mutex_destroy(&cip->ci_mutex);
476 				mutex_destroy(&cip->ci_clients_mutex);
477 				mutex_destroy(&cip->ci_wqe_mutex);
478 				cv_destroy(&cip->ci_state_cv);
479 				cv_destroy(&cip->ci_wqes_cv);
480 				kmem_free((void *)cip, sizeof (ibmf_ci_t));
481 				IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
482 				    ibmf_i_get_ci_err, IBMF_TNF_ERROR, "",
483 				    "%s\n", tnf_string, msg,
484 				    "kstat create failed");
485 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
486 				    ibmf_i_get_ci_end, IBMF_TNF_TRACE, "",
487 				    "ibmf_i_get_ci() exit\n");
488 				return (IBMF_NO_RESOURCES);
489 			}
490 			ksp = (ibmf_port_kstat_t *)cip->ci_port_kstatp->ks_data;
491 			kstat_named_init(&ksp->clients_registered,
492 			    "clients_registered", KSTAT_DATA_UINT32);
493 			kstat_named_init(&ksp->client_regs_failed,
494 			    "client_registrations_failed", KSTAT_DATA_UINT32);
495 			kstat_named_init(&ksp->send_wqes_alloced,
496 			    "send_wqes_allocated", KSTAT_DATA_UINT32);
497 			kstat_named_init(&ksp->recv_wqes_alloced,
498 			    "receive_wqes_allocated", KSTAT_DATA_UINT32);
499 			kstat_named_init(&ksp->swqe_allocs_failed,
500 			    "send_wqe_allocs_failed", KSTAT_DATA_UINT32);
501 			kstat_named_init(&ksp->rwqe_allocs_failed,
502 			    "recv_wqe_allocs_failed", KSTAT_DATA_UINT32);
503 			kstat_install(cip->ci_port_kstatp);
504 
505 			mutex_exit(&cip->ci_mutex);
506 
507 			mutex_enter(&ibmf_statep->ibmf_mutex);
508 
509 			tcip = ibmf_statep->ibmf_ci_list;
510 			while (tcip != (ibmf_ci_t *)NULL) {
511 				if (client_infop->ir_ci_guid ==
512 				    tcip->ci_node_guid) {
513 					/* found it in our list */
514 					break;
515 				}
516 				tcip = tcip->ci_next;
517 			}
518 
519 			/* if the ci isn't on the list, add it */
520 			if (tcip == NULL) {
521 				cip->ci_next = NULL;
522 
523 				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cip))
524 
525 				if (ibmf_statep->ibmf_ci_list_tail != NULL)
526 					ibmf_statep->ibmf_ci_list_tail->
527 					    ci_next = cip;
528 				if (ibmf_statep->ibmf_ci_list == NULL)
529 					ibmf_statep->ibmf_ci_list = cip;
530 				ibmf_statep->ibmf_ci_list_tail = cip;
531 
532 				mutex_enter(&cip->ci_mutex);
533 				cip->ci_init_state |= IBMF_CI_INIT_HCA_LINKED;
534 				mutex_exit(&cip->ci_mutex);
535 
536 			} else {
537 				/* free cip and set it to the one on the list */
538 				kstat_delete(cip->ci_port_kstatp);
539 				mutex_destroy(&cip->ci_mutex);
540 				mutex_destroy(&cip->ci_clients_mutex);
541 				mutex_destroy(&cip->ci_wqe_mutex);
542 				cv_destroy(&cip->ci_state_cv);
543 				cv_destroy(&cip->ci_wqes_cv);
544 				kmem_free((void *)cip, sizeof (ibmf_ci_t));
545 
546 				_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cip))
547 
548 				cip = tcip;
549 			}
550 			mutex_exit(&ibmf_statep->ibmf_mutex);
551 		} else {
552 			/* we didn't find it and the CI doesn't exist */
553 			IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L1,
554 			    ibmf_i_get_ci_err, IBMF_TNF_ERROR, "", "%s\n",
555 			    tnf_string, msg, "GUID doesn't exist");
556 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
557 			    ibmf_i_get_ci_end, IBMF_TNF_TRACE, "",
558 			    "ibmf_i_get_ci() exit\n");
559 			return (IBMF_TRANSPORT_FAILURE);
560 		}
561 	}
562 
563 	ASSERT(cip != NULL);
564 
565 	/*
566 	 * We now have a CI context structure, either found it on the list,
567 	 * or created it.
568 	 * We now proceed to intialize the CI context.
569 	 */
570 	for (;;) {
571 		mutex_enter(&cip->ci_mutex);
572 
573 		/* CI is INITED & no state change in progress; we are all set */
574 		if (cip->ci_state == IBMF_CI_STATE_INITED && (cip->
575 		    ci_state_flags & (IBMF_CI_STATE_INVALIDATING |
576 		    IBMF_CI_STATE_UNINITING)) == 0) {
577 
578 			cip->ci_ref++;
579 			mutex_exit(&cip->ci_mutex);
580 
581 			break;
582 		}
583 
584 		/* CI is PRESENT; transition it to INITED */
585 		if (cip->ci_state == IBMF_CI_STATE_PRESENT && (cip->
586 		    ci_state_flags & (IBMF_CI_STATE_INVALIDATING |
587 		    IBMF_CI_STATE_INITING)) == 0) {
588 
589 			/* mark state as initing and init the ci */
590 			cip->ci_state_flags |= IBMF_CI_STATE_INITING;
591 			mutex_exit(&cip->ci_mutex);
592 
593 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cip))
594 
595 			if (ibmf_i_init_ci(client_infop, cip) != IBMF_SUCCESS) {
596 				invalid = B_TRUE;
597 				break;
598 			}
599 
600 			_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cip))
601 
602 			continue;
603 		}
604 
605 		/*
606 		 * If CI is GONE and no validation is in progress, we should
607 		 * return failure. Also, if CI is INITED but in the process of
608 		 * being made GONE (ie., a hot remove in progress), return
609 		 * failure.
610 		 */
611 		if ((cip->ci_state == IBMF_CI_STATE_GONE && (cip->
612 		    ci_state_flags & IBMF_CI_STATE_VALIDATING) == 0) ||
613 		    (cip->ci_state == IBMF_CI_STATE_INITED && (cip->
614 		    ci_state_flags & IBMF_CI_STATE_INVALIDATING) != 0)) {
615 
616 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
617 			    ibmf_i_get_ci_err, IBMF_TNF_ERROR, "",
618 			    "ci_state = %x, ci_state_flags = %x\n",
619 			    tnf_opaque, cip->ci_state, cip->ci_state,
620 			    tnf_opaque, cip->ci_state_flags,
621 			    cip->ci_state_flags);
622 
623 			invalid = B_TRUE;
624 			mutex_exit(&cip->ci_mutex);
625 
626 			break;
627 		}
628 
629 		/* a state change in progress; block waiting for state change */
630 		if (cip->ci_state_flags & IBMF_CI_STATE_VALIDATING)
631 			cip->ci_state_flags |= IBMF_CI_STATE_VALIDATE_WAIT;
632 		else if (cip->ci_state_flags & IBMF_CI_STATE_INITING)
633 			cip->ci_state_flags |= IBMF_CI_STATE_INIT_WAIT;
634 		else if (cip->ci_state_flags & IBMF_CI_STATE_UNINITING)
635 			cip->ci_state_flags |= IBMF_CI_STATE_UNINIT_WAIT;
636 
637 		cv_wait(&cip->ci_state_cv, &cip->ci_mutex);
638 
639 		mutex_exit(&cip->ci_mutex);
640 	}
641 
642 	if (invalid == B_TRUE) {
643 		IBMF_TRACE_0(IBMF_TNF_NODEBUG, DPRINT_L2, ibmf_i_get_ci_err,
644 		    IBMF_TNF_ERROR, "", "ibmf_i_get_ci() error\n");
645 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_end,
646 		    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() exit\n");
647 		return (IBMF_FAILURE);
648 	}
649 
650 	if (cip != NULL) {
651 		*cipp = cip;
652 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_end,
653 		    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() exit\n");
654 		return (IBMF_SUCCESS);
655 	} else {
656 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_ci_end,
657 		    IBMF_TNF_TRACE, "", "ibmf_i_get_ci() exit\n");
658 		return (IBMF_FAILURE);
659 	}
660 }
661 
662 /*
663  * ibmf_i_release_ci():
664  *	Drop the reference count for the CI.
665  */
666 void
667 ibmf_i_release_ci(ibmf_ci_t *cip)
668 {
669 	uint_t ref;
670 
671 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_ci_start,
672 	    IBMF_TNF_TRACE, "", "ibmf_i_release_ci() enter, cip = %p\n",
673 	    tnf_opaque, cip, cip);
674 
675 	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
676 
677 	mutex_enter(&cip->ci_mutex);
678 	ref = cip->ci_ref--;
679 	if (ref == 1) {
680 		ASSERT(cip->ci_state == IBMF_CI_STATE_INITED);
681 		cip->ci_state_flags |= IBMF_CI_STATE_UNINITING;
682 	}
683 	mutex_exit(&cip->ci_mutex);
684 
685 	if (ref == 1) {
686 		ibmf_i_uninit_ci(cip);
687 	}
688 
689 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_ci_end,
690 	    IBMF_TNF_TRACE, "", "ibmf_i_release_ci() exit\n");
691 }
692 
693 /*
694  * ibmf_i_init_ci():
695  *	Initialize the CI structure by setting up the HCA, allocating
696  *	protection domains, completion queues, a pool of WQEs.
697  */
698 /* ARGSUSED */
699 static int
700 ibmf_i_init_ci(ibmf_register_info_t *client_infop, ibmf_ci_t *cip)
701 {
702 	ibt_pd_hdl_t		pd;
703 	ibt_status_t		status;
704 	ib_guid_t		ci_guid;
705 	ibt_hca_attr_t		hca_attrs;
706 	ibt_hca_hdl_t		hca_handle;
707 	ibt_pd_flags_t		pd_flags = IBT_PD_NO_FLAGS;
708 	boolean_t		error = B_FALSE;
709 	int			ibmfstatus = IBMF_SUCCESS;
710 	char			errmsg[128];
711 
712 	_NOTE(ASSUMING_PROTECTED(*cip))
713 
714 	ASSERT(MUTEX_NOT_HELD(&ibmf_statep->ibmf_mutex));
715 	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
716 
717 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_start,
718 	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci() enter, cip = %p\n",
719 	    tnf_opaque, ibmf_ci, cip);
720 
721 	mutex_enter(&cip->ci_mutex);
722 	ci_guid = cip->ci_node_guid;
723 	ASSERT(cip->ci_state == IBMF_CI_STATE_PRESENT);
724 	ASSERT((cip->ci_state_flags & IBMF_CI_STATE_INITING) != 0);
725 	mutex_exit(&cip->ci_mutex);
726 
727 	/* set up a connection to the HCA specified by the GUID */
728 	status = ibt_open_hca(ibmf_statep->ibmf_ibt_handle, ci_guid,
729 	    &hca_handle);
730 	ASSERT(status != IBT_HCA_IN_USE);
731 	if (status != IBT_SUCCESS) {
732 		ibmf_i_init_ci_done(cip);
733 		(void) sprintf(errmsg, "ibt open hca failed, status = 0x%x",
734 		    status);
735 		error = B_TRUE;
736 		ibmfstatus = IBMF_TRANSPORT_FAILURE;
737 		goto bail;
738 	}
739 
740 	/* get the HCA attributes */
741 	status = ibt_query_hca(hca_handle, &hca_attrs);
742 	if (status != IBT_SUCCESS) {
743 		(void) ibt_close_hca(hca_handle);
744 		ibmf_i_init_ci_done(cip);
745 		(void) sprintf(errmsg, "ibt query hca failed, status = 0x%x",
746 		    status);
747 		error = B_TRUE;
748 		ibmfstatus = IBMF_TRANSPORT_FAILURE;
749 		goto bail;
750 	}
751 
752 	/* allocate a Protection Domain */
753 	status = ibt_alloc_pd(hca_handle, pd_flags, &pd);
754 	if (status != IBT_SUCCESS) {
755 		(void) ibt_close_hca(hca_handle);
756 		ibmf_i_init_ci_done(cip);
757 		(void) sprintf(errmsg, "alloc PD failed, status = 0x%x",
758 		    status);
759 		error = B_TRUE;
760 		ibmfstatus = IBMF_TRANSPORT_FAILURE;
761 		goto bail;
762 	}
763 
764 	/* init the ci */
765 	mutex_enter(&cip->ci_mutex);
766 	cip->ci_nports = hca_attrs.hca_nports;
767 	cip->ci_vendor_id = hca_attrs.hca_vendor_id;
768 	cip->ci_device_id = hca_attrs.hca_device_id;
769 	cip->ci_ci_handle = hca_handle;
770 	cip->ci_pd = pd;
771 	cip->ci_init_state |= IBMF_CI_INIT_HCA_INITED;
772 	mutex_exit(&cip->ci_mutex);
773 
774 	/* initialize cqs */
775 	if (ibmf_i_init_cqs(cip) != IBMF_SUCCESS) {
776 		(void) ibt_free_pd(cip->ci_ci_handle, cip->ci_pd);
777 		mutex_enter(&cip->ci_mutex);
778 		cip->ci_init_state &= ~IBMF_CI_INIT_HCA_INITED;
779 		mutex_exit(&cip->ci_mutex);
780 		(void) ibt_close_hca(cip->ci_ci_handle);
781 		ibmf_i_init_ci_done(cip);
782 		(void) sprintf(errmsg, "init CQs failed");
783 		error = B_TRUE;
784 		ibmfstatus = IBMF_FAILURE;
785 		goto bail;
786 	}
787 
788 	/* initialize wqes */
789 	if (ibmf_i_init_wqes(cip) != IBMF_SUCCESS) {
790 		ibmf_i_fini_cqs(cip);
791 		(void) ibt_free_pd(cip->ci_ci_handle, cip->ci_pd);
792 		mutex_enter(&cip->ci_mutex);
793 		cip->ci_init_state &= ~IBMF_CI_INIT_HCA_INITED;
794 		mutex_exit(&cip->ci_mutex);
795 		(void) ibt_close_hca(cip->ci_ci_handle);
796 		ibmf_i_init_ci_done(cip);
797 		(void) sprintf(errmsg, "init WQEs failed");
798 		error = B_TRUE;
799 		ibmfstatus = IBMF_FAILURE;
800 		goto bail;
801 	}
802 
803 	/* initialize the UD destination structure pool */
804 	ibmf_i_init_ud_dest(cip);
805 
806 	/* initialize the QP list */
807 	ibmf_i_init_qplist(cip);
808 
809 	/* initialize condition variable, state, and enable CQ notification */
810 	cip->ci_init_state |= IBMF_CI_INIT_MUTEX_CV_INITED;
811 	(void) ibt_enable_cq_notify(cip->ci_cq_handle, IBT_NEXT_COMPLETION);
812 	(void) ibt_enable_cq_notify(cip->ci_alt_cq_handle, IBT_NEXT_COMPLETION);
813 
814 	/* set state to INITED */
815 	mutex_enter(&cip->ci_mutex);
816 	cip->ci_state = IBMF_CI_STATE_INITED;
817 	mutex_exit(&cip->ci_mutex);
818 
819 	/* wake up waiters blocked on an initialization done event */
820 	ibmf_i_init_ci_done(cip);
821 
822 bail:
823 	if (error) {
824 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_ci_err,
825 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
826 		    errmsg, tnf_uint, ibmfstatus, ibmfstatus);
827 	}
828 
829 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_end,
830 	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci() exit, cip = %p\n",
831 	    tnf_opaque, ibmf_ci, cip);
832 
833 	return (ibmfstatus);
834 }
835 
836 /*
837  * ibmf_i_uninit_ci():
838  *	Free up the resources allocated when initalizing the CI structure.
839  */
840 static void
841 ibmf_i_uninit_ci(ibmf_ci_t *cip)
842 {
843 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_start,
844 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci() enter, cip = %p\n",
845 	    tnf_opaque, cip, cip);
846 
847 	ASSERT(MUTEX_HELD(&cip->ci_mutex) == 0);
848 
849 	/* clean up the QP list */
850 	ibmf_i_fini_qplist(cip);
851 
852 	/* empty completions directly */
853 	ibmf_i_mad_completions(cip->ci_cq_handle, (void*)cip);
854 	ibmf_i_mad_completions(cip->ci_alt_cq_handle, (void*)cip);
855 
856 	mutex_enter(&cip->ci_mutex);
857 	if (cip->ci_init_state & IBMF_CI_INIT_MUTEX_CV_INITED) {
858 		cip->ci_init_state &= ~IBMF_CI_INIT_MUTEX_CV_INITED;
859 	}
860 	mutex_exit(&cip->ci_mutex);
861 
862 	/* clean up the UD destination structure pool */
863 	ibmf_i_fini_ud_dest(cip);
864 
865 	/* clean up any WQE caches */
866 	ibmf_i_fini_wqes(cip);
867 
868 	/* free up the completion queues */
869 	ibmf_i_fini_cqs(cip);
870 
871 	/* free up the protection domain */
872 	(void) ibt_free_pd(cip->ci_ci_handle, cip->ci_pd);
873 
874 	/* close the HCA connection */
875 	(void) ibt_close_hca(cip->ci_ci_handle);
876 
877 	/* set state down to PRESENT */
878 	mutex_enter(&cip->ci_mutex);
879 	cip->ci_init_state &= ~IBMF_CI_INIT_HCA_INITED;
880 	cip->ci_state = IBMF_CI_STATE_PRESENT;
881 	mutex_exit(&cip->ci_mutex);
882 
883 	/* wake up waiters blocked on an un-initialization done event */
884 	ibmf_i_uninit_ci_done(cip);
885 
886 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_end,
887 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci() exit\n");
888 }
889 
890 /*
891  * ibmf_i_init_ci_done():
892  *	Mark CI initialization as "done", and wake up any waiters.
893  */
894 static void
895 ibmf_i_init_ci_done(ibmf_ci_t *cip)
896 {
897 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_done_start,
898 	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci_done() enter, cip = %p\n",
899 	    tnf_opaque, cip, cip);
900 
901 	mutex_enter(&cip->ci_mutex);
902 	cip->ci_state_flags &= ~IBMF_CI_STATE_INITING;
903 	if (cip->ci_state_flags & IBMF_CI_STATE_INIT_WAIT) {
904 		cip->ci_state_flags &= ~IBMF_CI_STATE_INIT_WAIT;
905 		cv_broadcast(&cip->ci_state_cv);
906 	}
907 	mutex_exit(&cip->ci_mutex);
908 
909 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_ci_done_end,
910 	    IBMF_TNF_TRACE, "", "ibmf_i_init_ci_done() exit\n");
911 }
912 
913 /*
914  * ibmf_i_uninit_ci_done():
915  *	Mark CI uninitialization as "done", and wake up any waiters.
916  */
917 static void
918 ibmf_i_uninit_ci_done(ibmf_ci_t *cip)
919 {
920 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_done_start,
921 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci_done() enter, cip = %p\n",
922 	    tnf_opaque, cip, cip);
923 
924 	mutex_enter(&cip->ci_mutex);
925 	cip->ci_state_flags &= ~IBMF_CI_STATE_UNINITING;
926 	if (cip->ci_state_flags & IBMF_CI_STATE_UNINIT_WAIT) {
927 		cip->ci_state_flags &= ~IBMF_CI_STATE_UNINIT_WAIT;
928 		cv_broadcast(&cip->ci_state_cv);
929 	}
930 	mutex_exit(&cip->ci_mutex);
931 
932 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_ci_done_end,
933 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_ci_done() exit\n");
934 }
935 
936 /*
937  * ibmf_i_init_cqs():
938  *	Allocate a completion queue and set the CQ handler.
939  */
940 static int
941 ibmf_i_init_cqs(ibmf_ci_t *cip)
942 {
943 	ibt_status_t		status;
944 	ibt_cq_attr_t		cq_attrs;
945 	ibt_cq_hdl_t		cq_handle;
946 	uint32_t		num_entries;
947 
948 	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
949 
950 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_start,
951 	    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() enter, cip = %p\n",
952 	    tnf_opaque, cip, cip);
953 
954 	/*
955 	 * Allocate completion queue handle.
956 	 * The CQ size should be a 2^n - 1 value to avoid excess CQ allocation
957 	 * as done by some HCAs when the CQ size is specified as a 2^n
958 	 * quantity.
959 	 */
960 	cq_attrs.cq_size = (cip->ci_nports * (ibmf_send_wqes_posted_per_qp +
961 	    ibmf_recv_wqes_posted_per_qp)) - 1;
962 
963 	cq_attrs.cq_sched = NULL;
964 	cq_attrs.cq_flags = 0;
965 
966 	/* Get the CQ handle for the special QPs */
967 	status = ibt_alloc_cq(cip->ci_ci_handle, &cq_attrs,
968 	    &cq_handle, &num_entries);
969 	if (status != IBT_SUCCESS) {
970 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_cqs_err,
971 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
972 		    "ibt_alloc_cq failed", tnf_uint, ibt_status, status);
973 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_end,
974 		    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() exit\n");
975 		return (IBMF_TRANSPORT_FAILURE);
976 	}
977 	ibt_set_cq_handler(cq_handle, ibmf_statep->ibmf_cq_handler, cip);
978 	cip->ci_cq_handle = cq_handle;
979 
980 	/* Get the CQ handle for the alternate QPs */
981 	status = ibt_alloc_cq(cip->ci_ci_handle, &cq_attrs,
982 	    &cq_handle, &num_entries);
983 	if (status != IBT_SUCCESS) {
984 		(void) ibt_free_cq(cip->ci_cq_handle);
985 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_cqs_err,
986 		    IBMF_TNF_ERROR, "", "%s, status = %d\n", tnf_string, msg,
987 		    "ibt_alloc_cq failed", tnf_uint, ibt_status, status);
988 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_end,
989 		    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() exit\n");
990 		return (IBMF_TRANSPORT_FAILURE);
991 	}
992 	ibt_set_cq_handler(cq_handle, ibmf_statep->ibmf_cq_handler, cip);
993 	cip->ci_alt_cq_handle = cq_handle;
994 
995 	/* set state to CQ INITED */
996 	mutex_enter(&cip->ci_mutex);
997 	cip->ci_init_state |= IBMF_CI_INIT_CQ_INITED;
998 	mutex_exit(&cip->ci_mutex);
999 
1000 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_cqs_end,
1001 	    IBMF_TNF_TRACE, "", "ibmf_i_init_cqs() exit\n");
1002 
1003 	return (IBMF_SUCCESS);
1004 }
1005 
1006 /*
1007  * ibmf_i_fini_cqs():
1008  *	Free up the completion queue
1009  */
1010 static void
1011 ibmf_i_fini_cqs(ibmf_ci_t *cip)
1012 {
1013 	ibt_status_t	status;
1014 	uint_t		ci_init_state;
1015 
1016 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_cqs_start,
1017 	    IBMF_TNF_TRACE, "", "ibmf_i_fini_cqs() enter, cip = %p\n",
1018 	    tnf_opaque, cip, cip);
1019 
1020 	mutex_enter(&cip->ci_mutex);
1021 	ci_init_state = cip->ci_init_state;
1022 	cip->ci_init_state &= ~IBMF_CI_INIT_CQ_INITED;
1023 	mutex_exit(&cip->ci_mutex);
1024 
1025 	if (ci_init_state & IBMF_CI_INIT_CQ_INITED) {
1026 		status = ibt_free_cq(cip->ci_alt_cq_handle);
1027 		if (status != IBT_SUCCESS) {
1028 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
1029 			    ibmf_i_fini_cqs_err, IBMF_TNF_ERROR, "",
1030 			    "%s, status = %d\n", tnf_string, msg,
1031 			    "ibt free cqs failed", tnf_uint, status, status);
1032 		}
1033 
1034 		status = ibt_free_cq(cip->ci_cq_handle);
1035 		if (status != IBT_SUCCESS) {
1036 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L3,
1037 			    ibmf_i_fini_cqs_err, IBMF_TNF_ERROR, "",
1038 			    "%s, status = %d\n", tnf_string, msg,
1039 			    "ibt free cqs failed", tnf_uint, status, status);
1040 		}
1041 	}
1042 
1043 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_cqs_end,
1044 	    IBMF_TNF_TRACE, "", "ibmf_i_fini_cqs() exit");
1045 }
1046 
1047 /*
1048  * ibmf_i_init_qplist():
1049  *	Set the QP list inited state flag
1050  */
1051 static void
1052 ibmf_i_init_qplist(ibmf_ci_t *ibmf_cip)
1053 {
1054 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qplist_start,
1055 	    IBMF_TNF_TRACE, "", "ibmf_i_init_qplist() enter, cip = %p\n",
1056 	    tnf_opaque, cip, ibmf_cip);
1057 
1058 	mutex_enter(&ibmf_cip->ci_mutex);
1059 	ASSERT((ibmf_cip->ci_init_state & IBMF_CI_INIT_QP_LIST_INITED) == 0);
1060 	ASSERT(ibmf_cip->ci_qp_list == NULL && ibmf_cip->ci_qp_list_tail ==
1061 	    NULL);
1062 	cv_init(&ibmf_cip->ci_qp_cv, NULL, CV_DRIVER, NULL);
1063 	ibmf_cip->ci_init_state |= IBMF_CI_INIT_QP_LIST_INITED;
1064 	mutex_exit(&ibmf_cip->ci_mutex);
1065 
1066 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qplist_end,
1067 	    IBMF_TNF_TRACE, "", "ibmf_i_init_qplist() exit\n");
1068 }
1069 
1070 /*
1071  * ibmf_i_fini_qplist():
1072  *	Clean up the QP list
1073  */
1074 static void
1075 ibmf_i_fini_qplist(ibmf_ci_t *ibmf_cip)
1076 {
1077 	ibmf_qp_t *qpp;
1078 	ibmf_alt_qp_t *altqpp;
1079 	ibt_status_t status;
1080 
1081 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_qplist_start,
1082 	    IBMF_TNF_TRACE, "", "ibmf_i_fini_qplist() enter, cip = %p\n",
1083 	    tnf_opaque, cip, ibmf_cip);
1084 
1085 	mutex_enter(&ibmf_cip->ci_mutex);
1086 
1087 	if ((ibmf_cip->ci_init_state & IBMF_CI_INIT_QP_LIST_INITED) != 0) {
1088 
1089 		/* walk through the qp list and free the memory */
1090 		qpp = ibmf_cip->ci_qp_list;
1091 		while (qpp != NULL) {
1092 			/* Remove qpp from the list */
1093 			ibmf_cip->ci_qp_list = qpp->iq_next;
1094 
1095 			ASSERT(qpp->iq_qp_ref == 0);
1096 			ASSERT(qpp->iq_flags == IBMF_QP_FLAGS_INVALID);
1097 			mutex_exit(&ibmf_cip->ci_mutex);
1098 			if (qpp->iq_qp_handle != NULL) {
1099 				/* Flush the special QP */
1100 				status = ibt_flush_qp(qpp->iq_qp_handle);
1101 				if (status != IBT_SUCCESS) {
1102 					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1103 					    DPRINT_L1, ibmf_i_fini_qplist_err,
1104 					    IBMF_TNF_ERROR, "",
1105 					    "%s, status = %d\n", tnf_string,
1106 					    msg, "ibt_flush_qp returned error",
1107 					    tnf_int, status, status);
1108 				}
1109 
1110 				/* Grab the ci_mutex mutex before waiting */
1111 				mutex_enter(&ibmf_cip->ci_mutex);
1112 
1113 				/* Wait if WQEs for special QPs are alloced */
1114 				while (ibmf_cip->ci_wqes_alloced != 0) {
1115 					cv_wait(&ibmf_cip->ci_wqes_cv,
1116 					    &ibmf_cip->ci_mutex);
1117 				}
1118 
1119 				mutex_exit(&ibmf_cip->ci_mutex);
1120 
1121 				/* Free the special QP */
1122 				status = ibt_free_qp(qpp->iq_qp_handle);
1123 				if (status != IBT_SUCCESS) {
1124 					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1125 					    DPRINT_L1, ibmf_i_fini_qplist_err,
1126 					    IBMF_TNF_ERROR, "",
1127 					    "%s, status = %d\n", tnf_string,
1128 					    msg, "ibt_free_qp returned error",
1129 					    tnf_int, status, status);
1130 				}
1131 			}
1132 			mutex_destroy(&qpp->iq_mutex);
1133 			kmem_free((void *)qpp, sizeof (ibmf_qp_t));
1134 
1135 			/* Grab the mutex again before accessing the QP list */
1136 			mutex_enter(&ibmf_cip->ci_mutex);
1137 			qpp = ibmf_cip->ci_qp_list;
1138 		}
1139 
1140 		cv_destroy(&ibmf_cip->ci_qp_cv);
1141 
1142 		ibmf_cip->ci_qp_list = ibmf_cip->ci_qp_list_tail = NULL;
1143 		ibmf_cip->ci_init_state &=  ~IBMF_CI_INIT_QP_LIST_INITED;
1144 
1145 		altqpp = ibmf_cip->ci_alt_qp_list;
1146 		while (altqpp != NULL) {
1147 			/* Remove altqpp from the list */
1148 			ibmf_cip->ci_alt_qp_list = altqpp->isq_next;
1149 			mutex_exit(&ibmf_cip->ci_mutex);
1150 
1151 			if (altqpp->isq_qp_handle != NULL) {
1152 				/* Flush the special QP */
1153 				status = ibt_flush_qp(altqpp->isq_qp_handle);
1154 				if (status != IBT_SUCCESS) {
1155 					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1156 					    DPRINT_L1, ibmf_i_fini_qplist_err,
1157 					    IBMF_TNF_ERROR, "",
1158 					    "%s, status = %d\n", tnf_string,
1159 					    msg, "ibt_flush_qp returned error",
1160 					    tnf_int, status, status);
1161 				}
1162 
1163 				/* Free the special QP */
1164 				status = ibt_free_qp(altqpp->isq_qp_handle);
1165 				if (status != IBT_SUCCESS) {
1166 					IBMF_TRACE_2(IBMF_TNF_NODEBUG,
1167 					    DPRINT_L1, ibmf_i_fini_qplist_err,
1168 					    IBMF_TNF_ERROR, "",
1169 					    "%s, status = %d\n", tnf_string,
1170 					    msg, "ibt_free_qp returned error",
1171 					    tnf_int, status, status);
1172 				}
1173 			}
1174 			mutex_destroy(&altqpp->isq_mutex);
1175 			kmem_free((void *)altqpp, sizeof (ibmf_alt_qp_t));
1176 
1177 			/* Grab the mutex again before accessing the QP list */
1178 			mutex_enter(&ibmf_cip->ci_mutex);
1179 			altqpp = ibmf_cip->ci_alt_qp_list;
1180 		}
1181 	}
1182 
1183 	mutex_exit(&ibmf_cip->ci_mutex);
1184 
1185 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_fini_qplist_end,
1186 	    IBMF_TNF_TRACE, "", "ibmf_i_fini_qplist() exit\n");
1187 }
1188 
1189 /*
1190  * ibmf_i_alloc_client():
1191  *	Allocate and initialize the client structure.
1192  */
1193 int
1194 ibmf_i_alloc_client(ibmf_register_info_t *client_infop, uint_t flags,
1195     ibmf_client_t **clientpp)
1196 {
1197 	ibmf_client_t		*ibmf_clientp;
1198 	char			buf[128];
1199 	ibmf_kstat_t		*ksp;
1200 
1201 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_client_start,
1202 	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_client() enter, "
1203 	    "client_infop = %p\n", tnf_opaque, client_infop, client_infop);
1204 
1205 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibmf_clientp))
1206 
1207 	/* allocate memory for ibmf_client and initialize it */
1208 	ibmf_clientp = kmem_zalloc(sizeof (ibmf_client_t), KM_SLEEP);
1209 	mutex_init(&ibmf_clientp->ic_mutex, NULL, MUTEX_DRIVER, NULL);
1210 	mutex_init(&ibmf_clientp->ic_msg_mutex, NULL, MUTEX_DRIVER, NULL);
1211 	mutex_init(&ibmf_clientp->ic_kstat_mutex, NULL, MUTEX_DRIVER, NULL);
1212 	cv_init(&ibmf_clientp->ic_recv_cb_teardown_cv, NULL, CV_DRIVER, NULL);
1213 
1214 	(void) sprintf(buf, "s%08X_0x%08X",
1215 	    (uint32_t)client_infop->ir_ci_guid, client_infop->ir_client_class);
1216 
1217 	/* create a taskq to handle send completions based on reg flags */
1218 	if ((flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1219 		if (flags & IBMF_REG_FLAG_SINGLE_OFFLOAD)
1220 			ibmf_clientp->ic_send_taskq = taskq_create(buf,
1221 			    IBMF_TASKQ_1THREAD, MINCLSYSPRI, 1,
1222 			    ibmf_taskq_max_tasks, TASKQ_PREPOPULATE);
1223 		else
1224 			ibmf_clientp->ic_send_taskq = taskq_create(buf,
1225 			    IBMF_TASKQ_NTHREADS, MINCLSYSPRI, 1,
1226 			    ibmf_taskq_max_tasks,
1227 			    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
1228 		if (ibmf_clientp->ic_send_taskq == NULL) {
1229 			cv_destroy(&ibmf_clientp->ic_recv_cb_teardown_cv);
1230 			mutex_destroy(&ibmf_clientp->ic_mutex);
1231 			mutex_destroy(&ibmf_clientp->ic_msg_mutex);
1232 			mutex_destroy(&ibmf_clientp->ic_kstat_mutex);
1233 			kmem_free((void *)ibmf_clientp, sizeof (ibmf_client_t));
1234 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1235 			    ibmf_i_alloc_client_err, IBMF_TNF_ERROR, "", "%s\n",
1236 			    tnf_string, msg, buf);
1237 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1238 			    ibmf_i_alloc_client_end, IBMF_TNF_TRACE, "",
1239 			    "ibmf_i_alloc_client() exit\n");
1240 			return (IBMF_NO_RESOURCES);
1241 		}
1242 	}
1243 	ibmf_clientp->ic_init_state_class |= IBMF_CI_INIT_SEND_TASKQ_DONE;
1244 
1245 	(void) sprintf(buf, "r%08X_0x%08X",
1246 	    (uint32_t)client_infop->ir_ci_guid, client_infop->ir_client_class);
1247 
1248 	/* create a taskq to handle receive completions on reg flags */
1249 	if ((flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1250 		if (flags & IBMF_REG_FLAG_SINGLE_OFFLOAD)
1251 			ibmf_clientp->ic_recv_taskq = taskq_create(buf,
1252 			    IBMF_TASKQ_1THREAD, MINCLSYSPRI, 1,
1253 			    ibmf_taskq_max_tasks, TASKQ_PREPOPULATE);
1254 		else
1255 			ibmf_clientp->ic_recv_taskq = taskq_create(buf,
1256 			    IBMF_TASKQ_NTHREADS, MINCLSYSPRI, 1,
1257 			    ibmf_taskq_max_tasks,
1258 			    TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
1259 		if (ibmf_clientp->ic_recv_taskq == NULL) {
1260 			cv_destroy(&ibmf_clientp->ic_recv_cb_teardown_cv);
1261 			mutex_destroy(&ibmf_clientp->ic_mutex);
1262 			mutex_destroy(&ibmf_clientp->ic_msg_mutex);
1263 			mutex_destroy(&ibmf_clientp->ic_kstat_mutex);
1264 			taskq_destroy(ibmf_clientp->ic_send_taskq);
1265 			kmem_free((void *)ibmf_clientp, sizeof (ibmf_client_t));
1266 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1267 			    ibmf_i_alloc_client_err, IBMF_TNF_ERROR, "", "%s\n",
1268 			    tnf_string, msg, buf);
1269 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1270 			    ibmf_i_alloc_client_end, IBMF_TNF_TRACE, "",
1271 			    "ibmf_i_alloc_client() exit\n");
1272 			return (IBMF_NO_RESOURCES);
1273 		}
1274 	}
1275 	ibmf_clientp->ic_init_state_class |= IBMF_CI_INIT_RECV_TASKQ_DONE;
1276 	ibmf_clientp->ic_client_info.ci_guid = client_infop->ir_ci_guid;
1277 	ibmf_clientp->ic_client_info.port_num = client_infop->ir_port_num;
1278 
1279 	/* Get the base LID */
1280 	(void) ibt_get_port_state_byguid(ibmf_clientp->ic_client_info.ci_guid,
1281 	    ibmf_clientp->ic_client_info.port_num, NULL,
1282 	    &ibmf_clientp->ic_base_lid);
1283 
1284 	ibmf_clientp->ic_client_info.client_class =
1285 	    client_infop->ir_client_class;
1286 
1287 	/* set up the per client ibmf kstats */
1288 	(void) sprintf(buf, "ibmf_%016" PRIx64 "_%d_%X_stat",
1289 	    client_infop->ir_ci_guid, client_infop->ir_port_num,
1290 	    client_infop->ir_client_class);
1291 	if ((ibmf_clientp->ic_kstatp = kstat_create("ibmf", 0, buf, "misc",
1292 	    KSTAT_TYPE_NAMED, sizeof (ibmf_kstat_t) / sizeof (kstat_named_t),
1293 	    KSTAT_FLAG_WRITABLE)) == NULL) {
1294 		cv_destroy(&ibmf_clientp->ic_recv_cb_teardown_cv);
1295 		mutex_destroy(&ibmf_clientp->ic_mutex);
1296 		mutex_destroy(&ibmf_clientp->ic_msg_mutex);
1297 		mutex_destroy(&ibmf_clientp->ic_kstat_mutex);
1298 		if ((flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1299 			taskq_destroy(ibmf_clientp->ic_send_taskq);
1300 			taskq_destroy(ibmf_clientp->ic_recv_taskq);
1301 		}
1302 		kmem_free((void *)ibmf_clientp, sizeof (ibmf_client_t));
1303 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1304 		    ibmf_i_alloc_client_err, IBMF_TNF_ERROR, "", "%s\n",
1305 		    tnf_string, msg, "kstat creation failed");
1306 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1307 		    ibmf_i_alloc_client_end, IBMF_TNF_TRACE, "",
1308 		    "ibmf_i_alloc_client() exit\n");
1309 		return (IBMF_NO_RESOURCES);
1310 	}
1311 	ksp = (ibmf_kstat_t *)ibmf_clientp->ic_kstatp->ks_data;
1312 	kstat_named_init(&ksp->msgs_alloced, "messages_allocated",
1313 	    KSTAT_DATA_UINT32);
1314 	kstat_named_init(&ksp->msgs_active, "messages_active",
1315 	    KSTAT_DATA_UINT32);
1316 	kstat_named_init(&ksp->msgs_sent, "messages_sent", KSTAT_DATA_UINT32);
1317 	kstat_named_init(&ksp->msgs_received, "messages_received",
1318 	    KSTAT_DATA_UINT32);
1319 	kstat_named_init(&ksp->sends_active, "sends_active", KSTAT_DATA_UINT32);
1320 	kstat_named_init(&ksp->recvs_active, "receives_active",
1321 	    KSTAT_DATA_UINT32);
1322 	kstat_named_init(&ksp->ud_dests_alloced, "ud_dests_allocated",
1323 	    KSTAT_DATA_UINT32);
1324 	kstat_named_init(&ksp->alt_qps_alloced, "alt_qps_allocated",
1325 	    KSTAT_DATA_UINT32);
1326 	kstat_named_init(&ksp->send_cb_active, "send_callbacks_active",
1327 	    KSTAT_DATA_UINT32);
1328 	kstat_named_init(&ksp->recv_cb_active, "receive_callbacks_active",
1329 	    KSTAT_DATA_UINT32);
1330 	kstat_named_init(&ksp->recv_bufs_alloced, "receive_bufs_allocated",
1331 	    KSTAT_DATA_UINT32);
1332 	kstat_named_init(&ksp->msg_allocs_failed, "msg_allocs_failed",
1333 	    KSTAT_DATA_UINT32);
1334 	kstat_named_init(&ksp->uddest_allocs_failed, "uddest_allocs_failed",
1335 	    KSTAT_DATA_UINT32);
1336 	kstat_named_init(&ksp->alt_qp_allocs_failed, "alt_qp_allocs_failed",
1337 	    KSTAT_DATA_UINT32);
1338 	kstat_named_init(&ksp->send_pkt_failed, "send_pkt_failed",
1339 	    KSTAT_DATA_UINT32);
1340 	kstat_named_init(&ksp->rmpp_errors, "rmpp_errors",
1341 	    KSTAT_DATA_UINT32);
1342 
1343 	kstat_install(ibmf_clientp->ic_kstatp);
1344 
1345 	*clientpp = ibmf_clientp;
1346 
1347 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ibmf_clientp))
1348 
1349 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_client_end,
1350 	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_client() exit\n");
1351 
1352 	return (IBMF_SUCCESS);
1353 }
1354 
1355 /*
1356  * ibmf_i_free_client():
1357  *	Free up the client structure and release resources
1358  */
1359 void
1360 ibmf_i_free_client(ibmf_client_t *clientp)
1361 {
1362 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_client_start,
1363 	    IBMF_TNF_TRACE, "", "ibmf_i_free_client() enter, clientp = %p\n",
1364 	    tnf_opaque, clientp, clientp);
1365 
1366 	/* delete the general ibmf kstats */
1367 	if (clientp->ic_kstatp != NULL) {
1368 		kstat_delete(clientp->ic_kstatp);
1369 		clientp->ic_kstatp = NULL;
1370 	}
1371 
1372 	/* release references and destroy the resources */
1373 	if (clientp->ic_init_state_class & IBMF_CI_INIT_SEND_TASKQ_DONE) {
1374 		if ((clientp->ic_reg_flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1375 			taskq_destroy(clientp->ic_send_taskq);
1376 		}
1377 		clientp->ic_init_state_class &= ~IBMF_CI_INIT_SEND_TASKQ_DONE;
1378 	}
1379 
1380 	if (clientp->ic_init_state_class & IBMF_CI_INIT_RECV_TASKQ_DONE) {
1381 		if ((clientp->ic_reg_flags & IBMF_REG_FLAG_NO_OFFLOAD) == 0) {
1382 			taskq_destroy(clientp->ic_recv_taskq);
1383 		}
1384 		clientp->ic_init_state_class &= ~IBMF_CI_INIT_RECV_TASKQ_DONE;
1385 	}
1386 
1387 	mutex_destroy(&clientp->ic_mutex);
1388 	mutex_destroy(&clientp->ic_msg_mutex);
1389 	mutex_destroy(&clientp->ic_kstat_mutex);
1390 	cv_destroy(&clientp->ic_recv_cb_teardown_cv);
1391 	kmem_free((void *)clientp, sizeof (ibmf_client_t));
1392 
1393 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_client_end,
1394 	    IBMF_TNF_TRACE, "", "ibmf_i_free_client() exit\n");
1395 }
1396 
1397 /*
1398  * ibmf_i_validate_classes_and_port():
1399  *	Validate the class type and get the client structure
1400  */
1401 int
1402 ibmf_i_validate_classes_and_port(ibmf_ci_t *ibmf_cip,
1403     ibmf_register_info_t *client_infop)
1404 {
1405 	ibmf_client_t		*ibmf_clientp;
1406 	int			status;
1407 
1408 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
1409 	    ibmf_i_validate_classes_and_port_start, IBMF_TNF_TRACE, "",
1410 	    "ibmf_i_validate_classes_and_port() enter, cip = %p, "
1411 	    "clientp = %p\n", tnf_opaque, cip, ibmf_cip,
1412 	    tnf_opaque, client_infop, client_infop);
1413 
1414 	/*
1415 	 * the Solaris implementation of IBMF does not support
1416 	 * the UNIVERSAL_CLASS
1417 	 */
1418 	if (client_infop->ir_client_class == UNIVERSAL_CLASS) {
1419 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1420 		    ibmf_i_validate_classes_and_port_err, IBMF_TNF_ERROR, "",
1421 		    "%s\n", tnf_string, msg,
1422 		    "UNIVERSAL class is not supported");
1423 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1424 		    ibmf_i_validate_classes_and_port_end, IBMF_TNF_TRACE, "",
1425 		    "ibmf_i_validate_classes_and_port() exit\n");
1426 		return (IBMF_NOT_SUPPORTED);
1427 	}
1428 
1429 	/*
1430 	 * Check if the client context already exists on the list
1431 	 * maintained in the CI context. If it is, then the client class
1432 	 * has already been registered for.
1433 	 */
1434 	status = ibmf_i_lookup_client_by_info(ibmf_cip, client_infop,
1435 	    &ibmf_clientp);
1436 	if (status != IBMF_SUCCESS) {
1437 		/* client class has not been previously registered for */
1438 		status = IBMF_SUCCESS;
1439 	} else {
1440 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1441 		    ibmf_i_validate_classes_and_port_err, IBMF_TNF_ERROR, "",
1442 		    "client already registered, class = 0x%X\n",
1443 		    tnf_uint, class, client_infop->ir_client_class);
1444 		status = IBMF_PORT_IN_USE;
1445 	}
1446 
1447 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1448 	    ibmf_i_validate_classes_and_port_end, IBMF_TNF_TRACE, "",
1449 	    "ibmf_i_validate_classes_and_port() exit\n");
1450 	return (status);
1451 }
1452 
1453 /*
1454  * ibmf_i_lookup_client_by_info():
1455  *	Get the client structure from the list
1456  */
1457 static int
1458 ibmf_i_lookup_client_by_info(ibmf_ci_t *ibmf_cip,
1459     ibmf_register_info_t *ir_client, ibmf_client_t **clientpp)
1460 {
1461 	ibmf_client_t *clientp;
1462 
1463 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
1464 	    ibmf_i_lookup_client_by_info_start, IBMF_TNF_TRACE, "",
1465 	    "ibmf_i_lookup_client_by_info() enter, cip = %p, clientinfo = %p\n",
1466 	    tnf_opaque, cip, ibmf_cip, tnf_opaque, clientinfo, ir_client);
1467 
1468 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
1469 
1470 	/*
1471 	 * walk the CI's client list searching for one with the specified class
1472 	 */
1473 	mutex_enter(&ibmf_cip->ci_clients_mutex);
1474 	clientp = ibmf_cip->ci_clients;
1475 	while (clientp != NULL) {
1476 		ibmf_client_info_t *tmp = &clientp->ic_client_info;
1477 		if (tmp->client_class == ir_client->ir_client_class &&
1478 		    ir_client->ir_client_class != UNIVERSAL_CLASS &&
1479 		    tmp->ci_guid == ir_client->ir_ci_guid &&
1480 		    tmp->port_num == ir_client->ir_port_num) {
1481 			/* found our match */
1482 			break;
1483 		}
1484 		clientp = clientp->ic_next;
1485 	}
1486 	mutex_exit(&ibmf_cip->ci_clients_mutex);
1487 
1488 	if (clientp != NULL) {
1489 		*clientpp = clientp;
1490 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1491 		    ibmf_i_lookup_client_by_info_end, IBMF_TNF_TRACE, "",
1492 		    "ibmf_i_lookup_client_by_info(): clientp = %p\n",
1493 		    tnf_opaque, clientp, clientp);
1494 		return (IBMF_SUCCESS);
1495 	} else {
1496 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
1497 		    ibmf_i_lookup_client_by_info_end, IBMF_TNF_TRACE, "",
1498 		    "ibmf_i_lookup_client_by_info() exit\n");
1499 		return (IBMF_FAILURE);
1500 	}
1501 }
1502 
1503 /*
1504  * ibmf_i_add_client():
1505  *	Add a new client to the client list
1506  */
1507 void
1508 ibmf_i_add_client(ibmf_ci_t *ibmf_cip, ibmf_client_t *ibmf_clientp)
1509 {
1510 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_add_start,
1511 	    IBMF_TNF_TRACE, "",
1512 	    "ibmf_i_add_client() enter, cip = %p, clientp = %p\n",
1513 	    tnf_opaque, ibmf_ci, ibmf_cip, tnf_opaque, client, ibmf_clientp);
1514 
1515 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
1516 
1517 	mutex_enter(&ibmf_cip->ci_clients_mutex);
1518 	ibmf_clientp->ic_next = NULL;
1519 	ibmf_clientp->ic_prev = ibmf_cip->ci_clients_last;
1520 	if (ibmf_cip->ci_clients == NULL) {
1521 		ibmf_cip->ci_clients = ibmf_clientp;
1522 	}
1523 	if (ibmf_cip->ci_clients_last) {
1524 		ibmf_cip->ci_clients_last->ic_next = ibmf_clientp;
1525 	}
1526 	ibmf_cip->ci_clients_last = ibmf_clientp;
1527 	mutex_exit(&ibmf_cip->ci_clients_mutex);
1528 
1529 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_add_end,
1530 	    IBMF_TNF_TRACE, "", "ibmf_i_add_client() exit\n");
1531 }
1532 
1533 /*
1534  * ibmf_i_delete_client():
1535  *	Delete a client from the client list
1536  */
1537 void
1538 ibmf_i_delete_client(ibmf_ci_t *ibmf_cip, ibmf_client_t *ibmf_clientp)
1539 {
1540 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_delete_client_start,
1541 	    IBMF_TNF_TRACE, "", "ibmf_i_delete_client() enter, "
1542 	    "ibmf_i_delete_client() enter, cip = %p, clientp = %p\n",
1543 	    tnf_opaque, ibmf_ci, ibmf_cip, tnf_opaque, client, ibmf_clientp);
1544 
1545 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
1546 
1547 	mutex_enter(&ibmf_cip->ci_clients_mutex);
1548 	if (ibmf_clientp->ic_next)
1549 		ibmf_clientp->ic_next->ic_prev = ibmf_clientp->ic_prev;
1550 
1551 	if (ibmf_clientp->ic_prev)
1552 		ibmf_clientp->ic_prev->ic_next = ibmf_clientp->ic_next;
1553 
1554 	if (ibmf_cip->ci_clients == ibmf_clientp) {
1555 		ibmf_cip->ci_clients = ibmf_clientp->ic_next;
1556 	}
1557 	if (ibmf_cip->ci_clients_last == ibmf_clientp) {
1558 		ibmf_cip->ci_clients_last = ibmf_clientp->ic_prev;
1559 	}
1560 	mutex_exit(&ibmf_cip->ci_clients_mutex);
1561 
1562 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_delete_client_end,
1563 	    IBMF_TNF_TRACE, "", "ibmf_i_delete_client() exit\n");
1564 }
1565 
1566 /*
1567  * ibmf_i_get_qp():
1568  *	Get the QP structure based on the client class
1569  */
1570 int
1571 ibmf_i_get_qp(ibmf_ci_t *ibmf_cip, uint_t port_num, ibmf_client_type_t class,
1572     ibmf_qp_t **qppp)
1573 {
1574 	ibmf_qp_t		*qpp;
1575 	int			qp_num, status = IBMF_SUCCESS;
1576 
1577 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_qp_start,
1578 	    IBMF_TNF_TRACE, "", "ibmf_i_get_qp() enter, cip = %p, "
1579 	    "port = %d, class = %x\n", tnf_opaque, ibmf_ci, ibmf_cip,
1580 	    tnf_int, port, port_num, tnf_opaque, class, class);
1581 
1582 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_mutex));
1583 
1584 	mutex_enter(&ibmf_cip->ci_mutex);
1585 
1586 	/*
1587 	 * walk through the list of qps on this ci, looking for one that
1588 	 * corresponds to the type and class the caller is interested in.
1589 	 * If it is not there, we need allocate it from the transport. Since
1590 	 * qp0 & qp1 can only be allocated once, we maintain a reference count
1591 	 * and call the transport for allocation iff the ref count is 0.
1592 	 */
1593 	qp_num = (class == SUBN_AGENT || class == SUBN_MANAGER) ? 0 : 1;
1594 
1595 	qpp = ibmf_cip->ci_qp_list;
1596 	while (qpp != NULL) {
1597 		if (port_num == qpp->iq_port_num && qp_num == qpp->iq_qp_num)
1598 			break;
1599 		qpp = qpp->iq_next;
1600 	}
1601 
1602 	if (qpp == NULL) {
1603 		/*
1604 		 * allocate qp and add it the qp list; recheck to
1605 		 * catch races
1606 		 */
1607 		ibmf_qp_t *tqpp;
1608 
1609 		mutex_exit(&ibmf_cip->ci_mutex);
1610 
1611 		tqpp = (ibmf_qp_t *)kmem_zalloc(sizeof (ibmf_qp_t), KM_SLEEP);
1612 
1613 		/* check the list under lock */
1614 		mutex_enter(&ibmf_cip->ci_mutex);
1615 
1616 		qpp = ibmf_cip->ci_qp_list;
1617 		while (qpp != NULL) {
1618 			if (port_num == qpp->iq_port_num && qp_num ==
1619 			    qpp->iq_qp_num)
1620 				break;
1621 			qpp = qpp->iq_next;
1622 		}
1623 
1624 		if (qpp != NULL) {
1625 			/* some one raced past us and added to the list */
1626 			kmem_free((void *)tqpp, sizeof (ibmf_qp_t));
1627 		} else {
1628 			/* add this to the qp list */
1629 			qpp = tqpp;
1630 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qpp))
1631 			qpp->iq_next = NULL;
1632 			if (ibmf_cip->ci_qp_list == NULL)
1633 				ibmf_cip->ci_qp_list = qpp;
1634 			if (ibmf_cip->ci_qp_list_tail != NULL)
1635 				ibmf_cip->ci_qp_list_tail->iq_next = qpp;
1636 			ibmf_cip->ci_qp_list_tail = qpp;
1637 			qpp->iq_port_num = port_num;
1638 			qpp->iq_qp_num = qp_num;
1639 			qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1640 			mutex_init(&qpp->iq_mutex, NULL, MUTEX_DRIVER, NULL);
1641 		}
1642 	}
1643 
1644 	/* we now have a QP context */
1645 	for (;;) {
1646 		if (qpp->iq_flags == IBMF_QP_FLAGS_INITING) {
1647 
1648 			/* block till qp is in VALID state */
1649 			cv_wait(&ibmf_cip->ci_qp_cv, &ibmf_cip->ci_mutex);
1650 			continue;
1651 
1652 		}
1653 
1654 		if (qpp->iq_flags == IBMF_QP_FLAGS_UNINITING) {
1655 
1656 			/* block till qp is in INVALID state */
1657 			cv_wait(&ibmf_cip->ci_qp_cv, &ibmf_cip->ci_mutex);
1658 			continue;
1659 		}
1660 
1661 		if (qpp->iq_flags == IBMF_QP_FLAGS_INVALID) {
1662 			if ((status = ibmf_i_init_qp(ibmf_cip, qpp)) !=
1663 			    IBMF_SUCCESS) {
1664 				ibmf_qp_t *tqpp;
1665 
1666 				/*
1667 				 * Remove the QP context from the CI's list.
1668 				 * Only initialized QPs should be on the list.
1669 				 * We know that this QP is on the list, so
1670 				 * the list is not empty.
1671 				 */
1672 				tqpp = ibmf_cip->ci_qp_list;
1673 				if (tqpp == qpp) {
1674 					/* Only QP context on the list */
1675 					ibmf_cip->ci_qp_list = NULL;
1676 					ibmf_cip->ci_qp_list_tail = NULL;
1677 				}
1678 
1679 				/* Find the QP context before the last one */
1680 				if (tqpp != qpp) {
1681 					while (tqpp->iq_next != qpp) {
1682 						tqpp = tqpp->iq_next;
1683 					}
1684 
1685 					/*
1686 					 * We are at the second last element of
1687 					 * the list. Readjust the tail pointer.
1688 					 * Remove the last element from the
1689 					 * list.
1690 					 */
1691 					tqpp->iq_next = NULL;
1692 					ibmf_cip->ci_qp_list_tail = tqpp;
1693 				}
1694 
1695 				/* Free up the QP context */
1696 				kmem_free((void *)qpp, sizeof (ibmf_qp_t));
1697 
1698 				break;
1699 			}
1700 			continue;
1701 		}
1702 
1703 		if (qpp->iq_flags == IBMF_QP_FLAGS_INITED) {
1704 			qpp->iq_qp_ref++;
1705 			break;
1706 		}
1707 	}
1708 
1709 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*qpp))
1710 
1711 	mutex_exit(&ibmf_cip->ci_mutex);
1712 
1713 	if (status == IBMF_SUCCESS) {
1714 		*qppp = qpp;
1715 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_qp_end,
1716 		    IBMF_TNF_TRACE, "", "ibmf_i_get_qp() exit "
1717 		    "qp_handle = %p\n", tnf_opaque, qp_handle, qpp);
1718 		return (IBMF_SUCCESS);
1719 	} else {
1720 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_get_qp_err,
1721 		    IBMF_TNF_ERROR, "", "%s\n", tnf_string, msg,
1722 		    "ibmf_i_get_qp(): qp_not found");
1723 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_qp_end,
1724 		    IBMF_TNF_TRACE, "", "ibmf_i_get_qp() exit\n");
1725 		return (status);
1726 	}
1727 }
1728 
1729 /*
1730  * ibmf_i_release_qp():
1731  *	Drop the reference count on the QP structure
1732  */
1733 void
1734 ibmf_i_release_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t **qppp)
1735 {
1736 	ibmf_qp_t	*qpp;
1737 
1738 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_qp_start,
1739 	    IBMF_TNF_TRACE, "", "ibmf_i_release_qp() enter, cip = %p, "
1740 	    "qpp = %p\n", tnf_opaque, cip, ibmf_cip, tnf_opaque, qpp, *qppp);
1741 
1742 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_mutex));
1743 
1744 	mutex_enter(&ibmf_cip->ci_mutex);
1745 	qpp = *qppp;
1746 	qpp->iq_qp_ref--;
1747 	if (qpp->iq_qp_ref == 0)
1748 		ibmf_i_uninit_qp(ibmf_cip, qpp);
1749 	mutex_exit(&ibmf_cip->ci_mutex);
1750 
1751 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_release_qp_end,
1752 	    IBMF_TNF_TRACE, "", "ibmf_i_release_qp() exit\n");
1753 }
1754 
1755 /*
1756  * ibmf_i_init_qp():
1757  *	Set up the QP context, request a QP from the IBT framework
1758  *	and initialize it
1759  */
1760 static int
1761 ibmf_i_init_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp)
1762 {
1763 	ibt_sqp_type_t		qp_type;
1764 	ibt_qp_alloc_attr_t	qp_attrs;
1765 	ibt_qp_hdl_t		qp_handle;
1766 	ibt_qp_info_t		qp_modify_attr;
1767 	ibt_status_t		ibt_status;
1768 	int			i, status;
1769 
1770 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_start,
1771 	    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() enter, cip = %p, "
1772 	    "port = %d, qp = %d\n", tnf_opaque, ibmf_ci, ibmf_cip, tnf_int,
1773 	    port, qpp->iq_port_num, tnf_int, num, qpp->iq_qp_num);
1774 
1775 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qpp->iq_qp_handle))
1776 
1777 	ASSERT(MUTEX_HELD(&ibmf_cip->ci_mutex));
1778 
1779 	qpp->iq_flags = IBMF_QP_FLAGS_INITING;
1780 	mutex_exit(&ibmf_cip->ci_mutex);
1781 	if (qpp->iq_qp_handle) {	/* closed but not yet freed */
1782 		ibt_status = ibt_free_qp(qpp->iq_qp_handle);
1783 		if (ibt_status != IBT_SUCCESS) {
1784 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1785 			    ibmf_i_init_qp_err, IBMF_TNF_ERROR, "",
1786 			    "%s, status = %d\n", tnf_string, msg,
1787 			    "ibt_free_qp returned error",
1788 			    tnf_uint, ibt_status, ibt_status);
1789 		}
1790 		qpp->iq_qp_handle = NULL;
1791 	}
1792 	ASSERT(qpp->iq_qp_num == 0 || qpp->iq_qp_num == 1);
1793 	if (qpp->iq_qp_num == 0)
1794 		qp_type = IBT_SMI_SQP;
1795 	else
1796 		qp_type = IBT_GSI_SQP;
1797 	qp_attrs.qp_scq_hdl = ibmf_cip->ci_cq_handle;
1798 	qp_attrs.qp_rcq_hdl = ibmf_cip->ci_cq_handle;
1799 	qp_attrs.qp_pd_hdl = ibmf_cip->ci_pd;
1800 	qp_attrs.qp_sizes.cs_sq_sgl = 1;
1801 	qp_attrs.qp_sizes.cs_rq_sgl = IBMF_MAX_RQ_WR_SGL_ELEMENTS;
1802 	qp_attrs.qp_sizes.cs_sq = ibmf_send_wqes_posted_per_qp;
1803 	qp_attrs.qp_sizes.cs_rq = ibmf_recv_wqes_posted_per_qp;
1804 	qp_attrs.qp_flags = IBT_ALL_SIGNALED;
1805 	qp_attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
1806 
1807 	/* call the IB transport to allocate a special QP */
1808 	ibt_status = ibt_alloc_special_qp(ibmf_cip->ci_ci_handle,
1809 	    qpp->iq_port_num, qp_type, &qp_attrs, NULL, &qp_handle);
1810 	if (ibt_status != IBT_SUCCESS) {
1811 		mutex_enter(&ibmf_cip->ci_mutex);
1812 		qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1813 		cv_broadcast(&ibmf_cip->ci_qp_cv);
1814 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_qp_err,
1815 		    IBMF_TNF_ERROR, "", "ibmf_i_init_qp() error status = %d\n",
1816 		    tnf_uint, ibt_status, ibt_status);
1817 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1818 		    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1819 		return (IBMF_TRANSPORT_FAILURE);
1820 	}
1821 
1822 	/* initialize qpp */
1823 	qpp->iq_qp_handle = qp_handle;
1824 	qp_modify_attr.qp_trans = IBT_UD_SRV;
1825 	qp_modify_attr.qp_flags = IBT_CEP_NO_FLAGS;
1826 
1827 	/* get the pkey index for the specified pkey */
1828 	if (ibmf_i_get_pkeyix(ibmf_cip->ci_ci_handle, IBMF_P_KEY_DEF_LIMITED,
1829 	    qpp->iq_port_num, &qp_modify_attr.qp_transport.ud.ud_pkey_ix) !=
1830 	    IBMF_SUCCESS) {
1831 		ibt_status = ibt_free_qp(qpp->iq_qp_handle);
1832 		if (ibt_status != IBT_SUCCESS) {
1833 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1834 			    ibmf_i_init_qp_err, IBMF_TNF_ERROR, "",
1835 			    "%s, status = %d\n", tnf_string, msg,
1836 			    "ibt_free_qp returned error",
1837 			    tnf_uint, ibt_status, ibt_status);
1838 		}
1839 		mutex_enter(&ibmf_cip->ci_mutex);
1840 		qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1841 		cv_broadcast(&ibmf_cip->ci_qp_cv);
1842 		IBMF_TRACE_0(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_qp_err,
1843 		    IBMF_TNF_ERROR, "", "ibmf_init_qp(): failed to get "
1844 		    "pkey index\n");
1845 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1846 		    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1847 		return (IBMF_FAILURE);
1848 	}
1849 	qp_modify_attr.qp_transport.ud.ud_sq_psn = 0;
1850 	qp_modify_attr.qp_transport.ud.ud_port = qpp->iq_port_num;
1851 	qp_modify_attr.qp_transport.ud.ud_qkey = IBMF_MGMT_Q_KEY;
1852 
1853 	/* call the IB transport to initialize the QP */
1854 	ibt_status = ibt_initialize_qp(qp_handle, &qp_modify_attr);
1855 	if (ibt_status != IBT_SUCCESS) {
1856 		ibt_status = ibt_free_qp(qpp->iq_qp_handle);
1857 		if (ibt_status != IBT_SUCCESS) {
1858 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
1859 			    ibmf_i_init_qp_err, IBMF_TNF_ERROR, "",
1860 			    "%s, status = %d\n", tnf_string, msg,
1861 			    "ibt_free_qp returned error",
1862 			    tnf_uint, ibt_status, ibt_status);
1863 		}
1864 		mutex_enter(&ibmf_cip->ci_mutex);
1865 		qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1866 		cv_broadcast(&ibmf_cip->ci_qp_cv);
1867 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1, ibmf_i_init_qp_err,
1868 		    IBMF_TNF_ERROR, "", "ibmf_init_qp(): error status = %d\n",
1869 		    tnf_uint, ibt_status, ibt_status);
1870 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1871 		    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1872 		return (IBMF_TRANSPORT_FAILURE);
1873 	}
1874 
1875 	/* post receive wqes to the RQ to handle unsolicited inbound packets  */
1876 	for (i = 0; i < ibmf_recv_wqes_per_port; i++) {
1877 		status =  ibmf_i_post_recv_buffer(ibmf_cip, qpp,
1878 		    B_TRUE, IBMF_QP_HANDLE_DEFAULT);
1879 		if (status != IBMF_SUCCESS) {
1880 			IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L1,
1881 			    ibmf_i_init_qp, IBMF_TNF_TRACE, "",
1882 			    "%s\n", tnf_string, msg, "ibmf_i_init_qp(): "
1883 			    "ibmf_i_post_recv_buffer() failed");
1884 		}
1885 	}
1886 	mutex_enter(&ibmf_cip->ci_mutex);
1887 
1888 	/* set the state and signal blockers */
1889 	qpp->iq_flags = IBMF_QP_FLAGS_INITED;
1890 	cv_broadcast(&ibmf_cip->ci_qp_cv);
1891 
1892 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_qp_end,
1893 	    IBMF_TNF_TRACE, "", "ibmf_i_init_qp() exit\n");
1894 	return (IBMF_SUCCESS);
1895 }
1896 
1897 /*
1898  * ibmf_i_uninit_qp():
1899  *	Invalidate the QP context
1900  */
1901 static void
1902 ibmf_i_uninit_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t *qpp)
1903 {
1904 	ibt_status_t		status;
1905 
1906 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_qp_start,
1907 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_qp() enter, cip = %p "
1908 	    "qpp = %p\n", tnf_opaque, cip, ibmf_cip, tnf_opaque, qpp, qpp);
1909 
1910 	ASSERT(MUTEX_HELD(&ibmf_cip->ci_mutex));
1911 
1912 	/* mark the state as uniniting */
1913 	ASSERT(qpp->iq_qp_ref == 0);
1914 	qpp->iq_flags = IBMF_QP_FLAGS_UNINITING;
1915 	mutex_exit(&ibmf_cip->ci_mutex);
1916 
1917 	/* note: we ignore error values from ibt_flush_qp */
1918 	status = ibt_flush_qp(qpp->iq_qp_handle);
1919 	if (status != IBT_SUCCESS) {
1920 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L2,
1921 		    ibmf_i_uninit_qp_err, IBMF_TNF_ERROR, "",
1922 		    "ibmf_i_uninit_qp(): %s, status = %d\n", tnf_string, msg,
1923 		    "ibt_flush_qp returned error", tnf_int, status, status);
1924 	}
1925 
1926 	/* mark state as INVALID and signal any blockers */
1927 	mutex_enter(&ibmf_cip->ci_mutex);
1928 	qpp->iq_flags = IBMF_QP_FLAGS_INVALID;
1929 	cv_broadcast(&ibmf_cip->ci_qp_cv);
1930 
1931 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_uninit_qp_end,
1932 	    IBMF_TNF_TRACE, "", "ibmf_i_uninit_qp() exit\n");
1933 }
1934 
1935 /*
1936  * ibmf_i_alloc_msg():
1937  *	Allocate and set up a message context
1938  */
1939 int
1940 ibmf_i_alloc_msg(ibmf_client_t *clientp, ibmf_msg_impl_t **msgp, int km_flags)
1941 {
1942 	ibmf_msg_impl_t *msgimplp;
1943 
1944 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
1945 	    ibmf_i_alloc_msg_start, IBMF_TNF_TRACE, "",
1946 	    "ibmf_i_alloc_msg() enter, clientp = %p, msg = %p, "
1947 	    " kmflags = %d\n", tnf_opaque, clientp, clientp, tnf_opaque, msg,
1948 	    *msgp, tnf_int, km_flags, km_flags);
1949 
1950 	/* allocate the message context */
1951 	msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(sizeof (ibmf_msg_impl_t),
1952 	    km_flags);
1953 	if (msgimplp != NULL) {
1954 		if (km_flags == KM_SLEEP) {
1955 			ibmf_i_pop_ud_dest_thread(clientp->ic_myci);
1956 		}
1957 	} else {
1958 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
1959 		    ibmf_i_alloc_msg_err, IBMF_TNF_ERROR, "",
1960 		    "ibmf_i_alloc_msg(): %s\n",
1961 		    tnf_string, msg, "kmem_xalloc failed");
1962 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_msg_end,
1963 		    IBMF_TNF_TRACE, "", "ibmf_i_alloc_msg() exit\n");
1964 		return (IBMF_NO_RESOURCES);
1965 	}
1966 
1967 	*msgp = msgimplp;
1968 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_msg_end,
1969 	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_msg() exit\n");
1970 	return (IBMF_SUCCESS);
1971 }
1972 
1973 /*
1974  * ibmf_i_free_msg():
1975  *	frees up all buffers allocated by IBMF for
1976  * 	this message context, and then frees up the context
1977  */
1978 void
1979 ibmf_i_free_msg(ibmf_msg_impl_t *msgimplp)
1980 {
1981 	ibmf_msg_bufs_t *msgbufp = &msgimplp->im_msgbufs_recv;
1982 	ibmf_client_t *clientp = (ibmf_client_t *)msgimplp->im_client;
1983 	uint32_t	cl_hdr_sz, cl_hdr_off;
1984 
1985 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
1986 	    ibmf_i_free_msg_start, IBMF_TNF_TRACE, "",
1987 	    "ibmf_i_free_msg() enter, msg = %p\n", tnf_opaque, msg, msgimplp);
1988 
1989 	/* free up the UD destination resource */
1990 	if (msgimplp->im_ibmf_ud_dest != NULL) {
1991 		ibmf_i_free_ud_dest(clientp, msgimplp);
1992 		ibmf_i_clean_ud_dest_list(clientp->ic_myci, B_FALSE);
1993 	}
1994 
1995 	/* free up the receive buffer if allocated previously */
1996 	if (msgbufp->im_bufs_mad_hdr != NULL) {
1997 		ibmf_i_mgt_class_to_hdr_sz_off(
1998 		    msgbufp->im_bufs_mad_hdr->MgmtClass,
1999 		    &cl_hdr_sz, &cl_hdr_off);
2000 		kmem_free(msgbufp->im_bufs_mad_hdr, sizeof (ib_mad_hdr_t) +
2001 		    cl_hdr_off + msgbufp->im_bufs_cl_hdr_len +
2002 		    msgbufp->im_bufs_cl_data_len);
2003 		mutex_enter(&clientp->ic_kstat_mutex);
2004 		IBMF_SUB32_KSTATS(clientp, recv_bufs_alloced, 1);
2005 		mutex_exit(&clientp->ic_kstat_mutex);
2006 	}
2007 
2008 	/* destroy the message mutex */
2009 	mutex_destroy(&msgimplp->im_mutex);
2010 
2011 	/* free the message context */
2012 	kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
2013 
2014 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_msg_end,
2015 	    IBMF_TNF_TRACE, "", "ibmf_i_free_msg() exit\n");
2016 }
2017 
2018 /*
2019  * ibmf_i_msg_transport():
2020  *	Send a message posted by the IBMF client using the RMPP protocol
2021  *	if specified
2022  */
2023 int
2024 ibmf_i_msg_transport(ibmf_client_t *clientp, ibmf_qp_handle_t ibmf_qp_handle,
2025     ibmf_msg_impl_t *msgimplp, int blocking)
2026 {
2027 	ib_mad_hdr_t	*madhdrp;
2028 	ibmf_msg_bufs_t *msgbufp, *smsgbufp;
2029 	uint32_t	cl_hdr_sz, cl_hdr_off;
2030 	boolean_t	isDS = 0; /* double sided (sequenced) transaction */
2031 	boolean_t	error = B_FALSE;
2032 	int		status = IBMF_SUCCESS;
2033 	uint_t		refcnt;
2034 	char		errmsg[128];
2035 	timeout_id_t	msg_rp_unset_id, msg_tr_unset_id;
2036 
2037 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_msg_transport_start,
2038 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): clientp = 0x%p, "
2039 	    "qphdl = 0x%p, msgp = 0x%p, block = %d\n",
2040 	    tnf_opaque, clientp, clientp, tnf_opaque, qphdl, ibmf_qp_handle,
2041 	    tnf_opaque, msg, msgimplp, tnf_uint, block, blocking);
2042 
2043 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp, *msgbufp))
2044 
2045 	mutex_enter(&msgimplp->im_mutex);
2046 
2047 	madhdrp = msgimplp->im_msgbufs_send.im_bufs_mad_hdr;
2048 	msgbufp = &msgimplp->im_msgbufs_recv;
2049 	smsgbufp = &msgimplp->im_msgbufs_send;
2050 
2051 	/*
2052 	 * check if transp_op_flags specify that the transaction is
2053 	 * a single packet, then the size of the message header + data
2054 	 * does not exceed 256 bytes
2055 	 */
2056 	if ((msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP) == 0) {
2057 		ibmf_i_mgt_class_to_hdr_sz_off(
2058 		    smsgbufp->im_bufs_mad_hdr->MgmtClass,
2059 		    &cl_hdr_sz, &cl_hdr_off);
2060 
2061 		if ((sizeof (ib_mad_hdr_t) + cl_hdr_off +
2062 		    smsgbufp->im_bufs_cl_hdr_len +
2063 		    smsgbufp->im_bufs_cl_data_len) > IBMF_MAD_SIZE) {
2064 			mutex_exit(&msgimplp->im_mutex);
2065 			(void) sprintf(errmsg,
2066 			    "Non-RMPP message size is too large");
2067 			error = B_TRUE;
2068 			status = IBMF_BAD_SIZE;
2069 			goto bail;
2070 		}
2071 	}
2072 
2073 	/* more message context initialization */
2074 	msgimplp->im_qp_hdl 	= ibmf_qp_handle;
2075 	msgimplp->im_tid	= b2h64(madhdrp->TransactionID);
2076 	msgimplp->im_mgt_class 	= madhdrp->MgmtClass;
2077 	msgimplp->im_unsolicited = B_FALSE;
2078 	msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_UNINIT;
2079 	bzero(&msgimplp->im_rmpp_ctx, sizeof (ibmf_rmpp_ctx_t));
2080 	msgimplp->im_rmpp_ctx.rmpp_state = IBMF_RMPP_STATE_UNDEFINED;
2081 	msgimplp->im_rmpp_ctx.rmpp_respt = IBMF_RMPP_DEFAULT_RRESPT;
2082 	msgimplp->im_rmpp_ctx.rmpp_retry_cnt = 0;
2083 	msgimplp->im_ref_count = 0;
2084 	msgimplp->im_pending_send_compls = 0;
2085 	IBMF_MSG_INCR_REFCNT(msgimplp);
2086 	if (msgimplp->im_retrans.retrans_retries == 0)
2087 		msgimplp->im_retrans.retrans_retries = IBMF_RETRANS_DEF_RETRIES;
2088 	if (msgimplp->im_retrans.retrans_rtv == 0)
2089 		msgimplp->im_retrans.retrans_rtv = IBMF_RETRANS_DEF_RTV;
2090 	if (msgimplp->im_retrans.retrans_rttv == 0)
2091 		msgimplp->im_retrans.retrans_rttv = IBMF_RETRANS_DEF_RTTV;
2092 
2093 	IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2094 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): %s, msgp = 0x%p, "
2095 	    "class = 0x%x, method = 0x%x, attributeID = 0x%x\n",
2096 	    tnf_string, msg, "Added message", tnf_opaque, msgimplp,
2097 	    msgimplp, tnf_opaque, class, msgimplp->im_mgt_class, tnf_opaque,
2098 	    method, madhdrp->R_Method, tnf_opaque, attrib_id,
2099 	    b2h16(madhdrp->AttributeID));
2100 
2101 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2102 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): msgp = 0x%p, "
2103 	    "TID = 0x%p, transp_op_flags = 0x%x\n",
2104 	    tnf_opaque, msgimplp, msgimplp, tnf_opaque, tid, msgimplp->im_tid,
2105 	    tnf_uint, transp_op_flags, msgimplp->im_transp_op_flags);
2106 
2107 	/*
2108 	 * Do not allow reuse of a message where the receive buffers are
2109 	 * being used as send buffers if this is a sequenced transaction
2110 	 */
2111 	if ((madhdrp == msgbufp->im_bufs_mad_hdr) &&
2112 	    (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)) {
2113 		IBMF_MSG_DECR_REFCNT(msgimplp);
2114 		mutex_exit(&msgimplp->im_mutex);
2115 		(void) sprintf(errmsg,
2116 		    "Send and Recv buffers are the same for sequenced"
2117 		    " transaction");
2118 		error = B_TRUE;
2119 		status = IBMF_REQ_INVALID;
2120 		goto bail;
2121 	}
2122 
2123 	/* set transaction flags */
2124 	if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)
2125 		msgimplp->im_flags |= IBMF_MSG_FLAGS_SEQUENCED;
2126 
2127 	if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP)
2128 		msgimplp->im_flags |= IBMF_MSG_FLAGS_SEND_RMPP;
2129 	else
2130 		msgimplp->im_flags |= IBMF_MSG_FLAGS_NOT_RMPP;
2131 
2132 	/* free recv buffers if this is a reused message */
2133 	if ((msgbufp->im_bufs_mad_hdr != NULL) &&
2134 	    (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)) {
2135 
2136 		IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2137 		    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): %s, "
2138 		    "msgp = 0x%p, mad_hdrp = 0x%p\n", tnf_string, msg,
2139 		    "Freeing recv buffer for reused message",
2140 		    tnf_opaque, msgimplp, msgimplp,
2141 		    tnf_opaque, mad_hdr, msgbufp->im_bufs_mad_hdr);
2142 
2143 		ibmf_i_mgt_class_to_hdr_sz_off(
2144 		    msgbufp->im_bufs_mad_hdr->MgmtClass,
2145 		    &cl_hdr_sz, &cl_hdr_off);
2146 
2147 		kmem_free(msgbufp->im_bufs_mad_hdr, sizeof (ib_mad_hdr_t) +
2148 		    cl_hdr_off + msgbufp->im_bufs_cl_hdr_len +
2149 		    msgbufp->im_bufs_cl_data_len);
2150 
2151 		msgbufp->im_bufs_mad_hdr = NULL;
2152 		msgbufp->im_bufs_cl_hdr = NULL;
2153 		msgbufp->im_bufs_cl_hdr_len = 0;
2154 		msgbufp->im_bufs_cl_data = NULL;
2155 		msgbufp->im_bufs_cl_data_len = 0;
2156 	}
2157 
2158 	mutex_exit(&msgimplp->im_mutex);
2159 
2160 	/* initialize (and possibly allocate) the address handle */
2161 	status = ibmf_i_alloc_ud_dest(clientp, msgimplp,
2162 	    &msgimplp->im_ud_dest, blocking);
2163 	if (status != IBMF_SUCCESS) {
2164 		(void) sprintf(errmsg, "ibmf_i_alloc_ud_dest() failed");
2165 		error = B_TRUE;
2166 		goto bail;
2167 	}
2168 
2169 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*msgimplp, *msgbufp))
2170 
2171 	/* add the message to the client context's message list */
2172 	ibmf_i_client_add_msg(clientp, msgimplp);
2173 
2174 	mutex_enter(&msgimplp->im_mutex);
2175 
2176 	/* no one should have touched our state */
2177 	ASSERT(msgimplp->im_trans_state_flags == IBMF_TRANS_STATE_FLAG_UNINIT);
2178 
2179 	/* transition out of uninit state */
2180 	msgimplp->im_trans_state_flags = IBMF_TRANS_STATE_FLAG_INIT;
2181 
2182 	IBMF_TRACE_5(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2183 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): msgp = 0x%p, "
2184 	    "local_lid = 0x%x, remote_lid = 0x%x, remote_qpn = 0x%x, "
2185 	    "block = %d\n", tnf_opaque, msgp, msgimplp,
2186 	    tnf_uint, local_lid, msgimplp->im_local_addr.ia_local_lid,
2187 	    tnf_uint, remote_lid, msgimplp->im_local_addr.ia_remote_lid,
2188 	    tnf_uint, remote_qpn, msgimplp->im_local_addr.ia_remote_qno,
2189 	    tnf_uint, blocking, blocking);
2190 
2191 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2192 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport(): "
2193 	    "unsetting timer %p %d\n", tnf_opaque, msgimplp, msgimplp,
2194 	    tnf_opaque, timeout_id, msgimplp->im_rp_timeout_id);
2195 
2196 	ASSERT(msgimplp->im_rp_timeout_id == 0);
2197 	ASSERT(msgimplp->im_tr_timeout_id == 0);
2198 
2199 	if ((msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP) == 0) {
2200 
2201 		/* Non-RMPP transaction */
2202 
2203 		status = ibmf_i_send_single_pkt(clientp, ibmf_qp_handle,
2204 		    msgimplp, blocking);
2205 		if (status != IBMF_SUCCESS) {
2206 			IBMF_MSG_DECR_REFCNT(msgimplp);
2207 			mutex_exit(&msgimplp->im_mutex);
2208 			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2209 			(void) sprintf(errmsg, "Single packet send failed");
2210 			error = B_TRUE;
2211 			goto bail;
2212 		}
2213 
2214 	} else if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_RMPP) {
2215 
2216 		/* RMPP transaction */
2217 
2218 		/* check if client supports RMPP traffic */
2219 		if ((clientp->ic_reg_flags & IBMF_REG_FLAG_RMPP) == 0) {
2220 			IBMF_MSG_DECR_REFCNT(msgimplp);
2221 			mutex_exit(&msgimplp->im_mutex);
2222 			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2223 			(void) sprintf(errmsg, "Class does not support RMPP");
2224 			error = B_TRUE;
2225 			status = IBMF_BAD_RMPP_OPT;
2226 			goto bail;
2227 		}
2228 
2229 		/* for non-special QPs, check if QP supports RMPP traffic */
2230 		if (ibmf_qp_handle != IBMF_QP_HANDLE_DEFAULT &&
2231 		    (((ibmf_alt_qp_t *)ibmf_qp_handle)->isq_supports_rmpp ==
2232 		    B_FALSE)) {
2233 			IBMF_MSG_DECR_REFCNT(msgimplp);
2234 			mutex_exit(&msgimplp->im_mutex);
2235 			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2236 			(void) sprintf(errmsg, "QP does not support RMPP");
2237 			error = B_TRUE;
2238 			status = IBMF_BAD_RMPP_OPT;
2239 			goto bail;
2240 		}
2241 
2242 		/* check if transaction is "double sided" (send and receive) */
2243 		if (msgimplp->im_transp_op_flags & IBMF_MSG_TRANS_FLAG_SEQ)
2244 			isDS = 1;
2245 
2246 		status = ibmf_i_send_rmpp_pkts(clientp, ibmf_qp_handle,
2247 		    msgimplp, isDS, blocking);
2248 		if (status != IBMF_SUCCESS) {
2249 			IBMF_MSG_DECR_REFCNT(msgimplp);
2250 			mutex_exit(&msgimplp->im_mutex);
2251 			ibmf_i_client_rem_msg(clientp, msgimplp, &refcnt);
2252 			(void) sprintf(errmsg, "RMPP packets send failed");
2253 			error = B_TRUE;
2254 			goto bail;
2255 		}
2256 	}
2257 
2258 	/*
2259 	 * decrement the reference count so notify_client() can remove the
2260 	 * message when it's ready
2261 	 */
2262 	IBMF_MSG_DECR_REFCNT(msgimplp);
2263 
2264 	/* check if the transaction is a blocking transaction */
2265 	if (blocking && ((msgimplp->im_trans_state_flags &
2266 	    IBMF_TRANS_STATE_FLAG_SIGNALED) == 0)) {
2267 
2268 		/* indicate that the tranaction is waiting */
2269 		msgimplp->im_trans_state_flags |= IBMF_TRANS_STATE_FLAG_WAIT;
2270 
2271 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2272 		    IBMF_TNF_TRACE, "",
2273 		    "ibmf_i_msg_transport(): %s, msgp = 0x%p\n",
2274 		    tnf_string, msg, "blocking for completion",
2275 		    tnf_opaque, msgimplp, msgimplp);
2276 
2277 		/* wait for transaction completion */
2278 		cv_wait(&msgimplp->im_trans_cv, &msgimplp->im_mutex);
2279 
2280 		IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3, ibmf_i_msg_transport,
2281 		    IBMF_TNF_TRACE, "",
2282 		    "ibmf_i_msg_transport(): %s, msgp = 0x%p\n",
2283 		    tnf_string, msg, "unblocking for completion",
2284 		    tnf_opaque, msgimplp, msgimplp);
2285 
2286 		/* clean up flags */
2287 		msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_WAIT;
2288 		msgimplp->im_flags &= ~IBMF_MSG_FLAGS_BUSY;
2289 
2290 		if (msgimplp->im_msg_status != IBMF_SUCCESS) {
2291 
2292 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2293 			    ibmf_i_msg_transport_err, IBMF_TNF_ERROR, "",
2294 			    "ibmf_i_msg_transport(): msg_status = %d\n",
2295 			    tnf_uint, msgstatus, msgimplp->im_msg_status);
2296 
2297 			status = msgimplp->im_msg_status;
2298 		}
2299 	} else if (blocking && (msgimplp->im_trans_state_flags &
2300 	    IBMF_TRANS_STATE_FLAG_SIGNALED)) {
2301 		msgimplp->im_flags &= ~IBMF_MSG_FLAGS_BUSY;
2302 
2303 		if (msgimplp->im_msg_status != IBMF_SUCCESS) {
2304 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2305 			    ibmf_i_msg_transport_err, IBMF_TNF_ERROR, "",
2306 			    "ibmf_i_msg_transport(): msg_status = %d\n",
2307 			    tnf_uint, msgstatus, msgimplp->im_msg_status);
2308 			status = msgimplp->im_msg_status;
2309 		}
2310 	}
2311 
2312 	msg_rp_unset_id = msg_tr_unset_id = 0;
2313 	msg_rp_unset_id = msgimplp->im_rp_unset_timeout_id;
2314 	msg_tr_unset_id = msgimplp->im_tr_unset_timeout_id;
2315 	msgimplp->im_rp_unset_timeout_id = 0;
2316 	msgimplp->im_tr_unset_timeout_id = 0;
2317 
2318 	mutex_exit(&msgimplp->im_mutex);
2319 
2320 	/* Unset the timers */
2321 	if (msg_rp_unset_id != 0) {
2322 		(void) untimeout(msg_rp_unset_id);
2323 	}
2324 
2325 	if (msg_tr_unset_id != 0) {
2326 		(void) untimeout(msg_tr_unset_id);
2327 	}
2328 
2329 	/* increment kstats of the number of sent messages */
2330 	mutex_enter(&clientp->ic_kstat_mutex);
2331 	IBMF_ADD32_KSTATS(clientp, msgs_sent, 1);
2332 	mutex_exit(&clientp->ic_kstat_mutex);
2333 
2334 bail:
2335 	if (error) {
2336 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2337 		    ibmf_i_msg_transport_err, IBMF_TNF_ERROR, "",
2338 		    "ibmf_i_msg_transport(): %s, msgp = 0x%p\n",
2339 		    tnf_string, msg, errmsg, tnf_opaque, msgimplp, msgimplp);
2340 	}
2341 
2342 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,	ibmf_i_msg_transport_end,
2343 	    IBMF_TNF_TRACE, "", "ibmf_i_msg_transport() exit, status = %d\n",
2344 	    tnf_uint, status, status);
2345 
2346 	return (status);
2347 }
2348 
2349 /*
2350  * ibmf_i_init_msg():
2351  *	Initialize the message fields
2352  */
2353 void
2354 ibmf_i_init_msg(ibmf_msg_impl_t *msgimplp, ibmf_msg_cb_t trans_cb,
2355     void *trans_cb_arg, ibmf_retrans_t *retrans, boolean_t block)
2356 {
2357 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_msg_start,
2358 	    IBMF_TNF_TRACE, "", "ibmf_i_init_msg() enter\n");
2359 
2360 	_NOTE(ASSUMING_PROTECTED(msgimplp->im_trans_cb,
2361 	    msgimplp->im_trans_cb_arg))
2362 
2363 	if (block == B_TRUE)
2364 		msgimplp->im_msg_flags |= IBMF_MSG_FLAGS_BLOCKING;
2365 	msgimplp->im_trans_cb = trans_cb;
2366 	msgimplp->im_trans_cb_arg = trans_cb_arg;
2367 
2368 	bzero(&msgimplp->im_retrans, sizeof (ibmf_retrans_t));
2369 	if (retrans != NULL) {
2370 		bcopy((void *)retrans, (void *)&msgimplp->im_retrans,
2371 		    sizeof (ibmf_retrans_t));
2372 	}
2373 
2374 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_init_msg_end,
2375 	    IBMF_TNF_TRACE, "", "ibmf_i_init_msg() exit\n");
2376 }
2377 
2378 /*
2379  * ibmf_i_alloc_qp():
2380  *	Allocate a QP context for the alternate QPs
2381  */
2382 int
2383 ibmf_i_alloc_qp(ibmf_client_t *clientp, ib_pkey_t p_key, ib_qkey_t q_key,
2384     uint_t flags, ibmf_qp_handle_t *ibmf_qp_handlep)
2385 {
2386 	ibmf_ci_t		*ibmf_cip = clientp->ic_myci;
2387 	ibt_qp_alloc_attr_t	qp_attrs;
2388 	ibt_qp_info_t		qp_modify_attr;
2389 	ibmf_alt_qp_t		*qp_ctx;
2390 	uint16_t		pkey_ix;
2391 	ibt_status_t		ibt_status;
2392 	int			i, blocking;
2393 	boolean_t		error = B_FALSE;
2394 	int			status = IBMF_SUCCESS;
2395 	char			errmsg[128];
2396 
2397 
2398 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
2399 	    ibmf_i_alloc_qp_start, IBMF_TNF_TRACE, "",
2400 	    "ibmf_i_alloc_qp() enter, clientp = %p, pkey = %x, qkey = %x \n",
2401 	    tnf_opaque, clientp, clientp, tnf_uint, p_key, p_key,
2402 	    tnf_uint, q_key, q_key);
2403 
2404 	/*
2405 	 * get the pkey index associated with this pkey if present in table
2406 	 */
2407 	if (ibmf_i_get_pkeyix(clientp->ic_ci_handle, p_key,
2408 	    clientp->ic_client_info.port_num, &pkey_ix) != IBMF_SUCCESS) {
2409 		(void) sprintf(errmsg, "pkey not in table, pkey = %x", p_key);
2410 		error = B_TRUE;
2411 		status = IBMF_FAILURE;
2412 		goto bail;
2413 	}
2414 
2415 	/* allocate QP context memory */
2416 	qp_ctx = (ibmf_alt_qp_t *)kmem_zalloc(sizeof (ibmf_alt_qp_t),
2417 	    (flags & IBMF_ALLOC_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
2418 	if (qp_ctx == NULL) {
2419 		(void) sprintf(errmsg, "failed to kmem_zalloc qp ctx");
2420 		error = B_TRUE;
2421 		status = IBMF_NO_RESOURCES;
2422 		goto bail;
2423 	}
2424 
2425 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp_ctx));
2426 
2427 	/* setup the qp attrs for the alloc call */
2428 	qp_attrs.qp_scq_hdl = ibmf_cip->ci_alt_cq_handle;
2429 	qp_attrs.qp_rcq_hdl = ibmf_cip->ci_alt_cq_handle;
2430 	qp_attrs.qp_pd_hdl = ibmf_cip->ci_pd;
2431 	qp_attrs.qp_sizes.cs_sq_sgl = IBMF_MAX_SQ_WR_SGL_ELEMENTS;
2432 	qp_attrs.qp_sizes.cs_rq_sgl = IBMF_MAX_RQ_WR_SGL_ELEMENTS;
2433 	qp_attrs.qp_sizes.cs_sq = ibmf_send_wqes_posted_per_qp;
2434 	qp_attrs.qp_sizes.cs_rq = ibmf_recv_wqes_posted_per_qp;
2435 	qp_attrs.qp_flags = IBT_ALL_SIGNALED;
2436 	qp_attrs.qp_alloc_flags = IBT_QP_NO_FLAGS;
2437 
2438 	/* request IBT for a qp with the desired attributes */
2439 	ibt_status = ibt_alloc_qp(clientp->ic_ci_handle, IBT_UD_RQP,
2440 	    &qp_attrs, &qp_ctx->isq_qp_sizes, &qp_ctx->isq_qpn,
2441 	    &qp_ctx->isq_qp_handle);
2442 	if (ibt_status != IBT_SUCCESS) {
2443 		kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2444 		(void) sprintf(errmsg, "failed to alloc qp, status = %d",
2445 		    ibt_status);
2446 		error = B_TRUE;
2447 		status = IBMF_NO_RESOURCES;
2448 		goto bail;
2449 	}
2450 
2451 	qp_modify_attr.qp_trans = IBT_UD_SRV;
2452 	qp_modify_attr.qp_flags = IBT_CEP_NO_FLAGS;
2453 	qp_modify_attr.qp_transport.ud.ud_qkey = q_key;
2454 	qp_modify_attr.qp_transport.ud.ud_sq_psn = 0;
2455 	qp_modify_attr.qp_transport.ud.ud_pkey_ix = pkey_ix;
2456 	qp_modify_attr.qp_transport.ud.ud_port =
2457 	    clientp->ic_client_info.port_num;
2458 
2459 	/* Set up the client handle in the QP context */
2460 	qp_ctx->isq_client_hdl = clientp;
2461 
2462 	/* call the IB transport to initialize the QP */
2463 	ibt_status = ibt_initialize_qp(qp_ctx->isq_qp_handle, &qp_modify_attr);
2464 	if (ibt_status != IBT_SUCCESS) {
2465 		(void) ibt_free_qp(qp_ctx->isq_qp_handle);
2466 		kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2467 		(void) sprintf(errmsg, "failed to initialize qp, status = %d",
2468 		    ibt_status);
2469 		error = B_TRUE;
2470 		status = IBMF_NO_RESOURCES;
2471 		goto bail;
2472 	}
2473 
2474 	/* Set up the WQE caches */
2475 	status = ibmf_i_init_altqp_wqes(qp_ctx);
2476 	if (status != IBMF_SUCCESS) {
2477 		(void) ibt_free_qp(qp_ctx->isq_qp_handle);
2478 		kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2479 		(void) sprintf(errmsg, "failed to init wqe caches, status = %d",
2480 		    status);
2481 		error = B_TRUE;
2482 		goto bail;
2483 	}
2484 
2485 	qp_ctx->isq_next = NULL;
2486 	qp_ctx->isq_pkey = p_key;
2487 	qp_ctx->isq_qkey = q_key;
2488 	qp_ctx->isq_port_num = clientp->ic_client_info.port_num;
2489 	mutex_init(&qp_ctx->isq_mutex, NULL, MUTEX_DRIVER, NULL);
2490 	mutex_init(&qp_ctx->isq_wqe_mutex, NULL, MUTEX_DRIVER, NULL);
2491 	cv_init(&qp_ctx->isq_recv_cb_teardown_cv, NULL, CV_DRIVER, NULL);
2492 	cv_init(&qp_ctx->isq_sqd_cv, NULL, CV_DRIVER, NULL);
2493 	cv_init(&qp_ctx->isq_wqes_cv, NULL, CV_DRIVER, NULL);
2494 
2495 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*qp_ctx));
2496 
2497 	/* add alt qp to the list in CI context */
2498 	mutex_enter(&ibmf_cip->ci_mutex);
2499 	if (ibmf_cip->ci_alt_qp_list == NULL) {
2500 		ibmf_cip->ci_alt_qp_list = qp_ctx;
2501 	} else {
2502 		ibmf_alt_qp_t *qpp;
2503 
2504 		qpp = ibmf_cip->ci_alt_qp_list;
2505 		while (qpp->isq_next != NULL) {
2506 			qpp = qpp->isq_next;
2507 		}
2508 		qpp->isq_next = qp_ctx;
2509 	}
2510 	mutex_exit(&ibmf_cip->ci_mutex);
2511 
2512 	*ibmf_qp_handlep = (ibmf_qp_handle_t)qp_ctx;
2513 
2514 	if (flags & IBMF_ALLOC_SLEEP)
2515 		blocking = 1;
2516 	else
2517 		blocking = 0;
2518 
2519 	/* post the max number of buffers to RQ */
2520 	for (i = 0; i < ibmf_recv_wqes_per_port; i++) {
2521 		status = ibmf_i_post_recv_buffer(ibmf_cip, clientp->ic_qp,
2522 		    blocking, *ibmf_qp_handlep);
2523 		if (status != IBMF_SUCCESS) {
2524 			IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L3,
2525 			    ibmf_i_alloc_qp, IBMF_TNF_TRACE, "",
2526 			    "ibmf_i_alloc_qp(): %s, status = %d\n",
2527 			    tnf_string, msg, "ibmf_i_post_recv_buffer() failed",
2528 			    tnf_int, status, status);
2529 		}
2530 	}
2531 
2532 	mutex_enter(&clientp->ic_kstat_mutex);
2533 	IBMF_ADD32_KSTATS(clientp, alt_qps_alloced, 1);
2534 	mutex_exit(&clientp->ic_kstat_mutex);
2535 
2536 bail:
2537 	if (error) {
2538 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2539 		    ibmf_i_alloc_qp_err, IBMF_TNF_TRACE, "",
2540 		    "ibmf_i_alloc_qp(): %s\n", tnf_string, msg, errmsg);
2541 	}
2542 
2543 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_alloc_qp_end,
2544 	    IBMF_TNF_TRACE, "", "ibmf_i_alloc_qp() exit, qp = %p\n",
2545 	    tnf_opaque, qp_handlep, *ibmf_qp_handlep);
2546 	return (status);
2547 }
2548 
2549 /*
2550  * ibmf_i_free_qp():
2551  *	Free an alternate QP context
2552  */
2553 /* ARGSUSED */
2554 int
2555 ibmf_i_free_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags)
2556 {
2557 	ibmf_alt_qp_t		*qp_ctx = (ibmf_alt_qp_t *)ibmf_qp_handle;
2558 	ibmf_client_t		*clientp = qp_ctx->isq_client_hdl;
2559 	ibmf_ci_t		*ibmf_cip = qp_ctx->isq_client_hdl->ic_myci;
2560 	ibmf_alt_qp_t		*qpp, *pqpp;
2561 	ibt_status_t		ibt_status;
2562 
2563 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
2564 	    ibmf_i_free_qp_start, IBMF_TNF_TRACE, "",
2565 	    "ibmf_i_free_qp() enter, qp_hdl = %p, flags = %x\n",
2566 	    tnf_opaque, qp_hdl, ibmf_qp_handle, tnf_uint, flags, flags);
2567 
2568 	/* remove qp from the list in CI context */
2569 
2570 	mutex_enter(&ibmf_cip->ci_mutex);
2571 	qpp = ibmf_cip->ci_alt_qp_list;
2572 	ASSERT(qpp != NULL);
2573 	if (qpp == qp_ctx) {
2574 		ibmf_cip->ci_alt_qp_list = qpp->isq_next;
2575 	} else {
2576 		while (qpp != NULL) {
2577 			if (qpp == qp_ctx)
2578 				break;
2579 			pqpp = qpp;
2580 			qpp = qpp->isq_next;
2581 		}
2582 		ASSERT(qpp != NULL);
2583 		pqpp->isq_next = qpp->isq_next;
2584 	}
2585 
2586 	mutex_exit(&ibmf_cip->ci_mutex);
2587 
2588 	/* flush the WQEs in the QP queues */
2589 	ibt_status = ibt_flush_qp(qp_ctx->isq_qp_handle);
2590 	if (ibt_status != IBT_SUCCESS) {
2591 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2592 		    ibmf_i_free_qp_err, IBMF_TNF_TRACE, "",
2593 		    "ibmf_i_free_qp(): %s, status = %d\n",
2594 		    tnf_string, msg, "failed to close qp",
2595 		    tnf_uint, ibt_status, ibt_status);
2596 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_qp_end,
2597 		    IBMF_TNF_TRACE, "", "ibmf_i_free_qp() exit\n");
2598 		return (IBMF_TRANSPORT_FAILURE);
2599 	}
2600 
2601 	/* Call the MAD completion handler */
2602 	ibmf_i_mad_completions(ibmf_cip->ci_alt_cq_handle, (void*)ibmf_cip);
2603 
2604 	/* Wait here for all WQE owned by this QP to get freed */
2605 	mutex_enter(&qpp->isq_mutex);
2606 	while (qpp->isq_wqes_alloced != 0) {
2607 		cv_wait(&qpp->isq_wqes_cv, &qpp->isq_mutex);
2608 	}
2609 	mutex_exit(&qpp->isq_mutex);
2610 
2611 	cv_destroy(&qp_ctx->isq_recv_cb_teardown_cv);
2612 	cv_destroy(&qp_ctx->isq_sqd_cv);
2613 	cv_destroy(&qp_ctx->isq_wqes_cv);
2614 
2615 	/* call the IB transport to free the QP */
2616 	ibt_status = ibt_free_qp(qp_ctx->isq_qp_handle);
2617 	if (ibt_status != IBT_SUCCESS) {
2618 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2619 		    ibmf_i_free_qp_err, IBMF_TNF_TRACE, "",
2620 		    "ibmf_i_free_qp(): %s, status = %d\n",
2621 		    tnf_string, msg, "failed to free qp",
2622 		    tnf_uint, ibt_status, ibt_status);
2623 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_qp_end,
2624 		    IBMF_TNF_TRACE, "", "ibmf_i_free_qp() exit\n");
2625 		return (IBMF_TRANSPORT_FAILURE);
2626 	}
2627 
2628 	/* Clean up the WQE caches */
2629 	ibmf_i_fini_altqp_wqes(qp_ctx);
2630 	mutex_destroy(&qp_ctx->isq_wqe_mutex);
2631 	mutex_destroy(&qp_ctx->isq_mutex);
2632 
2633 	mutex_enter(&clientp->ic_kstat_mutex);
2634 	IBMF_SUB32_KSTATS(clientp, alt_qps_alloced, 1);
2635 	mutex_exit(&clientp->ic_kstat_mutex);
2636 
2637 	kmem_free(qp_ctx, sizeof (ibmf_alt_qp_t));
2638 
2639 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_free_qp_end,
2640 	    IBMF_TNF_TRACE, "", "ibmf_i_free_qp() exit\n");
2641 
2642 	return (IBMF_SUCCESS);
2643 }
2644 
2645 /*
2646  * ibmf_i_query_qp():
2647  *	Query an alternate QP context
2648  */
2649 /* ARGSUSED */
2650 int
2651 ibmf_i_query_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags,
2652     uint_t *qp_nump, ib_pkey_t *p_keyp, ib_qkey_t *q_keyp, uint8_t *portnump)
2653 {
2654 	ibt_qp_query_attr_t	qp_query;
2655 	ibmf_alt_qp_t		*qp_ctx = (ibmf_alt_qp_t *)ibmf_qp_handle;
2656 	uint16_t		pkey_ix;
2657 	ibt_status_t		ibt_status;
2658 
2659 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
2660 	    ibmf_i_free_qp_start, IBMF_TNF_TRACE, "",
2661 	    "ibmf_i_free_qp() enter, qp_hdl = %p, flags = %x\n",
2662 	    tnf_opaque, qp_hdl, ibmf_qp_handle, tnf_uint, flags, flags);
2663 
2664 	ibt_status = ibt_query_qp(qp_ctx->isq_qp_handle, &qp_query);
2665 	if (ibt_status != IBT_SUCCESS) {
2666 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2667 		    ibmf_i_query_qp_err, IBMF_TNF_TRACE, "",
2668 		    "ibmf_i_query_qp(): %s, status = %d\n",
2669 		    tnf_string, msg, "failed to query qp",
2670 		    tnf_uint, ibt_status, ibt_status);
2671 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_query_qp_end,
2672 		    IBMF_TNF_TRACE, "", "ibmf_i_query_qp() exit\n");
2673 		return (IBMF_TRANSPORT_FAILURE);
2674 	}
2675 
2676 	/* move the desired attributes into the locations provided */
2677 	*qp_nump = qp_query.qp_qpn;
2678 	*q_keyp = qp_query.qp_info.qp_transport.ud.ud_qkey;
2679 	*portnump = qp_query.qp_info.qp_transport.ud.ud_port;
2680 
2681 	pkey_ix = qp_query.qp_info.qp_transport.ud.ud_pkey_ix;
2682 
2683 	/* get the pkey based on the pkey_ix */
2684 	ibt_status = ibt_index2pkey(qp_ctx->isq_client_hdl->ic_ci_handle,
2685 	    *portnump, pkey_ix, p_keyp);
2686 	if (ibt_status != IBT_SUCCESS) {
2687 		IBMF_TRACE_3(IBMF_TNF_NODEBUG, DPRINT_L1,
2688 		    ibmf_i_query_qp_err, IBMF_TNF_TRACE, "",
2689 		    "ibmf_i_query_qp(): %s, pkey_ix = %d, status = %d\n",
2690 		    tnf_string, msg, "failed to get pkey from index",
2691 		    tnf_uint, pkey_ix, pkey_ix,
2692 		    tnf_uint, ibt_status, ibt_status);
2693 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_query_qp_end,
2694 		    IBMF_TNF_TRACE, "", "ibmf_i_query_qp() exit\n");
2695 		return (IBMF_TRANSPORT_FAILURE);
2696 	}
2697 
2698 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_query_qp_end,
2699 	    IBMF_TNF_TRACE, "", "ibmf_i_query_qp() exit, qp_num = 0x%x, "
2700 	    "pkey = 0x%x, qkey = 0x%x, portnum = %d\n",
2701 	    tnf_uint, qp_num, *qp_nump, tnf_uint, pkey, *p_keyp,
2702 	    tnf_uint, qkey, *q_keyp, tnf_uint, portnum, *portnump);
2703 
2704 	return (IBMF_SUCCESS);
2705 }
2706 
2707 /*
2708  * ibmf_i_modify_qp():
2709  *	Modify an alternate QP context
2710  */
2711 /* ARGSUSED */
2712 int
2713 ibmf_i_modify_qp(ibmf_qp_handle_t ibmf_qp_handle, ib_pkey_t p_key,
2714     ib_qkey_t q_key, uint_t flags)
2715 {
2716 	ibmf_alt_qp_t		*qp_ctx = (ibmf_alt_qp_t *)ibmf_qp_handle;
2717 	ibmf_client_t		*clientp = qp_ctx->isq_client_hdl;
2718 	ibmf_ci_t		*ibmf_cip = clientp->ic_myci;
2719 	ibmf_alt_qp_t		*qpp;
2720 	ibt_qp_info_t		qp_mod;
2721 	ibt_cep_modify_flags_t	qp_mod_flags;
2722 	ibt_queue_sizes_t	actual_sz;
2723 	uint16_t		pkey_ix;
2724 	ibt_status_t		ibt_status;
2725 
2726 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4,
2727 	    ibmf_i_modify_qp_start, IBMF_TNF_TRACE, "",
2728 	    "ibmf_i_modify_qp() enter, qp_hdl = %p, flags = %x, pkey = 0x%x, "
2729 	    "qkey = 0x%x\n", tnf_opaque, qp_hdl, ibmf_qp_handle,
2730 	    tnf_uint, flags, flags, tnf_uint, p_key, p_key,
2731 	    tnf_uint, q_key, q_key);
2732 
2733 	/*
2734 	 * get the pkey index associated with this pkey if present in table
2735 	 */
2736 	if (ibmf_i_get_pkeyix(clientp->ic_ci_handle, p_key,
2737 	    clientp->ic_client_info.port_num, &pkey_ix) != IBMF_SUCCESS) {
2738 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2739 		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2740 		    "ibmf_i_modify_qp(): %s, pkey = %x\n",
2741 		    tnf_string, msg, "pkey not in table",
2742 		    tnf_uint, pkey, p_key);
2743 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2744 		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2745 		return (IBMF_FAILURE);
2746 	}
2747 
2748 	/* Find the QP context in the CI QP context list */
2749 	mutex_enter(&ibmf_cip->ci_mutex);
2750 	qpp = ibmf_cip->ci_alt_qp_list;
2751 	while (qpp != NULL) {
2752 		if (qpp == qp_ctx) {
2753 			break;
2754 		}
2755 		qpp = qpp->isq_next;
2756 	}
2757 
2758 	if (qpp == NULL) {
2759 		mutex_exit(&ibmf_cip->ci_mutex);
2760 
2761 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2762 		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2763 		    "ibmf_i_modify_qp(): %s\n",
2764 		    tnf_string, msg, "QP not in altqp list");
2765 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2766 		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2767 		return (IBMF_BAD_QP_HANDLE);
2768 
2769 	} else {
2770 
2771 		mutex_enter(&qp_ctx->isq_mutex);
2772 	}
2773 
2774 	mutex_exit(&ibmf_cip->ci_mutex);
2775 
2776 	/*
2777 	 * Transition the QP to SQD state
2778 	 */
2779 	bzero(&qp_mod, sizeof (ibt_qp_info_t));
2780 	qp_mod.qp_trans = IBT_UD_SRV;
2781 	qp_mod.qp_state = IBT_STATE_SQD;
2782 	qp_mod_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_SQD_EVENT;
2783 	ibt_status = ibt_modify_qp(qp_ctx->isq_qp_handle, qp_mod_flags,
2784 	    &qp_mod, &actual_sz);
2785 	if (ibt_status != IBT_SUCCESS) {
2786 		mutex_exit(&qp_ctx->isq_mutex);
2787 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2788 		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2789 		    "ibmf_i_modify_qp(): %s, qp_hdl = %p\n",
2790 		    tnf_string, msg, "QP transition RTS to SQD failed",
2791 		    tnf_opaque, qp_handle, qp_ctx->isq_qp_handle);
2792 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2793 		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2794 		return (IBMF_TRANSPORT_FAILURE);
2795 	}
2796 
2797 	/*
2798 	 * Wait for an event indicating that the QP is in SQD state
2799 	 */
2800 	cv_wait(&qp_ctx->isq_sqd_cv, &qp_ctx->isq_mutex);
2801 
2802 	/* Setup QP modification information for transition to RTS state */
2803 	bzero(&qp_mod, sizeof (ibt_qp_info_t));
2804 	qp_mod.qp_trans = IBT_UD_SRV;
2805 	qp_mod.qp_state = IBT_STATE_RTS;
2806 	qp_mod.qp_current_state = IBT_STATE_SQD;
2807 	qp_mod.qp_transport.ud.ud_pkey_ix = pkey_ix;
2808 	qp_mod.qp_transport.ud.ud_qkey = q_key;
2809 	qp_mod_flags = IBT_CEP_SET_STATE | IBT_CEP_SET_PKEY_IX |
2810 	    IBT_CEP_SET_QKEY;
2811 
2812 	/*
2813 	 * transition the QP back to RTS state to allow
2814 	 * modification of the pkey and qkey
2815 	 */
2816 
2817 	ibt_status = ibt_modify_qp(qp_ctx->isq_qp_handle, qp_mod_flags,
2818 	    &qp_mod, &actual_sz);
2819 	if (ibt_status != IBT_SUCCESS) {
2820 		mutex_exit(&qp_ctx->isq_mutex);
2821 		IBMF_TRACE_3(IBMF_TNF_NODEBUG, DPRINT_L1,
2822 		    ibmf_i_modify_qp_err, IBMF_TNF_TRACE, "",
2823 		    "ibmf_i_modify_qp(): %s, qp_hdl = %p, status = %d\n",
2824 		    tnf_string, msg, "QP transition SQD to RTS failed",
2825 		    tnf_opaque, qp_handle, qp_ctx->isq_qp_handle,
2826 		    tnf_uint, ibt_status, ibt_status);
2827 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2828 		    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2829 		return (IBMF_TRANSPORT_FAILURE);
2830 	}
2831 
2832 	qp_ctx->isq_pkey = p_key;
2833 	qp_ctx->isq_qkey = q_key;
2834 	mutex_exit(&qp_ctx->isq_mutex);
2835 
2836 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_modify_qp_end,
2837 	    IBMF_TNF_TRACE, "", "ibmf_i_modify_qp() exit\n");
2838 	return (IBMF_SUCCESS);
2839 }
2840 
2841 /*
2842  * ibmf_i_post_recv_buffer():
2843  *	Post a WQE to the RQ of the specified QP
2844  */
2845 int
2846 ibmf_i_post_recv_buffer(ibmf_ci_t *cip, ibmf_qp_t *qpp, boolean_t block,
2847     ibmf_qp_handle_t ibmf_qp_handle)
2848 {
2849 	int			ret;
2850 	ibt_wr_ds_t		*sgl;
2851 	ibt_status_t		status;
2852 	ibmf_recv_wqe_t		*recv_wqep;
2853 	ibt_qp_hdl_t		ibt_qp_handle;
2854 	struct kmem_cache	*kmem_cachep;
2855 	ibmf_alt_qp_t		*altqp;
2856 
2857 	IBMF_TRACE_4(IBMF_TNF_DEBUG, DPRINT_L4,
2858 	    ibmf_i_post_recv_buffer_start, IBMF_TNF_TRACE, "",
2859 	    "ibmf_i_post_recv_buffer() enter, cip = %p, qpp = %p, "
2860 	    "qp_hdl = %p, block = %d\n", tnf_opaque, cip, cip,
2861 	    tnf_opaque, qpp, qpp, tnf_opaque, qp_hdl, ibmf_qp_handle,
2862 	    tnf_uint, block, block);
2863 
2864 	/*
2865 	 * if we haven't hit the max wqes per qp, attempt to allocate a recv
2866 	 * wqe and post it to the recv queue.
2867 	 * It is possible for more than one thread to get through this
2868 	 * check below and post wqes that could push us above the
2869 	 * ibmf_recv_wqes_posted_per_qp. We catch that case when the recv
2870 	 * completion is signaled.
2871 	 */
2872 	ASSERT(MUTEX_NOT_HELD(&cip->ci_mutex));
2873 
2874 	/* Get the WQE kmem cache pointer based on the QP type */
2875 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT)
2876 		kmem_cachep = cip->ci_recv_wqes_cache;
2877 	else {
2878 		altqp = (ibmf_alt_qp_t *)ibmf_qp_handle;
2879 		kmem_cachep = altqp->isq_recv_wqes_cache;
2880 	}
2881 
2882 	/* allocate a receive WQE from the receive WQE kmem cache */
2883 	recv_wqep = kmem_cache_alloc(kmem_cachep,
2884 	    (block == B_TRUE ? KM_SLEEP : KM_NOSLEEP));
2885 	if (recv_wqep == NULL) {
2886 		/*
2887 		 * Attempt to extend the cache and then retry the
2888 		 * kmem_cache_alloc()
2889 		 */
2890 		if (ibmf_i_extend_wqe_cache(cip, ibmf_qp_handle, block) ==
2891 		    IBMF_NO_RESOURCES) {
2892 			mutex_enter(&cip->ci_mutex);
2893 			IBMF_ADD32_PORT_KSTATS(cip, rwqe_allocs_failed, 1);
2894 			mutex_exit(&cip->ci_mutex);
2895 			IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2896 			    ibmf_i_post_recv_buffer_err, IBMF_TNF_ERROR, "",
2897 			    "ibmf_i_post_recv_buffer(): %s, status = %d\n",
2898 			    tnf_string, msg, "alloc recv_wqe failed",
2899 			    tnf_int, ibmf_status, IBMF_NO_RESOURCES);
2900 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2901 			    ibmf_i_post_recv_buffer_end, IBMF_TNF_TRACE, "",
2902 			    "ibmf_i_post_recv_buffer() exit\n");
2903 			return (IBMF_NO_RESOURCES);
2904 		} else {
2905 			recv_wqep = kmem_cache_alloc(kmem_cachep,
2906 			    (block == B_TRUE ? KM_SLEEP : KM_NOSLEEP));
2907 			if (recv_wqep == NULL) {
2908 				/* Allocation failed again. Give up here. */
2909 				mutex_enter(&cip->ci_mutex);
2910 				IBMF_ADD32_PORT_KSTATS(cip, rwqe_allocs_failed,
2911 				    1);
2912 				mutex_exit(&cip->ci_mutex);
2913 				IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2914 				    ibmf_i_post_recv_buffer_err,
2915 				    IBMF_TNF_ERROR, "",
2916 				    "ibmf_i_post_recv_buffer(): %s, "
2917 				    "status = %d\n",
2918 				    tnf_string, msg, "alloc recv_wqe failed",
2919 				    tnf_int, ibmf_status, IBMF_NO_RESOURCES);
2920 				IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2921 				    ibmf_i_post_recv_buffer_end,
2922 				    IBMF_TNF_TRACE, "",
2923 				    "ibmf_i_post_recv_buffer() exit\n");
2924 				return (IBMF_NO_RESOURCES);
2925 			}
2926 		}
2927 	}
2928 
2929 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*recv_wqep))
2930 
2931 	/*
2932 	 * if the qp handle provided in ibmf_send_pkt() or
2933 	 * ibmf_setup_recv_cb() is not the default qp handle
2934 	 * for this client, then the wqe must be queued on this qp,
2935 	 * else use the default qp handle set up during ibmf_register()
2936 	 */
2937 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
2938 		ibt_qp_handle = qpp->iq_qp_handle;
2939 	} else {
2940 		ibt_qp_handle =
2941 		    ((ibmf_alt_qp_t *)ibmf_qp_handle)->isq_qp_handle;
2942 	}
2943 
2944 	/* allocate memory for the scatter-gather list */
2945 	sgl = kmem_zalloc(IBMF_MAX_RQ_WR_SGL_ELEMENTS * sizeof (ibt_wr_ds_t),
2946 	    (block == B_TRUE) ? KM_SLEEP : KM_NOSLEEP);
2947 	if (sgl == NULL) {
2948 		kmem_cache_free(kmem_cachep, recv_wqep);
2949 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
2950 		    ibmf_i_post_recv_buffer_err, IBMF_TNF_ERROR, "",
2951 		    "ibmf_i_post_recv_buffer(): %s\n",
2952 		    tnf_string, msg, "failed to kmem_zalloc qp ctx");
2953 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2954 		    ibmf_i_post_recv_buffer_end, IBMF_TNF_TRACE, "",
2955 		    "ibmf_i_post_recv_buffer() exit\n");
2956 		return (IBMF_NO_RESOURCES);
2957 	}
2958 
2959 	/* initialize it */
2960 	ibmf_i_init_recv_wqe(qpp, sgl, recv_wqep, ibt_qp_handle,
2961 	    ibmf_qp_handle);
2962 
2963 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*recv_wqep))
2964 
2965 	/* and post it */
2966 	status = ibt_post_recv(recv_wqep->recv_qp_handle, &recv_wqep->recv_wr,
2967 	    1, NULL);
2968 
2969 	ret = ibmf_i_ibt_to_ibmf_status(status);
2970 	if (ret != IBMF_SUCCESS) {
2971 		kmem_free(sgl, IBMF_MAX_RQ_WR_SGL_ELEMENTS *
2972 		    sizeof (ibt_wr_ds_t));
2973 		kmem_cache_free(kmem_cachep, recv_wqep);
2974 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
2975 		    ibmf_i_post_recv_buffer_err, IBMF_TNF_ERROR, "",
2976 		    "ibmf_i_post_recv_buffer(): %s, status = %d\n",
2977 		    tnf_string, msg, "ibt_post_recv failed",
2978 		    tnf_uint, ibt_status, status);
2979 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
2980 		    ibmf_i_post_recv_buffer_end, IBMF_TNF_TRACE, "",
2981 		    "ibmf_i_post_recv_buffer() exit\n");
2982 		return (ret);
2983 	}
2984 
2985 	mutex_enter(&cip->ci_mutex);
2986 	IBMF_ADD32_PORT_KSTATS(cip, recv_wqes_alloced, 1);
2987 	mutex_exit(&cip->ci_mutex);
2988 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT) {
2989 		mutex_enter(&qpp->iq_mutex);
2990 		qpp->iq_rwqes_posted++;
2991 		mutex_exit(&qpp->iq_mutex);
2992 		mutex_enter(&cip->ci_mutex);
2993 		cip->ci_wqes_alloced++;
2994 		mutex_exit(&cip->ci_mutex);
2995 	} else {
2996 		mutex_enter(&altqp->isq_mutex);
2997 		altqp->isq_wqes_alloced++;
2998 		altqp->isq_rwqes_posted++;
2999 		mutex_exit(&altqp->isq_mutex);
3000 	}
3001 
3002 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_post_recv_buffer_end,
3003 	    IBMF_TNF_TRACE, "", "ibmf_i_post_recv_buffer() exit\n");
3004 
3005 	return (ret);
3006 }
3007 
3008 /*
3009  * ibmf_i_mgt_class_to_hdr_sz_off():
3010  *	Determine class header offser and size for management classes
3011  */
3012 void
3013 ibmf_i_mgt_class_to_hdr_sz_off(uint32_t mgt_class, uint32_t *szp,
3014     uint32_t *offp)
3015 {
3016 	uint32_t	hdr_sz = 0, hdr_off = 0;
3017 
3018 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
3019 	    ibmf_i_mgt_class_to_hdr_sz_off_start, IBMF_TNF_TRACE, "",
3020 	    "ibmf_i_mgt_class_to_hdr_sz_off(): mgt_class = 0x%x\n",
3021 	    tnf_uint, mgt_class, mgt_class);
3022 
3023 	switch (mgt_class) {
3024 	case MAD_MGMT_CLASS_SUBN_LID_ROUTED :
3025 	case MAD_MGMT_CLASS_SUBN_DIRECT_ROUTE :
3026 	case MAD_MGMT_CLASS_PERF :
3027 	case MAD_MGMT_CLASS_BM :
3028 	case MAD_MGMT_CLASS_DEV_MGT :
3029 	case MAD_MGMT_CLASS_SNMP :
3030 		hdr_sz = IBMF_MAD_CL_HDR_SZ_1;
3031 		hdr_off = IBMF_MAD_CL_HDR_OFF_1;
3032 		break;
3033 	case MAD_MGMT_CLASS_SUBN_ADM :
3034 		hdr_sz = IBMF_MAD_CL_HDR_SZ_2;
3035 		hdr_off = IBMF_MAD_CL_HDR_OFF_2;
3036 		break;
3037 	}
3038 
3039 	if (((mgt_class >= MAD_MGMT_CLASS_VENDOR_START) &&
3040 	    (mgt_class <= MAD_MGMT_CLASS_VENDOR_END)) ||
3041 	    ((mgt_class >= MAD_MGMT_CLASS_APPLICATION_START) &&
3042 	    (mgt_class <= MAD_MGMT_CLASS_APPLICATION_END))) {
3043 		hdr_sz = IBMF_MAD_CL_HDR_SZ_3;
3044 		hdr_off = IBMF_MAD_CL_HDR_OFF_1;
3045 	}
3046 
3047 	if ((mgt_class >= MAD_MGMT_CLASS_VENDOR2_START) &&
3048 	    (mgt_class <= MAD_MGMT_CLASS_VENDOR2_END)) {
3049 		hdr_sz = IBMF_MAD_CL_HDR_SZ_4;
3050 		hdr_off = IBMF_MAD_CL_HDR_OFF_2;
3051 	}
3052 
3053 	*szp = hdr_sz;
3054 	*offp = hdr_off;
3055 
3056 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
3057 	    ibmf_i_mgt_class_to_hdr_sz_off_end, IBMF_TNF_TRACE, "",
3058 	    "ibmf_i_mgt_class_to_hdr_sz_off() exit,hdr_sz = %d, hdr_off = %d\n",
3059 	    tnf_uint, hdr_sz, hdr_sz, tnf_uint, hdr_off, hdr_off);
3060 }
3061 
3062 /*
3063  * ibmf_i_lookup_client_by_mgmt_class():
3064  *	Lookup the client context based on the management class of
3065  *	the incoming packet
3066  */
3067 int
3068 ibmf_i_lookup_client_by_mgmt_class(ibmf_ci_t *ibmf_cip, int port_num,
3069     ibmf_client_type_t class, ibmf_client_t **clientpp)
3070 {
3071 	ibmf_client_t 		*clientp;
3072 	ibmf_client_info_t	*client_infop;
3073 
3074 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4,
3075 	    ibmf_i_lookup_client_by_mgmt_class_start, IBMF_TNF_TRACE, "",
3076 	    "ibmf_i_lookup_client_by_mgmt_class() enter, cip = %p, "
3077 	    "port_num = %d, class = 0x%x\n", tnf_opaque, cip, ibmf_cip,
3078 	    tnf_int, port, port_num, tnf_opaque, class, class);
3079 
3080 	ASSERT(MUTEX_NOT_HELD(&ibmf_cip->ci_clients_mutex));
3081 
3082 	mutex_enter(&ibmf_cip->ci_clients_mutex);
3083 
3084 	clientp = ibmf_cip->ci_clients;
3085 
3086 	/* walk client context list looking for class/portnum match */
3087 	while (clientp != NULL) {
3088 		client_infop = &clientp->ic_client_info;
3089 		if (class == client_infop->client_class &&
3090 		    port_num == client_infop->port_num) {
3091 			/* found our match */
3092 			break;
3093 		}
3094 		clientp = clientp->ic_next;
3095 	}
3096 
3097 	mutex_exit(&ibmf_cip->ci_clients_mutex);
3098 
3099 	if (clientp != NULL) {
3100 		*clientpp = clientp;
3101 		IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
3102 		    ibmf_i_lookup_client_by_mgmt_class_end, IBMF_TNF_TRACE, "",
3103 		    "ibmf_i_lookup_client_by_mgmt_class() exit, clp = %p\n",
3104 		    tnf_opaque, clientp, clientp);
3105 		return (IBMF_SUCCESS);
3106 	} else {
3107 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3108 		    ibmf_i_lookup_client_by_mgmt_class_end, IBMF_TNF_TRACE, "",
3109 		    "ibmf_i_lookup_client_by_mgmt_class() failure exit\n");
3110 		return (IBMF_FAILURE);
3111 	}
3112 }
3113 
3114 /*
3115  * ibmf_i_get_pkeyix():
3116  *	Get the pkey index of the pkey in the pkey table of the specified
3117  *	port. Take into account the partition membership.
3118  */
3119 int
3120 ibmf_i_get_pkeyix(ibt_hca_hdl_t hca_handle, ib_pkey_t pkey, uint8_t port,
3121     ib_pkey_t *pkeyixp)
3122 {
3123 	ib_pkey_t		tpkey;
3124 	ibt_status_t		ibt_status;
3125 
3126 	IBMF_TRACE_3(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_pkeyix_start,
3127 	    IBMF_TNF_TRACE, "", "ibmf_i_get_pkeyix() enter, hcahdl = %p, "
3128 	    "pkey = 0x%x, port = %d\n", tnf_opaque, hcahdl, hca_handle,
3129 	    tnf_int, pkey, pkey, tnf_int, port, port);
3130 
3131 	/*
3132 	 * If the client specifies the FULL membership pkey and the
3133 	 * pkey is not in the table, this function should fail.
3134 	 */
3135 	if (pkey & IBMF_PKEY_MEMBERSHIP_MASK) {
3136 		ibt_status = ibt_pkey2index(hca_handle, port,
3137 		    pkey, pkeyixp);
3138 		if (ibt_status != IBT_SUCCESS) {
3139 			IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3140 			    ibmf_i_get_pkeyix_err, IBMF_TNF_ERROR, "",
3141 			    "ibmf_i_get_pkeyix() error status = %d\n",
3142 			    tnf_uint, ibt_status, ibt_status);
3143 			IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3144 			    ibmf_i_get_pkeyix_end, IBMF_TNF_TRACE, "",
3145 			    "ibmf_i_get_pkeyix() exit\n");
3146 			return (IBMF_TRANSPORT_FAILURE);
3147 		}
3148 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_pkeyix_end,
3149 		    IBMF_TNF_TRACE, "", "ibmf_i_get_pkeyix() exit\n");
3150 		return (IBMF_SUCCESS);
3151 	}
3152 
3153 	/*
3154 	 * Limited member pkey processing
3155 	 * Check if this limited member pkey is in the pkey table
3156 	 */
3157 	ibt_status = ibt_pkey2index(hca_handle, port, pkey, pkeyixp);
3158 	if (ibt_status == IBT_SUCCESS) {
3159 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3160 		    ibmf_i_get_pkeyix_end, IBMF_TNF_TRACE, "",
3161 		    "ibmf_i_get_pkeyix() exit\n");
3162 		return (IBMF_SUCCESS);
3163 	}
3164 
3165 	/*
3166 	 * Could not find the limited member version of the pkey.
3167 	 * Now check if the full member version of the pkey is in the
3168 	 * pkey table. If not, fail the call.
3169 	 */
3170 	tpkey = pkey | IBMF_PKEY_MEMBERSHIP_MASK;
3171 	ibt_status = ibt_pkey2index(hca_handle, port, tpkey, pkeyixp);
3172 	if (ibt_status != IBT_SUCCESS) {
3173 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3174 		    ibmf_i_get_pkeyix_err, IBMF_TNF_ERROR, "",
3175 		    "ibmf_i_get_pkeyix() error status = %d\n",
3176 		    tnf_uint, ibt_status, ibt_status);
3177 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3178 		    ibmf_i_get_pkeyix_end, IBMF_TNF_TRACE, "",
3179 		    "ibmf_i_get_pkeyix() exit\n");
3180 		return (IBMF_TRANSPORT_FAILURE);
3181 	}
3182 
3183 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_get_pkeyix_end,
3184 	    IBMF_TNF_TRACE, "", "ibmf_i_get_pkeyix(): pkey_ix = %d\n",
3185 	    tnf_int, pkeyix, *pkeyixp);
3186 	return (IBMF_SUCCESS);
3187 }
3188 
3189 /*
3190  * ibmf_i_pkey_ix_to_key():
3191  *	Figure out pkey from pkey index
3192  */
3193 int
3194 ibmf_i_pkey_ix_to_key(ibmf_ci_t *cip, uint_t port_num, uint_t pkey_ix,
3195     ib_pkey_t *pkeyp)
3196 {
3197 	ibt_status_t		ibt_status;
3198 
3199 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_pkey_ix_to_key_start,
3200 	    IBMF_TNF_TRACE, "", "ibmf_i_pkey_ix_to_key() enter\n");
3201 
3202 	ibt_status = ibt_index2pkey(cip->ci_ci_handle, port_num, pkey_ix,
3203 	    pkeyp);
3204 	if (ibt_status != IBT_SUCCESS) {
3205 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3206 		    ibmf_i_pkey_ix_to_key, IBMF_TNF_TRACE, "",
3207 		    "ibmf_i_pkey_ix_to_key(): ibt_index2pkey failed for "
3208 		    " pkey index %d \n", tnf_uint, pkey_ix, pkey_ix);
3209 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3210 		    ibmf_i_pkey_ix_to_key_end,
3211 		    IBMF_TNF_TRACE, "", "ibmf_i_pkey_ix_to_key() exit\n");
3212 		return (IBMF_TRANSPORT_FAILURE);
3213 	}
3214 
3215 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_pkey_ix_to_key_end,
3216 	    IBMF_TNF_TRACE, "", "ibmf_i_pkey_ix_to_key() exit\n");
3217 
3218 	return (IBMF_SUCCESS);
3219 }
3220 
3221 /*
3222  * ibmf_i_ibt_to_ibmf_status():
3223  *	Map IBT return code to IBMF return code
3224  */
3225 int
3226 ibmf_i_ibt_to_ibmf_status(ibt_status_t ibt_status)
3227 {
3228 	int ibmf_status;
3229 
3230 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_ibt_to_ibmf_status_start,
3231 	    IBMF_TNF_TRACE, "", "ibmf_i_ibt_to_ibmf_status() enter, "
3232 	    "status = %d\n", tnf_uint, ibt_status, ibt_status);
3233 
3234 	switch (ibt_status) {
3235 
3236 	case IBT_SUCCESS:
3237 		ibmf_status = IBMF_SUCCESS;
3238 		break;
3239 
3240 	case IBT_INSUFF_KERNEL_RESOURCE:
3241 	case IBT_INSUFF_RESOURCE:
3242 	case IBT_QP_FULL:
3243 		ibmf_status = IBMF_NO_RESOURCES;
3244 		break;
3245 
3246 	case IBT_HCA_IN_USE:
3247 	case IBT_QP_IN_USE:
3248 	case IBT_CQ_BUSY:
3249 	case IBT_PD_IN_USE:
3250 	case IBT_MR_IN_USE:
3251 		ibmf_status = IBMF_BUSY;
3252 		break;
3253 
3254 	default:
3255 		ibmf_status = IBMF_FAILURE;
3256 		break;
3257 	}
3258 
3259 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_i_ibt_to_ibmf_status_end,
3260 	    IBMF_TNF_TRACE, "", "ibmf_i_ibt_to_ibmf_status() exit, "
3261 	    "ibt_status = %d, ibmf_status = %d\n", tnf_uint, ibt_status,
3262 	    ibt_status, tnf_int, ibmf_status, ibmf_status);
3263 
3264 	return (ibmf_status);
3265 }
3266 
3267 /*
3268  * ibmf_i_ibt_wc_to_ibmf_status():
3269  *	Map work completion code to IBMF return code
3270  */
3271 int
3272 ibmf_i_ibt_wc_to_ibmf_status(ibt_wc_status_t ibt_wc_status)
3273 {
3274 	int ibmf_status;
3275 
3276 	IBMF_TRACE_1(IBMF_TNF_DEBUG, DPRINT_L4,
3277 	    ibmf_i_ibt_wc_to_ibmf_status_start, IBMF_TNF_TRACE, "",
3278 	    "ibmf_i_ibt_to_ibmf_status() enter, status = %d\n",
3279 	    tnf_uint, ibt_wc_status, ibt_wc_status);
3280 
3281 	switch (ibt_wc_status) {
3282 
3283 	case IBT_WC_SUCCESS:
3284 		ibmf_status = IBMF_SUCCESS;
3285 		break;
3286 
3287 	default:
3288 		ibmf_status = IBMF_FAILURE;
3289 		break;
3290 	}
3291 
3292 	IBMF_TRACE_2(IBMF_TNF_DEBUG, DPRINT_L4,
3293 	    ibmf_i_ibt_wc_to_ibmf_status_end, IBMF_TNF_TRACE, "",
3294 	    "ibmf_i_ibt_to_ibmf_status() exit, wc_status = %d, "
3295 	    "ibmf_status = %d\n", tnf_uint, ibt_wc_status,
3296 	    ibt_wc_status, tnf_int, ibmf_status, ibmf_status);
3297 
3298 	return (ibmf_status);
3299 }
3300 
3301 /*
3302  * ibmf_i_is_ibmf_handle_valid():
3303  *	Validate the ibmf handle
3304  */
3305 int
3306 ibmf_i_is_ibmf_handle_valid(ibmf_handle_t ibmf_handle)
3307 {
3308 	ibmf_ci_t	*cip;
3309 	ibmf_client_t	*clp, *clientp = (ibmf_client_t *)ibmf_handle;
3310 
3311 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3312 	    ibmf_i_is_ibmf_handle_valid_start, IBMF_TNF_TRACE, "",
3313 	    "ibmf_i_is_ibmf_handle_valid() enter\n");
3314 
3315 	mutex_enter(&ibmf_statep->ibmf_mutex);
3316 
3317 	cip = ibmf_statep->ibmf_ci_list;
3318 
3319 	/* iterate through all the channel interace contexts */
3320 	while (cip != NULL) {
3321 
3322 		mutex_enter(&cip->ci_clients_mutex);
3323 
3324 		clp = cip->ci_clients;
3325 
3326 		/* search all registration contexts for this ci */
3327 		while (clp != NULL) {
3328 			if (clp == clientp)
3329 				break;
3330 			clp = clp->ic_next;
3331 		}
3332 
3333 		mutex_exit(&cip->ci_clients_mutex);
3334 
3335 		if (clp == clientp) {
3336 			/* ci found */
3337 			break;
3338 		} else {
3339 			/* ci not found, move onto next ci */
3340 			cip = cip->ci_next;
3341 		}
3342 	}
3343 
3344 	mutex_exit(&ibmf_statep->ibmf_mutex);
3345 
3346 	if (cip != NULL) {
3347 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3348 		    ibmf_i_is_ibmf_handle_valid_end, IBMF_TNF_TRACE, "",
3349 		    "ibmf_i_is_ibmf_handle_valid() exit\n");
3350 		return (IBMF_SUCCESS);
3351 	} else {
3352 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3353 		    ibmf_i_is_ibmf_handle_valid_end, IBMF_TNF_TRACE, "",
3354 		    "ibmf_i_is_ibmf_handle_valid() failure exit\n");
3355 		return (IBMF_FAILURE);
3356 	}
3357 }
3358 
3359 /*
3360  * ibmf_i_is_qp_handle_valid():
3361  *	Validate the QP handle
3362  */
3363 int
3364 ibmf_i_is_qp_handle_valid(ibmf_handle_t ibmf_handle,
3365     ibmf_qp_handle_t ibmf_qp_handle)
3366 {
3367 	ibmf_client_t	*clientp = (ibmf_client_t *)ibmf_handle;
3368 	ibmf_alt_qp_t	*alt_qp, *qpp = (ibmf_alt_qp_t *)ibmf_qp_handle;
3369 	ibmf_ci_t	*cip = clientp->ic_myci;
3370 
3371 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3372 	    ibmf_i_is_qp_handle_valid_start, IBMF_TNF_TRACE, "",
3373 	    "ibmf_i_is_qp_handle_valid() enter\n");
3374 
3375 	/* the default qp handle is always valid */
3376 	if (ibmf_qp_handle == IBMF_QP_HANDLE_DEFAULT)
3377 		return (IBMF_SUCCESS);
3378 
3379 	mutex_enter(&cip->ci_mutex);
3380 
3381 	alt_qp = cip->ci_alt_qp_list;
3382 
3383 	while (alt_qp != NULL) {
3384 		if (alt_qp == qpp) {
3385 			/* qp handle found */
3386 			break;
3387 		} else {
3388 			/* qp handle not found, get next qp on list */
3389 			alt_qp = alt_qp->isq_next;
3390 		}
3391 	}
3392 
3393 	mutex_exit(&cip->ci_mutex);
3394 
3395 	if (alt_qp != NULL) {
3396 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3397 		    ibmf_i_is_qp_handle_valid_end, IBMF_TNF_TRACE, "",
3398 		    "ibmf_i_is_qp_handle_valid() exit\n");
3399 		return (IBMF_SUCCESS);
3400 	} else {
3401 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3402 		    ibmf_i_is_qp_handle_valid_end, IBMF_TNF_TRACE, "",
3403 		    "ibmf_i_is_qp_handle_valid() failure exit\n");
3404 		return (IBMF_FAILURE);
3405 	}
3406 }
3407 
3408 void
3409 ibmf_dprintf(int l, const char *fmt, ...)
3410 {
3411 	va_list ap;
3412 
3413 	if ((l) > ibmf_trace_level) {
3414 
3415 		return;
3416 	}
3417 
3418 	va_start(ap, fmt);
3419 	(void) vprintf(fmt, ap);
3420 	va_end(ap);
3421 }
3422 
3423 /*
3424  * ibmf_setup_term_ctx():
3425  * Sets up a message context that is the duplicate of the one
3426  * passed in the regmsgimplp argument. The duplicate message context
3427  * is not visible to the client. It is managed internally by ibmf
3428  * to process the RMPP receiver termination flow logic for the
3429  * transaction while the client is notified of the completion of the
3430  * same transaction (i.e. all the solicited data has been received).
3431  */
3432 int
3433 ibmf_setup_term_ctx(ibmf_client_t *clientp, ibmf_msg_impl_t *regmsgimplp)
3434 {
3435 	ibmf_msg_impl_t	*msgimplp;
3436 	size_t		offset;
3437 	uint32_t	cl_hdr_sz, cl_hdr_off;
3438 	int		status;
3439 
3440 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3441 	    ibmf_setup_term_ctx_start, IBMF_TNF_TRACE, "",
3442 	    "ibmf_setup_term_ctx() enter\n");
3443 
3444 	/*
3445 	 * Allocate the termination message context
3446 	 */
3447 	msgimplp = (ibmf_msg_impl_t *)kmem_zalloc(sizeof (ibmf_msg_impl_t),
3448 	    KM_NOSLEEP);
3449 	if (msgimplp == NULL) {
3450 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3451 		    ibmf_setup_term_ctx_error, IBMF_TNF_ERROR, "",
3452 		    "ibmf_setup_term_ctx(): %s\n", tnf_string, msg,
3453 		    "message mem allocation failure");
3454 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3455 		    ibmf_setup_term_ctx_end, IBMF_TNF_TRACE, "",
3456 		    "ibmf_setup_term_ctx() exit\n");
3457 		return (IBMF_NO_RESOURCES);
3458 	}
3459 
3460 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*msgimplp))
3461 
3462 	/* Copy the message context to the termination message structure */
3463 	*msgimplp = *regmsgimplp;
3464 
3465 	/* Initialize the message mutex */
3466 	mutex_init(&msgimplp->im_mutex, NULL, MUTEX_DRIVER, NULL);
3467 
3468 	/*
3469 	 * Allocate enough memory for the MAD header only.
3470 	 */
3471 	msgimplp->im_msgbufs_recv.im_bufs_mad_hdr =
3472 	    (ib_mad_hdr_t *)kmem_zalloc(IBMF_MAD_SIZE, KM_NOSLEEP);
3473 	if (msgimplp->im_msgbufs_recv.im_bufs_mad_hdr == NULL) {
3474 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
3475 		IBMF_TRACE_1(IBMF_TNF_NODEBUG, DPRINT_L1,
3476 		    ibmf_setup_term_ctx_error, IBMF_TNF_ERROR, "",
3477 		    "ibmf_setup_term_ctx(): %s\n", tnf_string, msg,
3478 		    "recv buf mem allocation failure");
3479 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3480 		    ibmf_setup_term_ctx_end, IBMF_TNF_TRACE, "",
3481 		    "ibmf_setup_term_ctx() exit\n");
3482 		return (IBMF_NO_RESOURCES);
3483 	}
3484 
3485 	/* Copy over just the MAD header contents */
3486 	bcopy((const void *)regmsgimplp->im_msgbufs_recv.im_bufs_mad_hdr,
3487 	    (void *)msgimplp->im_msgbufs_recv.im_bufs_mad_hdr,
3488 	    sizeof (ib_mad_hdr_t));
3489 
3490 	offset = sizeof (ib_mad_hdr_t);
3491 	ibmf_i_mgt_class_to_hdr_sz_off(
3492 	    regmsgimplp->im_msgbufs_recv.im_bufs_mad_hdr->MgmtClass,
3493 	    &cl_hdr_sz, &cl_hdr_off);
3494 	offset += cl_hdr_off;
3495 
3496 	/*
3497 	 * Copy the management class header
3498 	 */
3499 	msgimplp->im_msgbufs_recv.im_bufs_cl_hdr =
3500 	    (uchar_t *)msgimplp->im_msgbufs_recv.im_bufs_mad_hdr + offset;
3501 	msgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len =
3502 	    regmsgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len;
3503 	bcopy((void *)regmsgimplp->im_msgbufs_recv.im_bufs_cl_hdr,
3504 	    (void *)msgimplp->im_msgbufs_recv.im_bufs_cl_hdr,
3505 	    regmsgimplp->im_msgbufs_recv.im_bufs_cl_hdr_len);
3506 
3507 	/*
3508 	 * Clear the termination message timers copied from the regular message
3509 	 * since ibmf_i_set_timer() expects them to be cleared.
3510 	 */
3511 	msgimplp->im_rp_timeout_id = 0;
3512 	msgimplp->im_tr_timeout_id = 0;
3513 
3514 	/* Mark this message as being in a receiver RMPP mode */
3515 	msgimplp->im_flags |= IBMF_MSG_FLAGS_RECV_RMPP;
3516 
3517 	/* Mark this message as being a "termination flow" message */
3518 	msgimplp->im_flags |= IBMF_MSG_FLAGS_TERMINATION;
3519 
3520 	/*
3521 	 * Clear the IBMF_MSG_FLAGS_SET_TERMINATION copied over from the regular
3522 	 * message.
3523 	 */
3524 	msgimplp->im_flags &= ~IBMF_MSG_FLAGS_SET_TERMINATION;
3525 
3526 	/*
3527 	 * Clear the trans_state RECV_DONE and DONE flags so that the
3528 	 * protocol continues with the termination message context.
3529 	 */
3530 	msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_RECV_DONE;
3531 	msgimplp->im_trans_state_flags &= ~IBMF_TRANS_STATE_FLAG_DONE;
3532 
3533 	/* Clear out references to the old UD dest handles */
3534 	msgimplp->im_ibmf_ud_dest = NULL;
3535 	msgimplp->im_ud_dest = NULL;
3536 
3537 	/*
3538 	 * Request new UD dest resources for the termination phase.
3539 	 * The old UD dest resources are freed when the IBMF client
3540 	 * calls ibmf_free_msg(), so they cannot be relied on to exist
3541 	 * when the RMPP termination loop completes.
3542 	 */
3543 	status = ibmf_i_alloc_ud_dest(clientp, msgimplp, &msgimplp->im_ud_dest,
3544 	    B_FALSE);
3545 	if (status != IBMF_SUCCESS) {
3546 		kmem_free(msgimplp, sizeof (ibmf_msg_impl_t));
3547 		IBMF_TRACE_2(IBMF_TNF_NODEBUG, DPRINT_L1,
3548 		    ibmf_setup_term_ctx_err, IBMF_TNF_ERROR, "",
3549 		    "ibmf_setup_term_ctx(): %s, status = %d\n",
3550 		    tnf_string, msg, "UD destination resource allocation"
3551 		    " failed", tnf_int, ibmf_status, status);
3552 		IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4,
3553 		    ibmf_setup_term_ctx_end, IBMF_TNF_TRACE, "",
3554 		    "ibmf_setup_term_ctx() exit\n");
3555 		return (status);
3556 	}
3557 
3558 	/*
3559 	 * Add the message to the termination client list by virtue of
3560 	 * having the IBMF_MSG_FLAGS_TERMINATION "im_flags" flag set.
3561 	 */
3562 	ibmf_i_client_add_msg(clientp, msgimplp);
3563 
3564 	/*
3565 	 * Increase the "allocted messages" count so that the client
3566 	 * does not unregister before this message has been freed.
3567 	 * This is necessary because we want the client context to
3568 	 * be around when the receive timeout expires for this termination
3569 	 * loop, otherwise the code will access freed memory and crash.
3570 	 */
3571 	mutex_enter(&clientp->ic_mutex);
3572 	clientp->ic_msgs_alloced++;
3573 	mutex_exit(&clientp->ic_mutex);
3574 
3575 	mutex_enter(&msgimplp->im_mutex);
3576 	/* Set the response timer for the termination message. */
3577 	ibmf_i_set_timer(ibmf_i_recv_timeout, msgimplp, IBMF_RESP_TIMER);
3578 	mutex_exit(&msgimplp->im_mutex);
3579 
3580 	IBMF_TRACE_0(IBMF_TNF_DEBUG, DPRINT_L4, ibmf_setup_term_ctx_end,
3581 	    IBMF_TNF_TRACE, "", "ibmf_setup_term_ctx() exit\n");
3582 
3583 	return (IBMF_SUCCESS);
3584 }
3585