1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * ibcm_impl.c
28  *
29  * contains internal functions of IB CM module.
30  *
31  * TBD:
32  * 1. HCA CATASTROPHIC/RECOVERED not handled yet
33  */
34 
35 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
36 #include <sys/disp.h>
37 
38 
39 /* function prototypes */
40 static ibcm_status_t	ibcm_init(void);
41 static ibcm_status_t	ibcm_fini(void);
42 
43 /* Routines to initialize and destory CM global locks and CVs */
44 static void		ibcm_init_locks(void);
45 static void		ibcm_fini_locks(void);
46 
47 /* Routines that initialize/teardown CM's global hca structures */
48 static void		ibcm_init_hcas();
49 static ibcm_status_t	ibcm_fini_hcas();
50 
51 static void		ibcm_init_classportinfo();
52 static void		ibcm_stop_timeout_thread();
53 
54 /* Routines that handle HCA attach/detach asyncs */
55 static void		ibcm_hca_attach(ib_guid_t);
56 static ibcm_status_t	ibcm_hca_detach(ibcm_hca_info_t *);
57 
58 /* Routines that initialize the HCA's port related fields */
59 static ibt_status_t	ibcm_hca_init_port(ibcm_hca_info_t *hcap,
60 			    uint8_t port_index);
61 static ibcm_status_t	ibcm_hca_fini_port(ibcm_hca_info_t *hcap,
62 			    uint8_t port_index);
63 
64 static void ibcm_rc_flow_control_init(void);
65 static void ibcm_rc_flow_control_fini(void);
66 
67 /*
68  * Routines that check if hca's avl trees and sidr lists are free of any
69  * active client resources ie., RC or UD state structures in certain states
70  */
71 static ibcm_status_t	ibcm_check_avl_clean(ibcm_hca_info_t *hcap);
72 static ibcm_status_t	ibcm_check_sidr_clean(ibcm_hca_info_t *hcap);
73 
74 /* Add a new hca structure to CM's global hca list */
75 static ibcm_hca_info_t	*ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports);
76 
77 static void		ibcm_comm_est_handler(ibt_async_event_t *);
78 void			ibcm_async_handler(void *, ibt_hca_hdl_t,
79 			    ibt_async_code_t, ibt_async_event_t *);
80 
81 /* Global variables */
82 char			cmlog[] = "ibcm";	/* for debug log messages */
83 ibt_clnt_hdl_t		ibcm_ibt_handle;	/* IBT handle */
84 kmutex_t		ibcm_svc_info_lock;	/* list lock */
85 kcondvar_t		ibcm_svc_info_cv;	/* cv for deregister */
86 kmutex_t		ibcm_recv_mutex;
87 avl_tree_t		ibcm_svc_avl_tree;
88 taskq_t			*ibcm_taskq = NULL;
89 int			taskq_dispatch_fail_cnt;
90 
91 kmutex_t		ibcm_trace_mutex;	/* Trace mutex */
92 kmutex_t		ibcm_trace_print_mutex;	/* Trace print mutex */
93 int			ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT;
94 
95 int			ibcm_enable_trace = 2;	/* Trace level 4 by default */
96 int			ibcm_dtrace = 0; /* conditionally enable more dtrace */
97 
98 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_info_s::{svc_bind_list
99     svc_ref_cnt svc_to_delete}))
100 
101 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_bind_s::{sbind_link}))
102 
103 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_mutex, ibcm_conn_trace_s))
104 
105 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_conn_trace_s))
106 
107 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_print_mutex, ibcm_debug_buf))
108 
109 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_debug_buf))
110 
111 /*
112  * Initial state is INIT. All hca dr's return success immediately in this
113  * state, without adding or deleting any hca's to CM.
114  */
115 ibcm_finit_state_t	ibcm_finit_state = IBCM_FINIT_INIT;
116 
117 /* mutex and cv to manage hca's reference and resource count(s) */
118 kmutex_t		ibcm_global_hca_lock;
119 kcondvar_t		ibcm_global_hca_cv;
120 
121 /* mutex and cv to sa session open */
122 kmutex_t		ibcm_sa_open_lock;
123 kcondvar_t		ibcm_sa_open_cv;
124 int			ibcm_sa_timeout_delay = 1;		/* in ticks */
125 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sa_open_lock,
126     ibcm_port_info_s::{port_ibmf_saa_hdl port_saa_open_in_progress}))
127 
128 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_saa_hdl}))
129 
130 /* serialize sm notice callbacks */
131 kmutex_t		ibcm_sm_notice_serialize_lock;
132 
133 _NOTE(LOCK_ORDER(ibcm_sm_notice_serialize_lock ibcm_global_hca_lock))
134 
135 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock, ibcm_hca_info_s::{hca_state
136     hca_svc_cnt hca_acc_cnt hca_res_cnt hca_next}))
137 
138 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock,
139     ibcm_port_info_s::{port_ibmf_hdl}))
140 
141 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sm_notice_serialize_lock,
142     ibcm_port_info_s::{port_event_status}))
143 
144 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_hca_info_s::{hca_state}))
145 _NOTE(DATA_READABLE_WITHOUT_LOCK(
146     ibcm_hca_info_s::{hca_port_info.port_ibmf_hdl}))
147 
148 /* mutex for CM's qp list management */
149 kmutex_t		ibcm_qp_list_lock;
150 
151 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_port_info_s::{port_qplist}))
152 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
153 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
154 
155 kcondvar_t		ibcm_timeout_list_cv;
156 kcondvar_t		ibcm_timeout_thread_done_cv;
157 kt_did_t		ibcm_timeout_thread_did;
158 ibcm_state_data_t	*ibcm_timeout_list_hdr, *ibcm_timeout_list_tail;
159 ibcm_ud_state_data_t	*ibcm_ud_timeout_list_hdr, *ibcm_ud_timeout_list_tail;
160 kmutex_t		ibcm_timeout_list_lock;
161 uint8_t			ibcm_timeout_list_flags = 0;
162 pri_t			ibcm_timeout_thread_pri = MINCLSYSPRI;
163 
164 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
165     ibcm_state_data_s::timeout_next))
166 
167 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
168     ibcm_ud_state_data_s::ud_timeout_next))
169 
170 /*
171  * Flow control logic for open_rc_channel uses the following.
172  */
173 
174 struct ibcm_open_s {
175 	kmutex_t		mutex;
176 	kcondvar_t		cv;
177 	uint8_t			task_running;
178 	uint_t			queued;
179 	uint_t			exit_deferred;
180 	uint_t			in_progress;
181 	uint_t			in_progress_max;
182 	uint_t			sends;
183 	uint_t			sends_max;
184 	uint_t			sends_lowat;
185 	uint_t			sends_hiwat;
186 	ibcm_state_data_t	*tail;
187 	ibcm_state_data_t	head;
188 } ibcm_open;
189 
190 static void ibcm_open_task(void *);
191 
192 /*
193  * Flow control logic for SA access and close_rc_channel calls follows.
194  */
195 
196 int ibcm_close_simul_max	= 12;
197 int ibcm_lapr_simul_max		= 12;
198 int ibcm_saa_simul_max		= 8;
199 
200 typedef struct ibcm_flow1_s {
201 	struct ibcm_flow1_s	*link;
202 	kcondvar_t		cv;
203 	uint8_t			waiters;	/* 1 to IBCM_FLOW_SIMUL_MAX */
204 } ibcm_flow1_t;
205 
206 typedef struct ibcm_flow_s {
207 	ibcm_flow1_t		*list;
208 	uint_t			simul;	/* #requests currently outstanding */
209 	uint_t			simul_max;
210 	uint_t			waiters_per_chunk;
211 	uint_t			lowat;
212 	uint_t			lowat_default;
213 	/* statistics */
214 	uint_t			total;
215 } ibcm_flow_t;
216 
217 ibcm_flow_t ibcm_saa_flow;
218 ibcm_flow_t ibcm_close_flow;
219 ibcm_flow_t ibcm_lapr_flow;
220 
221 /* NONBLOCKING close requests are queued */
222 struct ibcm_close_s {
223 	kmutex_t		mutex;
224 	ibcm_state_data_t	*tail;
225 	ibcm_state_data_t	head;
226 } ibcm_close;
227 
228 static ibt_clnt_modinfo_t ibcm_ibt_modinfo = {	/* Client's modinfop */
229 	IBTI_V_CURR,
230 	IBT_CM,
231 	ibcm_async_handler,
232 	NULL,
233 	"IBCM"
234 };
235 
236 /* IBCM's list of HCAs registered with it */
237 static ibcm_hca_info_t	*ibcm_hca_listp = NULL;	/* CM's HCA list */
238 
239 /* Array of CM state call table functions */
240 ibcm_state_handler_t	ibcm_sm_funcs_tbl[] = {
241 	ibcm_process_req_msg,
242 	ibcm_process_mra_msg,
243 	ibcm_process_rej_msg,
244 	ibcm_process_rep_msg,
245 	ibcm_process_rtu_msg,
246 	ibcm_process_dreq_msg,
247 	ibcm_process_drep_msg,
248 	ibcm_process_sidr_req_msg,
249 	ibcm_process_sidr_rep_msg,
250 	ibcm_process_lap_msg,
251 	ibcm_process_apr_msg
252 };
253 
254 /* the following globals are CM tunables */
255 ibt_rnr_nak_time_t	ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms;
256 
257 uint32_t	ibcm_max_retries = IBCM_MAX_RETRIES;
258 clock_t		ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME;
259 clock_t		ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME;
260 ib_time_t	ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME;
261 ib_time_t	ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME;
262 
263 ib_time_t	ibcm_max_sidr_rep_store_time = 18;
264 uint32_t	ibcm_wait_for_acc_cnt_timeout = 2000000;	/* 2 sec */
265 uint32_t	ibcm_wait_for_res_cnt_timeout = 2000000;	/* 2 sec */
266 
267 ib_time_t	ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
268 ib_time_t	ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
269 
270 /*
271  * This delay accounts for time involved in various activities as follows :
272  *
273  * IBMF delays for posting the MADs in non-blocking mode
274  * IBMF delays for receiving the MADs and delivering to CM
275  * CM delays in processing the MADs before invoking client handlers,
276  * Any other delays associated with HCA driver in processing the MADs and
277  * 	other subsystems that CM may invoke (ex : SA, HCA driver)
278  */
279 uint32_t	ibcm_sw_delay	= 1000;	/* 1000us / 1ms */
280 uint32_t	ibcm_max_sa_retries = IBCM_MAX_SA_RETRIES + 1;
281 
282 /*	approx boot time */
283 uint32_t	ibcm_adj_btime = 4;	/* 4 seconds */
284 
285 /*
286  * The information in ibcm_clpinfo is kept in wireformat and is setup at
287  * init time, and used read-only after that
288  */
289 ibcm_classportinfo_msg_t	ibcm_clpinfo;
290 
291 char	*event_str[] = {
292 	"NEVER SEE THIS             ",
293 	"SESSION_ID                 ",
294 	"CHAN_HDL                   ",
295 	"LOCAL_COMID/HCA/PORT       ",
296 	"LOCAL_QPN                  ",
297 	"REMOTE_COMID/HCA           ",
298 	"REMOTE_QPN                 ",
299 	"BASE_TIME                  ",
300 	"INCOMING_REQ               ",
301 	"INCOMING_REP               ",
302 	"INCOMING_RTU               ",
303 	"INCOMING_COMEST            ",
304 	"INCOMING_MRA               ",
305 	"INCOMING_REJ               ",
306 	"INCOMING_LAP               ",
307 	"INCOMING_APR               ",
308 	"INCOMING_DREQ              ",
309 	"INCOMING_DREP              ",
310 	"OUTGOING_REQ               ",
311 	"OUTGOING_REP               ",
312 	"OUTGOING_RTU               ",
313 	"OUTGOING_LAP               ",
314 	"OUTGOING_APR               ",
315 	"OUTGOING_MRA               ",
316 	"OUTGOING_REJ               ",
317 	"OUTGOING_DREQ              ",
318 	"OUTGOING_DREP              ",
319 	"REQ_POST_COMPLETE          ",
320 	"REP_POST_COMPLETE          ",
321 	"RTU_POST_COMPLETE          ",
322 	"MRA_POST_COMPLETE          ",
323 	"REJ_POST_COMPLETE          ",
324 	"LAP_POST_COMPLETE          ",
325 	"APR_POST_COMPLETE          ",
326 	"DREQ_POST_COMPLETE         ",
327 	"DREP_POST_COMPLETE         ",
328 	"TIMEOUT_REP                ",
329 	"CALLED_REQ_RCVD_EVENT      ",
330 	"RET_REQ_RCVD_EVENT         ",
331 	"CALLED_REP_RCVD_EVENT      ",
332 	"RET_REP_RCVD_EVENT         ",
333 	"CALLED_CONN_EST_EVENT      ",
334 	"RET_CONN_EST_EVENT         ",
335 	"CALLED_CONN_FAIL_EVENT     ",
336 	"RET_CONN_FAIL_EVENT        ",
337 	"CALLED_CONN_CLOSE_EVENT    ",
338 	"RET_CONN_CLOSE_EVENT       ",
339 	"INIT_INIT                  ",
340 	"INIT_INIT_FAIL             ",
341 	"INIT_RTR                   ",
342 	"INIT_RTR_FAIL              ",
343 	"RTR_RTS                    ",
344 	"RTR_RTS_FAIL               ",
345 	"RTS_RTS                    ",
346 	"RTS_RTS_FAIL               ",
347 	"TO_ERROR                   ",
348 	"ERROR_FAIL                 ",
349 	"SET_ALT                    ",
350 	"SET_ALT_FAIL               ",
351 	"STALE_DETECT               ",
352 	"OUTGOING_REQ_RETRY         ",
353 	"OUTGOING_REP_RETRY         ",
354 	"OUTGOING_LAP_RETRY         ",
355 	"OUTGOING_MRA_RETRY         ",
356 	"OUTGOING_DREQ_RETRY        ",
357 	"NEVER SEE THIS             "
358 };
359 
360 char	ibcm_debug_buf[IBCM_DEBUG_BUF_SIZE];
361 
362 _NOTE(SCHEME_PROTECTS_DATA("used in a localized function consistently",
363     ibcm_debug_buf))
364 _NOTE(READ_ONLY_DATA(ibcm_taskq))
365 
366 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_flags))
367 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_hdr))
368 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_ud_timeout_list_hdr))
369 
370 #ifdef DEBUG
371 int		ibcm_test_mode = 0;	/* set to 1, if running tests */
372 #endif
373 
374 
375 /* Module Driver Info */
376 static struct modlmisc ibcm_modlmisc = {
377 	&mod_miscops,
378 	"IB Communication Manager"
379 };
380 
381 /* Module Linkage */
382 static struct modlinkage ibcm_modlinkage = {
383 	MODREV_1,
384 	&ibcm_modlmisc,
385 	NULL
386 };
387 
388 
389 int
390 _init(void)
391 {
392 	int		rval;
393 	ibcm_status_t	status;
394 
395 	status = ibcm_init();
396 	if (status != IBCM_SUCCESS) {
397 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm failed %d", status);
398 		return (EINVAL);
399 	}
400 
401 	rval = mod_install(&ibcm_modlinkage);
402 	if (rval != 0) {
403 		IBTF_DPRINTF_L2(cmlog, "_init: ibcm mod_install failed %d",
404 		    rval);
405 		(void) ibcm_fini();
406 	}
407 
408 	IBTF_DPRINTF_L5(cmlog, "_init: ibcm successful");
409 	return (rval);
410 
411 }
412 
413 
414 int
415 _info(struct modinfo *modinfop)
416 {
417 	return (mod_info(&ibcm_modlinkage, modinfop));
418 }
419 
420 
421 int
422 _fini(void)
423 {
424 	int status;
425 
426 	if (ibcm_fini() != IBCM_SUCCESS)
427 		return (EBUSY);
428 
429 	if ((status = mod_remove(&ibcm_modlinkage)) != 0) {
430 		IBTF_DPRINTF_L2(cmlog, "_fini: ibcm mod_remove failed %d",
431 		    status);
432 		return (status);
433 	}
434 
435 	IBTF_DPRINTF_L5(cmlog, "_fini: ibcm successful");
436 
437 	return (status);
438 }
439 
440 /* Initializes all global mutex and CV in cm module */
441 static void
442 ibcm_init_locks()
443 {
444 
445 	/* Verify CM MAD sizes */
446 #ifdef DEBUG
447 
448 	if (ibcm_test_mode > 1) {
449 
450 		IBTF_DPRINTF_L1(cmlog, "REQ MAD SIZE %d",
451 		    sizeof (ibcm_req_msg_t));
452 		IBTF_DPRINTF_L1(cmlog, "REP MAD SIZE %d",
453 		    sizeof (ibcm_rep_msg_t));
454 		IBTF_DPRINTF_L1(cmlog, "RTU MAD SIZE %d",
455 		    sizeof (ibcm_rtu_msg_t));
456 		IBTF_DPRINTF_L1(cmlog, "MRA MAD SIZE %d",
457 		    sizeof (ibcm_mra_msg_t));
458 		IBTF_DPRINTF_L1(cmlog, "REJ MAD SIZE %d",
459 		    sizeof (ibcm_rej_msg_t));
460 		IBTF_DPRINTF_L1(cmlog, "LAP MAD SIZE %d",
461 		    sizeof (ibcm_lap_msg_t));
462 		IBTF_DPRINTF_L1(cmlog, "APR MAD SIZE %d",
463 		    sizeof (ibcm_apr_msg_t));
464 		IBTF_DPRINTF_L1(cmlog, "DREQ MAD SIZE %d",
465 		    sizeof (ibcm_dreq_msg_t));
466 		IBTF_DPRINTF_L1(cmlog, "DREP MAD SIZE %d",
467 		    sizeof (ibcm_drep_msg_t));
468 		IBTF_DPRINTF_L1(cmlog, "SIDR REQ MAD SIZE %d",
469 		    sizeof (ibcm_sidr_req_msg_t));
470 		IBTF_DPRINTF_L1(cmlog, "SIDR REP MAD SIZE %d",
471 		    sizeof (ibcm_sidr_rep_msg_t));
472 	}
473 
474 #endif
475 
476 	/* Create all global locks within cm module */
477 	mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL);
478 	mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL);
479 	mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL);
480 	mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL);
481 	mutex_init(&ibcm_recv_mutex, NULL, MUTEX_DEFAULT, NULL);
482 	mutex_init(&ibcm_sm_notice_serialize_lock, NULL, MUTEX_DEFAULT, NULL);
483 	mutex_init(&ibcm_qp_list_lock, NULL, MUTEX_DEFAULT, NULL);
484 	mutex_init(&ibcm_trace_mutex, NULL, MUTEX_DEFAULT, NULL);
485 	mutex_init(&ibcm_trace_print_mutex, NULL, MUTEX_DEFAULT, NULL);
486 	cv_init(&ibcm_svc_info_cv, NULL, CV_DRIVER, NULL);
487 	cv_init(&ibcm_timeout_list_cv, NULL, CV_DRIVER, NULL);
488 	cv_init(&ibcm_timeout_thread_done_cv, NULL, CV_DRIVER, NULL);
489 	cv_init(&ibcm_global_hca_cv, NULL, CV_DRIVER, NULL);
490 	cv_init(&ibcm_sa_open_cv, NULL, CV_DRIVER, NULL);
491 	avl_create(&ibcm_svc_avl_tree, ibcm_svc_compare,
492 	    sizeof (ibcm_svc_info_t),
493 	    offsetof(struct ibcm_svc_info_s, svc_link));
494 
495 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_locks: done");
496 }
497 
498 /* Destroys all global mutex and CV in cm module */
499 static void
500 ibcm_fini_locks()
501 {
502 	/* Destroy all global locks within cm module */
503 	mutex_destroy(&ibcm_svc_info_lock);
504 	mutex_destroy(&ibcm_timeout_list_lock);
505 	mutex_destroy(&ibcm_global_hca_lock);
506 	mutex_destroy(&ibcm_sa_open_lock);
507 	mutex_destroy(&ibcm_recv_mutex);
508 	mutex_destroy(&ibcm_sm_notice_serialize_lock);
509 	mutex_destroy(&ibcm_qp_list_lock);
510 	mutex_destroy(&ibcm_trace_mutex);
511 	mutex_destroy(&ibcm_trace_print_mutex);
512 	cv_destroy(&ibcm_svc_info_cv);
513 	cv_destroy(&ibcm_timeout_list_cv);
514 	cv_destroy(&ibcm_timeout_thread_done_cv);
515 	cv_destroy(&ibcm_global_hca_cv);
516 	cv_destroy(&ibcm_sa_open_cv);
517 	avl_destroy(&ibcm_svc_avl_tree);
518 
519 	IBTF_DPRINTF_L5(cmlog, "ibcm_fini_locks: done");
520 }
521 
522 
523 /* Initialize CM's classport info */
524 static void
525 ibcm_init_classportinfo()
526 {
527 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
528 
529 	ibcm_clpinfo.BaseVersion = IBCM_MAD_BASE_VERSION;
530 	ibcm_clpinfo.ClassVersion = IBCM_MAD_CLASS_VERSION;
531 
532 	/* For now, CM supports same capabilities at all ports */
533 	ibcm_clpinfo.CapabilityMask =
534 	    h2b16(IBCM_CPINFO_CAP_RC | IBCM_CPINFO_CAP_SIDR);
535 
536 	/* Bits 0-7 are all 0 for Communication Mgmt Class */
537 
538 	/* For now, CM has the same respvalue at all ports */
539 	ibcm_clpinfo.RespTimeValue_plus =
540 	    h2b32(ibt_usec2ib(ibcm_local_processing_time) & 0x1f);
541 
542 	/* For now, redirect fields are set to 0 */
543 	/* Trap fields are not applicable to CM, hence set to 0 */
544 
545 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
546 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_classportinfo: done");
547 }
548 
549 /*
550  * ibcm_init():
551  * 	- call ibt_attach()
552  * 	- create AVL trees
553  *	- Attach HCA handlers that are already present before
554  *	CM got loaded.
555  *
556  * Arguments:	NONE
557  *
558  * Return values:
559  *	IBCM_SUCCESS - success
560  */
561 static ibcm_status_t
562 ibcm_init(void)
563 {
564 	ibt_status_t	status;
565 	kthread_t	*t;
566 
567 	IBTF_DPRINTF_L3(cmlog, "ibcm_init:");
568 
569 	ibcm_init_classportinfo();
570 
571 	if (ibcm_init_ids() != IBCM_SUCCESS) {
572 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
573 		    "fatal error: vmem_create() failed");
574 		return (IBCM_FAILURE);
575 	}
576 	ibcm_init_locks();
577 
578 	if (ibcm_ar_init() != IBCM_SUCCESS) {
579 		IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
580 		    "fatal error: ibcm_ar_init() failed");
581 		ibcm_fini_ids();
582 		ibcm_fini_locks();
583 		return (IBCM_FAILURE);
584 	}
585 	ibcm_rc_flow_control_init();
586 
587 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_taskq))
588 	ibcm_taskq = system_taskq;
589 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_taskq))
590 
591 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
592 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
593 
594 	/* Start the timeout list processing thread */
595 	ibcm_timeout_list_flags = 0;
596 	t = thread_create(NULL, 0, ibcm_process_tlist, 0, 0, &p0, TS_RUN,
597 	    ibcm_timeout_thread_pri);
598 	ibcm_timeout_thread_did = t->t_did;
599 
600 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
601 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
602 
603 	/*
604 	 * NOTE : if ibt_attach is done after ibcm_init_hcas, then some
605 	 * HCA DR events may be lost. CM could call re-init hca list
606 	 * again, but it is more complicated. Some HCA's DR's lost may
607 	 * be HCA detach, which makes hca list re-syncing and locking more
608 	 * complex
609 	 */
610 	status = ibt_attach(&ibcm_ibt_modinfo, NULL, NULL, &ibcm_ibt_handle);
611 	if (status != IBT_SUCCESS) {
612 		IBTF_DPRINTF_L2(cmlog, "ibcm_init(): ibt_attach failed %d",
613 		    status);
614 		(void) ibcm_ar_fini();
615 		ibcm_stop_timeout_thread();
616 		ibcm_fini_ids();
617 		ibcm_fini_locks();
618 		ibcm_rc_flow_control_fini();
619 		return (IBCM_FAILURE);
620 	}
621 
622 	/* Block all HCA attach/detach asyncs */
623 	mutex_enter(&ibcm_global_hca_lock);
624 
625 	ibcm_init_hcas();
626 	ibcm_finit_state = IBCM_FINIT_IDLE;
627 
628 	ibcm_path_cache_init();
629 
630 	/* Unblock any waiting HCA DR asyncs in CM */
631 	mutex_exit(&ibcm_global_hca_lock);
632 
633 	IBTF_DPRINTF_L4(cmlog, "ibcm_init: done");
634 	return (IBCM_SUCCESS);
635 }
636 
637 /* Allocates and initializes the "per hca" global data in CM */
638 static void
639 ibcm_init_hcas()
640 {
641 	uint_t	num_hcas = 0;
642 	ib_guid_t *guid_array;
643 	int i;
644 
645 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas:");
646 
647 	/* Get the number of HCAs */
648 	num_hcas = ibt_get_hca_list(&guid_array);
649 	IBTF_DPRINTF_L4(cmlog, "ibcm_init_hcas: ibt_get_hca_list() "
650 	    "returned %d hcas", num_hcas);
651 
652 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
653 
654 	for (i = 0; i < num_hcas; i++)
655 		ibcm_hca_attach(guid_array[i]);
656 
657 	if (num_hcas)
658 		ibt_free_hca_list(guid_array, num_hcas);
659 
660 	IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas: done");
661 }
662 
663 
664 /*
665  * ibcm_fini():
666  * 	- Deregister w/ ibt
667  * 	- Cleanup IBCM HCA listp
668  * 	- Destroy mutexes
669  *
670  * Arguments:	NONE
671  *
672  * Return values:
673  *	IBCM_SUCCESS - success
674  */
675 static ibcm_status_t
676 ibcm_fini(void)
677 {
678 	ibt_status_t	status;
679 
680 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini:");
681 
682 	/*
683 	 * CM assumes that the all general clients got rid of all the
684 	 * established connections and service registrations, completed all
685 	 * pending SIDR operations before a call to ibcm_fini()
686 	 */
687 
688 	if (ibcm_ar_fini() != IBCM_SUCCESS) {
689 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: ibcm_ar_fini failed");
690 		return (IBCM_FAILURE);
691 	}
692 
693 	/* cleanup the svcinfo list */
694 	mutex_enter(&ibcm_svc_info_lock);
695 	if (avl_first(&ibcm_svc_avl_tree) != NULL) {
696 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
697 		    "ibcm_svc_avl_tree is not empty");
698 		mutex_exit(&ibcm_svc_info_lock);
699 		return (IBCM_FAILURE);
700 	}
701 	mutex_exit(&ibcm_svc_info_lock);
702 
703 	/* disables any new hca attach/detaches */
704 	mutex_enter(&ibcm_global_hca_lock);
705 
706 	ibcm_finit_state = IBCM_FINIT_BUSY;
707 
708 	if (ibcm_fini_hcas() != IBCM_SUCCESS) {
709 		IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
710 		    "some hca's still have client resources");
711 
712 		/* First, re-initialize the hcas */
713 		ibcm_init_hcas();
714 		/* and then enable the HCA asyncs */
715 		ibcm_finit_state = IBCM_FINIT_IDLE;
716 		mutex_exit(&ibcm_global_hca_lock);
717 		if (ibcm_ar_init() != IBCM_SUCCESS) {
718 			IBTF_DPRINTF_L1(cmlog, "ibcm_fini:ibcm_ar_init failed");
719 		}
720 		return (IBCM_FAILURE);
721 	}
722 
723 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
724 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
725 
726 	ASSERT(ibcm_timeout_list_hdr == NULL);
727 	ASSERT(ibcm_ud_timeout_list_hdr == NULL);
728 
729 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
730 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
731 
732 	/* Release any pending asyncs on ibcm_global_hca_lock */
733 	ibcm_finit_state = IBCM_FINIT_SUCCESS;
734 	mutex_exit(&ibcm_global_hca_lock);
735 
736 	ibcm_stop_timeout_thread();
737 
738 	/*
739 	 * Detach from IBTL. Waits until all pending asyncs are complete.
740 	 * Above cv_broadcast wakes up any waiting hca attach/detach asyncs
741 	 */
742 	status = ibt_detach(ibcm_ibt_handle);
743 
744 	/* if detach fails, CM didn't free up some resources, so assert */
745 	if (status != IBT_SUCCESS)
746 		IBTF_DPRINTF_L1(cmlog, "ibcm_fini: ibt_detach failed %d",
747 		    status);
748 
749 	ibcm_rc_flow_control_fini();
750 
751 	ibcm_path_cache_fini();
752 
753 	ibcm_fini_ids();
754 	ibcm_fini_locks();
755 	IBTF_DPRINTF_L3(cmlog, "ibcm_fini: done");
756 	return (IBCM_SUCCESS);
757 }
758 
759 /* This routine exit's the ibcm timeout thread  */
760 static void
761 ibcm_stop_timeout_thread()
762 {
763 	mutex_enter(&ibcm_timeout_list_lock);
764 
765 	/* Stop the timeout list processing thread */
766 	ibcm_timeout_list_flags =
767 	    ibcm_timeout_list_flags | IBCM_TIMEOUT_THREAD_EXIT;
768 
769 	/* Wake up, if the timeout thread is on a cv_wait */
770 	cv_signal(&ibcm_timeout_list_cv);
771 
772 	mutex_exit(&ibcm_timeout_list_lock);
773 	thread_join(ibcm_timeout_thread_did);
774 
775 	IBTF_DPRINTF_L5(cmlog, "ibcm_stop_timeout_thread: done");
776 }
777 
778 
779 /* Attempts to release all the hca's associated with CM */
780 static ibcm_status_t
781 ibcm_fini_hcas()
782 {
783 	ibcm_hca_info_t *hcap, *next;
784 
785 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas:");
786 
787 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
788 
789 	hcap = ibcm_hca_listp;
790 	while (hcap != NULL) {
791 		next = hcap->hca_next;
792 		if (ibcm_hca_detach(hcap) != IBCM_SUCCESS) {
793 			ibcm_hca_listp = hcap;
794 			return (IBCM_FAILURE);
795 		}
796 		hcap = next;
797 	}
798 
799 	IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas: SUCCEEDED");
800 	return (IBCM_SUCCESS);
801 }
802 
803 
804 /*
805  * ibcm_hca_attach():
806  *	Called as an asynchronous event to notify CM of an attach of HCA.
807  *	Here ibcm_hca_info_t is initialized and all fields are
808  *	filled in along with SA Access handles and IBMA handles.
809  *	Also called from ibcm_init to initialize ibcm_hca_info_t's for each
810  *	hca's
811  *
812  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
813  *	hca_guid	- HCA's guid
814  *
815  * Return values: NONE
816  */
817 static void
818 ibcm_hca_attach(ib_guid_t hcaguid)
819 {
820 	int			i;
821 	ibt_status_t		status;
822 	uint_t			nports = 0;
823 	ibcm_hca_info_t		*hcap;
824 	ibt_hca_attr_t		hca_attrs;
825 
826 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: guid = 0x%llX", hcaguid);
827 
828 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
829 
830 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hcap))
831 
832 	status = ibt_query_hca_byguid(hcaguid, &hca_attrs);
833 	if (status != IBT_SUCCESS) {
834 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
835 		    "ibt_query_hca_byguid failed = %d", status);
836 		return;
837 	}
838 	nports = hca_attrs.hca_nports;
839 
840 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_attach: num ports = %x", nports);
841 
842 	if ((hcap = ibcm_add_hca_entry(hcaguid, nports)) == NULL)
843 		return;
844 
845 	hcap->hca_guid = hcaguid;	/* Set GUID */
846 	hcap->hca_num_ports = nports;	/* Set number of ports */
847 
848 	if (ibcm_init_hca_ids(hcap) != IBCM_SUCCESS) {
849 		ibcm_delete_hca_entry(hcap);
850 		return;
851 	}
852 
853 	/* Store the static hca attribute data */
854 	hcap->hca_caps = hca_attrs.hca_flags;
855 	hcap->hca_vendor_id = hca_attrs.hca_vendor_id;
856 	hcap->hca_device_id = hca_attrs.hca_device_id;
857 	hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay;
858 	hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp;
859 	hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp;
860 
861 	/* loop thru nports and initialize IBMF handles */
862 	for (i = 0; i < hcap->hca_num_ports; i++) {
863 		status = ibt_get_port_state_byguid(hcaguid, i + 1, NULL, NULL);
864 		if (status != IBT_SUCCESS) {
865 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
866 			    "port_num %d state DOWN", i + 1);
867 		}
868 
869 		hcap->hca_port_info[i].port_hcap = hcap;
870 		hcap->hca_port_info[i].port_num = i+1;
871 
872 		if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
873 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
874 			    "ibcm_hca_init_port failed %d port_num %d",
875 			    status, i+1);
876 	}
877 
878 	/* create the "active" CM AVL tree */
879 	avl_create(&hcap->hca_active_tree, ibcm_active_node_compare,
880 	    sizeof (ibcm_state_data_t),
881 	    offsetof(struct ibcm_state_data_s, avl_active_link));
882 
883 	/* create the "passive" CM AVL tree */
884 	avl_create(&hcap->hca_passive_tree, ibcm_passive_node_compare,
885 	    sizeof (ibcm_state_data_t),
886 	    offsetof(struct ibcm_state_data_s, avl_passive_link));
887 
888 	/* create the "passive comid" CM AVL tree */
889 	avl_create(&hcap->hca_passive_comid_tree,
890 	    ibcm_passive_comid_node_compare,
891 	    sizeof (ibcm_state_data_t),
892 	    offsetof(struct ibcm_state_data_s, avl_passive_comid_link));
893 
894 	/*
895 	 * Mark the state of the HCA to "attach" only at the end
896 	 * Now CM starts accepting incoming MADs and client API calls
897 	 */
898 	hcap->hca_state = IBCM_HCA_ACTIVE;
899 
900 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*hcap))
901 
902 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: ATTACH Done");
903 }
904 
905 /*
906  * ibcm_hca_detach():
907  *	Called as an asynchronous event to notify CM of a detach of HCA.
908  *	Here ibcm_hca_info_t is freed up and all fields that
909  *	were initialized earlier are cleaned up
910  *
911  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
912  *	hca_guid    - HCA's guid
913  *
914  * Return values:
915  *	IBCM_SUCCESS	- able to detach HCA
916  *	IBCM_FAILURE	- failed to detach HCA
917  */
918 static ibcm_status_t
919 ibcm_hca_detach(ibcm_hca_info_t *hcap)
920 {
921 	int		port_index, i;
922 	ibcm_status_t	status = IBCM_SUCCESS;
923 	clock_t		absolute_time;
924 
925 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: hcap = 0x%p guid = 0x%llX",
926 	    hcap, hcap->hca_guid);
927 
928 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
929 
930 	/*
931 	 * Declare hca is going away to all CM clients. Wait until the
932 	 * access count becomes zero.
933 	 */
934 	hcap->hca_state = IBCM_HCA_NOT_ACTIVE;
935 
936 	/* wait on response CV */
937 	absolute_time = ddi_get_lbolt() +
938 	    drv_usectohz(ibcm_wait_for_acc_cnt_timeout);
939 
940 	while (hcap->hca_acc_cnt > 0)
941 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
942 		    absolute_time) == -1)
943 			break;
944 
945 	if (hcap->hca_acc_cnt != 0) {
946 		/* We got a timeout */
947 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
948 		    " to timeout on hca_acc_cnt %u, \n Some CM Clients are "
949 		    "still active, looks like we need to wait some more time "
950 		    "(ibcm_wait_for_acc_cnt_timeout).", hcap->hca_acc_cnt);
951 		hcap->hca_state = IBCM_HCA_ACTIVE;
952 		return (IBCM_FAILURE);
953 	}
954 
955 	/*
956 	 * First make sure, there are no active users of ibma handles,
957 	 * and then de-register handles.
958 	 */
959 
960 	/* make sure that there are no "Service"s registered w/ this HCA. */
961 	if (hcap->hca_svc_cnt != 0) {
962 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
963 		    "Active services still there %d", hcap->hca_svc_cnt);
964 		hcap->hca_state = IBCM_HCA_ACTIVE;
965 		return (IBCM_FAILURE);
966 	}
967 
968 	if (ibcm_check_sidr_clean(hcap) != IBCM_SUCCESS) {
969 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach:"
970 		    "There are active SIDR operations");
971 		hcap->hca_state = IBCM_HCA_ACTIVE;
972 		return (IBCM_FAILURE);
973 	}
974 
975 	if (ibcm_check_avl_clean(hcap) != IBCM_SUCCESS) {
976 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
977 		    "There are active RC connections");
978 		hcap->hca_state = IBCM_HCA_ACTIVE;
979 		return (IBCM_FAILURE);
980 	}
981 
982 	/*
983 	 * Now, wait until all rc and sidr stateps go away
984 	 * All these stateps must be short lived ones, waiting to be cleaned
985 	 * up after some timeout value, based on the current state.
986 	 */
987 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d",
988 	    hcap->hca_guid, hcap->hca_res_cnt);
989 
990 	/* wait on response CV */
991 	absolute_time = ddi_get_lbolt() +
992 	    drv_usectohz(ibcm_wait_for_res_cnt_timeout);
993 
994 	while (hcap->hca_res_cnt > 0)
995 		if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
996 		    absolute_time) == -1)
997 			break;
998 
999 	if (hcap->hca_res_cnt != 0) {
1000 		/* We got a timeout waiting for hca_res_cnt to become 0 */
1001 		IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
1002 		    " to timeout on res_cnt %d, \n Some CM connections are "
1003 		    "still in transient state, looks like we need to wait "
1004 		    "some more time (ibcm_wait_for_res_cnt_timeout).",
1005 		    hcap->hca_res_cnt);
1006 		hcap->hca_state = IBCM_HCA_ACTIVE;
1007 		return (IBCM_FAILURE);
1008 	}
1009 
1010 	/* Re-assert the while loop step above */
1011 	ASSERT(hcap->hca_sidr_list == NULL);
1012 	avl_destroy(&hcap->hca_active_tree);
1013 	avl_destroy(&hcap->hca_passive_tree);
1014 	avl_destroy(&hcap->hca_passive_comid_tree);
1015 
1016 	/*
1017 	 * Unregister all ports from IBMA
1018 	 * If there is a failure, re-initialize any free'd ibma handles. This
1019 	 * is required to receive the incoming mads
1020 	 */
1021 	status = IBCM_SUCCESS;
1022 	for (port_index = 0; port_index < hcap->hca_num_ports; port_index++) {
1023 		if ((status = ibcm_hca_fini_port(hcap, port_index)) !=
1024 		    IBCM_SUCCESS) {
1025 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1026 			    "Failed to free IBMA Handle for port_num %d",
1027 			    port_index + 1);
1028 			break;
1029 		}
1030 	}
1031 
1032 	/* If detach fails, re-initialize ibma handles for incoming mads */
1033 	if (status != IBCM_SUCCESS)  {
1034 		for (i = 0; i < port_index; i++) {
1035 			if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
1036 				IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1037 				    "Failed to re-allocate IBMA Handles for"
1038 				    " port_num %d", port_index + 1);
1039 		}
1040 		hcap->hca_state = IBCM_HCA_ACTIVE;
1041 		return (IBCM_FAILURE);
1042 	}
1043 
1044 	ibcm_fini_hca_ids(hcap);
1045 	ibcm_delete_hca_entry(hcap);
1046 
1047 	IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: DETACH succeeded");
1048 	return (IBCM_SUCCESS);
1049 }
1050 
1051 /* Checks, if there are any active sidr state entries in the specified hca */
1052 static ibcm_status_t
1053 ibcm_check_sidr_clean(ibcm_hca_info_t *hcap)
1054 {
1055 	ibcm_ud_state_data_t	*usp;
1056 	uint32_t		transient_cnt = 0;
1057 
1058 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_sidr_clean:");
1059 
1060 	rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
1061 	usp = hcap->hca_sidr_list;	/* Point to the list */
1062 	while (usp != NULL) {
1063 		mutex_enter(&usp->ud_state_mutex);
1064 		if ((usp->ud_state != IBCM_STATE_SIDR_REP_SENT) &&
1065 		    (usp->ud_state != IBCM_STATE_TIMED_OUT) &&
1066 		    (usp->ud_state != IBCM_STATE_DELETE)) {
1067 
1068 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_sidr_clean:"
1069 			    "usp = %p not in transient state = %d", usp,
1070 			    usp->ud_state);
1071 
1072 			mutex_exit(&usp->ud_state_mutex);
1073 			rw_exit(&hcap->hca_sidr_list_lock);
1074 			return (IBCM_FAILURE);
1075 		} else {
1076 			mutex_exit(&usp->ud_state_mutex);
1077 			++transient_cnt;
1078 		}
1079 
1080 		usp = usp->ud_nextp;
1081 	}
1082 	rw_exit(&hcap->hca_sidr_list_lock);
1083 
1084 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_sidr_clean: transient_cnt %d",
1085 	    transient_cnt);
1086 
1087 	return (IBCM_SUCCESS);
1088 }
1089 
1090 /* Checks, if there are any active rc state entries, in the specified hca */
1091 static ibcm_status_t
1092 ibcm_check_avl_clean(ibcm_hca_info_t *hcap)
1093 
1094 {
1095 	ibcm_state_data_t	*sp;
1096 	avl_tree_t		*avl_tree;
1097 	uint32_t		transient_cnt = 0;
1098 
1099 	IBTF_DPRINTF_L5(cmlog, "ibcm_check_avl_clean:");
1100 	/*
1101 	 * Both the trees ie., active and passive must reference to all
1102 	 * statep's, so let's use one
1103 	 */
1104 	avl_tree = &hcap->hca_active_tree;
1105 
1106 	rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
1107 
1108 	for (sp = avl_first(avl_tree); sp != NULL;
1109 	    sp = avl_walk(avl_tree, sp, AVL_AFTER)) {
1110 		mutex_enter(&sp->state_mutex);
1111 		if ((sp->state != IBCM_STATE_TIMEWAIT) &&
1112 		    (sp->state != IBCM_STATE_REJ_SENT) &&
1113 		    (sp->state != IBCM_STATE_DELETE)) {
1114 			IBTF_DPRINTF_L3(cmlog, "ibcm_check_avl_clean: "
1115 			    "sp = %p not in transient state = %d", sp,
1116 			    sp->state);
1117 			mutex_exit(&sp->state_mutex);
1118 			rw_exit(&hcap->hca_state_rwlock);
1119 			return (IBCM_FAILURE);
1120 		} else {
1121 			mutex_exit(&sp->state_mutex);
1122 			++transient_cnt;
1123 		}
1124 	}
1125 
1126 	rw_exit(&hcap->hca_state_rwlock);
1127 
1128 	IBTF_DPRINTF_L4(cmlog, "ibcm_check_avl_clean: transient_cnt %d",
1129 	    transient_cnt);
1130 
1131 	return (IBCM_SUCCESS);
1132 }
1133 
1134 /* Adds a new entry into CM's global hca list, if hca_guid is not there yet */
1135 static ibcm_hca_info_t *
1136 ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports)
1137 {
1138 	ibcm_hca_info_t	*hcap;
1139 
1140 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: guid = 0x%llX",
1141 	    hcaguid);
1142 
1143 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1144 
1145 	/*
1146 	 * Check if this hca_guid already in the list
1147 	 * If yes, then ignore this and return NULL
1148 	 */
1149 
1150 	hcap = ibcm_hca_listp;
1151 
1152 	/* search for this HCA */
1153 	while (hcap != NULL) {
1154 		if (hcap->hca_guid == hcaguid) {
1155 			/* already exists */
1156 			IBTF_DPRINTF_L2(cmlog, "ibcm_add_hca_entry: "
1157 			    "hcap %p guid 0x%llX, entry already exists !!",
1158 			    hcap, hcap->hca_guid);
1159 			return (NULL);
1160 		}
1161 		hcap = hcap->hca_next;
1162 	}
1163 
1164 	/* Allocate storage for the new HCA entry found */
1165 	hcap = kmem_zalloc(sizeof (ibcm_hca_info_t) +
1166 	    (nports - 1) * sizeof (ibcm_port_info_t), KM_SLEEP);
1167 
1168 	/* initialize RW lock */
1169 	rw_init(&hcap->hca_state_rwlock, NULL, RW_DRIVER, NULL);
1170 	/* initialize SIDR list lock */
1171 	rw_init(&hcap->hca_sidr_list_lock, NULL, RW_DRIVER, NULL);
1172 	/* Insert "hcap" into the global HCA list maintained by CM */
1173 	hcap->hca_next = ibcm_hca_listp;
1174 	ibcm_hca_listp = hcap;
1175 
1176 	IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: done hcap = 0x%p", hcap);
1177 
1178 	return (hcap);
1179 
1180 }
1181 
1182 /* deletes the given ibcm_hca_info_t from CM's global hca list */
1183 void
1184 ibcm_delete_hca_entry(ibcm_hca_info_t *hcap)
1185 {
1186 	ibcm_hca_info_t	*headp, *prevp = NULL;
1187 
1188 	/* ibcm_hca_global_lock is held */
1189 	IBTF_DPRINTF_L5(cmlog, "ibcm_delete_hca_entry: guid = 0x%llX "
1190 	    "hcap = 0x%p", hcap->hca_guid, hcap);
1191 
1192 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1193 
1194 	headp = ibcm_hca_listp;
1195 	while (headp != NULL) {
1196 		if (headp == hcap) {
1197 			IBTF_DPRINTF_L3(cmlog, "ibcm_delete_hca_entry: "
1198 			    "deleting hcap %p hcaguid %llX", hcap,
1199 			    hcap->hca_guid);
1200 			if (prevp) {
1201 				prevp->hca_next = headp->hca_next;
1202 			} else {
1203 				prevp = headp->hca_next;
1204 				ibcm_hca_listp = prevp;
1205 			}
1206 			rw_destroy(&hcap->hca_state_rwlock);
1207 			rw_destroy(&hcap->hca_sidr_list_lock);
1208 			kmem_free(hcap, sizeof (ibcm_hca_info_t) +
1209 			    (hcap->hca_num_ports - 1) *
1210 			    sizeof (ibcm_port_info_t));
1211 			return;
1212 		}
1213 
1214 		prevp = headp;
1215 		headp = headp->hca_next;
1216 	}
1217 }
1218 
1219 /*
1220  * ibcm_find_hca_entry:
1221  *	Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
1222  *	This entry can be then used to access AVL tree/SIDR list etc.
1223  *	If entry exists and in HCA ATTACH state, then hca's ref cnt is
1224  *	incremented and entry returned. Else NULL returned.
1225  *
1226  *	All functions that use ibcm_find_hca_entry and get a non-NULL
1227  *	return values must call ibcm_dec_hca_acc_cnt to decrement the
1228  *	respective hca ref cnt. There shouldn't be any usage of
1229  *	ibcm_hca_info_t * returned from ibcm_find_hca_entry,
1230  *	after decrementing the hca_acc_cnt
1231  *
1232  * INPUTS:
1233  *	hca_guid	- HCA's guid
1234  *
1235  * RETURN VALUE:
1236  *	hcap		- if a match is found, else NULL
1237  */
1238 ibcm_hca_info_t *
1239 ibcm_find_hca_entry(ib_guid_t hca_guid)
1240 {
1241 	ibcm_hca_info_t *hcap;
1242 
1243 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: guid = 0x%llX", hca_guid);
1244 
1245 	mutex_enter(&ibcm_global_hca_lock);
1246 
1247 	hcap = ibcm_hca_listp;
1248 	/* search for this HCA */
1249 	while (hcap != NULL) {
1250 		if (hcap->hca_guid == hca_guid)
1251 			break;
1252 		hcap = hcap->hca_next;
1253 	}
1254 
1255 	/* if no hcap for the hca_guid, return NULL */
1256 	if (hcap == NULL) {
1257 		mutex_exit(&ibcm_global_hca_lock);
1258 		return (NULL);
1259 	}
1260 
1261 	/* return hcap, only if it valid to use */
1262 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1263 		++(hcap->hca_acc_cnt);
1264 
1265 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: "
1266 		    "found hcap = 0x%p hca_acc_cnt %u", hcap,
1267 		    hcap->hca_acc_cnt);
1268 
1269 		mutex_exit(&ibcm_global_hca_lock);
1270 		return (hcap);
1271 	} else {
1272 		mutex_exit(&ibcm_global_hca_lock);
1273 
1274 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hca_entry: "
1275 		    "found hcap = 0x%p not in active state", hcap);
1276 		return (NULL);
1277 	}
1278 }
1279 
1280 /*
1281  * Searches for ibcm_hca_info_t entry based on hca_guid, but doesn't increment
1282  * the hca's reference count. This function is used, where the calling context
1283  * is attempting to delete hcap itself and hence acc_cnt cannot be incremented
1284  * OR assumes that valid hcap must be available in ibcm's global hca list.
1285  */
1286 ibcm_hca_info_t *
1287 ibcm_find_hcap_entry(ib_guid_t hca_guid)
1288 {
1289 	ibcm_hca_info_t *hcap;
1290 
1291 	IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: guid = 0x%llX", hca_guid);
1292 
1293 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1294 
1295 	hcap = ibcm_hca_listp;
1296 	/* search for this HCA */
1297 	while (hcap != NULL) {
1298 		if (hcap->hca_guid == hca_guid)
1299 			break;
1300 		hcap = hcap->hca_next;
1301 	}
1302 
1303 	if (hcap == NULL)
1304 		IBTF_DPRINTF_L2(cmlog, "ibcm_find_hcap_entry: No hcap found for"
1305 		    " hca_guid 0x%llX", hca_guid);
1306 	else
1307 		IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: hcap found for"
1308 		    " hca_guid 0x%llX", hca_guid);
1309 
1310 	return (hcap);
1311 }
1312 
1313 /* increment the hca's temporary reference count */
1314 ibcm_status_t
1315 ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hcap)
1316 {
1317 	mutex_enter(&ibcm_global_hca_lock);
1318 	if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1319 		++(hcap->hca_acc_cnt);
1320 		IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_acc_cnt: "
1321 		    "hcap = 0x%p  acc_cnt = %d ", hcap, hcap->hca_acc_cnt);
1322 		mutex_exit(&ibcm_global_hca_lock);
1323 		return (IBCM_SUCCESS);
1324 	} else {
1325 		IBTF_DPRINTF_L2(cmlog, "ibcm_inc_hca_acc_cnt: "
1326 		    "hcap INACTIVE 0x%p  acc_cnt = %d ", hcap,
1327 		    hcap->hca_acc_cnt);
1328 		mutex_exit(&ibcm_global_hca_lock);
1329 		return (IBCM_FAILURE);
1330 	}
1331 }
1332 
1333 /* decrement the hca's ref count, and wake up any waiting threads */
1334 void
1335 ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hcap)
1336 {
1337 	mutex_enter(&ibcm_global_hca_lock);
1338 	ASSERT(hcap->hca_acc_cnt > 0);
1339 	--(hcap->hca_acc_cnt);
1340 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_acc_cnt: hcap = 0x%p "
1341 	    "acc_cnt = %d", hcap, hcap->hca_acc_cnt);
1342 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1343 	    (hcap->hca_acc_cnt == 0)) {
1344 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_acc_cnt: "
1345 		    "cv_broadcast for hcap = 0x%p", hcap);
1346 		cv_broadcast(&ibcm_global_hca_cv);
1347 	}
1348 	mutex_exit(&ibcm_global_hca_lock);
1349 }
1350 
1351 /* increment the hca's resource count */
1352 void
1353 ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hcap)
1354 
1355 {
1356 	mutex_enter(&ibcm_global_hca_lock);
1357 	++(hcap->hca_res_cnt);
1358 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_res_cnt: hcap = 0x%p "
1359 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1360 	mutex_exit(&ibcm_global_hca_lock);
1361 }
1362 
1363 /* decrement the hca's resource count, and wake up any waiting threads */
1364 void
1365 ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hcap)
1366 {
1367 	mutex_enter(&ibcm_global_hca_lock);
1368 	ASSERT(hcap->hca_res_cnt > 0);
1369 	--(hcap->hca_res_cnt);
1370 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_res_cnt: hcap = 0x%p "
1371 	    "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1372 	if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1373 	    (hcap->hca_res_cnt == 0)) {
1374 		IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_res_cnt: "
1375 		    "cv_broadcast for hcap = 0x%p", hcap);
1376 		cv_broadcast(&ibcm_global_hca_cv);
1377 	}
1378 	mutex_exit(&ibcm_global_hca_lock);
1379 }
1380 
1381 /* increment the hca's service count */
1382 void
1383 ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hcap)
1384 
1385 {
1386 	mutex_enter(&ibcm_global_hca_lock);
1387 	++(hcap->hca_svc_cnt);
1388 	IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_svc_cnt: hcap = 0x%p "
1389 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1390 	mutex_exit(&ibcm_global_hca_lock);
1391 }
1392 
1393 /* decrement the hca's service count */
1394 void
1395 ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hcap)
1396 {
1397 	mutex_enter(&ibcm_global_hca_lock);
1398 	ASSERT(hcap->hca_svc_cnt > 0);
1399 	--(hcap->hca_svc_cnt);
1400 	IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_svc_cnt: hcap = 0x%p "
1401 	    "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1402 	mutex_exit(&ibcm_global_hca_lock);
1403 }
1404 
1405 /*
1406  * The following code manages three classes of requests that CM makes to
1407  * the fabric.  Those three classes are SA_ACCESS, REQ/REP/RTU, and DREQ/DREP.
1408  * The main issue is that the fabric can become very busy, and the CM
1409  * protocols rely on responses being made based on a predefined timeout
1410  * value.  By managing how many simultaneous sessions are allowed, there
1411  * is observed extremely high reliability of CM protocol succeeding when
1412  * it should.
1413  *
1414  * SA_ACCESS and DREQ/DREP are managed at the thread level, whereby the
1415  * thread blocks until there are less than some number of threads doing
1416  * similar requests.
1417  *
1418  * REQ/REP/RTU requests beyond a given limit are added to a list,
1419  * allowing the thread to return immediately to its caller in the
1420  * case where the "mode" is IBT_NONBLOCKING.  This is the mode used
1421  * by uDAPL and seems to be an important feature/behavior.
1422  */
1423 
1424 static int
1425 ibcm_ok_to_start(struct ibcm_open_s *openp)
1426 {
1427 	return (openp->sends < openp->sends_hiwat &&
1428 	    openp->in_progress < openp->in_progress_max);
1429 }
1430 
1431 void
1432 ibcm_open_done(ibcm_state_data_t *statep)
1433 {
1434 	int run;
1435 	ibcm_state_data_t **linkp, *tmp;
1436 
1437 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1438 	if (statep->open_flow == 1) {
1439 		statep->open_flow = 0;
1440 		mutex_enter(&ibcm_open.mutex);
1441 		if (statep->open_link == NULL) {
1442 			ibcm_open.in_progress--;
1443 			run = ibcm_ok_to_start(&ibcm_open);
1444 		} else {
1445 			ibcm_open.queued--;
1446 			linkp = &ibcm_open.head.open_link;
1447 			while (*linkp != statep)
1448 				linkp = &((*linkp)->open_link);
1449 			*linkp = statep->open_link;
1450 			statep->open_link = NULL;
1451 			/*
1452 			 * If we remove what tail pointed to, we need
1453 			 * to reassign tail (it is never NULL).
1454 			 * tail points to head for the empty list.
1455 			 */
1456 			if (ibcm_open.tail == statep) {
1457 				tmp = &ibcm_open.head;
1458 				while (tmp->open_link != &ibcm_open.head)
1459 					tmp = tmp->open_link;
1460 				ibcm_open.tail = tmp;
1461 			}
1462 			run = 0;
1463 		}
1464 		mutex_exit(&ibcm_open.mutex);
1465 		if (run)
1466 			ibcm_run_tlist_thread();
1467 	}
1468 }
1469 
1470 /* dtrace */
1471 void
1472 ibcm_open_wait(hrtime_t delta)
1473 {
1474 	if (delta > 1000000)
1475 		IBTF_DPRINTF_L2(cmlog, "ibcm_open_wait: flow more %lld", delta);
1476 }
1477 
1478 void
1479 ibcm_open_start(ibcm_state_data_t *statep)
1480 {
1481 	ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REQ);
1482 
1483 	mutex_enter(&statep->state_mutex);
1484 	ibcm_open_wait(gethrtime() - statep->post_time);
1485 	mutex_exit(&statep->state_mutex);
1486 
1487 	ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_req_complete,
1488 	    statep);
1489 
1490 	mutex_enter(&statep->state_mutex);
1491 	IBCM_REF_CNT_DECR(statep);
1492 	mutex_exit(&statep->state_mutex);
1493 }
1494 
1495 void
1496 ibcm_open_enqueue(ibcm_state_data_t *statep)
1497 {
1498 	int run;
1499 
1500 	mutex_enter(&statep->state_mutex);
1501 	statep->post_time = gethrtime();
1502 	mutex_exit(&statep->state_mutex);
1503 	mutex_enter(&ibcm_open.mutex);
1504 	if (ibcm_open.queued == 0 && ibcm_ok_to_start(&ibcm_open)) {
1505 		ibcm_open.in_progress++;
1506 		mutex_exit(&ibcm_open.mutex);
1507 		ibcm_open_start(statep);
1508 	} else {
1509 		ibcm_open.queued++;
1510 		statep->open_link = &ibcm_open.head;
1511 		ibcm_open.tail->open_link = statep;
1512 		ibcm_open.tail = statep;
1513 		run = ibcm_ok_to_start(&ibcm_open);
1514 		mutex_exit(&ibcm_open.mutex);
1515 		if (run)
1516 			ibcm_run_tlist_thread();
1517 	}
1518 }
1519 
1520 ibcm_state_data_t *
1521 ibcm_open_dequeue(void)
1522 {
1523 	ibcm_state_data_t *statep;
1524 
1525 	ASSERT(MUTEX_HELD(&ibcm_open.mutex));
1526 	ibcm_open.queued--;
1527 	ibcm_open.in_progress++;
1528 	statep = ibcm_open.head.open_link;
1529 	ibcm_open.head.open_link = statep->open_link;
1530 	statep->open_link = NULL;
1531 	/*
1532 	 * If we remove what tail pointed to, we need
1533 	 * to reassign tail (it is never NULL).
1534 	 * tail points to head for the empty list.
1535 	 */
1536 	if (ibcm_open.tail == statep)
1537 		ibcm_open.tail = &ibcm_open.head;
1538 	return (statep);
1539 }
1540 
1541 void
1542 ibcm_check_for_opens(void)
1543 {
1544 	ibcm_state_data_t 	*statep;
1545 
1546 	mutex_enter(&ibcm_open.mutex);
1547 
1548 	while (ibcm_open.queued > 0) {
1549 		if (ibcm_ok_to_start(&ibcm_open)) {
1550 			statep = ibcm_open_dequeue();
1551 			mutex_exit(&ibcm_open.mutex);
1552 
1553 			ibcm_open_start(statep);
1554 
1555 			mutex_enter(&ibcm_open.mutex);
1556 		} else {
1557 			break;
1558 		}
1559 	}
1560 	mutex_exit(&ibcm_open.mutex);
1561 }
1562 
1563 
1564 static void
1565 ibcm_flow_init(ibcm_flow_t *flow, uint_t simul_max)
1566 {
1567 	flow->list			= NULL;
1568 	flow->simul			= 0;
1569 	flow->waiters_per_chunk		= 4;
1570 	flow->simul_max			= simul_max;
1571 	flow->lowat			= simul_max - flow->waiters_per_chunk;
1572 	flow->lowat_default		= flow->lowat;
1573 	/* stats */
1574 	flow->total			= 0;
1575 }
1576 
1577 static void
1578 ibcm_rc_flow_control_init(void)
1579 {
1580 	mutex_init(&ibcm_open.mutex, NULL, MUTEX_DEFAULT, NULL);
1581 	mutex_enter(&ibcm_open.mutex);
1582 	ibcm_flow_init(&ibcm_close_flow, ibcm_close_simul_max);
1583 	ibcm_flow_init(&ibcm_lapr_flow, ibcm_lapr_simul_max);
1584 	ibcm_flow_init(&ibcm_saa_flow, ibcm_saa_simul_max);
1585 
1586 	ibcm_open.queued 		= 0;
1587 	ibcm_open.exit_deferred 	= 0;
1588 	ibcm_open.in_progress 		= 0;
1589 	ibcm_open.in_progress_max 	= 16;
1590 	ibcm_open.sends 		= 0;
1591 	ibcm_open.sends_max 		= 0;
1592 	ibcm_open.sends_lowat 		= 8;
1593 	ibcm_open.sends_hiwat 		= 16;
1594 	ibcm_open.tail 			= &ibcm_open.head;
1595 	ibcm_open.head.open_link 	= NULL;
1596 	mutex_exit(&ibcm_open.mutex);
1597 
1598 	mutex_init(&ibcm_close.mutex, NULL, MUTEX_DEFAULT, NULL);
1599 	mutex_enter(&ibcm_close.mutex);
1600 	ibcm_close.tail			= &ibcm_close.head;
1601 	ibcm_close.head.close_link 	= NULL;
1602 	mutex_exit(&ibcm_close.mutex);
1603 }
1604 
1605 static void
1606 ibcm_rc_flow_control_fini(void)
1607 {
1608 	mutex_destroy(&ibcm_open.mutex);
1609 	mutex_destroy(&ibcm_close.mutex);
1610 }
1611 
1612 static ibcm_flow1_t *
1613 ibcm_flow_find(ibcm_flow_t *flow)
1614 {
1615 	ibcm_flow1_t *flow1;
1616 	ibcm_flow1_t *f;
1617 
1618 	f = flow->list;
1619 	if (f) {	/* most likely code path */
1620 		while (f->link != NULL)
1621 			f = f->link;
1622 		if (f->waiters < flow->waiters_per_chunk)
1623 			return (f);
1624 	}
1625 
1626 	/* There was no flow1 list element ready for another waiter */
1627 	mutex_exit(&ibcm_open.mutex);
1628 	flow1 = kmem_alloc(sizeof (*flow1), KM_SLEEP);
1629 	mutex_enter(&ibcm_open.mutex);
1630 
1631 	f = flow->list;
1632 	if (f) {
1633 		while (f->link != NULL)
1634 			f = f->link;
1635 		if (f->waiters < flow->waiters_per_chunk) {
1636 			kmem_free(flow1, sizeof (*flow1));
1637 			return (f);
1638 		}
1639 		f->link = flow1;
1640 	} else {
1641 		flow->list = flow1;
1642 	}
1643 	cv_init(&flow1->cv, NULL, CV_DRIVER, NULL);
1644 	flow1->waiters = 0;
1645 	flow1->link = NULL;
1646 	return (flow1);
1647 }
1648 
1649 static void
1650 ibcm_flow_enter(ibcm_flow_t *flow)
1651 {
1652 	mutex_enter(&ibcm_open.mutex);
1653 	if (flow->list == NULL && flow->simul < flow->simul_max) {
1654 		flow->simul++;
1655 		flow->total++;
1656 		mutex_exit(&ibcm_open.mutex);
1657 	} else {
1658 		ibcm_flow1_t *flow1;
1659 
1660 		flow1 = ibcm_flow_find(flow);
1661 		flow1->waiters++;
1662 		cv_wait(&flow1->cv, &ibcm_open.mutex);
1663 		if (--flow1->waiters == 0) {
1664 			cv_destroy(&flow1->cv);
1665 			mutex_exit(&ibcm_open.mutex);
1666 			kmem_free(flow1, sizeof (*flow1));
1667 		} else
1668 			mutex_exit(&ibcm_open.mutex);
1669 	}
1670 }
1671 
1672 static void
1673 ibcm_flow_exit(ibcm_flow_t *flow)
1674 {
1675 	mutex_enter(&ibcm_open.mutex);
1676 	if (--flow->simul < flow->lowat) {
1677 		if (flow->lowat < flow->lowat_default)
1678 			flow->lowat++;
1679 		if (flow->list) {
1680 			ibcm_flow1_t *flow1;
1681 
1682 			flow1 = flow->list;
1683 			flow->list = flow1->link;	/* unlink */
1684 			flow1->link = NULL;		/* be clean */
1685 			flow->total += flow1->waiters;
1686 			flow->simul += flow1->waiters;
1687 			cv_broadcast(&flow1->cv);
1688 		}
1689 	}
1690 	mutex_exit(&ibcm_open.mutex);
1691 }
1692 
1693 void
1694 ibcm_flow_inc(void)
1695 {
1696 	mutex_enter(&ibcm_open.mutex);
1697 	if (++ibcm_open.sends > ibcm_open.sends_max) {
1698 		ibcm_open.sends_max = ibcm_open.sends;
1699 		IBTF_DPRINTF_L2(cmlog, "ibcm_flow_inc: sends max = %d",
1700 		    ibcm_open.sends_max);
1701 	}
1702 	mutex_exit(&ibcm_open.mutex);
1703 }
1704 
1705 static void
1706 ibcm_check_send_cmpltn_time(hrtime_t delta, char *event_msg)
1707 {
1708 	if (delta > 4000000LL) {
1709 		IBTF_DPRINTF_L2(cmlog, "ibcm_check_send_cmpltn_time: "
1710 		    "%s: %lldns", event_msg, delta);
1711 	}
1712 }
1713 
1714 void
1715 ibcm_flow_dec(hrtime_t time, char *mad_type)
1716 {
1717 	int flow_exit = 0;
1718 	int run = 0;
1719 
1720 	if (ibcm_dtrace)
1721 		ibcm_check_send_cmpltn_time(gethrtime() - time, mad_type);
1722 	mutex_enter(&ibcm_open.mutex);
1723 	ibcm_open.sends--;
1724 	if (ibcm_open.sends < ibcm_open.sends_lowat) {
1725 		run = ibcm_ok_to_start(&ibcm_open);
1726 		if (ibcm_open.exit_deferred) {
1727 			ibcm_open.exit_deferred--;
1728 			flow_exit = 1;
1729 		}
1730 	}
1731 	mutex_exit(&ibcm_open.mutex);
1732 	if (flow_exit)
1733 		ibcm_flow_exit(&ibcm_close_flow);
1734 	if (run)
1735 		ibcm_run_tlist_thread();
1736 }
1737 
1738 void
1739 ibcm_close_enqueue(ibcm_state_data_t *statep)
1740 {
1741 	mutex_enter(&ibcm_close.mutex);
1742 	statep->close_link = NULL;
1743 	ibcm_close.tail->close_link = statep;
1744 	ibcm_close.tail = statep;
1745 	mutex_exit(&ibcm_close.mutex);
1746 	ibcm_run_tlist_thread();
1747 }
1748 
1749 void
1750 ibcm_check_for_async_close()
1751 {
1752 	ibcm_state_data_t 	*statep;
1753 
1754 	mutex_enter(&ibcm_close.mutex);
1755 
1756 	while (ibcm_close.head.close_link) {
1757 		statep = ibcm_close.head.close_link;
1758 		ibcm_close.head.close_link = statep->close_link;
1759 		statep->close_link = NULL;
1760 		if (ibcm_close.tail == statep)
1761 			ibcm_close.tail = &ibcm_close.head;
1762 		mutex_exit(&ibcm_close.mutex);
1763 		ibcm_close_start(statep);
1764 		mutex_enter(&ibcm_close.mutex);
1765 	}
1766 	mutex_exit(&ibcm_close.mutex);
1767 }
1768 
1769 void
1770 ibcm_close_enter(void)
1771 {
1772 	ibcm_flow_enter(&ibcm_close_flow);
1773 }
1774 
1775 void
1776 ibcm_close_exit(void)
1777 {
1778 	int flow_exit;
1779 
1780 	mutex_enter(&ibcm_open.mutex);
1781 	if (ibcm_open.sends < ibcm_open.sends_lowat ||
1782 	    ibcm_open.exit_deferred >= 4)
1783 		flow_exit = 1;
1784 	else {
1785 		flow_exit = 0;
1786 		ibcm_open.exit_deferred++;
1787 	}
1788 	mutex_exit(&ibcm_open.mutex);
1789 	if (flow_exit)
1790 		ibcm_flow_exit(&ibcm_close_flow);
1791 }
1792 
1793 /*
1794  * This function needs to be called twice to finish our flow
1795  * control accounting when closing down a connection.  One
1796  * call has send_done set to 1, while the other has it set to 0.
1797  * Because of retries, this could get called more than once
1798  * with either 0 or 1, but additional calls have no effect.
1799  */
1800 void
1801 ibcm_close_done(ibcm_state_data_t *statep, int send_done)
1802 {
1803 	int flow_exit;
1804 
1805 	ASSERT(MUTEX_HELD(&statep->state_mutex));
1806 	if (statep->close_flow == 1) {
1807 		if (send_done)
1808 			statep->close_flow = 3;
1809 		else
1810 			statep->close_flow = 2;
1811 	} else if ((send_done && statep->close_flow == 2) ||
1812 	    (!send_done && statep->close_flow == 3)) {
1813 		statep->close_flow = 0;
1814 		mutex_enter(&ibcm_open.mutex);
1815 		if (ibcm_open.sends < ibcm_open.sends_lowat ||
1816 		    ibcm_open.exit_deferred >= 4)
1817 			flow_exit = 1;
1818 		else {
1819 			flow_exit = 0;
1820 			ibcm_open.exit_deferred++;
1821 		}
1822 		mutex_exit(&ibcm_open.mutex);
1823 		if (flow_exit)
1824 			ibcm_flow_exit(&ibcm_close_flow);
1825 	}
1826 }
1827 
1828 void
1829 ibcm_lapr_enter(void)
1830 {
1831 	ibcm_flow_enter(&ibcm_lapr_flow);
1832 }
1833 
1834 void
1835 ibcm_lapr_exit(void)
1836 {
1837 	ibcm_flow_exit(&ibcm_lapr_flow);
1838 }
1839 
1840 void
1841 ibcm_sa_access_enter()
1842 {
1843 	ibcm_flow_enter(&ibcm_saa_flow);
1844 }
1845 
1846 void
1847 ibcm_sa_access_exit()
1848 {
1849 	ibcm_flow_exit(&ibcm_saa_flow);
1850 }
1851 
1852 static void
1853 ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle,
1854     ibmf_saa_subnet_event_t saa_event_code,
1855     ibmf_saa_event_details_t *saa_event_details,
1856     void *callback_arg)
1857 {
1858 	ibcm_port_info_t	*portp = (ibcm_port_info_t *)callback_arg;
1859 	ibt_subnet_event_code_t code;
1860 	ibt_subnet_event_t	event;
1861 	uint8_t			event_status;
1862 
1863 	IBTF_DPRINTF_L3(cmlog, "ibcm_sm_notice_handler: saa_hdl %p, code = %d",
1864 	    saa_handle, saa_event_code);
1865 
1866 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1867 
1868 	switch (saa_event_code) {
1869 	case IBMF_SAA_EVENT_MCG_CREATED:
1870 		code = IBT_SM_EVENT_MCG_CREATED;
1871 		break;
1872 	case IBMF_SAA_EVENT_MCG_DELETED:
1873 		code = IBT_SM_EVENT_MCG_DELETED;
1874 		break;
1875 	case IBMF_SAA_EVENT_GID_AVAILABLE:
1876 		code = IBT_SM_EVENT_GID_AVAIL;
1877 		ibcm_path_cache_purge();
1878 		break;
1879 	case IBMF_SAA_EVENT_GID_UNAVAILABLE:
1880 		code = IBT_SM_EVENT_GID_UNAVAIL;
1881 		ibcm_path_cache_purge();
1882 		break;
1883 	case IBMF_SAA_EVENT_SUBSCRIBER_STATUS_CHG:
1884 		event_status =
1885 		    saa_event_details->ie_producer_event_status_mask &
1886 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1887 		if (event_status == (portp->port_event_status &
1888 		    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) {
1889 			mutex_exit(&ibcm_sm_notice_serialize_lock);
1890 			return;	/* no change */
1891 		}
1892 		portp->port_event_status = event_status;
1893 		if (event_status == IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)
1894 			code = IBT_SM_EVENT_AVAILABLE;
1895 		else
1896 			code = IBT_SM_EVENT_UNAVAILABLE;
1897 		break;
1898 	default:
1899 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1900 		return;
1901 	}
1902 
1903 	mutex_enter(&ibcm_global_hca_lock);
1904 
1905 	/* don't send the event if we're tearing down */
1906 	if (!IBCM_ACCESS_HCA_OK(portp->port_hcap)) {
1907 		mutex_exit(&ibcm_global_hca_lock);
1908 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1909 		return;
1910 	}
1911 
1912 	++(portp->port_hcap->hca_acc_cnt);
1913 	mutex_exit(&ibcm_global_hca_lock);
1914 
1915 	event.sm_notice_gid = saa_event_details->ie_gid;
1916 	ibtl_cm_sm_notice_handler(portp->port_sgid0, code, &event);
1917 
1918 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1919 
1920 	ibcm_dec_hca_acc_cnt(portp->port_hcap);
1921 }
1922 
1923 void
1924 ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl,
1925     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1926 {
1927 	ibcm_port_info_t	*portp;
1928 	ibcm_hca_info_t		*hcap;
1929 	uint8_t			port;
1930 	int			num_failed_sgids;
1931 	ibtl_cm_sm_init_fail_t	*ifail;
1932 	ib_gid_t		*sgidp;
1933 
1934 	IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices: ibt_hdl = %p",
1935 	    ibt_hdl);
1936 
1937 	mutex_enter(&ibcm_sm_notice_serialize_lock);
1938 
1939 	ibtl_cm_set_sm_notice_handler(ibt_hdl, sm_notice_handler, private);
1940 	if (sm_notice_handler == NULL) {
1941 		mutex_exit(&ibcm_sm_notice_serialize_lock);
1942 		return;
1943 	}
1944 
1945 	/* for each port, if service is not available, make a call */
1946 	mutex_enter(&ibcm_global_hca_lock);
1947 	num_failed_sgids = 0;
1948 	hcap = ibcm_hca_listp;
1949 	while (hcap != NULL) {
1950 		portp = hcap->hca_port_info;
1951 		for (port = 0; port < hcap->hca_num_ports; port++) {
1952 			if (!(portp->port_event_status &
1953 			    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1954 				num_failed_sgids++;
1955 			portp++;
1956 		}
1957 		hcap = hcap->hca_next;
1958 	}
1959 	if (num_failed_sgids != 0) {
1960 		ifail = kmem_alloc(sizeof (*ifail) +
1961 		    (num_failed_sgids - 1) * sizeof (ib_gid_t), KM_SLEEP);
1962 		ifail->smf_num_sgids = num_failed_sgids;
1963 		ifail->smf_ibt_hdl = ibt_hdl;
1964 		sgidp = &ifail->smf_sgid[0];
1965 		hcap = ibcm_hca_listp;
1966 		while (hcap != NULL) {
1967 			portp = hcap->hca_port_info;
1968 			for (port = 0; port < hcap->hca_num_ports; port++) {
1969 				if (!(portp->port_event_status &
1970 				    IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1971 					*sgidp++ = portp->port_sgid0;
1972 				portp++;
1973 			}
1974 			hcap = hcap->hca_next;
1975 		}
1976 	}
1977 	mutex_exit(&ibcm_global_hca_lock);
1978 
1979 	if (num_failed_sgids != 0) {
1980 		ibtl_cm_sm_notice_init_failure(ifail);
1981 		kmem_free(ifail, sizeof (*ifail) +
1982 		    (num_failed_sgids - 1) * sizeof (ib_gid_t));
1983 	}
1984 	mutex_exit(&ibcm_sm_notice_serialize_lock);
1985 }
1986 
1987 /* The following is run from a taskq because we've seen the stack overflow. */
1988 static void
1989 ibcm_init_saa(void *arg)
1990 {
1991 	ibcm_port_info_t		*portp = (ibcm_port_info_t *)arg;
1992 	int				status;
1993 	ib_guid_t			port_guid;
1994 	ibmf_saa_subnet_event_args_t	event_args;
1995 
1996 	port_guid = portp->port_sgid0.gid_guid;
1997 
1998 	IBTF_DPRINTF_L3(cmlog, "ibcm_init_saa: port guid %llX", port_guid);
1999 
2000 	event_args.is_event_callback_arg = portp;
2001 	event_args.is_event_callback = ibcm_sm_notice_handler;
2002 
2003 	if ((status = ibmf_sa_session_open(port_guid, 0, &event_args,
2004 	    IBMF_VERSION, 0, &portp->port_ibmf_saa_hdl)) != IBMF_SUCCESS) {
2005 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
2006 		    "ibmf_sa_session_open failed for port guid %llX "
2007 		    "status = %d", port_guid, status);
2008 	} else {
2009 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
2010 		    "registered sa_hdl 0x%p for port guid %llX",
2011 		    portp->port_ibmf_saa_hdl, port_guid);
2012 	}
2013 
2014 	mutex_enter(&ibcm_sa_open_lock);
2015 	portp->port_saa_open_in_progress = 0;
2016 	cv_broadcast(&ibcm_sa_open_cv);
2017 	mutex_exit(&ibcm_sa_open_lock);
2018 }
2019 
2020 void
2021 ibcm_init_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2022 {
2023 	ibmf_saa_handle_t	saa_handle;
2024 	uint8_t			port_index = port - 1;
2025 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
2026 	ibt_status_t		ibt_status;
2027 
2028 	if (port_index >= hcap->hca_num_ports)
2029 		return;
2030 
2031 	mutex_enter(&ibcm_sa_open_lock);
2032 	if (portp->port_saa_open_in_progress) {
2033 		mutex_exit(&ibcm_sa_open_lock);
2034 		return;
2035 	}
2036 
2037 	saa_handle = portp->port_ibmf_saa_hdl;
2038 	if (saa_handle != NULL) {
2039 		mutex_exit(&ibcm_sa_open_lock);
2040 		return;
2041 	}
2042 
2043 	portp->port_saa_open_in_progress = 1;
2044 	mutex_exit(&ibcm_sa_open_lock);
2045 
2046 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(portp->port_event_status))
2047 
2048 	/* The assumption is that we're getting event notifications */
2049 	portp->port_event_status = IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
2050 
2051 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(portp->port_event_status))
2052 
2053 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2054 	    portp->port_num, &portp->port_sgid0, NULL);
2055 	if (ibt_status != IBT_SUCCESS) {
2056 		IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa_handle: "
2057 		    "ibt_get_port_state_byguid failed for guid %llX "
2058 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
2059 		mutex_enter(&ibcm_sa_open_lock);
2060 		portp->port_saa_open_in_progress = 0;
2061 		cv_broadcast(&ibcm_sa_open_cv);
2062 		mutex_exit(&ibcm_sa_open_lock);
2063 		return;
2064 	}
2065 	/* if the port is UP, try sa_session_open */
2066 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2067 }
2068 
2069 
2070 ibmf_saa_handle_t
2071 ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2072 {
2073 	ibmf_saa_handle_t	saa_handle;
2074 	uint8_t			port_index = port - 1;
2075 	ibcm_port_info_t	*portp = &hcap->hca_port_info[port_index];
2076 	ibt_status_t		ibt_status;
2077 
2078 	if (port_index >= hcap->hca_num_ports)
2079 		return (NULL);
2080 
2081 	mutex_enter(&ibcm_sa_open_lock);
2082 	while (portp->port_saa_open_in_progress) {
2083 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2084 	}
2085 
2086 	saa_handle = portp->port_ibmf_saa_hdl;
2087 	if (saa_handle != NULL) {
2088 		mutex_exit(&ibcm_sa_open_lock);
2089 		return (saa_handle);
2090 	}
2091 
2092 	portp->port_saa_open_in_progress = 1;
2093 	mutex_exit(&ibcm_sa_open_lock);
2094 
2095 	ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2096 	    portp->port_num, &portp->port_sgid0, NULL);
2097 	if (ibt_status != IBT_SUCCESS) {
2098 		IBTF_DPRINTF_L2(cmlog, "ibcm_get_saa_handle: "
2099 		    "ibt_get_port_state_byguid failed for guid %llX "
2100 		    "with status %d", portp->port_hcap->hca_guid, ibt_status);
2101 		mutex_enter(&ibcm_sa_open_lock);
2102 		portp->port_saa_open_in_progress = 0;
2103 		cv_broadcast(&ibcm_sa_open_cv);
2104 		mutex_exit(&ibcm_sa_open_lock);
2105 		return (NULL);
2106 	}
2107 	/* if the port is UP, try sa_session_open */
2108 	(void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2109 
2110 	mutex_enter(&ibcm_sa_open_lock);
2111 	while (portp->port_saa_open_in_progress) {
2112 		cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2113 	}
2114 	saa_handle = portp->port_ibmf_saa_hdl;
2115 	mutex_exit(&ibcm_sa_open_lock);
2116 	return (saa_handle);
2117 }
2118 
2119 
2120 /*
2121  * ibcm_hca_init_port():
2122  * 	- Register port with IBMA
2123  *
2124  * Arguments:
2125  *	hcap		- HCA's guid
2126  *	port_index	- port number minus 1
2127  *
2128  * Return values:
2129  *	IBCM_SUCCESS - success
2130  */
2131 ibt_status_t
2132 ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2133 {
2134 	int			status;
2135 	ibmf_register_info_t	*ibmf_reg;
2136 
2137 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_init_port: hcap = 0x%p port_num %d",
2138 	    hcap, port_index + 1);
2139 
2140 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2141 
2142 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hcap->hca_port_info))
2143 
2144 	if (hcap->hca_port_info[port_index].port_ibmf_hdl == NULL) {
2145 		/* Register with IBMF */
2146 		ibmf_reg = &hcap->hca_port_info[port_index].port_ibmf_reg;
2147 		ibmf_reg->ir_ci_guid = hcap->hca_guid;
2148 		ibmf_reg->ir_port_num = port_index + 1;
2149 		ibmf_reg->ir_client_class = COMM_MGT_MANAGER_AGENT;
2150 
2151 		/*
2152 		 * register with management framework
2153 		 */
2154 		status = ibmf_register(ibmf_reg, IBMF_VERSION,
2155 		    IBMF_REG_FLAG_NO_OFFLOAD, NULL, NULL,
2156 		    &(hcap->hca_port_info[port_index].port_ibmf_hdl),
2157 		    &(hcap->hca_port_info[port_index].port_ibmf_caps));
2158 
2159 		if (status != IBMF_SUCCESS) {
2160 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_init_port: "
2161 			    "ibmf_register failed for port_num %x, "
2162 			    "status = %x", port_index + 1, status);
2163 			return (ibcm_ibmf_analyze_error(status));
2164 		}
2165 
2166 		hcap->hca_port_info[port_index].port_qp1.qp_cm =
2167 		    IBMF_QP_HANDLE_DEFAULT;
2168 		hcap->hca_port_info[port_index].port_qp1.qp_port =
2169 		    &(hcap->hca_port_info[port_index]);
2170 
2171 		/*
2172 		 * Register the read callback with IBMF.
2173 		 * Since we just did an ibmf_register, handle is
2174 		 * valid and ibcm_recv_cb() is valid so we can
2175 		 * safely assert for success of ibmf_setup_recv_cb()
2176 		 *
2177 		 * Depending on the "state" of the HCA,
2178 		 * CM may drop incoming packets
2179 		 */
2180 		status = ibmf_setup_async_cb(
2181 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2182 		    IBMF_QP_HANDLE_DEFAULT, ibcm_recv_cb,
2183 		    &(hcap->hca_port_info[port_index].port_qp1), 0);
2184 		ASSERT(status == IBMF_SUCCESS);
2185 
2186 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_init_port: "
2187 		    "IBMF hdl[%x] = 0x%p", port_index,
2188 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2189 
2190 		/* Attempt to get the saa_handle for this port */
2191 		ibcm_init_saa_handle(hcap, port_index + 1);
2192 	}
2193 
2194 	return (IBT_SUCCESS);
2195 }
2196 
2197 /*
2198  * useful, to re attempt to initialize port ibma handles from elsewhere in
2199  * cm code
2200  */
2201 ibt_status_t
2202 ibcm_hca_reinit_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2203 {
2204 	ibt_status_t	status;
2205 
2206 	IBTF_DPRINTF_L5(cmlog, "ibcm_hca_reinit_port: hcap 0x%p port_num %d",
2207 	    hcap, port_index + 1);
2208 
2209 	mutex_enter(&ibcm_global_hca_lock);
2210 	status = ibcm_hca_init_port(hcap, port_index);
2211 	mutex_exit(&ibcm_global_hca_lock);
2212 	return (status);
2213 }
2214 
2215 
2216 /*
2217  * ibcm_hca_fini_port():
2218  * 	- Deregister port with IBMA
2219  *
2220  * Arguments:
2221  *	hcap		- HCA's guid
2222  *	port_index	- port number minus 1
2223  *
2224  * Return values:
2225  *	IBCM_SUCCESS - success
2226  */
2227 static ibcm_status_t
2228 ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2229 {
2230 	int			ibmf_status;
2231 	ibcm_status_t		ibcm_status;
2232 
2233 	IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: hcap = 0x%p port_num %d ",
2234 	    hcap, port_index + 1);
2235 
2236 	ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2237 
2238 	if (hcap->hca_port_info[port_index].port_ibmf_saa_hdl != NULL) {
2239 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2240 		    "ibmf_sa_session_close IBMF SAA hdl %p",
2241 		    hcap->hca_port_info[port_index].port_ibmf_saa_hdl);
2242 
2243 		ibmf_status = ibmf_sa_session_close(
2244 		    &hcap->hca_port_info[port_index].port_ibmf_saa_hdl, 0);
2245 		if (ibmf_status != IBMF_SUCCESS) {
2246 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2247 			    "ibmf_sa_session_close of port %d returned %x",
2248 			    port_index + 1, ibmf_status);
2249 			return (IBCM_FAILURE);
2250 		}
2251 	}
2252 
2253 	if (hcap->hca_port_info[port_index].port_ibmf_hdl != NULL) {
2254 		IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2255 		    "ibmf_unregister IBMF Hdl %p",
2256 		    hcap->hca_port_info[port_index].port_ibmf_hdl);
2257 
2258 		/* clean-up all the ibmf qp's allocated on this port */
2259 		ibcm_status = ibcm_free_allqps(hcap, port_index + 1);
2260 
2261 		if (ibcm_status != IBCM_SUCCESS) {
2262 
2263 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2264 			    "ibcm_free_allqps failed for port_num %d",
2265 			    port_index + 1);
2266 			return (IBCM_FAILURE);
2267 		}
2268 
2269 		/* Tear down the receive callback */
2270 		ibmf_status = ibmf_tear_down_async_cb(
2271 		    hcap->hca_port_info[port_index].port_ibmf_hdl,
2272 		    IBMF_QP_HANDLE_DEFAULT, 0);
2273 
2274 		if (ibmf_status != IBMF_SUCCESS) {
2275 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2276 			    "ibmf_tear_down_async_cb failed %d port_num %d",
2277 			    ibmf_status, port_index + 1);
2278 			return (IBCM_FAILURE);
2279 		}
2280 
2281 		/* Now, unregister with IBMF */
2282 		ibmf_status = ibmf_unregister(
2283 		    &hcap->hca_port_info[port_index].port_ibmf_hdl, 0);
2284 		IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: "
2285 		    "ibmf_unregister of port_num %x returned %x",
2286 		    port_index + 1, ibmf_status);
2287 
2288 		if (ibmf_status == IBMF_SUCCESS)
2289 			hcap->hca_port_info[port_index].port_ibmf_hdl = NULL;
2290 		else {
2291 			IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2292 			    "ibmf_unregister failed %d port_num %d",
2293 			    ibmf_status, port_index + 1);
2294 			return (IBCM_FAILURE);
2295 		}
2296 	}
2297 	return (IBCM_SUCCESS);
2298 }
2299 
2300 /*
2301  * ibcm_comm_est_handler():
2302  *	Check if the given channel is in ESTABLISHED state or not
2303  *
2304  * Arguments:
2305  *	eventp	- A pointer to an ibt_async_event_t struct
2306  *
2307  * Return values: NONE
2308  */
2309 static void
2310 ibcm_comm_est_handler(ibt_async_event_t *eventp)
2311 {
2312 	ibcm_state_data_t	*statep;
2313 
2314 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler:");
2315 
2316 	/* Both QP and EEC handles can't be NULL */
2317 	if (eventp->ev_chan_hdl == NULL) {
2318 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2319 		    "both QP and EEC handles are NULL");
2320 		return;
2321 	}
2322 
2323 	/* get the "statep" from qp/eec handles */
2324 	IBCM_GET_CHAN_PRIVATE(eventp->ev_chan_hdl, statep);
2325 	if (statep == NULL) {
2326 		IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: statep is NULL");
2327 		return;
2328 	}
2329 
2330 	mutex_enter(&statep->state_mutex);
2331 
2332 	IBCM_RELEASE_CHAN_PRIVATE(eventp->ev_chan_hdl);
2333 
2334 	IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: statep = %p", statep);
2335 
2336 	IBCM_REF_CNT_INCR(statep);
2337 
2338 	if ((statep->state == IBCM_STATE_REP_SENT) ||
2339 	    (statep->state == IBCM_STATE_MRA_REP_RCVD)) {
2340 		timeout_id_t	timer_val = statep->timerid;
2341 
2342 		statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED;
2343 
2344 		if (timer_val) {
2345 			statep->timerid = 0;
2346 			mutex_exit(&statep->state_mutex);
2347 			(void) untimeout(timer_val);
2348 		} else
2349 			mutex_exit(&statep->state_mutex);
2350 
2351 		/* CM doesn't have RTU message here */
2352 		ibcm_cep_state_rtu(statep, NULL);
2353 
2354 	} else {
2355 		if (statep->state == IBCM_STATE_ESTABLISHED ||
2356 		    statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
2357 			IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: "
2358 			    "Channel already in ESTABLISHED state");
2359 		} else {
2360 			/* An unexpected behavior from remote */
2361 			IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2362 			    "Unexpected in state = %d", statep->state);
2363 		}
2364 		mutex_exit(&statep->state_mutex);
2365 
2366 		ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_COMEST);
2367 	}
2368 
2369 	mutex_enter(&statep->state_mutex);
2370 	IBCM_REF_CNT_DECR(statep);
2371 	mutex_exit(&statep->state_mutex);
2372 }
2373 
2374 
2375 /*
2376  * ibcm_async_handler():
2377  *	CM's Async Handler
2378  *	(Handles ATTACH, DETACH, COM_EST events)
2379  *
2380  * Arguments:
2381  *	eventp	- A pointer to an ibt_async_event_t struct
2382  *
2383  * Return values: None
2384  *
2385  * NOTE : CM assumes that all HCA DR events are delivered sequentially
2386  * i.e., until ibcm_async_handler  completes for a given HCA DR, framework
2387  * shall not invoke ibcm_async_handler with another DR event for the same
2388  * HCA
2389  */
2390 /* ARGSUSED */
2391 void
2392 ibcm_async_handler(void *clnt_hdl, ibt_hca_hdl_t hca_hdl,
2393     ibt_async_code_t code, ibt_async_event_t *eventp)
2394 {
2395 	ibcm_hca_info_t		*hcap;
2396 	ibcm_port_up_t		*pup;
2397 
2398 	IBTF_DPRINTF_L3(cmlog, "ibcm_async_handler: "
2399 	    "clnt_hdl = %p, code = 0x%x, eventp = 0x%p",
2400 	    clnt_hdl, code, eventp);
2401 
2402 	mutex_enter(&ibcm_global_hca_lock);
2403 
2404 	/* If fini is going to complete successfully, then return */
2405 	if (ibcm_finit_state != IBCM_FINIT_IDLE) {
2406 
2407 		/*
2408 		 * This finit state implies one of the following:
2409 		 * Init either didn't start or didn't complete OR
2410 		 * Fini is about to return SUCCESS and release the global lock.
2411 		 * In all these cases, it is safe to ignore the async.
2412 		 */
2413 
2414 		IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler: ignoring event %x, "
2415 		    "as either init didn't complete or fini about to succeed",
2416 		    code);
2417 		mutex_exit(&ibcm_global_hca_lock);
2418 		return;
2419 	}
2420 
2421 	switch (code) {
2422 	case IBT_EVENT_PORT_UP:
2423 		mutex_exit(&ibcm_global_hca_lock);
2424 		pup = kmem_alloc(sizeof (ibcm_port_up_t), KM_SLEEP);
2425 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pup))
2426 		pup->pup_hca_guid = eventp->ev_hca_guid;
2427 		pup->pup_port = eventp->ev_port;
2428 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pup))
2429 		(void) taskq_dispatch(ibcm_taskq,
2430 		    ibcm_service_record_rewrite_task, pup, TQ_SLEEP);
2431 		ibcm_path_cache_purge();
2432 		return;
2433 
2434 	case IBT_HCA_ATTACH_EVENT:
2435 
2436 		/* eventp->ev_hcaguid is the HCA GUID of interest */
2437 		ibcm_hca_attach(eventp->ev_hca_guid);
2438 		break;
2439 
2440 	case IBT_HCA_DETACH_EVENT:
2441 
2442 		/* eventp->ev_hca_guid is the HCA GUID of interest */
2443 		if ((hcap = ibcm_find_hcap_entry(eventp->ev_hca_guid)) ==
2444 		    NULL) {
2445 			IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler:"
2446 			    " hca %llX doesn't exist", eventp->ev_hca_guid);
2447 			break;
2448 		}
2449 
2450 		(void) ibcm_hca_detach(hcap);
2451 		break;
2452 
2453 	case IBT_EVENT_COM_EST_QP:
2454 		/* eventp->ev_qp_hdl is the ibt_qp_hdl_t of interest */
2455 	case IBT_EVENT_COM_EST_EEC:
2456 		/* eventp->ev_eec_hdl is the ibt_eec_hdl_t of interest */
2457 		ibcm_comm_est_handler(eventp);
2458 		break;
2459 	default:
2460 		break;
2461 	}
2462 
2463 	/* Unblock, any blocked fini/init operations */
2464 	mutex_exit(&ibcm_global_hca_lock);
2465 }
2466