1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * I/O Controller functions for the Solaris COMSTAR SCSI RDMA Protocol
29  * Target (SRPT) port provider.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/ddi.h>
34 #include <sys/types.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #include <sys/sysmacros.h>
38 #include <sys/ib/ibtl/ibti.h>
39 #include <sys/sdt.h>
40 
41 #include "srp.h"
42 #include "srpt_impl.h"
43 #include "srpt_ioc.h"
44 #include "srpt_stp.h"
45 #include "srpt_ch.h"
46 
47 /*
48  * srpt_ioc_srq_size - Tunable parameter that specifies the number
49  * of receive WQ entries that can be posted to the IOC shared
50  * receive queue.
51  */
52 uint32_t	srpt_ioc_srq_size = SRPT_DEFAULT_IOC_SRQ_SIZE;
53 extern uint16_t srpt_send_msg_depth;
54 
55 /* IOC profile capabilities mask must be big-endian */
56 typedef struct srpt_ioc_opcap_bits_s {
57 #if	defined(_BIT_FIELDS_LTOH)
58 	uint8_t		af:1,
59 			at:1,
60 			wf:1,
61 			wt:1,
62 			rf:1,
63 			rt:1,
64 			sf:1,
65 			st:1;
66 #elif	defined(_BIT_FIELDS_HTOL)
67 	uint8_t		st:1,
68 			sf:1,
69 			rt:1,
70 			rf:1,
71 			wt:1,
72 			wf:1,
73 			at:1,
74 			af:1;
75 #else
76 #error	One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
77 #endif
78 } srpt_ioc_opcap_bits_t;
79 
80 typedef union {
81 	srpt_ioc_opcap_bits_t	bits;
82 	uint8_t			mask;
83 } srpt_ioc_opcap_mask_t;
84 
85 /*
86  * vmem arena variables - values derived from iSER
87  */
88 #define	SRPT_MR_QUANTSIZE	0x400			/* 1K */
89 #define	SRPT_MIN_CHUNKSIZE	0x100000		/* 1MB */
90 
91 /* use less memory on 32-bit kernels as it's much more constrained */
92 #ifdef _LP64
93 #define	SRPT_BUF_MR_CHUNKSIZE	0x1000000		/* 16MB */
94 #define	SRPT_BUF_POOL_MAX	0x40000000		/* 1GB */
95 #else
96 #define	SRPT_BUF_MR_CHUNKSIZE	0x400000		/* 4MB */
97 #define	SRPT_BUF_POOL_MAX	0x4000000		/* 64MB */
98 #endif
99 
100 static ibt_mr_flags_t	srpt_dbuf_mr_flags =
101     IBT_MR_ENABLE_LOCAL_WRITE | IBT_MR_ENABLE_REMOTE_WRITE |
102     IBT_MR_ENABLE_REMOTE_READ;
103 
104 void srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
105 	ibt_async_code_t code, ibt_async_event_t *event);
106 
107 static struct ibt_clnt_modinfo_s srpt_ibt_modinfo = {
108 	IBTI_V_CURR,
109 	IBT_STORAGE_DEV,
110 	srpt_ioc_ib_async_hdlr,
111 	NULL,
112 	"srpt"
113 };
114 
115 static srpt_ioc_t *srpt_ioc_init(ib_guid_t guid);
116 static void srpt_ioc_fini(srpt_ioc_t *ioc);
117 
118 static srpt_vmem_pool_t *srpt_vmem_create(const char *name, srpt_ioc_t *ioc,
119     ib_memlen_t chunksize, uint64_t maxsize, ibt_mr_flags_t flags);
120 static void *srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size);
121 static int srpt_vmem_mr_compare(const void *a, const void *b);
122 static srpt_mr_t *srpt_vmem_chunk_alloc(srpt_vmem_pool_t *ioc,
123     ib_memlen_t chunksize);
124 static void srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool);
125 static void srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size);
126 static srpt_mr_t *srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr,
127     ib_memlen_t len);
128 static void srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr);
129 static void srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr);
130 static int srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
131     srpt_mr_t *mr);
132 
133 /*
134  * srpt_ioc_attach() - I/O Controller attach
135  *
136  * Attach to IBTF and initialize I/O controllers. The srpt_ctxt->sc_rwlock
137  * should be held outside of this call.
138  */
139 int
140 srpt_ioc_attach()
141 {
142 	int		status;
143 	int		hca_cnt;
144 	int		hca_ndx;
145 	ib_guid_t	*guid;
146 	srpt_ioc_t	*ioc;
147 
148 	ASSERT(srpt_ctxt != NULL);
149 
150 	/*
151 	 * Attach to IBTF and initialize a list of IB devices.  Each
152 	 * HCA will be represented by an I/O Controller.
153 	 */
154 	status = ibt_attach(&srpt_ibt_modinfo, srpt_ctxt->sc_dip,
155 	    srpt_ctxt,  &srpt_ctxt->sc_ibt_hdl);
156 	if (status != DDI_SUCCESS) {
157 		SRPT_DPRINTF_L1("ioc_attach, ibt_attach failed (0x%x)",
158 		    status);
159 		return (DDI_FAILURE);
160 	}
161 
162 	hca_cnt = ibt_get_hca_list(&guid);
163 	if (hca_cnt < 1) {
164 		/*
165 		 * not a fatal error.  Service will be up and
166 		 * waiting for ATTACH events.
167 		 */
168 		SRPT_DPRINTF_L2("ioc_attach, no HCA found");
169 		return (DDI_SUCCESS);
170 	}
171 
172 	for (hca_ndx = 0; hca_ndx < hca_cnt; hca_ndx++) {
173 		SRPT_DPRINTF_L2("ioc_attach, adding I/O"
174 		    " Controller (%016llx)", (u_longlong_t)guid[hca_ndx]);
175 
176 		ioc = srpt_ioc_init(guid[hca_ndx]);
177 		if (ioc == NULL) {
178 			SRPT_DPRINTF_L1("ioc_attach, ioc_init GUID(%016llx)"
179 			    " failed", (u_longlong_t)guid[hca_ndx]);
180 			continue;
181 		}
182 		list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc);
183 		SRPT_DPRINTF_L2("ioc_attach, I/O Controller ibt HCA hdl (%p)",
184 		    (void *)ioc->ioc_ibt_hdl);
185 		srpt_ctxt->sc_num_iocs++;
186 	}
187 
188 	ibt_free_hca_list(guid, hca_cnt);
189 	SRPT_DPRINTF_L3("ioc_attach, added %d I/O Controller(s)",
190 	    srpt_ctxt->sc_num_iocs);
191 	return (DDI_SUCCESS);
192 }
193 
194 /*
195  * srpt_ioc_detach() - I/O Controller detach
196  *
197  * srpt_ctxt->sc_rwlock should be held outside of this call.
198  */
199 void
200 srpt_ioc_detach()
201 {
202 	srpt_ioc_t	*ioc;
203 
204 	ASSERT(srpt_ctxt != NULL);
205 
206 	while ((ioc = list_head(&srpt_ctxt->sc_ioc_list)) != NULL) {
207 		list_remove(&srpt_ctxt->sc_ioc_list, ioc);
208 		SRPT_DPRINTF_L2("ioc_detach, removing I/O Controller(%p)"
209 		    " (%016llx), ibt_hdl(%p)",
210 		    (void *)ioc,
211 		    ioc ? (u_longlong_t)ioc->ioc_guid : 0x0ll,
212 		    (void *)ioc->ioc_ibt_hdl);
213 		srpt_ioc_fini(ioc);
214 	}
215 
216 	(void) ibt_detach(srpt_ctxt->sc_ibt_hdl);
217 	srpt_ctxt->sc_ibt_hdl = NULL;
218 }
219 
220 /*
221  * srpt_ioc_init() - I/O Controller initialization
222  *
223  * Requires srpt_ctxt->rw_lock be held outside of call.
224  */
225 static srpt_ioc_t *
226 srpt_ioc_init(ib_guid_t guid)
227 {
228 	ibt_status_t		status;
229 	srpt_ioc_t		*ioc;
230 	ibt_hca_attr_t		hca_attr;
231 	uint_t			iu_ndx;
232 	uint_t			err_ndx;
233 	ibt_mr_attr_t		mr_attr;
234 	ibt_mr_desc_t		mr_desc;
235 	srpt_iu_t		*iu;
236 	ibt_srq_sizes_t		srq_attr;
237 	char			namebuf[32];
238 	size_t			iu_offset;
239 
240 	status = ibt_query_hca_byguid(guid, &hca_attr);
241 	if (status != IBT_SUCCESS) {
242 		SRPT_DPRINTF_L1("ioc_init, HCA query error (%d)",
243 		    status);
244 		return (NULL);
245 	}
246 
247 	ioc = srpt_ioc_get_locked(guid);
248 	if (ioc != NULL) {
249 		SRPT_DPRINTF_L1("ioc_init, HCA already exists");
250 		return (NULL);
251 	}
252 
253 	ioc = kmem_zalloc(sizeof (srpt_ioc_t), KM_SLEEP);
254 
255 	rw_init(&ioc->ioc_rwlock, NULL, RW_DRIVER, NULL);
256 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
257 
258 	bcopy(&hca_attr, &ioc->ioc_attr, sizeof (ibt_hca_attr_t));
259 
260 	SRPT_DPRINTF_L2("ioc_init, HCA max mr=%d, mrlen=%lld",
261 	    hca_attr.hca_max_memr, (u_longlong_t)hca_attr.hca_max_memr_len);
262 	ioc->ioc_guid   = guid;
263 
264 	status = ibt_open_hca(srpt_ctxt->sc_ibt_hdl, guid, &ioc->ioc_ibt_hdl);
265 	if (status != IBT_SUCCESS) {
266 		SRPT_DPRINTF_L1("ioc_init, IBT open failed (%d)", status);
267 		goto hca_open_err;
268 	}
269 
270 	status = ibt_alloc_pd(ioc->ioc_ibt_hdl, IBT_PD_NO_FLAGS,
271 	    &ioc->ioc_pd_hdl);
272 	if (status != IBT_SUCCESS) {
273 		SRPT_DPRINTF_L1("ioc_init, IBT create PD failed (%d)", status);
274 		goto pd_alloc_err;
275 	}
276 
277 	/*
278 	 * We require hardware support for SRQs.  We use a common SRQ to
279 	 * reduce channel memory consumption.
280 	 */
281 	if ((ioc->ioc_attr.hca_flags & IBT_HCA_SRQ) == 0) {
282 		SRPT_DPRINTF_L0("ioc_init, no SRQ capability, not supported");
283 		goto srq_alloc_err;
284 	}
285 
286 	SRPT_DPRINTF_L3("ioc_init, Using shared receive queues, max srq work"
287 	    " queue size(%d), def size = %d", ioc->ioc_attr.hca_max_srqs_sz,
288 	    srpt_ioc_srq_size);
289 	srq_attr.srq_wr_sz = min(srpt_ioc_srq_size,
290 	    ioc->ioc_attr.hca_max_srqs_sz);
291 	srq_attr.srq_sgl_sz = 1;
292 
293 	status = ibt_alloc_srq(ioc->ioc_ibt_hdl, IBT_SRQ_NO_FLAGS,
294 	    ioc->ioc_pd_hdl, &srq_attr, &ioc->ioc_srq_hdl,
295 	    &ioc->ioc_srq_attr);
296 	if (status != IBT_SUCCESS) {
297 		SRPT_DPRINTF_L1("ioc_init, IBT create SRQ failed(%d)", status);
298 		goto srq_alloc_err;
299 	}
300 
301 	SRPT_DPRINTF_L2("ioc_init, SRQ WR size(%d), SG size(%d)",
302 	    ioc->ioc_srq_attr.srq_wr_sz, ioc->ioc_srq_attr.srq_sgl_sz);
303 
304 	ibt_set_srq_private(ioc->ioc_srq_hdl, ioc);
305 
306 	/*
307 	 * Allocate a pool of SRP IU message buffers and post them to
308 	 * the I/O Controller SRQ.  We let the SRQ manage the free IU
309 	 * messages.
310 	 */
311 	ioc->ioc_num_iu_entries =
312 	    min(srq_attr.srq_wr_sz, srpt_ioc_srq_size) - 1;
313 
314 	ioc->ioc_iu_pool = kmem_zalloc(sizeof (srpt_iu_t) *
315 	    ioc->ioc_num_iu_entries, KM_SLEEP);
316 
317 	ioc->ioc_iu_bufs = kmem_alloc(SRPT_DEFAULT_SEND_MSG_SIZE *
318 	    ioc->ioc_num_iu_entries, KM_SLEEP);
319 
320 	if ((ioc->ioc_iu_pool == NULL) || (ioc->ioc_iu_bufs == NULL)) {
321 		SRPT_DPRINTF_L1("ioc_init, failed to allocate SRQ IUs");
322 		goto srq_iu_alloc_err;
323 	}
324 
325 	mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)ioc->ioc_iu_bufs;
326 	mr_attr.mr_len   = SRPT_DEFAULT_SEND_MSG_SIZE * ioc->ioc_num_iu_entries;
327 	mr_attr.mr_as    = NULL;
328 	mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
329 
330 	status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
331 	    &mr_attr, &ioc->ioc_iu_mr_hdl, &mr_desc);
332 	if (status != IBT_SUCCESS) {
333 		SRPT_DPRINTF_L1("ioc_init, IU buffer pool MR err(%d)",
334 		    status);
335 		goto srq_iu_alloc_err;
336 	}
337 
338 	for (iu_ndx = 0, iu = ioc->ioc_iu_pool; iu_ndx <
339 	    ioc->ioc_num_iu_entries; iu_ndx++, iu++) {
340 
341 		iu_offset = (iu_ndx * SRPT_DEFAULT_SEND_MSG_SIZE);
342 		iu->iu_buf = (void *)((uintptr_t)ioc->ioc_iu_bufs + iu_offset);
343 
344 		mutex_init(&iu->iu_lock, NULL, MUTEX_DRIVER, NULL);
345 
346 		iu->iu_sge.ds_va  = mr_desc.md_vaddr + iu_offset;
347 		iu->iu_sge.ds_key = mr_desc.md_lkey;
348 		iu->iu_sge.ds_len = SRPT_DEFAULT_SEND_MSG_SIZE;
349 		iu->iu_ioc	  = ioc;
350 		iu->iu_pool_ndx   = iu_ndx;
351 
352 		status = srpt_ioc_post_recv_iu(ioc, &ioc->ioc_iu_pool[iu_ndx]);
353 		if (status != IBT_SUCCESS) {
354 			SRPT_DPRINTF_L1("ioc_init, SRQ IU post err(%d)",
355 			    status);
356 			goto srq_iu_post_err;
357 		}
358 	}
359 
360 	/*
361 	 * Initialize the dbuf vmem arena
362 	 */
363 	(void) snprintf(namebuf, sizeof (namebuf),
364 	    "srpt_buf_pool_%16llX", (u_longlong_t)guid);
365 	ioc->ioc_dbuf_pool = srpt_vmem_create(namebuf, ioc,
366 	    SRPT_BUF_MR_CHUNKSIZE, SRPT_BUF_POOL_MAX, srpt_dbuf_mr_flags);
367 
368 	if (ioc->ioc_dbuf_pool == NULL) {
369 		goto stmf_db_alloc_err;
370 	}
371 
372 	/*
373 	 * Allocate the I/O Controller STMF data buffer allocator.  The
374 	 * data store will span all targets associated with this IOC.
375 	 */
376 	ioc->ioc_stmf_ds = stmf_alloc(STMF_STRUCT_DBUF_STORE, 0, 0);
377 	if (ioc->ioc_stmf_ds == NULL) {
378 		SRPT_DPRINTF_L1("ioc_attach, STMF DBUF alloc failure for IOC");
379 		goto stmf_db_alloc_err;
380 	}
381 	ioc->ioc_stmf_ds->ds_alloc_data_buf = &srpt_ioc_ds_alloc_dbuf;
382 	ioc->ioc_stmf_ds->ds_free_data_buf  = &srpt_ioc_ds_free_dbuf;
383 	ioc->ioc_stmf_ds->ds_port_private   = ioc;
384 
385 	rw_exit(&ioc->ioc_rwlock);
386 	return (ioc);
387 
388 stmf_db_alloc_err:
389 	if (ioc->ioc_dbuf_pool != NULL) {
390 		srpt_vmem_destroy(ioc->ioc_dbuf_pool);
391 	}
392 
393 srq_iu_post_err:
394 	if (ioc->ioc_iu_mr_hdl != NULL) {
395 		status = ibt_deregister_mr(ioc->ioc_ibt_hdl,
396 		    ioc->ioc_iu_mr_hdl);
397 		if (status != IBT_SUCCESS) {
398 			SRPT_DPRINTF_L1("ioc_init, error deregistering"
399 			    " memory region (%d)", status);
400 		}
401 	}
402 	for (err_ndx = 0, iu = ioc->ioc_iu_pool; err_ndx < iu_ndx;
403 	    err_ndx++, iu++) {
404 		mutex_destroy(&iu->iu_lock);
405 	}
406 
407 srq_iu_alloc_err:
408 	if (ioc->ioc_iu_bufs != NULL) {
409 		kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE *
410 		    ioc->ioc_num_iu_entries);
411 	}
412 	if (ioc->ioc_iu_pool != NULL) {
413 		kmem_free(ioc->ioc_iu_pool,
414 		    sizeof (srpt_iu_t) * ioc->ioc_num_iu_entries);
415 	}
416 	if (ioc->ioc_srq_hdl != NULL) {
417 		status = ibt_free_srq(ioc->ioc_srq_hdl);
418 		if (status != IBT_SUCCESS) {
419 			SRPT_DPRINTF_L1("ioc_init, error freeing SRQ (%d)",
420 			    status);
421 		}
422 
423 	}
424 
425 srq_alloc_err:
426 	status = ibt_free_pd(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl);
427 	if (status != IBT_SUCCESS) {
428 		SRPT_DPRINTF_L1("ioc_init, free PD error (%d)", status);
429 	}
430 
431 pd_alloc_err:
432 	status = ibt_close_hca(ioc->ioc_ibt_hdl);
433 	if (status != IBT_SUCCESS) {
434 		SRPT_DPRINTF_L1("ioc_init, close ioc error (%d)", status);
435 	}
436 
437 hca_open_err:
438 	rw_exit(&ioc->ioc_rwlock);
439 	rw_destroy(&ioc->ioc_rwlock);
440 	kmem_free(ioc, sizeof (*ioc));
441 	return (NULL);
442 }
443 
444 /*
445  * srpt_ioc_fini() - I/O Controller Cleanup
446  *
447  * Requires srpt_ctxt->sc_rwlock be held outside of call.
448  */
449 static void
450 srpt_ioc_fini(srpt_ioc_t *ioc)
451 {
452 	int		status;
453 	int		ndx;
454 
455 	/*
456 	 * Note driver flows will have already taken all SRP
457 	 * services running on the I/O Controller off-line.
458 	 */
459 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
460 	if (ioc->ioc_ibt_hdl != NULL) {
461 		if (ioc->ioc_stmf_ds != NULL) {
462 			stmf_free(ioc->ioc_stmf_ds);
463 		}
464 
465 		if (ioc->ioc_srq_hdl != NULL) {
466 			SRPT_DPRINTF_L4("ioc_fini, freeing SRQ");
467 			status = ibt_free_srq(ioc->ioc_srq_hdl);
468 			if (status != IBT_SUCCESS) {
469 				SRPT_DPRINTF_L1("ioc_fini, free SRQ"
470 				    " error (%d)", status);
471 			}
472 		}
473 
474 		if (ioc->ioc_iu_mr_hdl != NULL) {
475 			status = ibt_deregister_mr(
476 			    ioc->ioc_ibt_hdl, ioc->ioc_iu_mr_hdl);
477 			if (status != IBT_SUCCESS) {
478 				SRPT_DPRINTF_L1("ioc_fini, error deregistering"
479 				    " memory region (%d)", status);
480 			}
481 		}
482 
483 		if (ioc->ioc_iu_bufs != NULL) {
484 			kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE *
485 			    ioc->ioc_num_iu_entries);
486 		}
487 
488 		if (ioc->ioc_iu_pool != NULL) {
489 			SRPT_DPRINTF_L4("ioc_fini, freeing IU entries");
490 			for (ndx = 0; ndx < ioc->ioc_num_iu_entries; ndx++) {
491 				mutex_destroy(&ioc->ioc_iu_pool[ndx].iu_lock);
492 			}
493 
494 			SRPT_DPRINTF_L4("ioc_fini, free IU pool struct");
495 			kmem_free(ioc->ioc_iu_pool,
496 			    sizeof (srpt_iu_t) * (ioc->ioc_num_iu_entries));
497 			ioc->ioc_iu_pool = NULL;
498 			ioc->ioc_num_iu_entries = 0;
499 		}
500 
501 		if (ioc->ioc_dbuf_pool != NULL) {
502 			srpt_vmem_destroy(ioc->ioc_dbuf_pool);
503 		}
504 
505 		if (ioc->ioc_pd_hdl != NULL) {
506 			status = ibt_free_pd(ioc->ioc_ibt_hdl,
507 			    ioc->ioc_pd_hdl);
508 			if (status != IBT_SUCCESS) {
509 				SRPT_DPRINTF_L1("ioc_fini, free PD"
510 				    " error (%d)", status);
511 			}
512 		}
513 
514 		status = ibt_close_hca(ioc->ioc_ibt_hdl);
515 		if (status != IBT_SUCCESS) {
516 			SRPT_DPRINTF_L1(
517 			    "ioc_fini, close ioc error (%d)", status);
518 		}
519 	}
520 	rw_exit(&ioc->ioc_rwlock);
521 	rw_destroy(&ioc->ioc_rwlock);
522 	kmem_free(ioc, sizeof (srpt_ioc_t));
523 }
524 
525 /*
526  * srpt_ioc_port_active() - I/O Controller port active
527  */
528 static void
529 srpt_ioc_port_active(ibt_async_event_t *event)
530 {
531 	ibt_status_t		status;
532 	srpt_ioc_t		*ioc;
533 	srpt_target_port_t	*tgt = NULL;
534 	boolean_t		online_target = B_FALSE;
535 	stmf_change_status_t	cstatus;
536 
537 	ASSERT(event != NULL);
538 
539 	SRPT_DPRINTF_L3("ioc_port_active event handler, invoked");
540 
541 	/*
542 	 * Find the HCA in question and if the HCA has completed
543 	 * initialization, and the SRP Target service for the
544 	 * the I/O Controller exists, then bind this port.
545 	 */
546 	ioc = srpt_ioc_get(event->ev_hca_guid);
547 
548 	if (ioc == NULL) {
549 		SRPT_DPRINTF_L2("ioc_port_active, I/O Controller not"
550 		    " active");
551 		return;
552 	}
553 
554 	tgt = ioc->ioc_tgt_port;
555 	if (tgt == NULL) {
556 		SRPT_DPRINTF_L2("ioc_port_active, no I/O Controller target"
557 		    " undefined");
558 		return;
559 	}
560 
561 
562 	/*
563 	 * We take the target lock here to serialize this operation
564 	 * with any STMF initiated target state transitions.  If
565 	 * SRP is off-line then the service handle is NULL.
566 	 */
567 	mutex_enter(&tgt->tp_lock);
568 
569 	if (tgt->tp_ibt_svc_hdl != NULL) {
570 		status = srpt_ioc_svc_bind(tgt, event->ev_port);
571 		if ((status != IBT_SUCCESS) &&
572 		    (status != IBT_HCA_PORT_NOT_ACTIVE)) {
573 			SRPT_DPRINTF_L1("ioc_port_active, bind failed (%d)",
574 			    status);
575 		}
576 	} else {
577 		/* if we were offline because of no ports, try onlining now */
578 		if ((tgt->tp_num_active_ports == 0) &&
579 		    (tgt->tp_requested_state != tgt->tp_state) &&
580 		    (tgt->tp_requested_state == SRPT_TGT_STATE_ONLINE)) {
581 			online_target = B_TRUE;
582 			cstatus.st_completion_status = STMF_SUCCESS;
583 			cstatus.st_additional_info = "port active";
584 		}
585 	}
586 
587 	mutex_exit(&tgt->tp_lock);
588 
589 	if (online_target) {
590 		stmf_status_t	ret;
591 
592 		ret = stmf_ctl(STMF_CMD_LPORT_ONLINE, tgt->tp_lport, &cstatus);
593 
594 		if (ret == STMF_SUCCESS) {
595 			SRPT_DPRINTF_L1("ioc_port_active, port %d active, "
596 			    "target %016llx online requested", event->ev_port,
597 			    (u_longlong_t)ioc->ioc_guid);
598 		} else if (ret != STMF_ALREADY) {
599 			SRPT_DPRINTF_L1("ioc_port_active, port %d active, "
600 			    "target %016llx failed online request: %d",
601 			    event->ev_port, (u_longlong_t)ioc->ioc_guid,
602 			    (int)ret);
603 		}
604 	}
605 }
606 
607 /*
608  * srpt_ioc_port_down()
609  */
610 static void
611 srpt_ioc_port_down(ibt_async_event_t *event)
612 {
613 	srpt_ioc_t		*ioc;
614 	srpt_target_port_t	*tgt;
615 	srpt_channel_t		*ch;
616 	srpt_channel_t		*next_ch;
617 	boolean_t		offline_target = B_FALSE;
618 	stmf_change_status_t	cstatus;
619 
620 	SRPT_DPRINTF_L3("ioc_port_down event handler, invoked");
621 
622 	/*
623 	 * Find the HCA in question and if the HCA has completed
624 	 * initialization, and the SRP Target service for the
625 	 * the I/O Controller exists, then logout initiators
626 	 * through this port.
627 	 */
628 	ioc = srpt_ioc_get(event->ev_hca_guid);
629 
630 	if (ioc == NULL) {
631 		SRPT_DPRINTF_L2("ioc_port_down, I/O Controller not"
632 		    " active");
633 		return;
634 	}
635 
636 	/*
637 	 * We only have one target now, but we could go through all
638 	 * SCSI target ports if more are added.
639 	 */
640 	tgt = ioc->ioc_tgt_port;
641 	if (tgt == NULL) {
642 		SRPT_DPRINTF_L2("ioc_port_down, no I/O Controller target"
643 		    " undefined");
644 		return;
645 	}
646 	mutex_enter(&tgt->tp_lock);
647 
648 	/*
649 	 * For all channel's logged in through this port, initiate a
650 	 * disconnect.
651 	 */
652 	mutex_enter(&tgt->tp_ch_list_lock);
653 	ch = list_head(&tgt->tp_ch_list);
654 	while (ch != NULL) {
655 		next_ch = list_next(&tgt->tp_ch_list, ch);
656 		if (ch->ch_session && (ch->ch_session->ss_hw_port ==
657 		    event->ev_port)) {
658 			srpt_ch_disconnect(ch);
659 		}
660 		ch = next_ch;
661 	}
662 	mutex_exit(&tgt->tp_ch_list_lock);
663 
664 	tgt->tp_num_active_ports--;
665 
666 	/* if we have no active ports, take the target offline */
667 	if ((tgt->tp_num_active_ports == 0) &&
668 	    (tgt->tp_state == SRPT_TGT_STATE_ONLINE)) {
669 		cstatus.st_completion_status = STMF_SUCCESS;
670 		cstatus.st_additional_info = "no ports active";
671 		offline_target = B_TRUE;
672 	}
673 
674 	mutex_exit(&tgt->tp_lock);
675 
676 	if (offline_target) {
677 		stmf_status_t	ret;
678 
679 		ret = stmf_ctl(STMF_CMD_LPORT_OFFLINE, tgt->tp_lport, &cstatus);
680 
681 		if (ret == STMF_SUCCESS) {
682 			SRPT_DPRINTF_L1("ioc_port_down, port %d down, target "
683 			    "%016llx offline requested", event->ev_port,
684 			    (u_longlong_t)ioc->ioc_guid);
685 		} else if (ret != STMF_ALREADY) {
686 			SRPT_DPRINTF_L1("ioc_port_down, port %d down, target "
687 			    "%016llx failed offline request: %d",
688 			    event->ev_port,
689 			    (u_longlong_t)ioc->ioc_guid, (int)ret);
690 		}
691 	}
692 }
693 
694 /*
695  * srpt_ioc_ib_async_hdlr - I/O Controller IB asynchronous events
696  */
697 /* ARGSUSED */
698 void
699 srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
700 	ibt_async_code_t code, ibt_async_event_t *event)
701 {
702 	srpt_ioc_t		*ioc;
703 	srpt_channel_t		*ch;
704 
705 	switch (code) {
706 	case IBT_EVENT_PORT_UP:
707 		srpt_ioc_port_active(event);
708 		break;
709 
710 	case IBT_ERROR_PORT_DOWN:
711 		srpt_ioc_port_down(event);
712 		break;
713 
714 	case IBT_HCA_ATTACH_EVENT:
715 		rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER);
716 		ioc = srpt_ioc_init(event->ev_hca_guid);
717 
718 		if (ioc == NULL) {
719 			rw_exit(&srpt_ctxt->sc_rwlock);
720 			SRPT_DPRINTF_L1("ib_async_hdlr, HCA_ATTACH"
721 			    " event failed to initialize HCA (0x%016llx)",
722 			    (u_longlong_t)event->ev_hca_guid);
723 			return;
724 		}
725 		SRPT_DPRINTF_L2("HCA_ATTACH_EVENT: I/O Controller"
726 		    " ibt hdl (%p)",
727 		    (void *)ioc->ioc_ibt_hdl);
728 
729 		rw_enter(&ioc->ioc_rwlock, RW_WRITER);
730 		ioc->ioc_tgt_port = srpt_stp_alloc_port(ioc, ioc->ioc_guid);
731 		if (ioc->ioc_tgt_port == NULL) {
732 			SRPT_DPRINTF_L1("ioc_ib_async_hdlr, alloc SCSI "
733 			    "target port error for HCA (0x%016llx)",
734 			    (u_longlong_t)event->ev_hca_guid);
735 			rw_exit(&ioc->ioc_rwlock);
736 			srpt_ioc_fini(ioc);
737 			rw_exit(&srpt_ctxt->sc_rwlock);
738 			return;
739 		}
740 
741 		/*
742 		 * New HCA added with default SCSI Target Port, SRP service
743 		 * will be started when SCSI Target Port is brought
744 		 * on-line by STMF.
745 		 */
746 		srpt_ctxt->sc_num_iocs++;
747 		list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc);
748 
749 		rw_exit(&ioc->ioc_rwlock);
750 		rw_exit(&srpt_ctxt->sc_rwlock);
751 		break;
752 
753 	case IBT_HCA_DETACH_EVENT:
754 		SRPT_DPRINTF_L1(
755 		    "ioc_iob_async_hdlr, HCA_DETACH_EVENT received.");
756 		break;
757 
758 	case IBT_EVENT_EMPTY_CHAN:
759 		/* Channel in ERROR state is now empty */
760 		ch = (srpt_channel_t *)ibt_get_chan_private(event->ev_chan_hdl);
761 		SRPT_DPRINTF_L3(
762 		    "ioc_iob_async_hdlr, received empty channel error on %p",
763 		    (void *)ch);
764 		break;
765 
766 	default:
767 		SRPT_DPRINTF_L2("ioc_ib_async_hdlr, event not "
768 		    "handled (%d)", code);
769 		break;
770 	}
771 }
772 
773 /*
774  * srpt_ioc_svc_bind()
775  */
776 ibt_status_t
777 srpt_ioc_svc_bind(srpt_target_port_t *tgt, uint_t portnum)
778 {
779 	ibt_status_t		status;
780 	srpt_hw_port_t		*port;
781 	ibt_hca_portinfo_t	*portinfo;
782 	uint_t			qportinfo_sz;
783 	uint_t			qportnum;
784 	ib_gid_t		new_gid;
785 	srpt_ioc_t		*ioc;
786 	srpt_session_t		sess;
787 
788 	ASSERT(tgt != NULL);
789 	ASSERT(tgt->tp_ioc != NULL);
790 	ioc = tgt->tp_ioc;
791 
792 	if (tgt->tp_ibt_svc_hdl == NULL) {
793 		SRPT_DPRINTF_L2("ioc_svc_bind, NULL SCSI target port"
794 		    " service");
795 		return (IBT_INVALID_PARAM);
796 	}
797 
798 	if (portnum == 0 || portnum > tgt->tp_nports) {
799 		SRPT_DPRINTF_L2("ioc_svc_bind, bad port (%d)", portnum);
800 		return (IBT_INVALID_PARAM);
801 	}
802 	status = ibt_query_hca_ports(ioc->ioc_ibt_hdl, portnum,
803 	    &portinfo, &qportnum, &qportinfo_sz);
804 	if (status != IBT_SUCCESS) {
805 		SRPT_DPRINTF_L1("ioc_svc_bind, query port error (%d)",
806 		    portnum);
807 		return (IBT_INVALID_PARAM);
808 	}
809 
810 	ASSERT(portinfo != NULL);
811 
812 	/*
813 	 * If port is not active do nothing, caller should attempt to bind
814 	 * after the port goes active.
815 	 */
816 	if (portinfo->p_linkstate != IBT_PORT_ACTIVE) {
817 		SRPT_DPRINTF_L2("ioc_svc_bind, port %d not in active state",
818 		    portnum);
819 		ibt_free_portinfo(portinfo, qportinfo_sz);
820 		return (IBT_HCA_PORT_NOT_ACTIVE);
821 	}
822 
823 	port    = &tgt->tp_hw_port[portnum-1];
824 	new_gid = portinfo->p_sgid_tbl[0];
825 	ibt_free_portinfo(portinfo, qportinfo_sz);
826 
827 	/*
828 	 * If previously bound and the port GID has changed,
829 	 * unbind the old GID.
830 	 */
831 	if (port->hwp_bind_hdl != NULL) {
832 		if (new_gid.gid_guid != port->hwp_gid.gid_guid ||
833 		    new_gid.gid_prefix != port->hwp_gid.gid_prefix) {
834 			SRPT_DPRINTF_L2("ioc_svc_bind, unregister current"
835 			    " bind");
836 			(void) ibt_unbind_service(tgt->tp_ibt_svc_hdl,
837 			    port->hwp_bind_hdl);
838 			port->hwp_bind_hdl = NULL;
839 		} else {
840 			SRPT_DPRINTF_L2("ioc_svc_bind, port %d already bound",
841 			    portnum);
842 		}
843 	}
844 
845 	/* bind the new port GID */
846 	if (port->hwp_bind_hdl == NULL) {
847 		SRPT_DPRINTF_L2("ioc_svc_bind, bind service, %016llx:%016llx",
848 		    (u_longlong_t)new_gid.gid_prefix,
849 		    (u_longlong_t)new_gid.gid_guid);
850 
851 		/*
852 		 * Pass SCSI Target Port as CM private data, the target will
853 		 * always exist while this service is bound.
854 		 */
855 		status = ibt_bind_service(tgt->tp_ibt_svc_hdl, new_gid, NULL,
856 		    tgt, &port->hwp_bind_hdl);
857 		if (status != IBT_SUCCESS && status != IBT_CM_SERVICE_EXISTS) {
858 			SRPT_DPRINTF_L1("ioc_svc_bind, bind error (%d)",
859 			    status);
860 			return (status);
861 		}
862 		port->hwp_gid.gid_prefix = new_gid.gid_prefix;
863 		port->hwp_gid.gid_guid = new_gid.gid_guid;
864 	}
865 
866 	/* port is now active */
867 	tgt->tp_num_active_ports++;
868 
869 	/* setting up a transient structure for the dtrace probe. */
870 	bzero(&sess, sizeof (srpt_session_t));
871 	ALIAS_STR(sess.ss_t_gid, new_gid.gid_prefix, new_gid.gid_guid);
872 	EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
873 
874 	DTRACE_SRP_1(service__up, srpt_session_t, &sess);
875 
876 	return (IBT_SUCCESS);
877 }
878 
879 /*
880  * srpt_ioc_svc_unbind()
881  */
882 void
883 srpt_ioc_svc_unbind(srpt_target_port_t *tgt, uint_t portnum)
884 {
885 	srpt_hw_port_t		*port;
886 	srpt_session_t		sess;
887 	ibt_status_t		ret;
888 
889 	if (tgt == NULL) {
890 		SRPT_DPRINTF_L2("ioc_svc_unbind, SCSI target does not exist");
891 		return;
892 	}
893 
894 	if (portnum == 0 || portnum > tgt->tp_nports) {
895 		SRPT_DPRINTF_L2("ioc_svc_unbind, bad port (%d)", portnum);
896 		return;
897 	}
898 	port = &tgt->tp_hw_port[portnum-1];
899 
900 	/* setting up a transient structure for the dtrace probe. */
901 	bzero(&sess, sizeof (srpt_session_t));
902 	ALIAS_STR(sess.ss_t_gid, port->hwp_gid.gid_prefix,
903 	    port->hwp_gid.gid_guid);
904 	EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
905 
906 	DTRACE_SRP_1(service__down, srpt_session_t, &sess);
907 
908 	if (tgt->tp_ibt_svc_hdl != NULL && port->hwp_bind_hdl != NULL) {
909 		SRPT_DPRINTF_L2("ioc_svc_unbind, unregister current bind");
910 		ret = ibt_unbind_service(tgt->tp_ibt_svc_hdl,
911 		    port->hwp_bind_hdl);
912 		if (ret != IBT_SUCCESS) {
913 			SRPT_DPRINTF_L1(
914 			    "ioc_svc_unbind, unregister port %d failed: %d",
915 			    portnum, ret);
916 		} else {
917 			port->hwp_bind_hdl = NULL;
918 			port->hwp_gid.gid_prefix = 0;
919 			port->hwp_gid.gid_guid = 0;
920 		}
921 	}
922 }
923 
924 /*
925  * srpt_ioc_svc_unbind_all()
926  */
927 void
928 srpt_ioc_svc_unbind_all(srpt_target_port_t *tgt)
929 {
930 	uint_t		portnum;
931 
932 	if (tgt == NULL) {
933 		SRPT_DPRINTF_L2("ioc_svc_unbind_all, NULL SCSI target port"
934 		    " specified");
935 		return;
936 	}
937 	for (portnum = 1; portnum <= tgt->tp_nports; portnum++) {
938 		srpt_ioc_svc_unbind(tgt, portnum);
939 	}
940 }
941 
942 /*
943  * srpt_ioc_get_locked()
944  *
945  * Requires srpt_ctxt->rw_lock be held outside of call.
946  */
947 srpt_ioc_t *
948 srpt_ioc_get_locked(ib_guid_t guid)
949 {
950 	srpt_ioc_t	*ioc;
951 
952 	ioc = list_head(&srpt_ctxt->sc_ioc_list);
953 	while (ioc != NULL) {
954 		if (ioc->ioc_guid == guid) {
955 			break;
956 		}
957 		ioc = list_next(&srpt_ctxt->sc_ioc_list, ioc);
958 	}
959 	return (ioc);
960 }
961 
962 /*
963  * srpt_ioc_get()
964  */
965 srpt_ioc_t *
966 srpt_ioc_get(ib_guid_t guid)
967 {
968 	srpt_ioc_t	*ioc;
969 
970 	rw_enter(&srpt_ctxt->sc_rwlock, RW_READER);
971 	ioc = srpt_ioc_get_locked(guid);
972 	rw_exit(&srpt_ctxt->sc_rwlock);
973 	return (ioc);
974 }
975 
976 /*
977  * srpt_ioc_post_recv_iu()
978  */
979 ibt_status_t
980 srpt_ioc_post_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
981 {
982 	ibt_status_t		status;
983 	ibt_recv_wr_t		wr;
984 	uint_t			posted;
985 
986 	ASSERT(ioc != NULL);
987 	ASSERT(iu != NULL);
988 
989 	wr.wr_id  = (ibt_wrid_t)(uintptr_t)iu;
990 	wr.wr_nds = 1;
991 	wr.wr_sgl = &iu->iu_sge;
992 	posted    = 0;
993 
994 	status = ibt_post_srq(ioc->ioc_srq_hdl, &wr, 1, &posted);
995 	if (status != IBT_SUCCESS) {
996 		SRPT_DPRINTF_L2("ioc_post_recv_iu, post error (%d)",
997 		    status);
998 	}
999 	return (status);
1000 }
1001 
1002 /*
1003  * srpt_ioc_repost_recv_iu()
1004  */
1005 void
1006 srpt_ioc_repost_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
1007 {
1008 	srpt_channel_t		*ch;
1009 	ibt_status_t		status;
1010 
1011 	ASSERT(iu != NULL);
1012 	ASSERT(mutex_owned(&iu->iu_lock));
1013 
1014 	/*
1015 	 * Some additional sanity checks while in debug state, all STMF
1016 	 * related task activities should be complete prior to returning
1017 	 * this IU to the available pool.
1018 	 */
1019 	ASSERT(iu->iu_stmf_task == NULL);
1020 	ASSERT(iu->iu_sq_posted_cnt == 0);
1021 
1022 	ch = iu->iu_ch;
1023 	iu->iu_ch = NULL;
1024 	iu->iu_num_rdescs = 0;
1025 	iu->iu_rdescs = NULL;
1026 	iu->iu_tot_xfer_len = 0;
1027 	iu->iu_tag = 0;
1028 	iu->iu_flags = 0;
1029 	iu->iu_sq_posted_cnt = 0;
1030 
1031 	status = srpt_ioc_post_recv_iu(ioc, iu);
1032 
1033 	if (status != IBT_SUCCESS) {
1034 		/*
1035 		 * Very bad, we should initiate a shutdown of the I/O
1036 		 * Controller here, off-lining any targets associated
1037 		 * with this I/O Controller (and therefore disconnecting
1038 		 * any logins that remain).
1039 		 *
1040 		 * In practice this should never happen so we put
1041 		 * the code near the bottom of the implementation list.
1042 		 */
1043 		SRPT_DPRINTF_L0("ioc_repost_recv_iu, error RX IU (%d)",
1044 		    status);
1045 		ASSERT(0);
1046 	} else if (ch != NULL) {
1047 		atomic_inc_32(&ch->ch_req_lim_delta);
1048 	}
1049 }
1050 
1051 /*
1052  * srpt_ioc_init_profile()
1053  *
1054  * SRP I/O Controller serialization lock must be held when this
1055  * routine is invoked.
1056  */
1057 void
1058 srpt_ioc_init_profile(srpt_ioc_t *ioc)
1059 {
1060 	srpt_ioc_opcap_mask_t		capmask = {0};
1061 
1062 	ASSERT(ioc != NULL);
1063 
1064 	ioc->ioc_profile.ioc_guid = h2b64(ioc->ioc_guid);
1065 	(void) memcpy(ioc->ioc_profile.ioc_id_string,
1066 	    "Solaris SRP Target 0.9a", 23);
1067 
1068 	/*
1069 	 * Note vendor ID and subsystem ID are 24 bit values.  Low order
1070 	 * 8 bits in vendor ID field is slot and is initialized to zero.
1071 	 * Low order 8 bits of subsystem ID is a reserved field and
1072 	 * initialized to zero.
1073 	 */
1074 	ioc->ioc_profile.ioc_vendorid =
1075 	    h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1076 	ioc->ioc_profile.ioc_deviceid =
1077 	    h2b32((uint32_t)ioc->ioc_attr.hca_device_id);
1078 	ioc->ioc_profile.ioc_device_ver =
1079 	    h2b16((uint16_t)ioc->ioc_attr.hca_version_id);
1080 	ioc->ioc_profile.ioc_subsys_vendorid =
1081 	    h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1082 	ioc->ioc_profile.ioc_subsys_id = h2b32(0);
1083 	ioc->ioc_profile.ioc_io_class = h2b16(SRP_REV_16A_IO_CLASS);
1084 	ioc->ioc_profile.ioc_io_subclass = h2b16(SRP_IO_SUBCLASS);
1085 	ioc->ioc_profile.ioc_protocol = h2b16(SRP_PROTOCOL);
1086 	ioc->ioc_profile.ioc_protocol_ver = h2b16(SRP_PROTOCOL_VERSION);
1087 	ioc->ioc_profile.ioc_send_msg_qdepth = h2b16(srpt_send_msg_depth);
1088 	ioc->ioc_profile.ioc_rdma_read_qdepth =
1089 	    ioc->ioc_attr.hca_max_rdma_out_chan;
1090 	ioc->ioc_profile.ioc_send_msg_sz = h2b32(SRPT_DEFAULT_SEND_MSG_SIZE);
1091 	ioc->ioc_profile.ioc_rdma_xfer_sz = h2b32(SRPT_DEFAULT_MAX_RDMA_SIZE);
1092 
1093 	capmask.bits.st = 1;	/* Messages can be sent to IOC */
1094 	capmask.bits.sf = 1;	/* Messages can be sent from IOC */
1095 	capmask.bits.rf = 1;	/* RDMA Reads can be sent from IOC */
1096 	capmask.bits.wf = 1;	/* RDMA Writes can be sent from IOC */
1097 	ioc->ioc_profile.ioc_ctrl_opcap_mask = capmask.mask;
1098 
1099 	/*
1100 	 * We currently only have one target, but if we had a list we would
1101 	 * go through that list and only count those that are ONLINE when
1102 	 * setting the services count and entries.
1103 	 */
1104 	if (ioc->ioc_tgt_port->tp_srp_enabled) {
1105 		ioc->ioc_profile.ioc_service_entries = 1;
1106 		ioc->ioc_svc.srv_id = h2b64(ioc->ioc_guid);
1107 		(void) snprintf((char *)ioc->ioc_svc.srv_name,
1108 		    IB_DM_MAX_SVC_NAME_LEN, "SRP.T10:%016llx",
1109 		    (u_longlong_t)ioc->ioc_guid);
1110 	} else {
1111 		ioc->ioc_profile.ioc_service_entries = 0;
1112 		ioc->ioc_svc.srv_id = 0;
1113 	}
1114 }
1115 
1116 /*
1117  * srpt_ioc_ds_alloc_dbuf()
1118  */
1119 /* ARGSUSED */
1120 stmf_data_buf_t *
1121 srpt_ioc_ds_alloc_dbuf(struct scsi_task *task, uint32_t size,
1122 	uint32_t *pminsize, uint32_t flags)
1123 {
1124 	srpt_iu_t		*iu;
1125 	srpt_ioc_t		*ioc;
1126 	srpt_ds_dbuf_t		*dbuf;
1127 	stmf_data_buf_t		*stmf_dbuf;
1128 	void			*buf;
1129 	srpt_mr_t		mr;
1130 
1131 	ASSERT(task != NULL);
1132 	iu  = task->task_port_private;
1133 	ioc = iu->iu_ioc;
1134 
1135 	SRPT_DPRINTF_L4("ioc_ds_alloc_dbuf, invoked ioc(%p)"
1136 	    " size(%d), flags(%x)",
1137 	    (void *)ioc, size, flags);
1138 
1139 	buf = srpt_vmem_alloc(ioc->ioc_dbuf_pool, size);
1140 	if (buf == NULL) {
1141 		return (NULL);
1142 	}
1143 
1144 	if (srpt_vmem_mr(ioc->ioc_dbuf_pool, buf, size, &mr) != 0) {
1145 		goto stmf_alloc_err;
1146 	}
1147 
1148 	stmf_dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, sizeof (srpt_ds_dbuf_t),
1149 	    0);
1150 	if (stmf_dbuf == NULL) {
1151 		SRPT_DPRINTF_L2("ioc_ds_alloc_dbuf, stmf_alloc failed");
1152 		goto stmf_alloc_err;
1153 	}
1154 
1155 	dbuf = stmf_dbuf->db_port_private;
1156 	dbuf->db_stmf_buf = stmf_dbuf;
1157 	dbuf->db_mr_hdl = mr.mr_hdl;
1158 	dbuf->db_ioc = ioc;
1159 	dbuf->db_sge.ds_va = mr.mr_va;
1160 	dbuf->db_sge.ds_key = mr.mr_lkey;
1161 	dbuf->db_sge.ds_len = size;
1162 
1163 	stmf_dbuf->db_buf_size = size;
1164 	stmf_dbuf->db_data_size = size;
1165 	stmf_dbuf->db_relative_offset = 0;
1166 	stmf_dbuf->db_flags = 0;
1167 	stmf_dbuf->db_xfer_status = 0;
1168 	stmf_dbuf->db_sglist_length = 1;
1169 	stmf_dbuf->db_sglist[0].seg_addr = buf;
1170 	stmf_dbuf->db_sglist[0].seg_length = size;
1171 
1172 	return (stmf_dbuf);
1173 
1174 buf_mr_err:
1175 	stmf_free(stmf_dbuf);
1176 
1177 stmf_alloc_err:
1178 	srpt_vmem_free(ioc->ioc_dbuf_pool, buf, size);
1179 
1180 	return (NULL);
1181 }
1182 
1183 void
1184 srpt_ioc_ds_free_dbuf(struct stmf_dbuf_store *ds,
1185 	stmf_data_buf_t *dbuf)
1186 {
1187 	srpt_ioc_t	*ioc;
1188 
1189 	SRPT_DPRINTF_L4("ioc_ds_free_dbuf, invoked buf (%p)",
1190 	    (void *)dbuf);
1191 	ioc = ds->ds_port_private;
1192 
1193 	srpt_vmem_free(ioc->ioc_dbuf_pool, dbuf->db_sglist[0].seg_addr,
1194 	    dbuf->db_buf_size);
1195 	stmf_free(dbuf);
1196 }
1197 
1198 /* Memory arena routines */
1199 
1200 static srpt_vmem_pool_t *
1201 srpt_vmem_create(const char *name, srpt_ioc_t *ioc, ib_memlen_t chunksize,
1202     uint64_t maxsize, ibt_mr_flags_t flags)
1203 {
1204 	srpt_mr_t		*chunk;
1205 	srpt_vmem_pool_t	*result;
1206 
1207 	ASSERT(chunksize <= maxsize);
1208 
1209 	result = kmem_zalloc(sizeof (srpt_vmem_pool_t), KM_SLEEP);
1210 
1211 	result->svp_ioc = ioc;
1212 	result->svp_chunksize = chunksize;
1213 	result->svp_max_size = maxsize;
1214 	result->svp_flags = flags;
1215 
1216 	rw_init(&result->svp_lock, NULL, RW_DRIVER, NULL);
1217 	avl_create(&result->svp_mr_list, srpt_vmem_mr_compare,
1218 	    sizeof (srpt_mr_t), offsetof(srpt_mr_t, mr_avl));
1219 
1220 	chunk = srpt_vmem_chunk_alloc(result, chunksize);
1221 
1222 	avl_add(&result->svp_mr_list, chunk);
1223 	result->svp_total_size = chunksize;
1224 
1225 	result->svp_vmem = vmem_create(name,
1226 	    (void*)(uintptr_t)chunk->mr_va,
1227 	    (size_t)chunk->mr_len, SRPT_MR_QUANTSIZE,
1228 	    NULL, NULL, NULL, 0, VM_SLEEP);
1229 
1230 	return (result);
1231 }
1232 
1233 static void
1234 srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool)
1235 {
1236 	srpt_mr_t		*chunk;
1237 	srpt_mr_t		*next;
1238 
1239 	rw_enter(&vm_pool->svp_lock, RW_WRITER);
1240 	vmem_destroy(vm_pool->svp_vmem);
1241 
1242 	chunk = avl_first(&vm_pool->svp_mr_list);
1243 
1244 	while (chunk != NULL) {
1245 		next = AVL_NEXT(&vm_pool->svp_mr_list, chunk);
1246 		avl_remove(&vm_pool->svp_mr_list, chunk);
1247 		srpt_vmem_chunk_free(vm_pool, chunk);
1248 		chunk = next;
1249 	}
1250 
1251 	avl_destroy(&vm_pool->svp_mr_list);
1252 
1253 	rw_exit(&vm_pool->svp_lock);
1254 	rw_destroy(&vm_pool->svp_lock);
1255 
1256 	kmem_free(vm_pool, sizeof (srpt_vmem_pool_t));
1257 }
1258 
1259 static void *
1260 srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size)
1261 {
1262 	void		*result;
1263 	srpt_mr_t	*next;
1264 	ib_memlen_t	chunklen;
1265 
1266 	ASSERT(vm_pool != NULL);
1267 
1268 	result = vmem_alloc(vm_pool->svp_vmem, size,
1269 	    VM_NOSLEEP | VM_FIRSTFIT);
1270 
1271 	if (result != NULL) {
1272 		/* memory successfully allocated */
1273 		return (result);
1274 	}
1275 
1276 	/* need more vmem */
1277 	rw_enter(&vm_pool->svp_lock, RW_WRITER);
1278 	chunklen = vm_pool->svp_chunksize;
1279 
1280 	if (vm_pool->svp_total_size >= vm_pool->svp_max_size) {
1281 		/* no more room to alloc */
1282 		rw_exit(&vm_pool->svp_lock);
1283 		return (NULL);
1284 	}
1285 
1286 	if ((vm_pool->svp_total_size + chunklen) > vm_pool->svp_max_size) {
1287 		chunklen = vm_pool->svp_max_size - vm_pool->svp_total_size;
1288 	}
1289 
1290 	next = srpt_vmem_chunk_alloc(vm_pool, chunklen);
1291 	if (next != NULL) {
1292 		/*
1293 		 * Note that the size of the chunk we got
1294 		 * may not be the size we requested.  Use the
1295 		 * length returned in the chunk itself.
1296 		 */
1297 		if (vmem_add(vm_pool->svp_vmem, (void*)(uintptr_t)next->mr_va,
1298 		    next->mr_len, VM_NOSLEEP) == NULL) {
1299 			srpt_vmem_chunk_free(vm_pool, next);
1300 			SRPT_DPRINTF_L2("vmem_add failed");
1301 		} else {
1302 			vm_pool->svp_total_size += next->mr_len;
1303 			avl_add(&vm_pool->svp_mr_list, next);
1304 		}
1305 	}
1306 
1307 	rw_exit(&vm_pool->svp_lock);
1308 
1309 	result = vmem_alloc(vm_pool->svp_vmem, size, VM_NOSLEEP | VM_FIRSTFIT);
1310 
1311 	return (result);
1312 }
1313 
1314 static void
1315 srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size)
1316 {
1317 	vmem_free(vm_pool->svp_vmem, vaddr, size);
1318 }
1319 
1320 static int
1321 srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
1322     srpt_mr_t *mr)
1323 {
1324 	avl_index_t		where;
1325 	ib_vaddr_t		mrva = (ib_vaddr_t)(uintptr_t)vaddr;
1326 	srpt_mr_t		chunk;
1327 	srpt_mr_t		*nearest;
1328 	ib_vaddr_t		chunk_end;
1329 	int			status = DDI_FAILURE;
1330 
1331 	rw_enter(&vm_pool->svp_lock, RW_READER);
1332 
1333 	chunk.mr_va = mrva;
1334 	nearest = avl_find(&vm_pool->svp_mr_list, &chunk, &where);
1335 
1336 	if (nearest == NULL) {
1337 		nearest = avl_nearest(&vm_pool->svp_mr_list, where,
1338 		    AVL_BEFORE);
1339 	}
1340 
1341 	if (nearest != NULL) {
1342 		/* Verify this chunk contains the specified address range */
1343 		ASSERT(nearest->mr_va <= mrva);
1344 
1345 		chunk_end = nearest->mr_va + nearest->mr_len;
1346 		if (chunk_end >= mrva + size) {
1347 			mr->mr_hdl = nearest->mr_hdl;
1348 			mr->mr_va = mrva;
1349 			mr->mr_len = size;
1350 			mr->mr_lkey = nearest->mr_lkey;
1351 			mr->mr_rkey = nearest->mr_rkey;
1352 			status = DDI_SUCCESS;
1353 		}
1354 	}
1355 
1356 	rw_exit(&vm_pool->svp_lock);
1357 	return (status);
1358 }
1359 
1360 static srpt_mr_t *
1361 srpt_vmem_chunk_alloc(srpt_vmem_pool_t *vm_pool, ib_memlen_t chunksize)
1362 {
1363 	void			*chunk = NULL;
1364 	srpt_mr_t		*result = NULL;
1365 
1366 	while ((chunk == NULL) && (chunksize >= SRPT_MIN_CHUNKSIZE)) {
1367 		chunk = kmem_alloc(chunksize, KM_NOSLEEP);
1368 		if (chunk == NULL) {
1369 			SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1370 			    "failed to alloc chunk of %d, trying %d",
1371 			    (int)chunksize, (int)chunksize/2);
1372 			chunksize /= 2;
1373 		}
1374 	}
1375 
1376 	if (chunk != NULL) {
1377 		result = srpt_reg_mem(vm_pool, (ib_vaddr_t)(uintptr_t)chunk,
1378 		    chunksize);
1379 		if (result == NULL) {
1380 			SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1381 			    "chunk registration failed");
1382 			kmem_free(chunk, chunksize);
1383 		}
1384 	}
1385 
1386 	return (result);
1387 }
1388 
1389 static void
1390 srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr)
1391 {
1392 	void			*chunk = (void *)(uintptr_t)mr->mr_va;
1393 	ib_memlen_t		chunksize = mr->mr_len;
1394 
1395 	srpt_dereg_mem(vm_pool->svp_ioc, mr);
1396 	kmem_free(chunk, chunksize);
1397 }
1398 
1399 static srpt_mr_t *
1400 srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, ib_memlen_t len)
1401 {
1402 	srpt_mr_t		*result = NULL;
1403 	ibt_mr_attr_t		mr_attr;
1404 	ibt_mr_desc_t		mr_desc;
1405 	ibt_status_t		status;
1406 	srpt_ioc_t		*ioc = vm_pool->svp_ioc;
1407 
1408 	result = kmem_zalloc(sizeof (srpt_mr_t), KM_NOSLEEP);
1409 	if (result == NULL) {
1410 		SRPT_DPRINTF_L2("srpt_reg_mem: failed to allocate");
1411 		return (NULL);
1412 	}
1413 
1414 	bzero(&mr_attr, sizeof (ibt_mr_attr_t));
1415 	bzero(&mr_desc, sizeof (ibt_mr_desc_t));
1416 
1417 	mr_attr.mr_vaddr = vaddr;
1418 	mr_attr.mr_len = len;
1419 	mr_attr.mr_as = NULL;
1420 	mr_attr.mr_flags = vm_pool->svp_flags;
1421 
1422 	status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
1423 	    &mr_attr, &result->mr_hdl, &mr_desc);
1424 	if (status != IBT_SUCCESS) {
1425 		SRPT_DPRINTF_L2("srpt_reg_mem: ibt_register_mr "
1426 		    "failed %d", status);
1427 		kmem_free(result, sizeof (srpt_mr_t));
1428 		return (NULL);
1429 	}
1430 
1431 	result->mr_va = mr_attr.mr_vaddr;
1432 	result->mr_len = mr_attr.mr_len;
1433 	result->mr_lkey = mr_desc.md_lkey;
1434 	result->mr_rkey = mr_desc.md_rkey;
1435 
1436 	return (result);
1437 }
1438 
1439 static void
1440 srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr)
1441 {
1442 	ibt_status_t		status;
1443 
1444 	status = ibt_deregister_mr(ioc->ioc_ibt_hdl, mr->mr_hdl);
1445 	if (status != IBT_SUCCESS) {
1446 		SRPT_DPRINTF_L1("ioc_fini, error deregistering MR (%d)",
1447 		    status);
1448 	}
1449 	kmem_free(mr, sizeof (srpt_mr_t));
1450 }
1451 
1452 static int
1453 srpt_vmem_mr_compare(const void *a, const void *b)
1454 {
1455 	srpt_mr_t		*mr1 = (srpt_mr_t *)a;
1456 	srpt_mr_t		*mr2 = (srpt_mr_t *)b;
1457 
1458 	/* sort and match by virtual address */
1459 	if (mr1->mr_va < mr2->mr_va) {
1460 		return (-1);
1461 	} else if (mr1->mr_va > mr2->mr_va) {
1462 		return (1);
1463 	}
1464 
1465 	return (0);
1466 }
1467