1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * I/O Controller functions for the Solaris COMSTAR SCSI RDMA Protocol
29  * Target (SRPT) port provider.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/ddi.h>
34 #include <sys/types.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #include <sys/sysmacros.h>
38 #include <sys/ib/ibtl/ibti.h>
39 #include <sys/sdt.h>
40 
41 #include "srp.h"
42 #include "srpt_impl.h"
43 #include "srpt_ioc.h"
44 #include "srpt_stp.h"
45 #include "srpt_ch.h"
46 
47 /*
48  * srpt_ioc_srq_size - Tunable parameter that specifies the number
49  * of receive WQ entries that can be posted to the IOC shared
50  * receive queue.
51  */
52 uint32_t	srpt_ioc_srq_size = SRPT_DEFAULT_IOC_SRQ_SIZE;
53 extern uint16_t srpt_send_msg_depth;
54 extern uint32_t	srpt_iu_size;
55 
56 /* IOC profile capabilities mask must be big-endian */
57 typedef struct srpt_ioc_opcap_bits_s {
58 #if	defined(_BIT_FIELDS_LTOH)
59 	uint8_t		af:1,
60 			at:1,
61 			wf:1,
62 			wt:1,
63 			rf:1,
64 			rt:1,
65 			sf:1,
66 			st:1;
67 #elif	defined(_BIT_FIELDS_HTOL)
68 	uint8_t		st:1,
69 			sf:1,
70 			rt:1,
71 			rf:1,
72 			wt:1,
73 			wf:1,
74 			at:1,
75 			af:1;
76 #else
77 #error	One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
78 #endif
79 } srpt_ioc_opcap_bits_t;
80 
81 typedef union {
82 	srpt_ioc_opcap_bits_t	bits;
83 	uint8_t			mask;
84 } srpt_ioc_opcap_mask_t;
85 
86 /*
87  * vmem arena variables - values derived from iSER
88  */
89 #define	SRPT_MR_QUANTSIZE	0x400			/* 1K */
90 #define	SRPT_MIN_CHUNKSIZE	0x100000		/* 1MB */
91 
92 /* use less memory on 32-bit kernels as it's much more constrained */
93 #ifdef _LP64
94 #define	SRPT_BUF_MR_CHUNKSIZE	0x1000000		/* 16MB */
95 #define	SRPT_BUF_POOL_MAX	0x40000000		/* 1GB */
96 #else
97 #define	SRPT_BUF_MR_CHUNKSIZE	0x400000		/* 4MB */
98 #define	SRPT_BUF_POOL_MAX	0x4000000		/* 64MB */
99 #endif
100 
101 static ibt_mr_flags_t	srpt_dbuf_mr_flags =
102     IBT_MR_ENABLE_LOCAL_WRITE | IBT_MR_ENABLE_REMOTE_WRITE |
103     IBT_MR_ENABLE_REMOTE_READ;
104 
105 void srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
106 	ibt_async_code_t code, ibt_async_event_t *event);
107 
108 static struct ibt_clnt_modinfo_s srpt_ibt_modinfo = {
109 	IBTI_V_CURR,
110 	IBT_STORAGE_DEV,
111 	srpt_ioc_ib_async_hdlr,
112 	NULL,
113 	"srpt"
114 };
115 
116 static srpt_ioc_t *srpt_ioc_init(ib_guid_t guid);
117 static void srpt_ioc_fini(srpt_ioc_t *ioc);
118 
119 static srpt_vmem_pool_t *srpt_vmem_create(const char *name, srpt_ioc_t *ioc,
120     ib_memlen_t chunksize, uint64_t maxsize, ibt_mr_flags_t flags);
121 static void *srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size);
122 static int srpt_vmem_mr_compare(const void *a, const void *b);
123 static srpt_mr_t *srpt_vmem_chunk_alloc(srpt_vmem_pool_t *ioc,
124     ib_memlen_t chunksize);
125 static void srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool);
126 static void srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size);
127 static srpt_mr_t *srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr,
128     ib_memlen_t len);
129 static void srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr);
130 static void srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr);
131 static int srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
132     srpt_mr_t *mr);
133 
134 /*
135  * srpt_ioc_attach() - I/O Controller attach
136  *
137  * Attach to IBTF and initialize I/O controllers. The srpt_ctxt->sc_rwlock
138  * should be held outside of this call.
139  */
140 int
141 srpt_ioc_attach()
142 {
143 	int		status;
144 	int		hca_cnt;
145 	int		hca_ndx;
146 	ib_guid_t	*guid;
147 	srpt_ioc_t	*ioc;
148 
149 	ASSERT(srpt_ctxt != NULL);
150 
151 	/*
152 	 * Attach to IBTF and initialize a list of IB devices.  Each
153 	 * HCA will be represented by an I/O Controller.
154 	 */
155 	status = ibt_attach(&srpt_ibt_modinfo, srpt_ctxt->sc_dip,
156 	    srpt_ctxt,  &srpt_ctxt->sc_ibt_hdl);
157 	if (status != DDI_SUCCESS) {
158 		SRPT_DPRINTF_L1("ioc_attach, ibt_attach failed (0x%x)",
159 		    status);
160 		return (DDI_FAILURE);
161 	}
162 
163 	hca_cnt = ibt_get_hca_list(&guid);
164 	if (hca_cnt < 1) {
165 		/*
166 		 * not a fatal error.  Service will be up and
167 		 * waiting for ATTACH events.
168 		 */
169 		SRPT_DPRINTF_L2("ioc_attach, no HCA found");
170 		return (DDI_SUCCESS);
171 	}
172 
173 	for (hca_ndx = 0; hca_ndx < hca_cnt; hca_ndx++) {
174 		SRPT_DPRINTF_L2("ioc_attach, adding I/O"
175 		    " Controller (%016llx)", (u_longlong_t)guid[hca_ndx]);
176 
177 		ioc = srpt_ioc_init(guid[hca_ndx]);
178 		if (ioc == NULL) {
179 			SRPT_DPRINTF_L1("ioc_attach, ioc_init GUID(%016llx)"
180 			    " failed", (u_longlong_t)guid[hca_ndx]);
181 			continue;
182 		}
183 		list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc);
184 		SRPT_DPRINTF_L2("ioc_attach, I/O Controller ibt HCA hdl (%p)",
185 		    (void *)ioc->ioc_ibt_hdl);
186 		srpt_ctxt->sc_num_iocs++;
187 	}
188 
189 	ibt_free_hca_list(guid, hca_cnt);
190 	SRPT_DPRINTF_L3("ioc_attach, added %d I/O Controller(s)",
191 	    srpt_ctxt->sc_num_iocs);
192 	return (DDI_SUCCESS);
193 }
194 
195 /*
196  * srpt_ioc_detach() - I/O Controller detach
197  *
198  * srpt_ctxt->sc_rwlock should be held outside of this call.
199  */
200 void
201 srpt_ioc_detach()
202 {
203 	srpt_ioc_t	*ioc;
204 
205 	ASSERT(srpt_ctxt != NULL);
206 
207 	while ((ioc = list_head(&srpt_ctxt->sc_ioc_list)) != NULL) {
208 		list_remove(&srpt_ctxt->sc_ioc_list, ioc);
209 		SRPT_DPRINTF_L2("ioc_detach, removing I/O Controller(%p)"
210 		    " (%016llx), ibt_hdl(%p)",
211 		    (void *)ioc,
212 		    ioc ? (u_longlong_t)ioc->ioc_guid : 0x0ll,
213 		    (void *)ioc->ioc_ibt_hdl);
214 		srpt_ioc_fini(ioc);
215 	}
216 
217 	(void) ibt_detach(srpt_ctxt->sc_ibt_hdl);
218 	srpt_ctxt->sc_ibt_hdl = NULL;
219 }
220 
221 /*
222  * srpt_ioc_init() - I/O Controller initialization
223  *
224  * Requires srpt_ctxt->rw_lock be held outside of call.
225  */
226 static srpt_ioc_t *
227 srpt_ioc_init(ib_guid_t guid)
228 {
229 	ibt_status_t		status;
230 	srpt_ioc_t		*ioc;
231 	ibt_hca_attr_t		hca_attr;
232 	uint_t			iu_ndx;
233 	uint_t			err_ndx;
234 	ibt_mr_attr_t		mr_attr;
235 	ibt_mr_desc_t		mr_desc;
236 	srpt_iu_t		*iu;
237 	ibt_srq_sizes_t		srq_attr;
238 	char			namebuf[32];
239 	size_t			iu_offset;
240 	uint_t			srq_sz;
241 
242 	status = ibt_query_hca_byguid(guid, &hca_attr);
243 	if (status != IBT_SUCCESS) {
244 		SRPT_DPRINTF_L1("ioc_init, HCA query error (%d)",
245 		    status);
246 		return (NULL);
247 	}
248 
249 	ioc = srpt_ioc_get_locked(guid);
250 	if (ioc != NULL) {
251 		SRPT_DPRINTF_L1("ioc_init, HCA already exists");
252 		return (NULL);
253 	}
254 
255 	ioc = kmem_zalloc(sizeof (srpt_ioc_t), KM_SLEEP);
256 
257 	rw_init(&ioc->ioc_rwlock, NULL, RW_DRIVER, NULL);
258 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
259 
260 	bcopy(&hca_attr, &ioc->ioc_attr, sizeof (ibt_hca_attr_t));
261 
262 	SRPT_DPRINTF_L2("ioc_init, HCA max mr=%d, mrlen=%lld",
263 	    hca_attr.hca_max_memr, (u_longlong_t)hca_attr.hca_max_memr_len);
264 	ioc->ioc_guid   = guid;
265 
266 	status = ibt_open_hca(srpt_ctxt->sc_ibt_hdl, guid, &ioc->ioc_ibt_hdl);
267 	if (status != IBT_SUCCESS) {
268 		SRPT_DPRINTF_L1("ioc_init, IBT open failed (%d)", status);
269 		goto hca_open_err;
270 	}
271 
272 	status = ibt_alloc_pd(ioc->ioc_ibt_hdl, IBT_PD_NO_FLAGS,
273 	    &ioc->ioc_pd_hdl);
274 	if (status != IBT_SUCCESS) {
275 		SRPT_DPRINTF_L1("ioc_init, IBT create PD failed (%d)", status);
276 		goto pd_alloc_err;
277 	}
278 
279 	/*
280 	 * We require hardware support for SRQs.  We use a common SRQ to
281 	 * reduce channel memory consumption.
282 	 */
283 	if ((ioc->ioc_attr.hca_flags & IBT_HCA_SRQ) == 0) {
284 		SRPT_DPRINTF_L0(
285 		    "ioc_init, no SRQ capability, HCA not supported");
286 		goto srq_alloc_err;
287 	}
288 
289 	SRPT_DPRINTF_L3("ioc_init, Using shared receive queues, max srq work"
290 	    " queue size(%d), def size = %d", ioc->ioc_attr.hca_max_srqs_sz,
291 	    srpt_ioc_srq_size);
292 	srq_sz = srq_attr.srq_wr_sz = min(srpt_ioc_srq_size,
293 	    ioc->ioc_attr.hca_max_srqs_sz) - 1;
294 	srq_attr.srq_sgl_sz = 1;
295 
296 	status = ibt_alloc_srq(ioc->ioc_ibt_hdl, IBT_SRQ_NO_FLAGS,
297 	    ioc->ioc_pd_hdl, &srq_attr, &ioc->ioc_srq_hdl,
298 	    &ioc->ioc_srq_attr);
299 	if (status != IBT_SUCCESS) {
300 		SRPT_DPRINTF_L1("ioc_init, IBT create SRQ failed(%d)", status);
301 		goto srq_alloc_err;
302 	}
303 
304 	SRPT_DPRINTF_L2("ioc_init, Using SRQ size(%d), MAX SG size(%d)",
305 	    srq_sz, 1);
306 
307 	ibt_set_srq_private(ioc->ioc_srq_hdl, ioc);
308 
309 	/*
310 	 * Allocate a pool of SRP IU message buffers and post them to
311 	 * the I/O Controller SRQ.  We let the SRQ manage the free IU
312 	 * messages.
313 	 */
314 	ioc->ioc_num_iu_entries = srq_sz;
315 
316 	ioc->ioc_iu_pool = kmem_zalloc(sizeof (srpt_iu_t) *
317 	    ioc->ioc_num_iu_entries, KM_SLEEP);
318 
319 	ioc->ioc_iu_bufs = kmem_alloc(srpt_iu_size *
320 	    ioc->ioc_num_iu_entries, KM_SLEEP);
321 
322 	if ((ioc->ioc_iu_pool == NULL) || (ioc->ioc_iu_bufs == NULL)) {
323 		SRPT_DPRINTF_L1("ioc_init, failed to allocate SRQ IUs");
324 		goto srq_iu_alloc_err;
325 	}
326 
327 	mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)ioc->ioc_iu_bufs;
328 	mr_attr.mr_len   = srpt_iu_size * ioc->ioc_num_iu_entries;
329 	mr_attr.mr_as    = NULL;
330 	mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
331 
332 	status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
333 	    &mr_attr, &ioc->ioc_iu_mr_hdl, &mr_desc);
334 	if (status != IBT_SUCCESS) {
335 		SRPT_DPRINTF_L1("ioc_init, IU buffer pool MR err(%d)",
336 		    status);
337 		goto srq_iu_alloc_err;
338 	}
339 
340 	for (iu_ndx = 0, iu = ioc->ioc_iu_pool; iu_ndx <
341 	    ioc->ioc_num_iu_entries; iu_ndx++, iu++) {
342 
343 		iu_offset = (iu_ndx * srpt_iu_size);
344 		iu->iu_buf = (void *)((uintptr_t)ioc->ioc_iu_bufs + iu_offset);
345 
346 		mutex_init(&iu->iu_lock, NULL, MUTEX_DRIVER, NULL);
347 
348 		iu->iu_sge.ds_va  = mr_desc.md_vaddr + iu_offset;
349 		iu->iu_sge.ds_key = mr_desc.md_lkey;
350 		iu->iu_sge.ds_len = srpt_iu_size;
351 		iu->iu_ioc	  = ioc;
352 		iu->iu_pool_ndx   = iu_ndx;
353 
354 		status = srpt_ioc_post_recv_iu(ioc, &ioc->ioc_iu_pool[iu_ndx]);
355 		if (status != IBT_SUCCESS) {
356 			SRPT_DPRINTF_L1("ioc_init, SRQ IU post err(%d)",
357 			    status);
358 			goto srq_iu_post_err;
359 		}
360 	}
361 
362 	/*
363 	 * Initialize the dbuf vmem arena
364 	 */
365 	(void) snprintf(namebuf, sizeof (namebuf),
366 	    "srpt_buf_pool_%16llX", (u_longlong_t)guid);
367 	ioc->ioc_dbuf_pool = srpt_vmem_create(namebuf, ioc,
368 	    SRPT_BUF_MR_CHUNKSIZE, SRPT_BUF_POOL_MAX, srpt_dbuf_mr_flags);
369 
370 	if (ioc->ioc_dbuf_pool == NULL) {
371 		goto stmf_db_alloc_err;
372 	}
373 
374 	/*
375 	 * Allocate the I/O Controller STMF data buffer allocator.  The
376 	 * data store will span all targets associated with this IOC.
377 	 */
378 	ioc->ioc_stmf_ds = stmf_alloc(STMF_STRUCT_DBUF_STORE, 0, 0);
379 	if (ioc->ioc_stmf_ds == NULL) {
380 		SRPT_DPRINTF_L1("ioc_attach, STMF DBUF alloc failure for IOC");
381 		goto stmf_db_alloc_err;
382 	}
383 	ioc->ioc_stmf_ds->ds_alloc_data_buf = &srpt_ioc_ds_alloc_dbuf;
384 	ioc->ioc_stmf_ds->ds_free_data_buf  = &srpt_ioc_ds_free_dbuf;
385 	ioc->ioc_stmf_ds->ds_port_private   = ioc;
386 
387 	rw_exit(&ioc->ioc_rwlock);
388 	return (ioc);
389 
390 stmf_db_alloc_err:
391 	if (ioc->ioc_dbuf_pool != NULL) {
392 		srpt_vmem_destroy(ioc->ioc_dbuf_pool);
393 	}
394 
395 srq_iu_post_err:
396 	if (ioc->ioc_iu_mr_hdl != NULL) {
397 		status = ibt_deregister_mr(ioc->ioc_ibt_hdl,
398 		    ioc->ioc_iu_mr_hdl);
399 		if (status != IBT_SUCCESS) {
400 			SRPT_DPRINTF_L1("ioc_init, error deregistering"
401 			    " memory region (%d)", status);
402 		}
403 	}
404 	for (err_ndx = 0, iu = ioc->ioc_iu_pool; err_ndx < iu_ndx;
405 	    err_ndx++, iu++) {
406 		mutex_destroy(&iu->iu_lock);
407 	}
408 
409 srq_iu_alloc_err:
410 	if (ioc->ioc_iu_bufs != NULL) {
411 		kmem_free(ioc->ioc_iu_bufs, srpt_iu_size *
412 		    ioc->ioc_num_iu_entries);
413 	}
414 	if (ioc->ioc_iu_pool != NULL) {
415 		kmem_free(ioc->ioc_iu_pool,
416 		    sizeof (srpt_iu_t) * ioc->ioc_num_iu_entries);
417 	}
418 	if (ioc->ioc_srq_hdl != NULL) {
419 		status = ibt_free_srq(ioc->ioc_srq_hdl);
420 		if (status != IBT_SUCCESS) {
421 			SRPT_DPRINTF_L1("ioc_init, error freeing SRQ (%d)",
422 			    status);
423 		}
424 
425 	}
426 
427 srq_alloc_err:
428 	status = ibt_free_pd(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl);
429 	if (status != IBT_SUCCESS) {
430 		SRPT_DPRINTF_L1("ioc_init, free PD error (%d)", status);
431 	}
432 
433 pd_alloc_err:
434 	status = ibt_close_hca(ioc->ioc_ibt_hdl);
435 	if (status != IBT_SUCCESS) {
436 		SRPT_DPRINTF_L1("ioc_init, close ioc error (%d)", status);
437 	}
438 
439 hca_open_err:
440 	rw_exit(&ioc->ioc_rwlock);
441 	rw_destroy(&ioc->ioc_rwlock);
442 	kmem_free(ioc, sizeof (*ioc));
443 	return (NULL);
444 }
445 
446 /*
447  * srpt_ioc_fini() - I/O Controller Cleanup
448  *
449  * Requires srpt_ctxt->sc_rwlock be held outside of call.
450  */
451 static void
452 srpt_ioc_fini(srpt_ioc_t *ioc)
453 {
454 	int		status;
455 	int		ndx;
456 
457 	/*
458 	 * Note driver flows will have already taken all SRP
459 	 * services running on the I/O Controller off-line.
460 	 */
461 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
462 	if (ioc->ioc_ibt_hdl != NULL) {
463 		if (ioc->ioc_stmf_ds != NULL) {
464 			stmf_free(ioc->ioc_stmf_ds);
465 		}
466 
467 		if (ioc->ioc_srq_hdl != NULL) {
468 			SRPT_DPRINTF_L4("ioc_fini, freeing SRQ");
469 			status = ibt_free_srq(ioc->ioc_srq_hdl);
470 			if (status != IBT_SUCCESS) {
471 				SRPT_DPRINTF_L1("ioc_fini, free SRQ"
472 				    " error (%d)", status);
473 			}
474 		}
475 
476 		if (ioc->ioc_iu_mr_hdl != NULL) {
477 			status = ibt_deregister_mr(
478 			    ioc->ioc_ibt_hdl, ioc->ioc_iu_mr_hdl);
479 			if (status != IBT_SUCCESS) {
480 				SRPT_DPRINTF_L1("ioc_fini, error deregistering"
481 				    " memory region (%d)", status);
482 			}
483 		}
484 
485 		if (ioc->ioc_iu_bufs != NULL) {
486 			kmem_free(ioc->ioc_iu_bufs, srpt_iu_size *
487 			    ioc->ioc_num_iu_entries);
488 		}
489 
490 		if (ioc->ioc_iu_pool != NULL) {
491 			SRPT_DPRINTF_L4("ioc_fini, freeing IU entries");
492 			for (ndx = 0; ndx < ioc->ioc_num_iu_entries; ndx++) {
493 				mutex_destroy(&ioc->ioc_iu_pool[ndx].iu_lock);
494 			}
495 
496 			SRPT_DPRINTF_L4("ioc_fini, free IU pool struct");
497 			kmem_free(ioc->ioc_iu_pool,
498 			    sizeof (srpt_iu_t) * (ioc->ioc_num_iu_entries));
499 			ioc->ioc_iu_pool = NULL;
500 			ioc->ioc_num_iu_entries = 0;
501 		}
502 
503 		if (ioc->ioc_dbuf_pool != NULL) {
504 			srpt_vmem_destroy(ioc->ioc_dbuf_pool);
505 		}
506 
507 		if (ioc->ioc_pd_hdl != NULL) {
508 			status = ibt_free_pd(ioc->ioc_ibt_hdl,
509 			    ioc->ioc_pd_hdl);
510 			if (status != IBT_SUCCESS) {
511 				SRPT_DPRINTF_L1("ioc_fini, free PD"
512 				    " error (%d)", status);
513 			}
514 		}
515 
516 		status = ibt_close_hca(ioc->ioc_ibt_hdl);
517 		if (status != IBT_SUCCESS) {
518 			SRPT_DPRINTF_L1(
519 			    "ioc_fini, close ioc error (%d)", status);
520 		}
521 	}
522 	rw_exit(&ioc->ioc_rwlock);
523 	rw_destroy(&ioc->ioc_rwlock);
524 	kmem_free(ioc, sizeof (srpt_ioc_t));
525 }
526 
527 /*
528  * srpt_ioc_port_active() - I/O Controller port active
529  */
530 static void
531 srpt_ioc_port_active(ibt_async_event_t *event)
532 {
533 	ibt_status_t		status;
534 	srpt_ioc_t		*ioc;
535 	srpt_target_port_t	*tgt = NULL;
536 	boolean_t		online_target = B_FALSE;
537 	stmf_change_status_t	cstatus;
538 
539 	ASSERT(event != NULL);
540 
541 	SRPT_DPRINTF_L3("ioc_port_active event handler, invoked");
542 
543 	/*
544 	 * Find the HCA in question and if the HCA has completed
545 	 * initialization, and the SRP Target service for the
546 	 * the I/O Controller exists, then bind this port.
547 	 */
548 	ioc = srpt_ioc_get(event->ev_hca_guid);
549 
550 	if (ioc == NULL) {
551 		SRPT_DPRINTF_L2("ioc_port_active, I/O Controller not"
552 		    " active");
553 		return;
554 	}
555 
556 	tgt = ioc->ioc_tgt_port;
557 	if (tgt == NULL) {
558 		SRPT_DPRINTF_L2("ioc_port_active, no I/O Controller target"
559 		    " undefined");
560 		return;
561 	}
562 
563 
564 	/*
565 	 * We take the target lock here to serialize this operation
566 	 * with any STMF initiated target state transitions.  If
567 	 * SRP is off-line then the service handle is NULL.
568 	 */
569 	mutex_enter(&tgt->tp_lock);
570 
571 	if (tgt->tp_ibt_svc_hdl != NULL) {
572 		status = srpt_ioc_svc_bind(tgt, event->ev_port);
573 		if ((status != IBT_SUCCESS) &&
574 		    (status != IBT_HCA_PORT_NOT_ACTIVE)) {
575 			SRPT_DPRINTF_L1("ioc_port_active, bind failed (%d)",
576 			    status);
577 		}
578 	} else {
579 		/* if we were offline because of no ports, try onlining now */
580 		if ((tgt->tp_num_active_ports == 0) &&
581 		    (tgt->tp_requested_state != tgt->tp_state) &&
582 		    (tgt->tp_requested_state == SRPT_TGT_STATE_ONLINE)) {
583 			online_target = B_TRUE;
584 			cstatus.st_completion_status = STMF_SUCCESS;
585 			cstatus.st_additional_info = "port active";
586 		}
587 	}
588 
589 	mutex_exit(&tgt->tp_lock);
590 
591 	if (online_target) {
592 		stmf_status_t	ret;
593 
594 		ret = stmf_ctl(STMF_CMD_LPORT_ONLINE, tgt->tp_lport, &cstatus);
595 
596 		if (ret == STMF_SUCCESS) {
597 			SRPT_DPRINTF_L1("ioc_port_active, port %d active, "
598 			    "target %016llx online requested", event->ev_port,
599 			    (u_longlong_t)ioc->ioc_guid);
600 		} else if (ret != STMF_ALREADY) {
601 			SRPT_DPRINTF_L1("ioc_port_active, port %d active, "
602 			    "target %016llx failed online request: %d",
603 			    event->ev_port, (u_longlong_t)ioc->ioc_guid,
604 			    (int)ret);
605 		}
606 	}
607 }
608 
609 /*
610  * srpt_ioc_port_down()
611  */
612 static void
613 srpt_ioc_port_down(ibt_async_event_t *event)
614 {
615 	srpt_ioc_t		*ioc;
616 	srpt_target_port_t	*tgt;
617 	srpt_channel_t		*ch;
618 	srpt_channel_t		*next_ch;
619 	boolean_t		offline_target = B_FALSE;
620 	stmf_change_status_t	cstatus;
621 
622 	SRPT_DPRINTF_L3("ioc_port_down event handler, invoked");
623 
624 	/*
625 	 * Find the HCA in question and if the HCA has completed
626 	 * initialization, and the SRP Target service for the
627 	 * the I/O Controller exists, then logout initiators
628 	 * through this port.
629 	 */
630 	ioc = srpt_ioc_get(event->ev_hca_guid);
631 
632 	if (ioc == NULL) {
633 		SRPT_DPRINTF_L2("ioc_port_down, I/O Controller not"
634 		    " active");
635 		return;
636 	}
637 
638 	/*
639 	 * We only have one target now, but we could go through all
640 	 * SCSI target ports if more are added.
641 	 */
642 	tgt = ioc->ioc_tgt_port;
643 	if (tgt == NULL) {
644 		SRPT_DPRINTF_L2("ioc_port_down, no I/O Controller target"
645 		    " undefined");
646 		return;
647 	}
648 	mutex_enter(&tgt->tp_lock);
649 
650 	/*
651 	 * For all channel's logged in through this port, initiate a
652 	 * disconnect.
653 	 */
654 	mutex_enter(&tgt->tp_ch_list_lock);
655 	ch = list_head(&tgt->tp_ch_list);
656 	while (ch != NULL) {
657 		next_ch = list_next(&tgt->tp_ch_list, ch);
658 		if (ch->ch_session && (ch->ch_session->ss_hw_port ==
659 		    event->ev_port)) {
660 			srpt_ch_disconnect(ch);
661 		}
662 		ch = next_ch;
663 	}
664 	mutex_exit(&tgt->tp_ch_list_lock);
665 
666 	tgt->tp_num_active_ports--;
667 
668 	/* if we have no active ports, take the target offline */
669 	if ((tgt->tp_num_active_ports == 0) &&
670 	    (tgt->tp_state == SRPT_TGT_STATE_ONLINE)) {
671 		cstatus.st_completion_status = STMF_SUCCESS;
672 		cstatus.st_additional_info = "no ports active";
673 		offline_target = B_TRUE;
674 	}
675 
676 	mutex_exit(&tgt->tp_lock);
677 
678 	if (offline_target) {
679 		stmf_status_t	ret;
680 
681 		ret = stmf_ctl(STMF_CMD_LPORT_OFFLINE, tgt->tp_lport, &cstatus);
682 
683 		if (ret == STMF_SUCCESS) {
684 			SRPT_DPRINTF_L1("ioc_port_down, port %d down, target "
685 			    "%016llx offline requested", event->ev_port,
686 			    (u_longlong_t)ioc->ioc_guid);
687 		} else if (ret != STMF_ALREADY) {
688 			SRPT_DPRINTF_L1("ioc_port_down, port %d down, target "
689 			    "%016llx failed offline request: %d",
690 			    event->ev_port,
691 			    (u_longlong_t)ioc->ioc_guid, (int)ret);
692 		}
693 	}
694 }
695 
696 /*
697  * srpt_ioc_ib_async_hdlr - I/O Controller IB asynchronous events
698  */
699 /* ARGSUSED */
700 void
701 srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
702 	ibt_async_code_t code, ibt_async_event_t *event)
703 {
704 	srpt_ioc_t		*ioc;
705 	srpt_channel_t		*ch;
706 
707 	switch (code) {
708 	case IBT_EVENT_PORT_UP:
709 		srpt_ioc_port_active(event);
710 		break;
711 
712 	case IBT_ERROR_PORT_DOWN:
713 		srpt_ioc_port_down(event);
714 		break;
715 
716 	case IBT_HCA_ATTACH_EVENT:
717 		rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER);
718 		ioc = srpt_ioc_init(event->ev_hca_guid);
719 
720 		if (ioc == NULL) {
721 			rw_exit(&srpt_ctxt->sc_rwlock);
722 			SRPT_DPRINTF_L1("ib_async_hdlr, HCA_ATTACH"
723 			    " event failed to initialize HCA (0x%016llx)",
724 			    (u_longlong_t)event->ev_hca_guid);
725 			return;
726 		}
727 		SRPT_DPRINTF_L2("HCA_ATTACH_EVENT: I/O Controller"
728 		    " ibt hdl (%p)",
729 		    (void *)ioc->ioc_ibt_hdl);
730 
731 		rw_enter(&ioc->ioc_rwlock, RW_WRITER);
732 		ioc->ioc_tgt_port = srpt_stp_alloc_port(ioc, ioc->ioc_guid);
733 		if (ioc->ioc_tgt_port == NULL) {
734 			SRPT_DPRINTF_L1("ioc_ib_async_hdlr, alloc SCSI "
735 			    "target port error for HCA (0x%016llx)",
736 			    (u_longlong_t)event->ev_hca_guid);
737 			rw_exit(&ioc->ioc_rwlock);
738 			srpt_ioc_fini(ioc);
739 			rw_exit(&srpt_ctxt->sc_rwlock);
740 			return;
741 		}
742 
743 		/*
744 		 * New HCA added with default SCSI Target Port, SRP service
745 		 * will be started when SCSI Target Port is brought
746 		 * on-line by STMF.
747 		 */
748 		srpt_ctxt->sc_num_iocs++;
749 		list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc);
750 
751 		rw_exit(&ioc->ioc_rwlock);
752 		rw_exit(&srpt_ctxt->sc_rwlock);
753 		break;
754 
755 	case IBT_HCA_DETACH_EVENT:
756 		SRPT_DPRINTF_L1(
757 		    "ioc_iob_async_hdlr, HCA_DETACH_EVENT received.");
758 		break;
759 
760 	case IBT_EVENT_EMPTY_CHAN:
761 		/* Channel in ERROR state is now empty */
762 		ch = (srpt_channel_t *)ibt_get_chan_private(event->ev_chan_hdl);
763 		SRPT_DPRINTF_L3(
764 		    "ioc_iob_async_hdlr, received empty channel error on %p",
765 		    (void *)ch);
766 		break;
767 
768 	default:
769 		SRPT_DPRINTF_L2("ioc_ib_async_hdlr, event not "
770 		    "handled (%d)", code);
771 		break;
772 	}
773 }
774 
775 /*
776  * srpt_ioc_svc_bind()
777  */
778 ibt_status_t
779 srpt_ioc_svc_bind(srpt_target_port_t *tgt, uint_t portnum)
780 {
781 	ibt_status_t		status;
782 	srpt_hw_port_t		*port;
783 	ibt_hca_portinfo_t	*portinfo;
784 	uint_t			qportinfo_sz;
785 	uint_t			qportnum;
786 	ib_gid_t		new_gid;
787 	srpt_ioc_t		*ioc;
788 	srpt_session_t		sess;
789 
790 	ASSERT(tgt != NULL);
791 	ASSERT(tgt->tp_ioc != NULL);
792 	ioc = tgt->tp_ioc;
793 
794 	if (tgt->tp_ibt_svc_hdl == NULL) {
795 		SRPT_DPRINTF_L2("ioc_svc_bind, NULL SCSI target port"
796 		    " service");
797 		return (IBT_INVALID_PARAM);
798 	}
799 
800 	if (portnum == 0 || portnum > tgt->tp_nports) {
801 		SRPT_DPRINTF_L2("ioc_svc_bind, bad port (%d)", portnum);
802 		return (IBT_INVALID_PARAM);
803 	}
804 	status = ibt_query_hca_ports(ioc->ioc_ibt_hdl, portnum,
805 	    &portinfo, &qportnum, &qportinfo_sz);
806 	if (status != IBT_SUCCESS) {
807 		SRPT_DPRINTF_L1("ioc_svc_bind, query port error (%d)",
808 		    portnum);
809 		return (IBT_INVALID_PARAM);
810 	}
811 
812 	ASSERT(portinfo != NULL);
813 
814 	/*
815 	 * If port is not active do nothing, caller should attempt to bind
816 	 * after the port goes active.
817 	 */
818 	if (portinfo->p_linkstate != IBT_PORT_ACTIVE) {
819 		SRPT_DPRINTF_L2("ioc_svc_bind, port %d not in active state",
820 		    portnum);
821 		ibt_free_portinfo(portinfo, qportinfo_sz);
822 		return (IBT_HCA_PORT_NOT_ACTIVE);
823 	}
824 
825 	port    = &tgt->tp_hw_port[portnum-1];
826 	new_gid = portinfo->p_sgid_tbl[0];
827 	ibt_free_portinfo(portinfo, qportinfo_sz);
828 
829 	/*
830 	 * If previously bound and the port GID has changed,
831 	 * unbind the old GID.
832 	 */
833 	if (port->hwp_bind_hdl != NULL) {
834 		if (new_gid.gid_guid != port->hwp_gid.gid_guid ||
835 		    new_gid.gid_prefix != port->hwp_gid.gid_prefix) {
836 			SRPT_DPRINTF_L2("ioc_svc_bind, unregister current"
837 			    " bind");
838 			(void) ibt_unbind_service(tgt->tp_ibt_svc_hdl,
839 			    port->hwp_bind_hdl);
840 			port->hwp_bind_hdl = NULL;
841 		} else {
842 			SRPT_DPRINTF_L2("ioc_svc_bind, port %d already bound",
843 			    portnum);
844 		}
845 	}
846 
847 	/* bind the new port GID */
848 	if (port->hwp_bind_hdl == NULL) {
849 		SRPT_DPRINTF_L2("ioc_svc_bind, bind service, %016llx:%016llx",
850 		    (u_longlong_t)new_gid.gid_prefix,
851 		    (u_longlong_t)new_gid.gid_guid);
852 
853 		/*
854 		 * Pass SCSI Target Port as CM private data, the target will
855 		 * always exist while this service is bound.
856 		 */
857 		status = ibt_bind_service(tgt->tp_ibt_svc_hdl, new_gid, NULL,
858 		    tgt, &port->hwp_bind_hdl);
859 		if (status != IBT_SUCCESS && status != IBT_CM_SERVICE_EXISTS) {
860 			SRPT_DPRINTF_L1("ioc_svc_bind, bind error (%d)",
861 			    status);
862 			return (status);
863 		}
864 		port->hwp_gid.gid_prefix = new_gid.gid_prefix;
865 		port->hwp_gid.gid_guid = new_gid.gid_guid;
866 	}
867 
868 	/* port is now active */
869 	tgt->tp_num_active_ports++;
870 
871 	/* setting up a transient structure for the dtrace probe. */
872 	bzero(&sess, sizeof (srpt_session_t));
873 	ALIAS_STR(sess.ss_t_gid, new_gid.gid_prefix, new_gid.gid_guid);
874 	EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
875 
876 	DTRACE_SRP_1(service__up, srpt_session_t, &sess);
877 
878 	return (IBT_SUCCESS);
879 }
880 
881 /*
882  * srpt_ioc_svc_unbind()
883  */
884 void
885 srpt_ioc_svc_unbind(srpt_target_port_t *tgt, uint_t portnum)
886 {
887 	srpt_hw_port_t		*port;
888 	srpt_session_t		sess;
889 	ibt_status_t		ret;
890 
891 	if (tgt == NULL) {
892 		SRPT_DPRINTF_L2("ioc_svc_unbind, SCSI target does not exist");
893 		return;
894 	}
895 
896 	if (portnum == 0 || portnum > tgt->tp_nports) {
897 		SRPT_DPRINTF_L2("ioc_svc_unbind, bad port (%d)", portnum);
898 		return;
899 	}
900 	port = &tgt->tp_hw_port[portnum-1];
901 
902 	/* setting up a transient structure for the dtrace probe. */
903 	bzero(&sess, sizeof (srpt_session_t));
904 	ALIAS_STR(sess.ss_t_gid, port->hwp_gid.gid_prefix,
905 	    port->hwp_gid.gid_guid);
906 	EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
907 
908 	DTRACE_SRP_1(service__down, srpt_session_t, &sess);
909 
910 	if (tgt->tp_ibt_svc_hdl != NULL && port->hwp_bind_hdl != NULL) {
911 		SRPT_DPRINTF_L2("ioc_svc_unbind, unregister current bind");
912 		ret = ibt_unbind_service(tgt->tp_ibt_svc_hdl,
913 		    port->hwp_bind_hdl);
914 		if (ret != IBT_SUCCESS) {
915 			SRPT_DPRINTF_L1(
916 			    "ioc_svc_unbind, unregister port %d failed: %d",
917 			    portnum, ret);
918 		} else {
919 			port->hwp_bind_hdl = NULL;
920 			port->hwp_gid.gid_prefix = 0;
921 			port->hwp_gid.gid_guid = 0;
922 		}
923 	}
924 }
925 
926 /*
927  * srpt_ioc_svc_unbind_all()
928  */
929 void
930 srpt_ioc_svc_unbind_all(srpt_target_port_t *tgt)
931 {
932 	uint_t		portnum;
933 
934 	if (tgt == NULL) {
935 		SRPT_DPRINTF_L2("ioc_svc_unbind_all, NULL SCSI target port"
936 		    " specified");
937 		return;
938 	}
939 	for (portnum = 1; portnum <= tgt->tp_nports; portnum++) {
940 		srpt_ioc_svc_unbind(tgt, portnum);
941 	}
942 }
943 
944 /*
945  * srpt_ioc_get_locked()
946  *
947  * Requires srpt_ctxt->rw_lock be held outside of call.
948  */
949 srpt_ioc_t *
950 srpt_ioc_get_locked(ib_guid_t guid)
951 {
952 	srpt_ioc_t	*ioc;
953 
954 	ioc = list_head(&srpt_ctxt->sc_ioc_list);
955 	while (ioc != NULL) {
956 		if (ioc->ioc_guid == guid) {
957 			break;
958 		}
959 		ioc = list_next(&srpt_ctxt->sc_ioc_list, ioc);
960 	}
961 	return (ioc);
962 }
963 
964 /*
965  * srpt_ioc_get()
966  */
967 srpt_ioc_t *
968 srpt_ioc_get(ib_guid_t guid)
969 {
970 	srpt_ioc_t	*ioc;
971 
972 	rw_enter(&srpt_ctxt->sc_rwlock, RW_READER);
973 	ioc = srpt_ioc_get_locked(guid);
974 	rw_exit(&srpt_ctxt->sc_rwlock);
975 	return (ioc);
976 }
977 
978 /*
979  * srpt_ioc_post_recv_iu()
980  */
981 ibt_status_t
982 srpt_ioc_post_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
983 {
984 	ibt_status_t		status;
985 	ibt_recv_wr_t		wr;
986 	uint_t			posted;
987 
988 	ASSERT(ioc != NULL);
989 	ASSERT(iu != NULL);
990 
991 	wr.wr_id  = (ibt_wrid_t)(uintptr_t)iu;
992 	wr.wr_nds = 1;
993 	wr.wr_sgl = &iu->iu_sge;
994 	posted    = 0;
995 
996 	status = ibt_post_srq(ioc->ioc_srq_hdl, &wr, 1, &posted);
997 	if (status != IBT_SUCCESS) {
998 		SRPT_DPRINTF_L2("ioc_post_recv_iu, post error (%d)",
999 		    status);
1000 	}
1001 	return (status);
1002 }
1003 
1004 /*
1005  * srpt_ioc_repost_recv_iu()
1006  */
1007 void
1008 srpt_ioc_repost_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
1009 {
1010 	srpt_channel_t		*ch;
1011 	ibt_status_t		status;
1012 
1013 	ASSERT(iu != NULL);
1014 	ASSERT(mutex_owned(&iu->iu_lock));
1015 
1016 	/*
1017 	 * Some additional sanity checks while in debug state, all STMF
1018 	 * related task activities should be complete prior to returning
1019 	 * this IU to the available pool.
1020 	 */
1021 	ASSERT(iu->iu_stmf_task == NULL);
1022 	ASSERT(iu->iu_sq_posted_cnt == 0);
1023 
1024 	ch = iu->iu_ch;
1025 	iu->iu_ch = NULL;
1026 	iu->iu_num_rdescs = 0;
1027 	iu->iu_rdescs = NULL;
1028 	iu->iu_tot_xfer_len = 0;
1029 	iu->iu_tag = 0;
1030 	iu->iu_flags = 0;
1031 	iu->iu_sq_posted_cnt = 0;
1032 
1033 	status = srpt_ioc_post_recv_iu(ioc, iu);
1034 
1035 	if (status != IBT_SUCCESS) {
1036 		/*
1037 		 * Very bad, we should initiate a shutdown of the I/O
1038 		 * Controller here, off-lining any targets associated
1039 		 * with this I/O Controller (and therefore disconnecting
1040 		 * any logins that remain).
1041 		 *
1042 		 * In practice this should never happen so we put
1043 		 * the code near the bottom of the implementation list.
1044 		 */
1045 		SRPT_DPRINTF_L0("ioc_repost_recv_iu, error RX IU (%d)",
1046 		    status);
1047 		ASSERT(0);
1048 	} else if (ch != NULL) {
1049 		atomic_inc_32(&ch->ch_req_lim_delta);
1050 	}
1051 }
1052 
1053 /*
1054  * srpt_ioc_init_profile()
1055  *
1056  * SRP I/O Controller serialization lock must be held when this
1057  * routine is invoked.
1058  */
1059 void
1060 srpt_ioc_init_profile(srpt_ioc_t *ioc)
1061 {
1062 	srpt_ioc_opcap_mask_t		capmask = {0};
1063 
1064 	ASSERT(ioc != NULL);
1065 
1066 	ioc->ioc_profile.ioc_guid = h2b64(ioc->ioc_guid);
1067 	(void) memcpy(ioc->ioc_profile.ioc_id_string,
1068 	    "Solaris SRP Target 0.9a", 23);
1069 
1070 	/*
1071 	 * Note vendor ID and subsystem ID are 24 bit values.  Low order
1072 	 * 8 bits in vendor ID field is slot and is initialized to zero.
1073 	 * Low order 8 bits of subsystem ID is a reserved field and
1074 	 * initialized to zero.
1075 	 */
1076 	ioc->ioc_profile.ioc_vendorid =
1077 	    h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1078 	ioc->ioc_profile.ioc_deviceid =
1079 	    h2b32((uint32_t)ioc->ioc_attr.hca_device_id);
1080 	ioc->ioc_profile.ioc_device_ver =
1081 	    h2b16((uint16_t)ioc->ioc_attr.hca_version_id);
1082 	ioc->ioc_profile.ioc_subsys_vendorid =
1083 	    h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1084 	ioc->ioc_profile.ioc_subsys_id = h2b32(0);
1085 	ioc->ioc_profile.ioc_io_class = h2b16(SRP_REV_16A_IO_CLASS);
1086 	ioc->ioc_profile.ioc_io_subclass = h2b16(SRP_IO_SUBCLASS);
1087 	ioc->ioc_profile.ioc_protocol = h2b16(SRP_PROTOCOL);
1088 	ioc->ioc_profile.ioc_protocol_ver = h2b16(SRP_PROTOCOL_VERSION);
1089 	ioc->ioc_profile.ioc_send_msg_qdepth = h2b16(srpt_send_msg_depth);
1090 	ioc->ioc_profile.ioc_rdma_read_qdepth =
1091 	    ioc->ioc_attr.hca_max_rdma_out_chan;
1092 	ioc->ioc_profile.ioc_send_msg_sz = h2b32(srpt_iu_size);
1093 	ioc->ioc_profile.ioc_rdma_xfer_sz = h2b32(SRPT_DEFAULT_MAX_RDMA_SIZE);
1094 
1095 	capmask.bits.st = 1;	/* Messages can be sent to IOC */
1096 	capmask.bits.sf = 1;	/* Messages can be sent from IOC */
1097 	capmask.bits.rf = 1;	/* RDMA Reads can be sent from IOC */
1098 	capmask.bits.wf = 1;	/* RDMA Writes can be sent from IOC */
1099 	ioc->ioc_profile.ioc_ctrl_opcap_mask = capmask.mask;
1100 
1101 	/*
1102 	 * We currently only have one target, but if we had a list we would
1103 	 * go through that list and only count those that are ONLINE when
1104 	 * setting the services count and entries.
1105 	 */
1106 	if (ioc->ioc_tgt_port->tp_srp_enabled) {
1107 		ioc->ioc_profile.ioc_service_entries = 1;
1108 		ioc->ioc_svc.srv_id = h2b64(ioc->ioc_guid);
1109 		(void) snprintf((char *)ioc->ioc_svc.srv_name,
1110 		    IB_DM_MAX_SVC_NAME_LEN, "SRP.T10:%016llx",
1111 		    (u_longlong_t)ioc->ioc_guid);
1112 	} else {
1113 		ioc->ioc_profile.ioc_service_entries = 0;
1114 		ioc->ioc_svc.srv_id = 0;
1115 	}
1116 }
1117 
1118 /*
1119  * srpt_ioc_ds_alloc_dbuf()
1120  */
1121 /* ARGSUSED */
1122 stmf_data_buf_t *
1123 srpt_ioc_ds_alloc_dbuf(struct scsi_task *task, uint32_t size,
1124 	uint32_t *pminsize, uint32_t flags)
1125 {
1126 	srpt_iu_t		*iu;
1127 	srpt_ioc_t		*ioc;
1128 	srpt_ds_dbuf_t		*dbuf;
1129 	stmf_data_buf_t		*stmf_dbuf;
1130 	void			*buf;
1131 	srpt_mr_t		mr;
1132 
1133 	ASSERT(task != NULL);
1134 	iu  = task->task_port_private;
1135 	ioc = iu->iu_ioc;
1136 
1137 	SRPT_DPRINTF_L4("ioc_ds_alloc_dbuf, invoked ioc(%p)"
1138 	    " size(%d), flags(%x)",
1139 	    (void *)ioc, size, flags);
1140 
1141 	buf = srpt_vmem_alloc(ioc->ioc_dbuf_pool, size);
1142 	if (buf == NULL) {
1143 		return (NULL);
1144 	}
1145 
1146 	if (srpt_vmem_mr(ioc->ioc_dbuf_pool, buf, size, &mr) != 0) {
1147 		goto stmf_alloc_err;
1148 	}
1149 
1150 	stmf_dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, sizeof (srpt_ds_dbuf_t),
1151 	    0);
1152 	if (stmf_dbuf == NULL) {
1153 		SRPT_DPRINTF_L2("ioc_ds_alloc_dbuf, stmf_alloc failed");
1154 		goto stmf_alloc_err;
1155 	}
1156 
1157 	dbuf = stmf_dbuf->db_port_private;
1158 	dbuf->db_stmf_buf = stmf_dbuf;
1159 	dbuf->db_mr_hdl = mr.mr_hdl;
1160 	dbuf->db_ioc = ioc;
1161 	dbuf->db_sge.ds_va = mr.mr_va;
1162 	dbuf->db_sge.ds_key = mr.mr_lkey;
1163 	dbuf->db_sge.ds_len = size;
1164 
1165 	stmf_dbuf->db_buf_size = size;
1166 	stmf_dbuf->db_data_size = size;
1167 	stmf_dbuf->db_relative_offset = 0;
1168 	stmf_dbuf->db_flags = 0;
1169 	stmf_dbuf->db_xfer_status = 0;
1170 	stmf_dbuf->db_sglist_length = 1;
1171 	stmf_dbuf->db_sglist[0].seg_addr = buf;
1172 	stmf_dbuf->db_sglist[0].seg_length = size;
1173 
1174 	return (stmf_dbuf);
1175 
1176 buf_mr_err:
1177 	stmf_free(stmf_dbuf);
1178 
1179 stmf_alloc_err:
1180 	srpt_vmem_free(ioc->ioc_dbuf_pool, buf, size);
1181 
1182 	return (NULL);
1183 }
1184 
1185 void
1186 srpt_ioc_ds_free_dbuf(struct stmf_dbuf_store *ds,
1187 	stmf_data_buf_t *dbuf)
1188 {
1189 	srpt_ioc_t	*ioc;
1190 
1191 	SRPT_DPRINTF_L4("ioc_ds_free_dbuf, invoked buf (%p)",
1192 	    (void *)dbuf);
1193 	ioc = ds->ds_port_private;
1194 
1195 	srpt_vmem_free(ioc->ioc_dbuf_pool, dbuf->db_sglist[0].seg_addr,
1196 	    dbuf->db_buf_size);
1197 	stmf_free(dbuf);
1198 }
1199 
1200 /* Memory arena routines */
1201 
1202 static srpt_vmem_pool_t *
1203 srpt_vmem_create(const char *name, srpt_ioc_t *ioc, ib_memlen_t chunksize,
1204     uint64_t maxsize, ibt_mr_flags_t flags)
1205 {
1206 	srpt_mr_t		*chunk;
1207 	srpt_vmem_pool_t	*result;
1208 
1209 	ASSERT(chunksize <= maxsize);
1210 
1211 	result = kmem_zalloc(sizeof (srpt_vmem_pool_t), KM_SLEEP);
1212 
1213 	result->svp_ioc = ioc;
1214 	result->svp_chunksize = chunksize;
1215 	result->svp_max_size = maxsize;
1216 	result->svp_flags = flags;
1217 
1218 	rw_init(&result->svp_lock, NULL, RW_DRIVER, NULL);
1219 	avl_create(&result->svp_mr_list, srpt_vmem_mr_compare,
1220 	    sizeof (srpt_mr_t), offsetof(srpt_mr_t, mr_avl));
1221 
1222 	chunk = srpt_vmem_chunk_alloc(result, chunksize);
1223 
1224 	avl_add(&result->svp_mr_list, chunk);
1225 	result->svp_total_size = chunksize;
1226 
1227 	result->svp_vmem = vmem_create(name,
1228 	    (void*)(uintptr_t)chunk->mr_va,
1229 	    (size_t)chunk->mr_len, SRPT_MR_QUANTSIZE,
1230 	    NULL, NULL, NULL, 0, VM_SLEEP);
1231 
1232 	return (result);
1233 }
1234 
1235 static void
1236 srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool)
1237 {
1238 	srpt_mr_t		*chunk;
1239 	srpt_mr_t		*next;
1240 
1241 	rw_enter(&vm_pool->svp_lock, RW_WRITER);
1242 	vmem_destroy(vm_pool->svp_vmem);
1243 
1244 	chunk = avl_first(&vm_pool->svp_mr_list);
1245 
1246 	while (chunk != NULL) {
1247 		next = AVL_NEXT(&vm_pool->svp_mr_list, chunk);
1248 		avl_remove(&vm_pool->svp_mr_list, chunk);
1249 		srpt_vmem_chunk_free(vm_pool, chunk);
1250 		chunk = next;
1251 	}
1252 
1253 	avl_destroy(&vm_pool->svp_mr_list);
1254 
1255 	rw_exit(&vm_pool->svp_lock);
1256 	rw_destroy(&vm_pool->svp_lock);
1257 
1258 	kmem_free(vm_pool, sizeof (srpt_vmem_pool_t));
1259 }
1260 
1261 static void *
1262 srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size)
1263 {
1264 	void		*result;
1265 	srpt_mr_t	*next;
1266 	ib_memlen_t	chunklen;
1267 
1268 	ASSERT(vm_pool != NULL);
1269 
1270 	result = vmem_alloc(vm_pool->svp_vmem, size,
1271 	    VM_NOSLEEP | VM_FIRSTFIT);
1272 
1273 	if (result != NULL) {
1274 		/* memory successfully allocated */
1275 		return (result);
1276 	}
1277 
1278 	/* need more vmem */
1279 	rw_enter(&vm_pool->svp_lock, RW_WRITER);
1280 	chunklen = vm_pool->svp_chunksize;
1281 
1282 	if (vm_pool->svp_total_size >= vm_pool->svp_max_size) {
1283 		/* no more room to alloc */
1284 		rw_exit(&vm_pool->svp_lock);
1285 		return (NULL);
1286 	}
1287 
1288 	if ((vm_pool->svp_total_size + chunklen) > vm_pool->svp_max_size) {
1289 		chunklen = vm_pool->svp_max_size - vm_pool->svp_total_size;
1290 	}
1291 
1292 	next = srpt_vmem_chunk_alloc(vm_pool, chunklen);
1293 	if (next != NULL) {
1294 		/*
1295 		 * Note that the size of the chunk we got
1296 		 * may not be the size we requested.  Use the
1297 		 * length returned in the chunk itself.
1298 		 */
1299 		if (vmem_add(vm_pool->svp_vmem, (void*)(uintptr_t)next->mr_va,
1300 		    next->mr_len, VM_NOSLEEP) == NULL) {
1301 			srpt_vmem_chunk_free(vm_pool, next);
1302 			SRPT_DPRINTF_L2("vmem_add failed");
1303 		} else {
1304 			vm_pool->svp_total_size += next->mr_len;
1305 			avl_add(&vm_pool->svp_mr_list, next);
1306 		}
1307 	}
1308 
1309 	rw_exit(&vm_pool->svp_lock);
1310 
1311 	result = vmem_alloc(vm_pool->svp_vmem, size, VM_NOSLEEP | VM_FIRSTFIT);
1312 
1313 	return (result);
1314 }
1315 
1316 static void
1317 srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size)
1318 {
1319 	vmem_free(vm_pool->svp_vmem, vaddr, size);
1320 }
1321 
1322 static int
1323 srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
1324     srpt_mr_t *mr)
1325 {
1326 	avl_index_t		where;
1327 	ib_vaddr_t		mrva = (ib_vaddr_t)(uintptr_t)vaddr;
1328 	srpt_mr_t		chunk;
1329 	srpt_mr_t		*nearest;
1330 	ib_vaddr_t		chunk_end;
1331 	int			status = DDI_FAILURE;
1332 
1333 	rw_enter(&vm_pool->svp_lock, RW_READER);
1334 
1335 	chunk.mr_va = mrva;
1336 	nearest = avl_find(&vm_pool->svp_mr_list, &chunk, &where);
1337 
1338 	if (nearest == NULL) {
1339 		nearest = avl_nearest(&vm_pool->svp_mr_list, where,
1340 		    AVL_BEFORE);
1341 	}
1342 
1343 	if (nearest != NULL) {
1344 		/* Verify this chunk contains the specified address range */
1345 		ASSERT(nearest->mr_va <= mrva);
1346 
1347 		chunk_end = nearest->mr_va + nearest->mr_len;
1348 		if (chunk_end >= mrva + size) {
1349 			mr->mr_hdl = nearest->mr_hdl;
1350 			mr->mr_va = mrva;
1351 			mr->mr_len = size;
1352 			mr->mr_lkey = nearest->mr_lkey;
1353 			mr->mr_rkey = nearest->mr_rkey;
1354 			status = DDI_SUCCESS;
1355 		}
1356 	}
1357 
1358 	rw_exit(&vm_pool->svp_lock);
1359 	return (status);
1360 }
1361 
1362 static srpt_mr_t *
1363 srpt_vmem_chunk_alloc(srpt_vmem_pool_t *vm_pool, ib_memlen_t chunksize)
1364 {
1365 	void			*chunk = NULL;
1366 	srpt_mr_t		*result = NULL;
1367 
1368 	while ((chunk == NULL) && (chunksize >= SRPT_MIN_CHUNKSIZE)) {
1369 		chunk = kmem_alloc(chunksize, KM_NOSLEEP);
1370 		if (chunk == NULL) {
1371 			SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1372 			    "failed to alloc chunk of %d, trying %d",
1373 			    (int)chunksize, (int)chunksize/2);
1374 			chunksize /= 2;
1375 		}
1376 	}
1377 
1378 	if (chunk != NULL) {
1379 		result = srpt_reg_mem(vm_pool, (ib_vaddr_t)(uintptr_t)chunk,
1380 		    chunksize);
1381 		if (result == NULL) {
1382 			SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1383 			    "chunk registration failed");
1384 			kmem_free(chunk, chunksize);
1385 		}
1386 	}
1387 
1388 	return (result);
1389 }
1390 
1391 static void
1392 srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr)
1393 {
1394 	void			*chunk = (void *)(uintptr_t)mr->mr_va;
1395 	ib_memlen_t		chunksize = mr->mr_len;
1396 
1397 	srpt_dereg_mem(vm_pool->svp_ioc, mr);
1398 	kmem_free(chunk, chunksize);
1399 }
1400 
1401 static srpt_mr_t *
1402 srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, ib_memlen_t len)
1403 {
1404 	srpt_mr_t		*result = NULL;
1405 	ibt_mr_attr_t		mr_attr;
1406 	ibt_mr_desc_t		mr_desc;
1407 	ibt_status_t		status;
1408 	srpt_ioc_t		*ioc = vm_pool->svp_ioc;
1409 
1410 	result = kmem_zalloc(sizeof (srpt_mr_t), KM_NOSLEEP);
1411 	if (result == NULL) {
1412 		SRPT_DPRINTF_L2("srpt_reg_mem: failed to allocate");
1413 		return (NULL);
1414 	}
1415 
1416 	bzero(&mr_attr, sizeof (ibt_mr_attr_t));
1417 	bzero(&mr_desc, sizeof (ibt_mr_desc_t));
1418 
1419 	mr_attr.mr_vaddr = vaddr;
1420 	mr_attr.mr_len = len;
1421 	mr_attr.mr_as = NULL;
1422 	mr_attr.mr_flags = vm_pool->svp_flags;
1423 
1424 	status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
1425 	    &mr_attr, &result->mr_hdl, &mr_desc);
1426 	if (status != IBT_SUCCESS) {
1427 		SRPT_DPRINTF_L2("srpt_reg_mem: ibt_register_mr "
1428 		    "failed %d", status);
1429 		kmem_free(result, sizeof (srpt_mr_t));
1430 		return (NULL);
1431 	}
1432 
1433 	result->mr_va = mr_attr.mr_vaddr;
1434 	result->mr_len = mr_attr.mr_len;
1435 	result->mr_lkey = mr_desc.md_lkey;
1436 	result->mr_rkey = mr_desc.md_rkey;
1437 
1438 	return (result);
1439 }
1440 
1441 static void
1442 srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr)
1443 {
1444 	ibt_status_t		status;
1445 
1446 	status = ibt_deregister_mr(ioc->ioc_ibt_hdl, mr->mr_hdl);
1447 	if (status != IBT_SUCCESS) {
1448 		SRPT_DPRINTF_L1("ioc_fini, error deregistering MR (%d)",
1449 		    status);
1450 	}
1451 	kmem_free(mr, sizeof (srpt_mr_t));
1452 }
1453 
1454 static int
1455 srpt_vmem_mr_compare(const void *a, const void *b)
1456 {
1457 	srpt_mr_t		*mr1 = (srpt_mr_t *)a;
1458 	srpt_mr_t		*mr2 = (srpt_mr_t *)b;
1459 
1460 	/* sort and match by virtual address */
1461 	if (mr1->mr_va < mr2->mr_va) {
1462 		return (-1);
1463 	} else if (mr1->mr_va > mr2->mr_va) {
1464 		return (1);
1465 	}
1466 
1467 	return (0);
1468 }
1469