1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/conf.h>
27 #include <sys/file.h>
28 #include <sys/ddi.h>
29 #include <sys/sunddi.h>
30 #include <sys/modctl.h>
31 #include <sys/scsi/scsi.h>
32 #include <sys/scsi/impl/scsi_reset_notify.h>
33 #include <sys/disp.h>
34 #include <sys/byteorder.h>
35 #include <sys/pathname.h>
36 #include <sys/atomic.h>
37 #include <sys/nvpair.h>
38 #include <sys/fs/zfs.h>
39 #include <sys/sdt.h>
40 #include <sys/dkio.h>
41 #include <sys/zfs_ioctl.h>
42 
43 #include <stmf.h>
44 #include <lpif.h>
45 #include <stmf_ioctl.h>
46 #include <stmf_sbd.h>
47 #include <sbd_impl.h>
48 #include <stmf_sbd_ioctl.h>
49 
50 #define	SBD_IS_ZVOL(zvol)	(strncmp("/dev/zvol", zvol, 9))
51 
52 extern sbd_status_t sbd_pgr_meta_init(sbd_lu_t *sl);
53 extern sbd_status_t sbd_pgr_meta_load(sbd_lu_t *sl);
54 
55 static int sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
56     void **result);
57 static int sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
58 static int sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
59 static int sbd_open(dev_t *devp, int flag, int otype, cred_t *credp);
60 static int sbd_close(dev_t dev, int flag, int otype, cred_t *credp);
61 static int stmf_sbd_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
62     cred_t *credp, int *rval);
63 void sbd_lp_cb(stmf_lu_provider_t *lp, int cmd, void *arg, uint32_t flags);
64 int sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz,
65     uint32_t *err_ret);
66 int sbd_import_lu(sbd_import_lu_t *ilu, int struct_sz, uint32_t *err_ret,
67     int no_register, sbd_lu_t **slr);
68 int sbd_delete_lu(sbd_delete_lu_t *dlu, int struct_sz, uint32_t *err_ret);
69 int sbd_modify_lu(sbd_modify_lu_t *mlu, int struct_sz, uint32_t *err_ret);
70 int sbd_get_lu_props(sbd_lu_props_t *islp, uint32_t islp_sz,
71     sbd_lu_props_t *oslp, uint32_t oslp_sz, uint32_t *err_ret);
72 char *sbd_get_zvol_name(sbd_lu_t *sl);
73 sbd_status_t sbd_create_zfs_meta_object(sbd_lu_t *sl);
74 sbd_status_t sbd_open_zfs_meta(sbd_lu_t *sl);
75 sbd_status_t sbd_read_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
76     uint64_t off);
77 sbd_status_t sbd_write_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz,
78     uint64_t off);
79 int sbd_is_zvol(char *path);
80 int sbd_zvolget(char *zvol_name, char **comstarprop);
81 int sbd_zvolset(char *zvol_name, char *comstarprop);
82 char sbd_ctoi(char c);
83 
84 static ldi_ident_t	sbd_zfs_ident;
85 static stmf_lu_provider_t *sbd_lp;
86 static sbd_lu_t		*sbd_lu_list = NULL;
87 static kmutex_t		sbd_lock;
88 static dev_info_t	*sbd_dip;
89 static uint32_t		sbd_lu_count = 0;
90 char sbd_vendor_id[]	= "SUN     ";
91 char sbd_product_id[]	= "COMSTAR         ";
92 char sbd_revision[]	= "1.0 ";
93 static char sbd_name[] = "sbd";
94 
95 static struct cb_ops sbd_cb_ops = {
96 	sbd_open,			/* open */
97 	sbd_close,			/* close */
98 	nodev,				/* strategy */
99 	nodev,				/* print */
100 	nodev,				/* dump */
101 	nodev,				/* read */
102 	nodev,				/* write */
103 	stmf_sbd_ioctl,			/* ioctl */
104 	nodev,				/* devmap */
105 	nodev,				/* mmap */
106 	nodev,				/* segmap */
107 	nochpoll,			/* chpoll */
108 	ddi_prop_op,			/* cb_prop_op */
109 	0,				/* streamtab */
110 	D_NEW | D_MP,			/* cb_flag */
111 	CB_REV,				/* rev */
112 	nodev,				/* aread */
113 	nodev				/* awrite */
114 };
115 
116 static struct dev_ops sbd_ops = {
117 	DEVO_REV,
118 	0,
119 	sbd_getinfo,
120 	nulldev,		/* identify */
121 	nulldev,		/* probe */
122 	sbd_attach,
123 	sbd_detach,
124 	nodev,			/* reset */
125 	&sbd_cb_ops,
126 	NULL,			/* bus_ops */
127 	NULL			/* power */
128 };
129 
130 #define	SBD_NAME	"COMSTAR SBD"
131 
132 static struct modldrv modldrv = {
133 	&mod_driverops,
134 	SBD_NAME,
135 	&sbd_ops
136 };
137 
138 static struct modlinkage modlinkage = {
139 	MODREV_1,
140 	&modldrv,
141 	NULL
142 };
143 
144 int
145 _init(void)
146 {
147 	int ret;
148 
149 	ret = mod_install(&modlinkage);
150 	if (ret)
151 		return (ret);
152 	sbd_lp = (stmf_lu_provider_t *)stmf_alloc(STMF_STRUCT_LU_PROVIDER,
153 	    0, 0);
154 	sbd_lp->lp_lpif_rev = LPIF_REV_1;
155 	sbd_lp->lp_instance = 0;
156 	sbd_lp->lp_name = sbd_name;
157 	sbd_lp->lp_cb = sbd_lp_cb;
158 	sbd_zfs_ident = ldi_ident_from_anon();
159 
160 	if (stmf_register_lu_provider(sbd_lp) != STMF_SUCCESS) {
161 		(void) mod_remove(&modlinkage);
162 		stmf_free(sbd_lp);
163 		return (EINVAL);
164 	}
165 	mutex_init(&sbd_lock, NULL, MUTEX_DRIVER, NULL);
166 	return (0);
167 }
168 
169 int
170 _fini(void)
171 {
172 	int ret;
173 
174 	/*
175 	 * If we have registered lus, then make sure they are all offline
176 	 * if so then deregister them. This should drop the sbd_lu_count
177 	 * to zero.
178 	 */
179 	if (sbd_lu_count) {
180 		sbd_lu_t *slu;
181 
182 		/* See if all of them are offline */
183 		mutex_enter(&sbd_lock);
184 		for (slu = sbd_lu_list; slu != NULL; slu = slu->sl_next) {
185 			if ((slu->sl_state != STMF_STATE_OFFLINE) ||
186 			    slu->sl_state_not_acked) {
187 				mutex_exit(&sbd_lock);
188 				return (EBUSY);
189 			}
190 		}
191 		mutex_exit(&sbd_lock);
192 
193 #if 0
194 		/* ok start deregistering them */
195 		while (sbd_lu_list) {
196 			sbd_store_t *sst = sbd_lu_list->sl_sst;
197 			if (sst->sst_deregister_lu(sst) != STMF_SUCCESS)
198 				return (EBUSY);
199 		}
200 #endif
201 		return (EBUSY);
202 	}
203 	if (stmf_deregister_lu_provider(sbd_lp) != STMF_SUCCESS)
204 		return (EBUSY);
205 	ret = mod_remove(&modlinkage);
206 	if (ret != 0) {
207 		(void) stmf_register_lu_provider(sbd_lp);
208 		return (ret);
209 	}
210 	stmf_free(sbd_lp);
211 	mutex_destroy(&sbd_lock);
212 	ldi_ident_release(sbd_zfs_ident);
213 	return (0);
214 }
215 
216 int
217 _info(struct modinfo *modinfop)
218 {
219 	return (mod_info(&modlinkage, modinfop));
220 }
221 
222 /* ARGSUSED */
223 static int
224 sbd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
225 {
226 	switch (cmd) {
227 	case DDI_INFO_DEVT2DEVINFO:
228 		*result = sbd_dip;
229 		break;
230 	case DDI_INFO_DEVT2INSTANCE:
231 		*result = (void *)(uintptr_t)ddi_get_instance(sbd_dip);
232 		break;
233 	default:
234 		return (DDI_FAILURE);
235 	}
236 
237 	return (DDI_SUCCESS);
238 }
239 
240 static int
241 sbd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
242 {
243 	switch (cmd) {
244 	case DDI_ATTACH:
245 		sbd_dip = dip;
246 
247 		if (ddi_create_minor_node(dip, "admin", S_IFCHR, 0,
248 		    DDI_NT_STMF_LP, 0) != DDI_SUCCESS) {
249 			break;
250 		}
251 		ddi_report_dev(dip);
252 		return (DDI_SUCCESS);
253 	}
254 
255 	return (DDI_FAILURE);
256 }
257 
258 static int
259 sbd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
260 {
261 	switch (cmd) {
262 	case DDI_DETACH:
263 		ddi_remove_minor_node(dip, 0);
264 		return (DDI_SUCCESS);
265 	}
266 
267 	return (DDI_FAILURE);
268 }
269 
270 /* ARGSUSED */
271 static int
272 sbd_open(dev_t *devp, int flag, int otype, cred_t *credp)
273 {
274 	if (otype != OTYP_CHR)
275 		return (EINVAL);
276 	return (0);
277 }
278 
279 /* ARGSUSED */
280 static int
281 sbd_close(dev_t dev, int flag, int otype, cred_t *credp)
282 {
283 	return (0);
284 }
285 
286 /* ARGSUSED */
287 static int
288 stmf_sbd_ioctl(dev_t dev, int cmd, intptr_t data, int mode,
289 	cred_t *credp, int *rval)
290 {
291 	stmf_iocdata_t		*iocd;
292 	void			*ibuf	= NULL;
293 	void			*obuf	= NULL;
294 	sbd_lu_t		*nsl;
295 	int			i;
296 	int			ret;
297 
298 	if (drv_priv(credp) != 0) {
299 		return (EPERM);
300 	}
301 
302 	ret = stmf_copyin_iocdata(data, mode, &iocd, &ibuf, &obuf);
303 	if (ret)
304 		return (ret);
305 	iocd->stmf_error = 0;
306 
307 	switch (cmd) {
308 	case SBD_IOCTL_CREATE_AND_REGISTER_LU:
309 		if (iocd->stmf_ibuf_size <
310 		    (sizeof (sbd_create_and_reg_lu_t) - 8)) {
311 			ret = EFAULT;
312 			break;
313 		}
314 		if ((iocd->stmf_obuf_size == 0) ||
315 		    (iocd->stmf_obuf_size > iocd->stmf_ibuf_size)) {
316 			ret = EINVAL;
317 			break;
318 		}
319 		ret = sbd_create_register_lu((sbd_create_and_reg_lu_t *)
320 		    ibuf, iocd->stmf_ibuf_size, &iocd->stmf_error);
321 		bcopy(ibuf, obuf, iocd->stmf_obuf_size);
322 		break;
323 	case SBD_IOCTL_IMPORT_LU:
324 		if (iocd->stmf_ibuf_size <
325 		    (sizeof (sbd_import_lu_t) - 8)) {
326 			ret = EFAULT;
327 			break;
328 		}
329 		if ((iocd->stmf_obuf_size == 0) ||
330 		    (iocd->stmf_obuf_size > iocd->stmf_ibuf_size)) {
331 			ret = EINVAL;
332 			break;
333 		}
334 		ret = sbd_import_lu((sbd_import_lu_t *)ibuf,
335 		    iocd->stmf_ibuf_size, &iocd->stmf_error, 0, NULL);
336 		bcopy(ibuf, obuf, iocd->stmf_obuf_size);
337 		break;
338 	case SBD_IOCTL_DELETE_LU:
339 		if (iocd->stmf_ibuf_size < (sizeof (sbd_delete_lu_t) - 8)) {
340 			ret = EFAULT;
341 			break;
342 		}
343 		if (iocd->stmf_obuf_size) {
344 			ret = EINVAL;
345 			break;
346 		}
347 		ret = sbd_delete_lu((sbd_delete_lu_t *)ibuf,
348 		    iocd->stmf_ibuf_size, &iocd->stmf_error);
349 		break;
350 	case SBD_IOCTL_MODIFY_LU:
351 		if (iocd->stmf_ibuf_size < (sizeof (sbd_modify_lu_t) - 8)) {
352 			ret = EFAULT;
353 			break;
354 		}
355 		if (iocd->stmf_obuf_size) {
356 			ret = EINVAL;
357 			break;
358 		}
359 		ret = sbd_modify_lu((sbd_modify_lu_t *)ibuf,
360 		    iocd->stmf_ibuf_size, &iocd->stmf_error);
361 		break;
362 	case SBD_IOCTL_GET_LU_PROPS:
363 		if (iocd->stmf_ibuf_size < (sizeof (sbd_lu_props_t) - 8)) {
364 			ret = EFAULT;
365 			break;
366 		}
367 		if (iocd->stmf_obuf_size < sizeof (sbd_lu_props_t)) {
368 			ret = EINVAL;
369 			break;
370 		}
371 		ret = sbd_get_lu_props((sbd_lu_props_t *)ibuf,
372 		    iocd->stmf_ibuf_size, (sbd_lu_props_t *)obuf,
373 		    iocd->stmf_obuf_size, &iocd->stmf_error);
374 		break;
375 	case SBD_IOCTL_GET_LU_LIST:
376 		mutex_enter(&sbd_lock);
377 		iocd->stmf_obuf_max_nentries = sbd_lu_count;
378 		iocd->stmf_obuf_nentries = min((iocd->stmf_obuf_size >> 4),
379 		    sbd_lu_count);
380 		for (nsl = sbd_lu_list, i = 0; nsl &&
381 		    (i < iocd->stmf_obuf_nentries); i++, nsl = nsl->sl_next) {
382 			bcopy(nsl->sl_device_id + 4,
383 			    &(((uint8_t *)obuf)[i << 4]), 16);
384 		}
385 		mutex_exit(&sbd_lock);
386 		ret = 0;
387 		iocd->stmf_error = 0;
388 		break;
389 	default:
390 		ret = ENOTTY;
391 	}
392 
393 	if (ret == 0) {
394 		ret = stmf_copyout_iocdata(data, mode, iocd, obuf);
395 	} else if (iocd->stmf_error) {
396 		(void) stmf_copyout_iocdata(data, mode, iocd, obuf);
397 	}
398 	if (obuf) {
399 		kmem_free(obuf, iocd->stmf_obuf_size);
400 		obuf = NULL;
401 	}
402 	if (ibuf) {
403 		kmem_free(ibuf, iocd->stmf_ibuf_size);
404 		ibuf = NULL;
405 	}
406 	kmem_free(iocd, sizeof (stmf_iocdata_t));
407 	return (ret);
408 }
409 
410 /* ARGSUSED */
411 void
412 sbd_lp_cb(stmf_lu_provider_t *lp, int cmd, void *arg, uint32_t flags)
413 {
414 	nvpair_t	*np;
415 	char		*s;
416 	sbd_import_lu_t *ilu;
417 	uint32_t	ilu_sz;
418 	uint32_t	struct_sz;
419 	uint32_t	err_ret;
420 	int		iret;
421 
422 	if ((cmd != STMF_PROVIDER_DATA_UPDATED) || (arg == NULL)) {
423 		return;
424 	}
425 
426 	if ((flags & (STMF_PCB_STMF_ONLINING | STMF_PCB_PREG_COMPLETE)) == 0) {
427 		return;
428 	}
429 
430 	np = NULL;
431 	ilu_sz = 1024;
432 	ilu = (sbd_import_lu_t *)kmem_zalloc(ilu_sz, KM_SLEEP);
433 	while ((np = nvlist_next_nvpair((nvlist_t *)arg, np)) != NULL) {
434 		if (nvpair_type(np) != DATA_TYPE_STRING) {
435 			continue;
436 		}
437 		if (nvpair_value_string(np, &s) != 0) {
438 			continue;
439 		}
440 		struct_sz = max(8, strlen(s) + 1);
441 		struct_sz += sizeof (sbd_import_lu_t) - 8;
442 		if (struct_sz > ilu_sz) {
443 			kmem_free(ilu, ilu_sz);
444 			ilu_sz = struct_sz + 32;
445 			ilu = (sbd_import_lu_t *)kmem_zalloc(ilu_sz, KM_SLEEP);
446 		}
447 		ilu->ilu_struct_size = struct_sz;
448 		(void) strcpy(ilu->ilu_meta_fname, s);
449 		iret = sbd_import_lu(ilu, struct_sz, &err_ret, 0, NULL);
450 		if (iret) {
451 			stmf_trace(0, "sbd_lp_cb: import_lu failed, ret = %d, "
452 			    "err_ret = %d", iret, err_ret);
453 		} else {
454 			stmf_trace(0, "Imported the LU %s", nvpair_name(np));
455 		}
456 	}
457 
458 	if (ilu) {
459 		kmem_free(ilu, ilu_sz);
460 		ilu = NULL;
461 	}
462 }
463 
464 sbd_status_t
465 sbd_link_lu(sbd_lu_t *sl)
466 {
467 	sbd_lu_t *nsl;
468 
469 	mutex_enter(&sbd_lock);
470 	mutex_enter(&sl->sl_lock);
471 	ASSERT(sl->sl_trans_op != SL_OP_NONE);
472 
473 	if (sl->sl_flags & SL_LINKED) {
474 		mutex_exit(&sbd_lock);
475 		mutex_exit(&sl->sl_lock);
476 		return (SBD_ALREADY);
477 	}
478 	for (nsl = sbd_lu_list; nsl; nsl = nsl->sl_next) {
479 		if (strcmp(nsl->sl_name, sl->sl_name) == 0)
480 			break;
481 	}
482 	if (nsl) {
483 		mutex_exit(&sbd_lock);
484 		mutex_exit(&sl->sl_lock);
485 		return (SBD_ALREADY);
486 	}
487 	sl->sl_next = sbd_lu_list;
488 	sbd_lu_list = sl;
489 	sl->sl_flags |= SL_LINKED;
490 	mutex_exit(&sbd_lock);
491 	mutex_exit(&sl->sl_lock);
492 	return (SBD_SUCCESS);
493 }
494 
495 void
496 sbd_unlink_lu(sbd_lu_t *sl)
497 {
498 	sbd_lu_t **ppnsl;
499 
500 	mutex_enter(&sbd_lock);
501 	mutex_enter(&sl->sl_lock);
502 	ASSERT(sl->sl_trans_op != SL_OP_NONE);
503 
504 	ASSERT(sl->sl_flags & SL_LINKED);
505 	for (ppnsl = &sbd_lu_list; *ppnsl; ppnsl = &((*ppnsl)->sl_next)) {
506 		if (*ppnsl == sl)
507 			break;
508 	}
509 	ASSERT(*ppnsl);
510 	*ppnsl = (*ppnsl)->sl_next;
511 	sl->sl_flags &= ~SL_LINKED;
512 	mutex_exit(&sbd_lock);
513 	mutex_exit(&sl->sl_lock);
514 }
515 
516 sbd_status_t
517 sbd_find_and_lock_lu(uint8_t *guid, uint8_t *meta_name, uint8_t op,
518     sbd_lu_t **ppsl)
519 {
520 	sbd_lu_t *sl;
521 	int found = 0;
522 	sbd_status_t sret;
523 
524 	mutex_enter(&sbd_lock);
525 	for (sl = sbd_lu_list; sl; sl = sl->sl_next) {
526 		if (guid) {
527 			found = bcmp(sl->sl_device_id + 4, guid, 16) == 0;
528 		} else {
529 			found = strcmp(sl->sl_name, (char *)meta_name) == 0;
530 		}
531 		if (found)
532 			break;
533 	}
534 	if (!found) {
535 		mutex_exit(&sbd_lock);
536 		return (SBD_NOT_FOUND);
537 	}
538 	mutex_enter(&sl->sl_lock);
539 	if (sl->sl_trans_op == SL_OP_NONE) {
540 		sl->sl_trans_op = op;
541 		*ppsl = sl;
542 		sret = SBD_SUCCESS;
543 	} else {
544 		sret = SBD_BUSY;
545 	}
546 	mutex_exit(&sl->sl_lock);
547 	mutex_exit(&sbd_lock);
548 	return (sret);
549 }
550 
551 sbd_status_t
552 sbd_read_meta(sbd_lu_t *sl, uint64_t offset, uint64_t size, uint8_t *buf)
553 {
554 	uint64_t	meta_align;
555 	uint64_t	starting_off;
556 	uint64_t	data_off;
557 	uint64_t	ending_off;
558 	uint64_t	io_size;
559 	uint8_t		*io_buf;
560 	vnode_t		*vp;
561 	sbd_status_t	ret;
562 	ssize_t		resid;
563 	int		vret;
564 
565 	ASSERT(sl->sl_flags & SL_META_OPENED);
566 	if (sl->sl_flags & SL_SHARED_META) {
567 		meta_align = (((uint64_t)1) << sl->sl_data_blocksize_shift) - 1;
568 		vp = sl->sl_data_vp;
569 		ASSERT(vp);
570 	} else {
571 		meta_align = (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
572 		if ((sl->sl_flags & SL_ZFS_META) == 0) {
573 			vp = sl->sl_meta_vp;
574 			ASSERT(vp);
575 		}
576 	}
577 	starting_off = offset & ~(meta_align);
578 	data_off = offset & meta_align;
579 	ending_off = (offset + size + meta_align) & (~meta_align);
580 	if (ending_off > sl->sl_meta_size_used) {
581 		bzero(buf, size);
582 		if (starting_off >= sl->sl_meta_size_used) {
583 			return (SBD_SUCCESS);
584 		}
585 		ending_off = (sl->sl_meta_size_used + meta_align) &
586 		    (~meta_align);
587 		if (size > (ending_off - (starting_off + data_off))) {
588 			size = ending_off - (starting_off + data_off);
589 		}
590 	}
591 	io_size = ending_off - starting_off;
592 	io_buf = (uint8_t *)kmem_zalloc(io_size, KM_SLEEP);
593 	ASSERT((starting_off + io_size) <= sl->sl_total_meta_size);
594 
595 	if (sl->sl_flags & SL_ZFS_META) {
596 		if ((ret = sbd_read_zfs_meta(sl, io_buf, io_size,
597 		    starting_off)) != SBD_SUCCESS) {
598 			goto sbd_read_meta_failure;
599 		}
600 	} else {
601 		vret = vn_rdwr(UIO_READ, vp, (caddr_t)io_buf, (ssize_t)io_size,
602 		    (offset_t)starting_off, UIO_SYSSPACE, FRSYNC,
603 		    RLIM64_INFINITY, CRED(), &resid);
604 
605 		if (vret || resid) {
606 			ret = SBD_FILEIO_FAILURE | vret;
607 			goto sbd_read_meta_failure;
608 		}
609 	}
610 
611 	bcopy(io_buf + data_off, buf, size);
612 	ret = SBD_SUCCESS;
613 
614 sbd_read_meta_failure:
615 	kmem_free(io_buf, io_size);
616 	return (ret);
617 }
618 
619 sbd_status_t
620 sbd_write_meta(sbd_lu_t *sl, uint64_t offset, uint64_t size, uint8_t *buf)
621 {
622 	uint64_t	meta_align;
623 	uint64_t	starting_off;
624 	uint64_t	data_off;
625 	uint64_t	ending_off;
626 	uint64_t	io_size;
627 	uint8_t		*io_buf;
628 	vnode_t		*vp;
629 	sbd_status_t	ret;
630 	ssize_t		resid;
631 	int		vret;
632 
633 	ASSERT(sl->sl_flags & SL_META_OPENED);
634 	if (sl->sl_flags & SL_SHARED_META) {
635 		meta_align = (((uint64_t)1) << sl->sl_data_blocksize_shift) - 1;
636 		vp = sl->sl_data_vp;
637 		ASSERT(vp);
638 	} else {
639 		meta_align = (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
640 		if ((sl->sl_flags & SL_ZFS_META) == 0) {
641 			vp = sl->sl_meta_vp;
642 			ASSERT(vp);
643 		}
644 	}
645 	starting_off = offset & ~(meta_align);
646 	data_off = offset & meta_align;
647 	ending_off = (offset + size + meta_align) & (~meta_align);
648 	io_size = ending_off - starting_off;
649 	io_buf = (uint8_t *)kmem_zalloc(io_size, KM_SLEEP);
650 	ret = sbd_read_meta(sl, starting_off, io_size, io_buf);
651 	if (ret != SBD_SUCCESS) {
652 		goto sbd_write_meta_failure;
653 	}
654 	bcopy(buf, io_buf + data_off, size);
655 	if (sl->sl_flags & SL_ZFS_META) {
656 		if ((ret = sbd_write_zfs_meta(sl, io_buf, io_size,
657 		    starting_off)) != SBD_SUCCESS) {
658 			goto sbd_write_meta_failure;
659 		}
660 	} else {
661 		vret = vn_rdwr(UIO_WRITE, vp, (caddr_t)io_buf, (ssize_t)io_size,
662 		    (offset_t)starting_off, UIO_SYSSPACE, FDSYNC,
663 		    RLIM64_INFINITY, CRED(), &resid);
664 
665 		if (vret || resid) {
666 			ret = SBD_FILEIO_FAILURE | vret;
667 			goto sbd_write_meta_failure;
668 		}
669 	}
670 
671 	ret = SBD_SUCCESS;
672 
673 sbd_write_meta_failure:
674 	kmem_free(io_buf, io_size);
675 	return (ret);
676 }
677 
678 uint8_t
679 sbd_calc_sum(uint8_t *buf, int size)
680 {
681 	uint8_t s = 0;
682 
683 	while (size > 0)
684 		s += buf[--size];
685 
686 	return (s);
687 }
688 
689 uint8_t
690 sbd_calc_section_sum(sm_section_hdr_t *sm, uint32_t sz)
691 {
692 	uint8_t s, o;
693 
694 	o = sm->sms_chksum;
695 	sm->sms_chksum = 0;
696 	s = sbd_calc_sum((uint8_t *)sm, sz);
697 	sm->sms_chksum = o;
698 
699 	return (s);
700 }
701 
702 uint32_t
703 sbd_strlen(char *str, uint32_t maxlen)
704 {
705 	uint32_t i;
706 
707 	for (i = 0; i < maxlen; i++) {
708 		if (str[i] == 0)
709 			return (i);
710 	}
711 	return (i);
712 }
713 
714 void
715 sbd_swap_meta_start(sbd_meta_start_t *sm)
716 {
717 	if (sm->sm_magic == SBD_MAGIC)
718 		return;
719 	sm->sm_magic		= BSWAP_64(sm->sm_magic);
720 	sm->sm_meta_size	= BSWAP_64(sm->sm_meta_size);
721 	sm->sm_meta_size_used	= BSWAP_64(sm->sm_meta_size_used);
722 	sm->sm_ver_major	= BSWAP_16(sm->sm_ver_major);
723 	sm->sm_ver_minor	= BSWAP_16(sm->sm_ver_minor);
724 	sm->sm_ver_subminor	= BSWAP_16(sm->sm_ver_subminor);
725 }
726 
727 void
728 sbd_swap_section_hdr(sm_section_hdr_t *sm)
729 {
730 	if (sm->sms_data_order == SMS_DATA_ORDER)
731 		return;
732 	sm->sms_offset		= BSWAP_64(sm->sms_offset);
733 	sm->sms_size		= BSWAP_32(sm->sms_size);
734 	sm->sms_id		= BSWAP_16(sm->sms_id);
735 	sm->sms_chksum		+= SMS_DATA_ORDER - sm->sms_data_order;
736 	sm->sms_data_order	= SMS_DATA_ORDER;
737 }
738 
739 void
740 sbd_swap_lu_info_1_0(sbd_lu_info_1_0_t *sli)
741 {
742 	sbd_swap_section_hdr(&sli->sli_sms_header);
743 	if (sli->sli_data_order == SMS_DATA_ORDER)
744 		return;
745 	sli->sli_sms_header.sms_chksum	+= SMS_DATA_ORDER - sli->sli_data_order;
746 	sli->sli_data_order		= SMS_DATA_ORDER;
747 	sli->sli_total_store_size	= BSWAP_64(sli->sli_total_store_size);
748 	sli->sli_total_meta_size	= BSWAP_64(sli->sli_total_meta_size);
749 	sli->sli_lu_data_offset		= BSWAP_64(sli->sli_lu_data_offset);
750 	sli->sli_lu_data_size		= BSWAP_64(sli->sli_lu_data_size);
751 	sli->sli_flags			= BSWAP_32(sli->sli_flags);
752 	sli->sli_blocksize		= BSWAP_16(sli->sli_blocksize);
753 }
754 
755 void
756 sbd_swap_lu_info_1_1(sbd_lu_info_1_1_t *sli)
757 {
758 	sbd_swap_section_hdr(&sli->sli_sms_header);
759 	if (sli->sli_data_order == SMS_DATA_ORDER)
760 		return;
761 	sli->sli_sms_header.sms_chksum	+= SMS_DATA_ORDER - sli->sli_data_order;
762 	sli->sli_data_order		= SMS_DATA_ORDER;
763 	sli->sli_flags			= BSWAP_32(sli->sli_flags);
764 	sli->sli_lu_size		= BSWAP_64(sli->sli_lu_size);
765 	sli->sli_meta_fname_offset	= BSWAP_16(sli->sli_meta_fname_offset);
766 	sli->sli_data_fname_offset	= BSWAP_16(sli->sli_data_fname_offset);
767 	sli->sli_serial_offset		= BSWAP_16(sli->sli_serial_offset);
768 	sli->sli_alias_offset		= BSWAP_16(sli->sli_alias_offset);
769 }
770 
771 sbd_status_t
772 sbd_load_section_hdr(sbd_lu_t *sl, sm_section_hdr_t *sms)
773 {
774 	sm_section_hdr_t	h;
775 	uint64_t		st;
776 	sbd_status_t 		ret;
777 
778 	for (st = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
779 	    st < sl->sl_meta_size_used; st += h.sms_size) {
780 		if ((ret = sbd_read_meta(sl, st, sizeof (sm_section_hdr_t),
781 		    (uint8_t *)&h)) != SBD_SUCCESS) {
782 			return (ret);
783 		}
784 		if (h.sms_data_order != SMS_DATA_ORDER) {
785 			sbd_swap_section_hdr(&h);
786 		}
787 		if ((h.sms_data_order != SMS_DATA_ORDER) ||
788 		    (h.sms_offset != st) || (h.sms_size < sizeof (h)) ||
789 		    ((st + h.sms_size) > sl->sl_meta_size_used)) {
790 			return (SBD_META_CORRUPTED);
791 		}
792 		if (h.sms_id == sms->sms_id) {
793 			bcopy(&h, sms, sizeof (h));
794 			return (SBD_SUCCESS);
795 		}
796 	}
797 
798 	return (SBD_NOT_FOUND);
799 }
800 
801 sbd_status_t
802 sbd_load_meta_start(sbd_lu_t *sl)
803 {
804 	sbd_meta_start_t *sm;
805 	sbd_status_t ret;
806 
807 	/* Fake meta params initially */
808 	sl->sl_total_meta_size = (uint64_t)-1;
809 	sl->sl_meta_size_used = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
810 
811 	sm = kmem_zalloc(sizeof (*sm), KM_SLEEP);
812 	ret = sbd_read_meta(sl, sl->sl_meta_offset, sizeof (*sm),
813 	    (uint8_t *)sm);
814 	if (ret != SBD_SUCCESS) {
815 		goto load_meta_start_failed;
816 	}
817 
818 	if (sm->sm_magic != SBD_MAGIC) {
819 		sbd_swap_meta_start(sm);
820 	}
821 
822 	if ((sm->sm_magic != SBD_MAGIC) || (sbd_calc_sum((uint8_t *)sm,
823 	    sizeof (*sm) - 1) != sm->sm_chksum)) {
824 		ret = SBD_META_CORRUPTED;
825 		goto load_meta_start_failed;
826 	}
827 
828 	if (sm->sm_ver_major != SBD_VER_MAJOR) {
829 		ret = SBD_NOT_SUPPORTED;
830 		goto load_meta_start_failed;
831 	}
832 
833 	sl->sl_total_meta_size = sm->sm_meta_size;
834 	sl->sl_meta_size_used = sm->sm_meta_size_used;
835 	ret = SBD_SUCCESS;
836 
837 load_meta_start_failed:
838 	kmem_free(sm, sizeof (*sm));
839 	return (ret);
840 }
841 
842 sbd_status_t
843 sbd_write_meta_start(sbd_lu_t *sl, uint64_t meta_size, uint64_t meta_size_used)
844 {
845 	sbd_meta_start_t *sm;
846 	sbd_status_t ret;
847 
848 	sm = (sbd_meta_start_t *)kmem_zalloc(sizeof (sbd_meta_start_t),
849 	    KM_SLEEP);
850 
851 	sm->sm_magic = SBD_MAGIC;
852 	sm->sm_meta_size = meta_size;
853 	sm->sm_meta_size_used = meta_size_used;
854 	sm->sm_ver_major = SBD_VER_MAJOR;
855 	sm->sm_ver_minor = SBD_VER_MINOR;
856 	sm->sm_ver_subminor = SBD_VER_SUBMINOR;
857 	sm->sm_chksum = sbd_calc_sum((uint8_t *)sm, sizeof (*sm) - 1);
858 
859 	ret = sbd_write_meta(sl, sl->sl_meta_offset, sizeof (*sm),
860 	    (uint8_t *)sm);
861 	kmem_free(sm, sizeof (*sm));
862 
863 	return (ret);
864 }
865 
866 sbd_status_t
867 sbd_read_meta_section(sbd_lu_t *sl, sm_section_hdr_t **ppsms, uint16_t sms_id)
868 {
869 	sbd_status_t ret;
870 	sm_section_hdr_t sms;
871 	int alloced = 0;
872 
873 	if (((*ppsms) == NULL) || ((*ppsms)->sms_offset == 0)) {
874 		bzero(&sms, sizeof (sm_section_hdr_t));
875 		sms.sms_id = sms_id;
876 		if ((ret = sbd_load_section_hdr(sl, &sms)) != SBD_SUCCESS) {
877 			return (ret);
878 		} else {
879 			if ((*ppsms) == NULL) {
880 				*ppsms = (sm_section_hdr_t *)kmem_zalloc(
881 				    sms.sms_size, KM_SLEEP);
882 				alloced = 1;
883 			}
884 			bcopy(&sms, *ppsms, sizeof (sm_section_hdr_t));
885 		}
886 	}
887 
888 	ret = sbd_read_meta(sl, (*ppsms)->sms_offset, (*ppsms)->sms_size,
889 	    (uint8_t *)(*ppsms));
890 	if (ret == SBD_SUCCESS) {
891 		uint8_t s;
892 		if ((*ppsms)->sms_data_order != SMS_DATA_ORDER)
893 			sbd_swap_section_hdr(*ppsms);
894 		if ((*ppsms)->sms_id != SMS_ID_UNUSED) {
895 			s = sbd_calc_section_sum(*ppsms, (*ppsms)->sms_size);
896 			if (s != (*ppsms)->sms_chksum)
897 				ret = SBD_META_CORRUPTED;
898 		}
899 	}
900 
901 	if ((ret != SBD_SUCCESS) && alloced)
902 		kmem_free(*ppsms, sms.sms_size);
903 	return (ret);
904 }
905 
906 sbd_status_t
907 sbd_write_meta_section(sbd_lu_t *sl, sm_section_hdr_t *sms)
908 {
909 	sm_section_hdr_t t;
910 	uint64_t off, s;
911 	uint64_t unused_start;
912 	sbd_status_t ret;
913 	uint8_t *cb;
914 	int update_meta_start = 0;
915 
916 write_meta_section_again:
917 	if (sms->sms_offset) {
918 		/* Verify that size has not changed */
919 		ret = sbd_read_meta(sl, sms->sms_offset, sizeof (t),
920 		    (uint8_t *)&t);
921 		if (ret != SBD_SUCCESS)
922 			return (ret);
923 		if (t.sms_data_order != SMS_DATA_ORDER) {
924 			sbd_swap_section_hdr(&t);
925 		}
926 		if (t.sms_id != sms->sms_id) {
927 			return (SBD_INVALID_ARG);
928 		}
929 		if (t.sms_size == sms->sms_size) {
930 			return (sbd_write_meta(sl, sms->sms_offset,
931 			    sms->sms_size, (uint8_t *)sms));
932 		}
933 		t.sms_id = SMS_ID_UNUSED;
934 		/*
935 		 * For unused sections we only use chksum of the header. for
936 		 * all other sections, the chksum is for the entire section.
937 		 */
938 		t.sms_chksum = sbd_calc_section_sum(&t, sizeof (t));
939 		ret = sbd_write_meta(sl, t.sms_offset, sizeof (t),
940 		    (uint8_t *)&t);
941 		if (ret != SBD_SUCCESS)
942 			return (ret);
943 		sms->sms_offset = 0;
944 	} else {
945 		t.sms_id = sms->sms_id;
946 		t.sms_data_order = SMS_DATA_ORDER;
947 		ret = sbd_load_section_hdr(sl, &t);
948 		if (ret == SBD_SUCCESS) {
949 			sms->sms_offset = t.sms_offset;
950 			sms->sms_chksum =
951 			    sbd_calc_section_sum(sms, sms->sms_size);
952 			goto write_meta_section_again;
953 		} else if (ret != SBD_NOT_FOUND) {
954 			return (ret);
955 		}
956 	}
957 
958 	/*
959 	 * At this point we know that section does not already exist.
960 	 * find space large enough to hold the section or grow meta if
961 	 * possible.
962 	 */
963 	unused_start = 0;
964 	s = 0;
965 	for (off = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
966 	    off < sl->sl_meta_size_used; off += t.sms_size) {
967 		ret = sbd_read_meta(sl, off, sizeof (t), (uint8_t *)&t);
968 		if (ret != SBD_SUCCESS)
969 			return (ret);
970 		if (t.sms_data_order != SMS_DATA_ORDER)
971 			sbd_swap_section_hdr(&t);
972 		if (t.sms_size == 0)
973 			return (SBD_META_CORRUPTED);
974 		if (t.sms_id == SMS_ID_UNUSED) {
975 			if (unused_start == 0)
976 				unused_start = off;
977 			s = t.sms_size - unused_start + off;
978 			if ((s == sms->sms_size) || (s >= (sms->sms_size +
979 			    sizeof (t)))) {
980 				break;
981 			} else {
982 				s = 0;
983 			}
984 		} else {
985 			unused_start = 0;
986 		}
987 	}
988 
989 	off = (unused_start == 0) ? sl->sl_meta_size_used : unused_start;
990 	if (s == 0) {
991 		s = sl->sl_total_meta_size - off;
992 		/* Lets see if we can expand the metadata */
993 		if (s >= sms->sms_size || !(sl->sl_flags & SL_SHARED_META)) {
994 			s = sms->sms_size;
995 			update_meta_start = 1;
996 		} else {
997 			s = 0;
998 		}
999 	}
1000 
1001 	if (s == 0)
1002 		return (SBD_ALLOC_FAILURE);
1003 
1004 	sms->sms_offset = off;
1005 	sms->sms_chksum = sbd_calc_section_sum(sms, sms->sms_size);
1006 	/*
1007 	 * Since we may have to write more than one section (current +
1008 	 * any unused), use a combined buffer.
1009 	 */
1010 	cb = kmem_zalloc(s, KM_SLEEP);
1011 	bcopy(sms, cb, sms->sms_size);
1012 	if (s > sms->sms_size) {
1013 		t.sms_offset = off + sms->sms_size;
1014 		t.sms_size = s - sms->sms_size;
1015 		t.sms_id = SMS_ID_UNUSED;
1016 		t.sms_data_order = SMS_DATA_ORDER;
1017 		t.sms_chksum = sbd_calc_section_sum(&t, sizeof (t));
1018 		bcopy(&t, cb + sms->sms_size, sizeof (t));
1019 	}
1020 	ret = sbd_write_meta(sl, off, s, cb);
1021 	kmem_free(cb, s);
1022 	if (ret != SBD_SUCCESS)
1023 		return (ret);
1024 
1025 	if (update_meta_start) {
1026 		uint64_t old_sz_used = sl->sl_meta_size_used; /* save a copy */
1027 		sl->sl_meta_size_used = off + s;
1028 		s = sl->sl_total_meta_size; /* save a copy */
1029 		if (sl->sl_total_meta_size < sl->sl_meta_size_used) {
1030 			uint64_t meta_align =
1031 			    (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
1032 			sl->sl_total_meta_size = (sl->sl_meta_size_used +
1033 			    meta_align) & (~meta_align);
1034 		}
1035 		ret = sbd_write_meta_start(sl, sl->sl_total_meta_size,
1036 		    sl->sl_meta_size_used);
1037 		if (ret != SBD_SUCCESS) {
1038 			sl->sl_meta_size_used = old_sz_used;
1039 			sl->sl_total_meta_size = s;
1040 		}
1041 	}
1042 	return (ret);
1043 }
1044 
1045 sbd_status_t
1046 sbd_write_lu_info(sbd_lu_t *sl)
1047 {
1048 	sbd_lu_info_1_1_t *sli;
1049 	int s;
1050 	uint8_t *p;
1051 	char *zvol_name = NULL;
1052 	sbd_status_t ret;
1053 
1054 	mutex_enter(&sl->sl_lock);
1055 
1056 	s = sl->sl_serial_no_size;
1057 	if ((sl->sl_flags & (SL_SHARED_META | SL_ZFS_META)) == 0) {
1058 		if (sl->sl_data_filename) {
1059 			s += strlen(sl->sl_data_filename) + 1;
1060 		}
1061 	}
1062 	if (sl->sl_flags & SL_ZFS_META) {
1063 		zvol_name = sbd_get_zvol_name(sl);
1064 		s += strlen(zvol_name) + 1;
1065 	}
1066 	if (sl->sl_alias) {
1067 		s += strlen(sl->sl_alias) + 1;
1068 	}
1069 	sli = (sbd_lu_info_1_1_t *)kmem_zalloc(sizeof (*sli) + s, KM_SLEEP);
1070 	p = sli->sli_buf;
1071 	if ((sl->sl_flags & (SL_SHARED_META | SL_ZFS_META)) == 0) {
1072 		sli->sli_flags |= SLI_SEPARATE_META;
1073 		(void) strcpy((char *)p, sl->sl_data_filename);
1074 		sli->sli_data_fname_offset =
1075 		    (uintptr_t)p - (uintptr_t)sli->sli_buf;
1076 		sli->sli_flags |= SLI_DATA_FNAME_VALID;
1077 		p += strlen(sl->sl_data_filename) + 1;
1078 	}
1079 	if (sl->sl_flags & SL_ZFS_META) {
1080 		(void) strcpy((char *)p, zvol_name);
1081 		sli->sli_meta_fname_offset =
1082 		    (uintptr_t)p - (uintptr_t)sli->sli_buf;
1083 		sli->sli_flags |= SLI_META_FNAME_VALID | SLI_ZFS_META;
1084 		p += strlen(zvol_name) + 1;
1085 		kmem_free(zvol_name, strlen(zvol_name) + 1);
1086 		zvol_name = NULL;
1087 	}
1088 	if (sl->sl_alias) {
1089 		(void) strcpy((char *)p, sl->sl_alias);
1090 		sli->sli_alias_offset =
1091 		    (uintptr_t)p - (uintptr_t)sli->sli_buf;
1092 		sli->sli_flags |= SLI_ALIAS_VALID;
1093 		p += strlen(sl->sl_alias) + 1;
1094 	}
1095 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1096 		sli->sli_flags |= SLI_WRITE_PROTECTED;
1097 	}
1098 	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE) {
1099 		sli->sli_flags |= SLI_WRITEBACK_CACHE_DISABLE;
1100 	}
1101 	if (sl->sl_flags & SL_VID_VALID) {
1102 		bcopy(sl->sl_vendor_id, sli->sli_vid, 8);
1103 		sli->sli_flags |= SLI_VID_VALID;
1104 	}
1105 	if (sl->sl_flags & SL_PID_VALID) {
1106 		bcopy(sl->sl_product_id, sli->sli_pid, 16);
1107 		sli->sli_flags |= SLI_PID_VALID;
1108 	}
1109 	if (sl->sl_flags & SL_REV_VALID) {
1110 		bcopy(sl->sl_revision, sli->sli_rev, 4);
1111 		sli->sli_flags |= SLI_REV_VALID;
1112 	}
1113 	if (sl->sl_serial_no_size) {
1114 		bcopy(sl->sl_serial_no, p, sl->sl_serial_no_size);
1115 		sli->sli_serial_size = sl->sl_serial_no_size;
1116 		sli->sli_serial_offset =
1117 		    (uintptr_t)p - (uintptr_t)sli->sli_buf;
1118 		sli->sli_flags |= SLI_SERIAL_VALID;
1119 		p += sli->sli_serial_size;
1120 	}
1121 	sli->sli_lu_size = sl->sl_lu_size;
1122 	sli->sli_data_blocksize_shift = sl->sl_data_blocksize_shift;
1123 	sli->sli_data_order = SMS_DATA_ORDER;
1124 	bcopy(sl->sl_device_id, sli->sli_device_id, 20);
1125 
1126 	sli->sli_sms_header.sms_size = sizeof (*sli) + s;
1127 	sli->sli_sms_header.sms_id = SMS_ID_LU_INFO_1_1;
1128 	sli->sli_sms_header.sms_data_order = SMS_DATA_ORDER;
1129 
1130 	mutex_exit(&sl->sl_lock);
1131 	ret = sbd_write_meta_section(sl, (sm_section_hdr_t *)sli);
1132 	kmem_free(sli, sizeof (*sli) + s);
1133 	return (ret);
1134 }
1135 
1136 int
1137 sbd_populate_and_register_lu(sbd_lu_t *sl, uint32_t *err_ret)
1138 {
1139 	stmf_lu_t *lu = sl->sl_lu;
1140 	stmf_status_t ret;
1141 
1142 	lu->lu_id = (scsi_devid_desc_t *)sl->sl_device_id;
1143 	if (sl->sl_alias) {
1144 		lu->lu_alias = sl->sl_alias;
1145 	} else {
1146 		lu->lu_alias = sl->sl_name;
1147 	}
1148 	lu->lu_lp = sbd_lp;
1149 	lu->lu_task_alloc = sbd_task_alloc;
1150 	lu->lu_new_task = sbd_new_task;
1151 	lu->lu_dbuf_xfer_done = sbd_dbuf_xfer_done;
1152 	lu->lu_send_status_done = sbd_send_status_done;
1153 	lu->lu_task_free = sbd_task_free;
1154 	lu->lu_abort = sbd_abort;
1155 	lu->lu_ctl = sbd_ctl;
1156 	lu->lu_info = sbd_info;
1157 	sl->sl_state = STMF_STATE_OFFLINE;
1158 
1159 	if ((ret = stmf_register_lu(lu)) != STMF_SUCCESS) {
1160 		stmf_trace(0, "Failed to register with framework, ret=%llx",
1161 		    ret);
1162 		if (ret == STMF_ALREADY) {
1163 			*err_ret = SBD_RET_GUID_ALREADY_REGISTERED;
1164 		}
1165 		return (EIO);
1166 	}
1167 
1168 	*err_ret = 0;
1169 	return (0);
1170 }
1171 
1172 int
1173 sbd_open_data_file(sbd_lu_t *sl, uint32_t *err_ret, int lu_size_valid,
1174     int vp_valid, int keep_open)
1175 {
1176 	int ret;
1177 	int flag;
1178 	ulong_t	nbits;
1179 	uint64_t supported_size;
1180 	vattr_t vattr;
1181 	enum vtype vt;
1182 
1183 	mutex_enter(&sl->sl_lock);
1184 	if (vp_valid) {
1185 		goto odf_over_open;
1186 	}
1187 	if (sl->sl_data_filename[0] != '/') {
1188 		*err_ret = SBD_RET_DATA_PATH_NOT_ABSOLUTE;
1189 		mutex_exit(&sl->sl_lock);
1190 		return (EINVAL);
1191 	}
1192 	if ((ret = lookupname(sl->sl_data_filename, UIO_SYSSPACE, FOLLOW,
1193 	    NULLVPP, &sl->sl_data_vp)) != 0) {
1194 		*err_ret = SBD_RET_DATA_FILE_LOOKUP_FAILED;
1195 		mutex_exit(&sl->sl_lock);
1196 		return (ret);
1197 	}
1198 	sl->sl_data_vtype = vt = sl->sl_data_vp->v_type;
1199 	VN_RELE(sl->sl_data_vp);
1200 	if ((vt != VREG) && (vt != VCHR) && (vt != VBLK)) {
1201 		*err_ret = SBD_RET_WRONG_DATA_FILE_TYPE;
1202 		mutex_exit(&sl->sl_lock);
1203 		return (EINVAL);
1204 	}
1205 	if (sl->sl_flags & SL_WRITE_PROTECTED) {
1206 		flag = FREAD | FOFFMAX;
1207 	} else {
1208 		flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1209 	}
1210 	if ((ret = vn_open(sl->sl_data_filename, UIO_SYSSPACE, flag, 0,
1211 	    &sl->sl_data_vp, 0, 0)) != 0) {
1212 		*err_ret = SBD_RET_DATA_FILE_OPEN_FAILED;
1213 		mutex_exit(&sl->sl_lock);
1214 		return (ret);
1215 	}
1216 odf_over_open:
1217 	vattr.va_mask = AT_SIZE;
1218 	if ((ret = VOP_GETATTR(sl->sl_data_vp, &vattr, 0, CRED(), NULL)) != 0) {
1219 		*err_ret = SBD_RET_DATA_FILE_GETATTR_FAILED;
1220 		goto odf_close_data_and_exit;
1221 	}
1222 	if ((vt != VREG) && (vattr.va_size == 0)) {
1223 		/*
1224 		 * Its a zero byte block or char device. This cannot be
1225 		 * a raw disk.
1226 		 */
1227 		*err_ret = SBD_RET_WRONG_DATA_FILE_TYPE;
1228 		ret = EINVAL;
1229 		goto odf_close_data_and_exit;
1230 	}
1231 	/* sl_data_readable size includes any metadata. */
1232 	sl->sl_data_readable_size = vattr.va_size;
1233 	if (VOP_PATHCONF(sl->sl_data_vp, _PC_FILESIZEBITS, &nbits,
1234 	    CRED(), NULL) != 0) {
1235 		nbits = 0;
1236 	}
1237 	/* nbits cannot be greater than 64 */
1238 	sl->sl_data_fs_nbits = (uint8_t)nbits;
1239 	if (lu_size_valid) {
1240 		sl->sl_total_data_size = sl->sl_lu_size;
1241 		if (sl->sl_flags & SL_SHARED_META) {
1242 			sl->sl_total_data_size += SHARED_META_DATA_SIZE;
1243 		}
1244 		if ((nbits > 0) && (nbits < 64)) {
1245 			/*
1246 			 * The expression below is correct only if nbits is
1247 			 * positive and less than 64.
1248 			 */
1249 			supported_size = (((uint64_t)1) << nbits) - 1;
1250 			if (sl->sl_total_data_size > supported_size) {
1251 				*err_ret = SBD_RET_SIZE_NOT_SUPPORTED_BY_FS;
1252 				ret = EINVAL;
1253 				goto odf_close_data_and_exit;
1254 			}
1255 		}
1256 	} else {
1257 		sl->sl_total_data_size = vattr.va_size;
1258 		if (sl->sl_flags & SL_SHARED_META) {
1259 			if (vattr.va_size > SHARED_META_DATA_SIZE) {
1260 				sl->sl_lu_size = vattr.va_size -
1261 				    SHARED_META_DATA_SIZE;
1262 			} else {
1263 				*err_ret = SBD_RET_FILE_SIZE_ERROR;
1264 				ret = EINVAL;
1265 				goto odf_close_data_and_exit;
1266 			}
1267 		} else {
1268 			sl->sl_lu_size = vattr.va_size;
1269 		}
1270 	}
1271 	if (sl->sl_lu_size < SBD_MIN_LU_SIZE) {
1272 		*err_ret = SBD_RET_FILE_SIZE_ERROR;
1273 		ret = EINVAL;
1274 		goto odf_close_data_and_exit;
1275 	}
1276 	if (sl->sl_lu_size &
1277 	    ((((uint64_t)1) << sl->sl_data_blocksize_shift) - 1)) {
1278 		*err_ret = SBD_RET_FILE_ALIGN_ERROR;
1279 		ret = EINVAL;
1280 		goto odf_close_data_and_exit;
1281 	}
1282 	sl->sl_flags |= SL_MEDIA_LOADED;
1283 	mutex_exit(&sl->sl_lock);
1284 	return (0);
1285 
1286 odf_close_data_and_exit:
1287 	if (!keep_open) {
1288 		(void) VOP_CLOSE(sl->sl_data_vp, flag, 1, 0, CRED(), NULL);
1289 		VN_RELE(sl->sl_data_vp);
1290 	}
1291 	mutex_exit(&sl->sl_lock);
1292 	return (ret);
1293 }
1294 
1295 int
1296 sbd_close_delete_lu(sbd_lu_t *sl, int ret)
1297 {
1298 	int flag;
1299 
1300 	if (((sl->sl_flags & SL_SHARED_META) == 0) &&
1301 	    (sl->sl_flags & SL_META_OPENED)) {
1302 		if (sl->sl_flags & SL_ZFS_META) {
1303 			rw_destroy(&sl->sl_zfs_meta_lock);
1304 			if (sl->sl_zfs_meta) {
1305 				kmem_free(sl->sl_zfs_meta, ZAP_MAXVALUELEN / 2);
1306 			}
1307 		} else {
1308 			flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1309 			(void) VOP_CLOSE(sl->sl_meta_vp, flag, 1, 0,
1310 			    CRED(), NULL);
1311 			VN_RELE(sl->sl_meta_vp);
1312 		}
1313 		sl->sl_flags &= ~SL_META_OPENED;
1314 	}
1315 	if (sl->sl_flags & SL_MEDIA_LOADED) {
1316 		if (sl->sl_flags & SL_WRITE_PROTECTED) {
1317 			flag = FREAD | FOFFMAX;
1318 		} else {
1319 			flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1320 		}
1321 		(void) VOP_CLOSE(sl->sl_data_vp, flag, 1, 0, CRED(), NULL);
1322 		VN_RELE(sl->sl_data_vp);
1323 		sl->sl_flags &= ~SL_MEDIA_LOADED;
1324 		if (sl->sl_flags & SL_SHARED_META) {
1325 			sl->sl_flags &= ~SL_META_OPENED;
1326 		}
1327 	}
1328 	if (sl->sl_flags & SL_LINKED)
1329 		sbd_unlink_lu(sl);
1330 	mutex_destroy(&sl->sl_lock);
1331 	rw_destroy(&sl->sl_pgr->pgr_lock);
1332 	if (sl->sl_serial_no_alloc_size) {
1333 		kmem_free(sl->sl_serial_no, sl->sl_serial_no_alloc_size);
1334 	}
1335 	if (sl->sl_data_fname_alloc_size) {
1336 		kmem_free(sl->sl_data_filename, sl->sl_data_fname_alloc_size);
1337 	}
1338 	if (sl->sl_alias_alloc_size) {
1339 		kmem_free(sl->sl_alias, sl->sl_alias_alloc_size);
1340 	}
1341 	stmf_free(sl->sl_lu);
1342 	return (ret);
1343 }
1344 
1345 int
1346 sbd_create_register_lu(sbd_create_and_reg_lu_t *slu, int struct_sz,
1347     uint32_t *err_ret)
1348 {
1349 	char *namebuf;
1350 	sbd_lu_t *sl;
1351 	stmf_lu_t *lu;
1352 	sbd_status_t sret;
1353 	char *p;
1354 	int sz;
1355 	int alloc_sz;
1356 	int ret = EIO;
1357 	int flag;
1358 	int wcd = 0;
1359 	enum vtype vt;
1360 
1361 	sz = struct_sz - sizeof (sbd_create_and_reg_lu_t) + 8 + 1;
1362 
1363 	*err_ret = 0;
1364 
1365 	/* Lets validate various offsets */
1366 	if (((slu->slu_meta_fname_valid) &&
1367 	    (slu->slu_meta_fname_off >= sz)) ||
1368 	    (slu->slu_data_fname_off >= sz) ||
1369 	    ((slu->slu_alias_valid) &&
1370 	    (slu->slu_alias_off >= sz)) ||
1371 	    ((slu->slu_serial_valid) &&
1372 	    ((slu->slu_serial_off + slu->slu_serial_size) >= sz))) {
1373 		return (EINVAL);
1374 	}
1375 
1376 	namebuf = kmem_zalloc(sz, KM_SLEEP);
1377 	bcopy(slu->slu_buf, namebuf, sz - 1);
1378 	namebuf[sz - 1] = 0;
1379 
1380 	alloc_sz = sizeof (sbd_lu_t) + sizeof (sbd_pgr_t);
1381 	if (slu->slu_meta_fname_valid) {
1382 		alloc_sz += strlen(namebuf + slu->slu_meta_fname_off) + 1;
1383 	}
1384 	alloc_sz += strlen(namebuf + slu->slu_data_fname_off) + 1;
1385 	if (slu->slu_alias_valid) {
1386 		alloc_sz += strlen(namebuf + slu->slu_alias_off) + 1;
1387 	}
1388 	if (slu->slu_serial_valid) {
1389 		alloc_sz += slu->slu_serial_size;
1390 	}
1391 
1392 	lu = (stmf_lu_t *)stmf_alloc(STMF_STRUCT_STMF_LU, alloc_sz, 0);
1393 	if (lu == NULL) {
1394 		kmem_free(namebuf, sz);
1395 		return (ENOMEM);
1396 	}
1397 	sl = (sbd_lu_t *)lu->lu_provider_private;
1398 	bzero(sl, alloc_sz);
1399 	sl->sl_lu = lu;
1400 	sl->sl_alloc_size = alloc_sz;
1401 	sl->sl_pgr = (sbd_pgr_t *)(sl + 1);
1402 	rw_init(&sl->sl_pgr->pgr_lock, NULL, RW_DRIVER, NULL);
1403 	mutex_init(&sl->sl_lock, NULL, MUTEX_DRIVER, NULL);
1404 	p = ((char *)sl) + sizeof (sbd_lu_t) + sizeof (sbd_pgr_t);
1405 	sl->sl_data_filename = p;
1406 	(void) strcpy(sl->sl_data_filename, namebuf + slu->slu_data_fname_off);
1407 	p += strlen(sl->sl_data_filename) + 1;
1408 	sl->sl_meta_offset = SBD_META_OFFSET;
1409 	if (slu->slu_meta_fname_valid) {
1410 		sl->sl_alias = sl->sl_name = sl->sl_meta_filename = p;
1411 		(void) strcpy(sl->sl_meta_filename, namebuf +
1412 		    slu->slu_meta_fname_off);
1413 		p += strlen(sl->sl_meta_filename) + 1;
1414 	} else {
1415 		sl->sl_alias = sl->sl_name = sl->sl_data_filename;
1416 		if (sbd_is_zvol(sl->sl_data_filename)) {
1417 			sl->sl_flags |= SL_ZFS_META;
1418 			sl->sl_meta_offset = 0;
1419 		} else {
1420 			sl->sl_flags |= SL_SHARED_META;
1421 			sl->sl_data_offset = SHARED_META_DATA_SIZE;
1422 			sl->sl_total_meta_size = SHARED_META_DATA_SIZE;
1423 			sl->sl_meta_size_used = 0;
1424 		}
1425 	}
1426 	if (slu->slu_alias_valid) {
1427 		sl->sl_alias = p;
1428 		(void) strcpy(p, namebuf + slu->slu_alias_off);
1429 		p += strlen(sl->sl_alias) + 1;
1430 	}
1431 	if (slu->slu_serial_valid) {
1432 		sl->sl_serial_no = (uint8_t *)p;
1433 		bcopy(namebuf + slu->slu_serial_off, sl->sl_serial_no,
1434 		    slu->slu_serial_size);
1435 		sl->sl_serial_no_size = slu->slu_serial_size;
1436 		p += slu->slu_serial_size;
1437 	}
1438 	kmem_free(namebuf, sz);
1439 	if (slu->slu_vid_valid) {
1440 		bcopy(slu->slu_vid, sl->sl_vendor_id, 8);
1441 		sl->sl_flags |= SL_VID_VALID;
1442 	}
1443 	if (slu->slu_pid_valid) {
1444 		bcopy(slu->slu_pid, sl->sl_product_id, 16);
1445 		sl->sl_flags |= SL_PID_VALID;
1446 	}
1447 	if (slu->slu_rev_valid) {
1448 		bcopy(slu->slu_rev, sl->sl_revision, 4);
1449 		sl->sl_flags |= SL_REV_VALID;
1450 	}
1451 	if (slu->slu_write_protected) {
1452 		sl->sl_flags |= SL_WRITE_PROTECTED;
1453 	}
1454 	if (slu->slu_writeback_cache_disable) {
1455 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE |
1456 		    SL_SAVED_WRITE_CACHE_DISABLE;
1457 	}
1458 
1459 	if (slu->slu_blksize_valid) {
1460 		if ((slu->slu_blksize & (slu->slu_blksize - 1)) ||
1461 		    (slu->slu_blksize > (32 * 1024)) ||
1462 		    (slu->slu_blksize == 0)) {
1463 			*err_ret = SBD_RET_INVALID_BLKSIZE;
1464 			ret = EINVAL;
1465 			goto scm_err_out;
1466 		}
1467 		while ((1 << sl->sl_data_blocksize_shift) != slu->slu_blksize) {
1468 			sl->sl_data_blocksize_shift++;
1469 		}
1470 	} else {
1471 		sl->sl_data_blocksize_shift = 9;	/* 512 by default */
1472 		slu->slu_blksize = 512;
1473 	}
1474 
1475 	/* Now lets start creating meta */
1476 	sl->sl_trans_op = SL_OP_CREATE_REGISTER_LU;
1477 	if (sbd_link_lu(sl) != SBD_SUCCESS) {
1478 		*err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
1479 		ret = EALREADY;
1480 		goto scm_err_out;
1481 	}
1482 
1483 	/* 1st focus on the data store */
1484 	if (slu->slu_lu_size_valid) {
1485 		sl->sl_lu_size = slu->slu_lu_size;
1486 	}
1487 	ret = sbd_open_data_file(sl, err_ret, slu->slu_lu_size_valid, 0, 0);
1488 	slu->slu_ret_filesize_nbits = sl->sl_data_fs_nbits;
1489 	slu->slu_lu_size = sl->sl_lu_size;
1490 	if (ret) {
1491 		goto scm_err_out;
1492 	}
1493 
1494 	/*
1495 	 * set write cache disable on the device
1496 	 * if it fails, we'll support it using sync/flush
1497 	 */
1498 	if (slu->slu_writeback_cache_disable) {
1499 		(void) sbd_wcd_set(1, sl);
1500 		wcd = 1;
1501 	/*
1502 	 * Attempt to set it to enable, if that fails and it was explicitly set
1503 	 * return an error, otherwise get the current setting and use that
1504 	 */
1505 	} else {
1506 		sret = sbd_wcd_set(0, sl);
1507 		if (slu->slu_writeback_cache_disable_valid &&
1508 		    sret != SBD_SUCCESS) {
1509 			*err_ret = SBD_RET_WRITE_CACHE_SET_FAILED;
1510 			ret = EFAULT;
1511 			goto scm_err_out;
1512 		}
1513 		if (sret != SBD_SUCCESS) {
1514 			sbd_wcd_get(&wcd, sl);
1515 		}
1516 	}
1517 
1518 	if (wcd) {
1519 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE |
1520 		    SL_SAVED_WRITE_CACHE_DISABLE;
1521 	}
1522 
1523 	if (sl->sl_flags & SL_SHARED_META) {
1524 		goto over_meta_open;
1525 	}
1526 	if (sl->sl_flags & SL_ZFS_META) {
1527 		if (sbd_create_zfs_meta_object(sl) != SBD_SUCCESS) {
1528 			*err_ret = SBD_RET_ZFS_META_CREATE_FAILED;
1529 			ret = ENOMEM;
1530 			goto scm_err_out;
1531 		}
1532 		sl->sl_meta_blocksize_shift = 0;
1533 		goto over_meta_create;
1534 	}
1535 	if ((ret = lookupname(sl->sl_meta_filename, UIO_SYSSPACE, FOLLOW,
1536 	    NULLVPP, &sl->sl_meta_vp)) != 0) {
1537 		*err_ret = SBD_RET_META_FILE_LOOKUP_FAILED;
1538 		goto scm_err_out;
1539 	}
1540 	sl->sl_meta_vtype = vt = sl->sl_meta_vp->v_type;
1541 	VN_RELE(sl->sl_meta_vp);
1542 	if ((vt != VREG) && (vt != VCHR) && (vt != VBLK)) {
1543 		*err_ret = SBD_RET_WRONG_META_FILE_TYPE;
1544 		ret = EINVAL;
1545 		goto scm_err_out;
1546 	}
1547 	if (vt == VREG) {
1548 		sl->sl_meta_blocksize_shift = 0;
1549 	} else {
1550 		sl->sl_meta_blocksize_shift = 9;
1551 	}
1552 	flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1553 	if ((ret = vn_open(sl->sl_meta_filename, UIO_SYSSPACE, flag, 0,
1554 	    &sl->sl_meta_vp, 0, 0)) != 0) {
1555 		*err_ret = SBD_RET_META_FILE_OPEN_FAILED;
1556 		goto scm_err_out;
1557 	}
1558 over_meta_create:
1559 	sl->sl_total_meta_size = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
1560 	sl->sl_total_meta_size +=
1561 	    (((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1;
1562 	sl->sl_total_meta_size &=
1563 	    ~((((uint64_t)1) << sl->sl_meta_blocksize_shift) - 1);
1564 	sl->sl_meta_size_used = 0;
1565 over_meta_open:
1566 	sl->sl_flags |= SL_META_OPENED;
1567 
1568 	sl->sl_device_id[3] = 16;
1569 	if (slu->slu_guid_valid) {
1570 		sl->sl_device_id[0] = 0xf1;
1571 		sl->sl_device_id[1] = 3;
1572 		sl->sl_device_id[2] = 0;
1573 		bcopy(slu->slu_guid, sl->sl_device_id + 4, 16);
1574 	} else {
1575 		if (!slu->slu_company_id_valid)
1576 			slu->slu_company_id = COMPANY_ID_SUN;
1577 		if (stmf_scsilib_uniq_lu_id(slu->slu_company_id,
1578 		    (scsi_devid_desc_t *)&sl->sl_device_id[0]) !=
1579 		    STMF_SUCCESS) {
1580 			*err_ret = SBD_RET_META_CREATION_FAILED;
1581 			ret = EIO;
1582 			goto scm_err_out;
1583 		}
1584 		bcopy(sl->sl_device_id + 4, slu->slu_guid, 16);
1585 	}
1586 
1587 	/* Lets create the meta now */
1588 	if (sbd_write_meta_start(sl, sl->sl_total_meta_size,
1589 	    sizeof (sbd_meta_start_t)) != SBD_SUCCESS) {
1590 		*err_ret = SBD_RET_META_CREATION_FAILED;
1591 		ret = EIO;
1592 		goto scm_err_out;
1593 	}
1594 	sl->sl_meta_size_used = sl->sl_meta_offset + sizeof (sbd_meta_start_t);
1595 
1596 	if (sbd_write_lu_info(sl) != SBD_SUCCESS) {
1597 		*err_ret = SBD_RET_META_CREATION_FAILED;
1598 		ret = EIO;
1599 		goto scm_err_out;
1600 	}
1601 
1602 	if (sbd_pgr_meta_init(sl) != SBD_SUCCESS) {
1603 		*err_ret = SBD_RET_META_CREATION_FAILED;
1604 		ret = EIO;
1605 		goto scm_err_out;
1606 	}
1607 
1608 	ret = sbd_populate_and_register_lu(sl, err_ret);
1609 	if (ret) {
1610 		goto scm_err_out;
1611 	}
1612 
1613 	sl->sl_trans_op = SL_OP_NONE;
1614 	atomic_add_32(&sbd_lu_count, 1);
1615 	return (0);
1616 
1617 scm_err_out:
1618 	return (sbd_close_delete_lu(sl, ret));
1619 }
1620 
1621 int
1622 sbd_load_sli_1_0(sbd_lu_t *sl, uint32_t *err_ret)
1623 {
1624 	sbd_lu_info_1_0_t *sli = NULL;
1625 	sbd_status_t sret;
1626 
1627 	sret = sbd_read_meta_section(sl, (sm_section_hdr_t **)&sli,
1628 	    SMS_ID_LU_INFO_1_0);
1629 
1630 	if (sret != SBD_SUCCESS) {
1631 		*err_ret = SBD_RET_NO_META;
1632 		return (EIO);
1633 	}
1634 	if (sli->sli_data_order != SMS_DATA_ORDER) {
1635 		sbd_swap_lu_info_1_0(sli);
1636 		if (sli->sli_data_order != SMS_DATA_ORDER) {
1637 			kmem_free(sli, sli->sli_sms_header.sms_size);
1638 			*err_ret = SBD_RET_NO_META;
1639 			return (EIO);
1640 		}
1641 	}
1642 
1643 	sl->sl_flags |= SL_SHARED_META;
1644 	sl->sl_data_blocksize_shift = 9;
1645 	sl->sl_data_offset = SHARED_META_DATA_SIZE;
1646 	sl->sl_lu_size = sli->sli_total_store_size - SHARED_META_DATA_SIZE;
1647 	sl->sl_total_data_size = SHARED_META_DATA_SIZE + sl->sl_lu_size;
1648 	bcopy(sli->sli_lu_devid, sl->sl_device_id, 20);
1649 
1650 	kmem_free(sli, sli->sli_sms_header.sms_size);
1651 	return (0);
1652 }
1653 
1654 int
1655 sbd_import_lu(sbd_import_lu_t *ilu, int struct_sz, uint32_t *err_ret,
1656     int no_register, sbd_lu_t **slr)
1657 {
1658 	stmf_lu_t *lu;
1659 	sbd_lu_t *sl;
1660 	sbd_lu_info_1_1_t *sli = NULL;
1661 	int asz;
1662 	int ret = 0;
1663 	int flag;
1664 	int wcd = 0;
1665 	int data_opened;
1666 	uint16_t sli_buf_sz;
1667 	uint8_t *sli_buf_copy = NULL;
1668 	enum vtype vt;
1669 	sbd_status_t sret;
1670 
1671 	if (no_register && slr == NULL) {
1672 		return (EINVAL);
1673 	}
1674 	ilu->ilu_meta_fname[struct_sz - sizeof (*ilu) + 8 - 1] = 0;
1675 	asz = strlen(ilu->ilu_meta_fname) + 1;
1676 
1677 	lu = (stmf_lu_t *)stmf_alloc(STMF_STRUCT_STMF_LU,
1678 	    sizeof (sbd_lu_t) + sizeof (sbd_pgr_t) + asz, 0);
1679 	if (lu == NULL) {
1680 		return (ENOMEM);
1681 	}
1682 	sl = (sbd_lu_t *)lu->lu_provider_private;
1683 	bzero(sl, sizeof (*sl));
1684 	sl->sl_lu = lu;
1685 	sl->sl_pgr = (sbd_pgr_t *)(sl + 1);
1686 	sl->sl_meta_filename = ((char *)sl) + sizeof (*sl) + sizeof (sbd_pgr_t);
1687 	(void) strcpy(sl->sl_meta_filename, ilu->ilu_meta_fname);
1688 	sl->sl_name = sl->sl_meta_filename;
1689 	rw_init(&sl->sl_pgr->pgr_lock, NULL, RW_DRIVER, NULL);
1690 	mutex_init(&sl->sl_lock, NULL, MUTEX_DRIVER, NULL);
1691 	sl->sl_trans_op = SL_OP_IMPORT_LU;
1692 	/* we're only loading the metadata */
1693 	if (!no_register) {
1694 		if (sbd_link_lu(sl) != SBD_SUCCESS) {
1695 			*err_ret = SBD_RET_FILE_ALREADY_REGISTERED;
1696 			ret = EALREADY;
1697 			goto sim_err_out;
1698 		}
1699 	}
1700 	if ((ret = lookupname(sl->sl_meta_filename, UIO_SYSSPACE, FOLLOW,
1701 	    NULLVPP, &sl->sl_meta_vp)) != 0) {
1702 		*err_ret = SBD_RET_META_FILE_LOOKUP_FAILED;
1703 		goto sim_err_out;
1704 	}
1705 	if (sbd_is_zvol(sl->sl_meta_filename)) {
1706 		sl->sl_flags |= SL_ZFS_META;
1707 		sl->sl_data_filename = sl->sl_meta_filename;
1708 	}
1709 	sl->sl_meta_vtype = vt = sl->sl_meta_vp->v_type;
1710 	VN_RELE(sl->sl_meta_vp);
1711 	if ((vt != VREG) && (vt != VCHR) && (vt != VBLK)) {
1712 		*err_ret = SBD_RET_WRONG_META_FILE_TYPE;
1713 		ret = EINVAL;
1714 		goto sim_err_out;
1715 	}
1716 	if (sl->sl_flags & SL_ZFS_META) {
1717 		if (sbd_open_zfs_meta(sl) != SBD_SUCCESS) {
1718 			/* let see if metadata is in the 64k block */
1719 			sl->sl_flags &= ~SL_ZFS_META;
1720 		}
1721 	}
1722 	if (!(sl->sl_flags & SL_ZFS_META)) {
1723 		/* metadata is always writable */
1724 		flag = FREAD | FWRITE | FOFFMAX | FEXCL;
1725 		if ((ret = vn_open(sl->sl_meta_filename, UIO_SYSSPACE, flag, 0,
1726 		    &sl->sl_meta_vp, 0, 0)) != 0) {
1727 			*err_ret = SBD_RET_META_FILE_OPEN_FAILED;
1728 			goto sim_err_out;
1729 		}
1730 	}
1731 	if ((sl->sl_flags & SL_ZFS_META) || (vt == VREG)) {
1732 		sl->sl_meta_blocksize_shift = 0;
1733 	} else {
1734 		sl->sl_meta_blocksize_shift = 9;
1735 	}
1736 	sl->sl_meta_offset = (sl->sl_flags & SL_ZFS_META) ? 0 : SBD_META_OFFSET;
1737 	sl->sl_flags |= SL_META_OPENED;
1738 
1739 	sret = sbd_load_meta_start(sl);
1740 	if (sret != SBD_SUCCESS) {
1741 		if (sret == SBD_META_CORRUPTED) {
1742 			*err_ret = SBD_RET_NO_META;
1743 		} else if (sret == SBD_NOT_SUPPORTED) {
1744 			*err_ret = SBD_RET_VERSION_NOT_SUPPORTED;
1745 		} else {
1746 			*err_ret = SBD_RET_NO_META;
1747 		}
1748 		ret = EINVAL;
1749 		goto sim_err_out;
1750 	}
1751 
1752 	/* Now lets see if we can read the most recent LU info */
1753 	sret = sbd_read_meta_section(sl, (sm_section_hdr_t **)&sli,
1754 	    SMS_ID_LU_INFO_1_1);
1755 	if ((sret == SBD_NOT_FOUND) && ((sl->sl_flags & SL_ZFS_META) == 0)) {
1756 		ret = sbd_load_sli_1_0(sl, err_ret);
1757 		if (ret)
1758 			goto sim_err_out;
1759 		goto sim_sli_loaded;
1760 	}
1761 	if (sret != SBD_SUCCESS) {
1762 		*err_ret = SBD_RET_NO_META;
1763 		ret = EIO;
1764 		goto sim_err_out;
1765 	}
1766 	/* load sli 1.1 */
1767 	if (sli->sli_data_order != SMS_DATA_ORDER) {
1768 		sbd_swap_lu_info_1_1(sli);
1769 		if (sli->sli_data_order != SMS_DATA_ORDER) {
1770 			*err_ret = SBD_RET_NO_META;
1771 			ret = EIO;
1772 			goto sim_err_out;
1773 		}
1774 	}
1775 
1776 	sli_buf_sz = sli->sli_sms_header.sms_size -
1777 	    sizeof (sbd_lu_info_1_1_t) + 8;
1778 	sli_buf_copy = kmem_alloc(sli_buf_sz + 1, KM_SLEEP);
1779 	bcopy(sli->sli_buf, sli_buf_copy, sli_buf_sz);
1780 	sli_buf_copy[sli_buf_sz] = 0;
1781 
1782 	/* Make sure all the offsets are within limits */
1783 	if (((sli->sli_flags & SLI_META_FNAME_VALID) &&
1784 	    (sli->sli_meta_fname_offset > sli_buf_sz)) ||
1785 	    ((sli->sli_flags & SLI_DATA_FNAME_VALID) &&
1786 	    (sli->sli_data_fname_offset > sli_buf_sz)) ||
1787 	    ((sli->sli_flags & SLI_SERIAL_VALID) &&
1788 	    ((sli->sli_serial_offset + sli->sli_serial_size) > sli_buf_sz)) ||
1789 	    ((sli->sli_flags & SLI_ALIAS_VALID) &&
1790 	    (sli->sli_alias_offset > sli_buf_sz))) {
1791 		*err_ret = SBD_RET_NO_META;
1792 		ret = EIO;
1793 		goto sim_err_out;
1794 	}
1795 
1796 	if (sl->sl_flags & SL_ZFS_META) {
1797 		/* Verify that its the right zfs node and not some clone */
1798 		int same_zvol;
1799 		char *zvol_name = sbd_get_zvol_name(sl);
1800 
1801 		if ((sli->sli_flags & (SLI_ZFS_META |
1802 		    SLI_META_FNAME_VALID)) == 0) {
1803 			*err_ret = SBD_RET_NO_META;
1804 			ret = EIO;
1805 			kmem_free(zvol_name, strlen(zvol_name) + 1);
1806 			goto sim_err_out;
1807 		}
1808 		if (strcmp(zvol_name, (char *)sli_buf_copy +
1809 		    sli->sli_meta_fname_offset) != 0)
1810 			same_zvol = 0;
1811 		else
1812 			same_zvol = 1;
1813 		kmem_free(zvol_name, strlen(zvol_name) + 1);
1814 		if (!same_zvol) {
1815 			*err_ret = SBD_ZVOL_META_NAME_MISMATCH;
1816 			ret = EINVAL;
1817 			goto sim_err_out;
1818 		}
1819 	}
1820 	sl->sl_lu_size = sli->sli_lu_size;
1821 	sl->sl_data_blocksize_shift = sli->sli_data_blocksize_shift;
1822 	bcopy(sli->sli_device_id, sl->sl_device_id, 20);
1823 	if (sli->sli_flags & SLI_SERIAL_VALID) {
1824 		sl->sl_serial_no_size = sl->sl_serial_no_alloc_size =
1825 		    sli->sli_serial_size;
1826 		sl->sl_serial_no = kmem_zalloc(sli->sli_serial_size, KM_SLEEP);
1827 		bcopy(sli_buf_copy + sli->sli_serial_offset, sl->sl_serial_no,
1828 		    sl->sl_serial_no_size);
1829 	}
1830 	if (sli->sli_flags & SLI_SEPARATE_META) {
1831 		sl->sl_total_data_size = sl->sl_lu_size;
1832 		if (sli->sli_flags & SLI_DATA_FNAME_VALID) {
1833 			sl->sl_data_fname_alloc_size = strlen((char *)
1834 			    sli_buf_copy + sli->sli_data_fname_offset) + 1;
1835 			sl->sl_data_filename = kmem_zalloc(
1836 			    sl->sl_data_fname_alloc_size, KM_SLEEP);
1837 			(void) strcpy(sl->sl_data_filename,
1838 			    (char *)sli_buf_copy + sli->sli_data_fname_offset);
1839 		}
1840 	} else {
1841 		if (sl->sl_flags & SL_ZFS_META) {
1842 			sl->sl_total_data_size = sl->sl_lu_size;
1843 			sl->sl_data_offset = 0;
1844 		} else {
1845 			sl->sl_total_data_size =
1846 			    sl->sl_lu_size + SHARED_META_DATA_SIZE;
1847 			sl->sl_data_offset = SHARED_META_DATA_SIZE;
1848 			sl->sl_flags |= SL_SHARED_META;
1849 		}
1850 	}
1851 	if (sli->sli_flags & SLI_ALIAS_VALID) {
1852 		sl->sl_alias_alloc_size = strlen((char *)sli_buf_copy +
1853 		    sli->sli_alias_offset) + 1;
1854 		sl->sl_alias = kmem_alloc(sl->sl_alias_alloc_size, KM_SLEEP);
1855 		(void) strcpy(sl->sl_alias, (char *)sli_buf_copy +
1856 		    sli->sli_alias_offset);
1857 	}
1858 	if (sli->sli_flags & SLI_WRITE_PROTECTED) {
1859 		sl->sl_flags |= SL_WRITE_PROTECTED;
1860 	}
1861 	if (sli->sli_flags & SLI_VID_VALID) {
1862 		sl->sl_flags |= SL_VID_VALID;
1863 		bcopy(sli->sli_vid, sl->sl_vendor_id, 8);
1864 	}
1865 	if (sli->sli_flags & SLI_PID_VALID) {
1866 		sl->sl_flags |= SL_PID_VALID;
1867 		bcopy(sli->sli_pid, sl->sl_product_id, 16);
1868 	}
1869 	if (sli->sli_flags & SLI_REV_VALID) {
1870 		sl->sl_flags |= SL_REV_VALID;
1871 		bcopy(sli->sli_rev, sl->sl_revision, 4);
1872 	}
1873 	if (sli->sli_flags & SLI_WRITEBACK_CACHE_DISABLE) {
1874 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
1875 	}
1876 sim_sli_loaded:
1877 	if ((sl->sl_flags & SL_SHARED_META) == 0) {
1878 		data_opened = 0;
1879 	} else {
1880 		data_opened = 1;
1881 		sl->sl_data_filename = sl->sl_meta_filename;
1882 		sl->sl_data_vp = sl->sl_meta_vp;
1883 		sl->sl_data_vtype = sl->sl_meta_vtype;
1884 	}
1885 
1886 	sret = sbd_pgr_meta_load(sl);
1887 	if (sret != SBD_SUCCESS) {
1888 		*err_ret = SBD_RET_NO_META;
1889 		ret = EIO;
1890 		goto sim_err_out;
1891 	}
1892 
1893 	ret = sbd_open_data_file(sl, err_ret, 1, data_opened, 0);
1894 	if (ret)
1895 		goto sim_err_out;
1896 
1897 	/*
1898 	 * set write cache disable on the device
1899 	 * Note: this shouldn't fail on import unless the cache capabilities
1900 	 * of the device changed. If that happened, modify will need to
1901 	 * be used to set the cache flag appropriately after import is done.
1902 	 */
1903 	if (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) {
1904 		(void) sbd_wcd_set(1, sl);
1905 		wcd = 1;
1906 	/*
1907 	 * if not explicitly set, attempt to set it to enable, if that fails
1908 	 * get the current setting and use that
1909 	 */
1910 	} else {
1911 		sret = sbd_wcd_set(0, sl);
1912 		if (sret != SBD_SUCCESS) {
1913 			sbd_wcd_get(&wcd, sl);
1914 		}
1915 	}
1916 
1917 	if (wcd) {
1918 		sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE |
1919 		    SL_SAVED_WRITE_CACHE_DISABLE;
1920 	}
1921 
1922 	/* we're only loading the metadata */
1923 	if (!no_register) {
1924 		ret = sbd_populate_and_register_lu(sl, err_ret);
1925 		if (ret)
1926 			goto sim_err_out;
1927 		atomic_add_32(&sbd_lu_count, 1);
1928 	}
1929 
1930 	bcopy(sl->sl_device_id + 4, ilu->ilu_ret_guid, 16);
1931 	sl->sl_trans_op = SL_OP_NONE;
1932 	if (sli) {
1933 		kmem_free(sli, sli->sli_sms_header.sms_size);
1934 		sli = NULL;
1935 	}
1936 	if (sli_buf_copy) {
1937 		kmem_free(sli_buf_copy, sli_buf_sz + 1);
1938 		sli_buf_copy = NULL;
1939 	}
1940 	if (no_register) {
1941 		*slr = sl;
1942 	}
1943 	return (0);
1944 
1945 sim_err_out:
1946 	if (sli) {
1947 		kmem_free(sli, sli->sli_sms_header.sms_size);
1948 		sli = NULL;
1949 	}
1950 	if (sli_buf_copy) {
1951 		kmem_free(sli_buf_copy, sli_buf_sz + 1);
1952 		sli_buf_copy = NULL;
1953 	}
1954 	return (sbd_close_delete_lu(sl, ret));
1955 }
1956 
1957 int
1958 sbd_modify_lu(sbd_modify_lu_t *mlu, int struct_sz, uint32_t *err_ret)
1959 {
1960 	sbd_lu_t *sl = NULL;
1961 	int alias_sz;
1962 	int ret = 0;
1963 	sbd_it_data_t *it;
1964 	sbd_status_t sret;
1965 	uint64_t old_size;
1966 	int modify_unregistered = 0;
1967 	int ua = 0;
1968 	sbd_import_lu_t *ilu;
1969 	stmf_lu_t *lu;
1970 	uint32_t ilu_sz;
1971 	uint32_t sz;
1972 
1973 	sz = struct_sz - sizeof (*mlu) + 8 + 1;
1974 
1975 	/* if there is data in the buf, null terminate it */
1976 	if (struct_sz > sizeof (*mlu)) {
1977 		mlu->mlu_buf[struct_sz - sizeof (*mlu) + 8 - 1] = 0;
1978 	}
1979 
1980 	*err_ret = 0;
1981 
1982 	/* Lets validate offsets */
1983 	if (((mlu->mlu_alias_valid) &&
1984 	    (mlu->mlu_alias_off >= sz)) ||
1985 	    (mlu->mlu_by_fname) &&
1986 	    (mlu->mlu_fname_off >= sz)) {
1987 		return (EINVAL);
1988 	}
1989 
1990 	/*
1991 	 * We'll look for the device but if we don't find it registered,
1992 	 * we'll still try to modify the unregistered device.
1993 	 */
1994 	if (mlu->mlu_by_guid) {
1995 		sret = sbd_find_and_lock_lu(mlu->mlu_input_guid, NULL,
1996 		    SL_OP_MODIFY_LU, &sl);
1997 	} else if (mlu->mlu_by_fname) {
1998 		sret = sbd_find_and_lock_lu(NULL,
1999 		    (uint8_t *)&(mlu->mlu_buf[mlu->mlu_fname_off]),
2000 		    SL_OP_MODIFY_LU, &sl);
2001 	} else {
2002 		return (EINVAL);
2003 	}
2004 
2005 
2006 	if (sret != SBD_SUCCESS) {
2007 		if (sret == SBD_BUSY) {
2008 			*err_ret = SBD_RET_LU_BUSY;
2009 			return (EBUSY);
2010 		} else if (sret != SBD_NOT_FOUND) {
2011 			return (EIO);
2012 		} else if (!mlu->mlu_by_fname) {
2013 			return (EINVAL);
2014 		}
2015 		/* Okay, try to import the device */
2016 		struct_sz = max(8, strlen(&(mlu->mlu_buf[mlu->mlu_fname_off]))
2017 		    + 1);
2018 		struct_sz += sizeof (sbd_import_lu_t) - 8;
2019 		ilu_sz = struct_sz;
2020 		ilu = (sbd_import_lu_t *)kmem_zalloc(ilu_sz, KM_SLEEP);
2021 		ilu->ilu_struct_size = struct_sz;
2022 		(void) strcpy(ilu->ilu_meta_fname,
2023 		    &(mlu->mlu_buf[mlu->mlu_fname_off]));
2024 		ret = sbd_import_lu(ilu, struct_sz, err_ret, 1, &sl);
2025 		kmem_free(ilu, ilu_sz);
2026 		if (ret != SBD_SUCCESS) {
2027 			return (ENOENT);
2028 		}
2029 		modify_unregistered = 1;
2030 	}
2031 
2032 	/* check for write cache change */
2033 	if (mlu->mlu_writeback_cache_disable_valid) {
2034 		/* set wce on device */
2035 		sret = sbd_wcd_set(mlu->mlu_writeback_cache_disable, sl);
2036 		if (!mlu->mlu_writeback_cache_disable && sret != SBD_SUCCESS) {
2037 			*err_ret = SBD_RET_WRITE_CACHE_SET_FAILED;
2038 			ret = EFAULT;
2039 			goto smm_err_out;
2040 		}
2041 		mutex_enter(&sl->sl_lock);
2042 		if (!mlu->mlu_writeback_cache_disable) {
2043 			if (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) {
2044 				ua = 1;
2045 				sl->sl_flags &= ~SL_WRITEBACK_CACHE_DISABLE;
2046 				sl->sl_flags &= ~SL_SAVED_WRITE_CACHE_DISABLE;
2047 			}
2048 		} else {
2049 			if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) == 0) {
2050 				ua = 1;
2051 				sl->sl_flags |= SL_WRITEBACK_CACHE_DISABLE;
2052 				sl->sl_flags |= SL_SAVED_WRITE_CACHE_DISABLE;
2053 			}
2054 		}
2055 		for (it = sl->sl_it_list; ua && it != NULL;
2056 		    it = it->sbd_it_next) {
2057 			it->sbd_it_ua_conditions |=
2058 			    SBD_UA_MODE_PARAMETERS_CHANGED;
2059 		}
2060 		mutex_exit(&sl->sl_lock);
2061 	}
2062 	ua = 0;
2063 
2064 	if (mlu->mlu_alias_valid) {
2065 		alias_sz = strlen((char *)mlu->mlu_buf +
2066 		    mlu->mlu_alias_off) + 1;
2067 		/*
2068 		 * Use the allocated buffer or alloc a new one.
2069 		 * Don't copy into sl_alias if sl_alias_alloc_size is 0
2070 		 * otherwise or you'll be writing over the data/metadata
2071 		 * filename.
2072 		 */
2073 		mutex_enter(&sl->sl_lock);
2074 		if (sl->sl_alias_alloc_size > 0 &&
2075 		    sl->sl_alias_alloc_size < alias_sz) {
2076 			kmem_free(sl->sl_alias,
2077 			    sl->sl_alias_alloc_size);
2078 			sl->sl_alias_alloc_size = 0;
2079 		}
2080 		if (sl->sl_alias_alloc_size == 0) {
2081 			sl->sl_alias = kmem_alloc(alias_sz, KM_SLEEP);
2082 			sl->sl_alias_alloc_size = alias_sz;
2083 		}
2084 		(void) strcpy(sl->sl_alias, (char *)mlu->mlu_buf +
2085 		    mlu->mlu_alias_off);
2086 		lu = sl->sl_lu;
2087 		lu->lu_alias = sl->sl_alias;
2088 		mutex_exit(&sl->sl_lock);
2089 	}
2090 
2091 
2092 	if (mlu->mlu_write_protected_valid) {
2093 		mutex_enter(&sl->sl_lock);
2094 		if (mlu->mlu_write_protected) {
2095 			if ((sl->sl_flags & SL_WRITE_PROTECTED) == 0) {
2096 				ua = 1;
2097 				sl->sl_flags |= SL_WRITE_PROTECTED;
2098 			}
2099 		} else {
2100 			if (sl->sl_flags & SL_WRITE_PROTECTED) {
2101 				ua = 1;
2102 				sl->sl_flags &= ~SL_WRITE_PROTECTED;
2103 			}
2104 		}
2105 		for (it = sl->sl_it_list; ua && it != NULL;
2106 		    it = it->sbd_it_next) {
2107 			it->sbd_it_ua_conditions |=
2108 			    SBD_UA_MODE_PARAMETERS_CHANGED;
2109 		}
2110 		mutex_exit(&sl->sl_lock);
2111 	}
2112 
2113 	if (mlu->mlu_lu_size_valid) {
2114 		/*
2115 		 * validate lu size and set
2116 		 * For open file only (registered lu)
2117 		 */
2118 		mutex_enter(&sl->sl_lock);
2119 		old_size = sl->sl_lu_size;
2120 		sl->sl_lu_size = mlu->mlu_lu_size;
2121 		mutex_exit(&sl->sl_lock);
2122 		ret = sbd_open_data_file(sl, err_ret, 1, 1, 1);
2123 		if (ret) {
2124 			mutex_enter(&sl->sl_lock);
2125 			sl->sl_lu_size = old_size;
2126 			mutex_exit(&sl->sl_lock);
2127 			goto smm_err_out;
2128 		}
2129 		if (old_size != mlu->mlu_lu_size) {
2130 			mutex_enter(&sl->sl_lock);
2131 			for (it = sl->sl_it_list; it != NULL;
2132 			    it = it->sbd_it_next) {
2133 				it->sbd_it_ua_conditions |=
2134 				    SBD_UA_CAPACITY_CHANGED;
2135 			}
2136 			mutex_exit(&sl->sl_lock);
2137 		}
2138 	}
2139 
2140 	if (sbd_write_lu_info(sl) != SBD_SUCCESS) {
2141 		*err_ret = SBD_RET_META_CREATION_FAILED;
2142 		ret = EIO;
2143 	}
2144 
2145 smm_err_out:
2146 	if (modify_unregistered) {
2147 		(void) sbd_close_delete_lu(sl, 0);
2148 	} else {
2149 		sl->sl_trans_op = SL_OP_NONE;
2150 	}
2151 	return (ret);
2152 }
2153 
2154 /* ARGSUSED */
2155 int
2156 sbd_delete_locked_lu(sbd_lu_t *sl, uint32_t *err_ret,
2157     stmf_state_change_info_t *ssi)
2158 {
2159 	int i;
2160 
2161 	if ((sl->sl_state == STMF_STATE_OFFLINE) &&
2162 	    !sl->sl_state_not_acked) {
2163 		goto sdl_do_dereg;
2164 	}
2165 
2166 	if ((sl->sl_state != STMF_STATE_ONLINE) ||
2167 	    sl->sl_state_not_acked) {
2168 		return (EBUSY);
2169 	}
2170 	if (stmf_ctl(STMF_CMD_LU_OFFLINE, sl->sl_lu, ssi) != STMF_SUCCESS) {
2171 		return (EBUSY);
2172 	}
2173 
2174 	for (i = 0; i < 500; i++) {
2175 		if (sl->sl_state == STMF_STATE_OFFLINE)
2176 			break;
2177 		delay(drv_usectohz(10000));
2178 	}
2179 
2180 	if ((sl->sl_state == STMF_STATE_OFFLINE) &&
2181 	    !sl->sl_state_not_acked) {
2182 		goto sdl_do_dereg;
2183 	}
2184 
2185 	return (EBUSY);
2186 sdl_do_dereg:;
2187 	if (stmf_deregister_lu(sl->sl_lu) != STMF_SUCCESS)
2188 		return (EBUSY);
2189 	atomic_add_32(&sbd_lu_count, -1);
2190 
2191 	return (sbd_close_delete_lu(sl, 0));
2192 }
2193 
2194 int
2195 sbd_delete_lu(sbd_delete_lu_t *dlu, int struct_sz, uint32_t *err_ret)
2196 {
2197 	sbd_lu_t *sl;
2198 	sbd_status_t sret;
2199 	stmf_state_change_info_t ssi;
2200 	int ret;
2201 
2202 	if (dlu->dlu_by_meta_name) {
2203 		((char *)dlu)[struct_sz - 1] = 0;
2204 		sret = sbd_find_and_lock_lu(NULL, dlu->dlu_meta_name,
2205 		    SL_OP_DELETE_LU, &sl);
2206 	} else {
2207 		sret = sbd_find_and_lock_lu(dlu->dlu_guid, NULL,
2208 		    SL_OP_DELETE_LU, &sl);
2209 	}
2210 	if (sret != SBD_SUCCESS) {
2211 		if (sret == SBD_BUSY) {
2212 			*err_ret = SBD_RET_LU_BUSY;
2213 			return (EBUSY);
2214 		} else if (sret == SBD_NOT_FOUND) {
2215 			*err_ret = SBD_RET_NOT_FOUND;
2216 			return (ENOENT);
2217 		}
2218 		return (EIO);
2219 	}
2220 
2221 	ssi.st_rflags = STMF_RFLAG_USER_REQUEST;
2222 	ssi.st_additional_info = "sbd_delete_lu call (ioctl)";
2223 	ret = sbd_delete_locked_lu(sl, err_ret, &ssi);
2224 
2225 	if (ret) {
2226 		/* Once its locked, no need to grab mutex again */
2227 		sl->sl_trans_op = SL_OP_NONE;
2228 	}
2229 	return (ret);
2230 }
2231 
2232 sbd_status_t
2233 sbd_data_read(sbd_lu_t *sl, uint64_t offset, uint64_t size, uint8_t *buf)
2234 {
2235 	int ret;
2236 	long resid;
2237 
2238 	if ((offset + size) > sl->sl_lu_size) {
2239 		return (SBD_IO_PAST_EOF);
2240 	}
2241 
2242 	offset += sl->sl_data_offset;
2243 
2244 	if ((offset + size) > sl->sl_data_readable_size) {
2245 		uint64_t store_end;
2246 		if (offset > sl->sl_data_readable_size) {
2247 			bzero(buf, size);
2248 			return (SBD_SUCCESS);
2249 		}
2250 		store_end = sl->sl_data_readable_size - offset;
2251 		bzero(buf + store_end, size - store_end);
2252 		size = store_end;
2253 	}
2254 
2255 	DTRACE_PROBE4(backing__store__read__start, sbd_lu_t *, sl,
2256 	    uint8_t *, buf, uint64_t, size, uint64_t, offset);
2257 
2258 	ret = vn_rdwr(UIO_READ, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
2259 	    (offset_t)offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, CRED(),
2260 	    &resid);
2261 
2262 	DTRACE_PROBE5(backing__store__read__end, sbd_lu_t *, sl,
2263 	    uint8_t *, buf, uint64_t, size, uint64_t, offset,
2264 	    int, ret);
2265 
2266 over_sl_data_read:
2267 	if (ret || resid) {
2268 		stmf_trace(0, "UIO_READ failed, ret = %d, resid = %d", ret,
2269 		    resid);
2270 		return (SBD_FAILURE);
2271 	}
2272 
2273 	return (SBD_SUCCESS);
2274 }
2275 
2276 sbd_status_t
2277 sbd_data_write(sbd_lu_t *sl, uint64_t offset, uint64_t size, uint8_t *buf)
2278 {
2279 	int ret;
2280 	long resid;
2281 	sbd_status_t sret = SBD_SUCCESS;
2282 	int ioflag;
2283 
2284 	if ((offset + size) > sl->sl_lu_size) {
2285 		return (SBD_IO_PAST_EOF);
2286 	}
2287 
2288 	offset += sl->sl_data_offset;
2289 
2290 	if ((sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
2291 	    (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
2292 		ioflag = FSYNC;
2293 	} else {
2294 		ioflag = 0;
2295 	}
2296 
2297 	DTRACE_PROBE4(backing__store__write__start, sbd_lu_t *, sl,
2298 	    uint8_t *, buf, uint64_t, size, uint64_t, offset);
2299 
2300 	ret = vn_rdwr(UIO_WRITE, sl->sl_data_vp, (caddr_t)buf, (ssize_t)size,
2301 	    (offset_t)offset, UIO_SYSSPACE, ioflag, RLIM64_INFINITY, CRED(),
2302 	    &resid);
2303 
2304 	DTRACE_PROBE5(backing__store__write__end, sbd_lu_t *, sl,
2305 	    uint8_t *, buf, uint64_t, size, uint64_t, offset,
2306 	    int, ret);
2307 
2308 	if ((ret == 0) && (resid == 0) &&
2309 	    (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE) &&
2310 	    (sl->sl_flags & SL_FLUSH_ON_DISABLED_WRITECACHE)) {
2311 		sret = sbd_flush_data_cache(sl, 1);
2312 	}
2313 over_sl_data_write:
2314 
2315 	if ((ret || resid) || (sret != SBD_SUCCESS)) {
2316 		return (SBD_FAILURE);
2317 	} else if ((offset + size) > sl->sl_data_readable_size) {
2318 		uint64_t old_size, new_size;
2319 
2320 		do {
2321 			old_size = sl->sl_data_readable_size;
2322 			if ((offset + size) <= old_size)
2323 				break;
2324 			new_size = offset + size;
2325 		} while (atomic_cas_64(&sl->sl_data_readable_size, old_size,
2326 		    new_size) != old_size);
2327 	}
2328 
2329 	return (SBD_SUCCESS);
2330 }
2331 
2332 int
2333 sbd_get_lu_props(sbd_lu_props_t *islp, uint32_t islp_sz,
2334     sbd_lu_props_t *oslp, uint32_t oslp_sz, uint32_t *err_ret)
2335 {
2336 	sbd_status_t sret;
2337 	sbd_lu_t *sl = NULL;
2338 	uint32_t sz;
2339 	uint16_t off;
2340 
2341 	if (islp->slp_input_guid) {
2342 		sret = sbd_find_and_lock_lu(islp->slp_guid, NULL,
2343 		    SL_OP_LU_PROPS, &sl);
2344 	} else {
2345 		((char *)islp)[islp_sz - 1] = 0;
2346 		sret = sbd_find_and_lock_lu(NULL, islp->slp_buf,
2347 		    SL_OP_LU_PROPS, &sl);
2348 	}
2349 	if (sret != SBD_SUCCESS) {
2350 		if (sret == SBD_BUSY) {
2351 			*err_ret = SBD_RET_LU_BUSY;
2352 			return (EBUSY);
2353 		} else if (sret == SBD_NOT_FOUND) {
2354 			*err_ret = SBD_RET_NOT_FOUND;
2355 			return (ENOENT);
2356 		}
2357 		return (EIO);
2358 	}
2359 
2360 	sz = strlen(sl->sl_name) + 1;
2361 	if ((sl->sl_flags & (SL_ZFS_META | SL_SHARED_META)) == 0) {
2362 		if (sl->sl_data_filename) {
2363 			sz += strlen(sl->sl_data_filename) + 1;
2364 		}
2365 	}
2366 	sz += sl->sl_serial_no_size;
2367 	if (sl->sl_alias) {
2368 		sz += strlen(sl->sl_alias) + 1;
2369 	}
2370 
2371 	bzero(oslp, sizeof (*oslp) - 8);
2372 	oslp->slp_buf_size_needed = sz;
2373 
2374 	if (sz > (oslp_sz - sizeof (*oslp) + 8)) {
2375 		sl->sl_trans_op = SL_OP_NONE;
2376 		*err_ret = SBD_RET_INSUFFICIENT_BUF_SPACE;
2377 		return (ENOMEM);
2378 	}
2379 
2380 	off = 0;
2381 	(void) strcpy((char *)oslp->slp_buf, sl->sl_name);
2382 	oslp->slp_meta_fname_off = off;
2383 	off += strlen(sl->sl_name) + 1;
2384 	if ((sl->sl_flags & (SL_ZFS_META | SL_SHARED_META)) == 0) {
2385 		oslp->slp_meta_fname_valid = 1;
2386 		oslp->slp_separate_meta = 1;
2387 		if (sl->sl_data_filename) {
2388 			oslp->slp_data_fname_valid = 1;
2389 			oslp->slp_data_fname_off = off;
2390 			(void) strcpy((char *)&oslp->slp_buf[off],
2391 			    sl->sl_data_filename);
2392 			off += strlen(sl->sl_data_filename) + 1;
2393 		}
2394 	} else {
2395 		oslp->slp_data_fname_valid = 1;
2396 		oslp->slp_data_fname_off = oslp->slp_meta_fname_off;
2397 		if (sl->sl_flags & SL_ZFS_META) {
2398 			oslp->slp_zfs_meta = 1;
2399 		}
2400 	}
2401 	if (sl->sl_alias) {
2402 		oslp->slp_alias_valid = 1;
2403 		oslp->slp_alias_off = off;
2404 		(void) strcpy((char *)&oslp->slp_buf[off], sl->sl_alias);
2405 		off += strlen(sl->sl_alias) + 1;
2406 	}
2407 	if (sl->sl_serial_no_size) {
2408 		oslp->slp_serial_off = off;
2409 		bcopy(sl->sl_serial_no, &oslp->slp_buf[off],
2410 		    sl->sl_serial_no_size);
2411 		oslp->slp_serial_size = sl->sl_serial_no_size;
2412 		oslp->slp_serial_valid = 1;
2413 		off += sl->sl_serial_no_size;
2414 	}
2415 
2416 	oslp->slp_lu_size = sl->sl_lu_size;
2417 	oslp->slp_blksize = ((uint16_t)1) << sl->sl_data_blocksize_shift;
2418 
2419 	if (sl->sl_flags & SL_VID_VALID) {
2420 		oslp->slp_lu_vid = 1;
2421 		bcopy(sl->sl_vendor_id, oslp->slp_vid, 8);
2422 	} else {
2423 		bcopy(sbd_vendor_id, oslp->slp_vid, 8);
2424 	}
2425 	if (sl->sl_flags & SL_PID_VALID) {
2426 		oslp->slp_lu_pid = 1;
2427 		bcopy(sl->sl_product_id, oslp->slp_pid, 16);
2428 	} else {
2429 		bcopy(sbd_product_id, oslp->slp_pid, 16);
2430 	}
2431 	if (sl->sl_flags & SL_REV_VALID) {
2432 		oslp->slp_lu_rev = 1;
2433 		bcopy(sl->sl_revision, oslp->slp_rev, 4);
2434 	} else {
2435 		bcopy(sbd_revision, oslp->slp_rev, 4);
2436 	}
2437 	bcopy(sl->sl_device_id + 4, oslp->slp_guid, 16);
2438 
2439 	if (sl->sl_flags & SL_WRITEBACK_CACHE_DISABLE)
2440 		oslp->slp_writeback_cache_disable_cur = 1;
2441 	if (sl->sl_flags & SL_SAVED_WRITE_CACHE_DISABLE)
2442 		oslp->slp_writeback_cache_disable_saved = 1;
2443 	if (sl->sl_flags & SL_WRITE_PROTECTED)
2444 		oslp->slp_write_protected = 1;
2445 
2446 	sl->sl_trans_op = SL_OP_NONE;
2447 
2448 	return (0);
2449 }
2450 
2451 char *
2452 sbd_get_zvol_name(sbd_lu_t *sl)
2453 {
2454 	char *src;
2455 	char *p;
2456 
2457 	if (sl->sl_data_filename)
2458 		src = sl->sl_data_filename;
2459 	else
2460 		src = sl->sl_meta_filename;
2461 	/* There has to be a better way */
2462 	if (SBD_IS_ZVOL(src) != 0) {
2463 		ASSERT(0);
2464 	}
2465 	src += 14;
2466 	if (*src == '/')
2467 		src++;
2468 	p = (char *)kmem_alloc(strlen(src) + 1, KM_SLEEP);
2469 	(void) strcpy(p, src);
2470 	return (p);
2471 }
2472 
2473 /*
2474  * this function creates a local metadata zvol property
2475  */
2476 sbd_status_t
2477 sbd_create_zfs_meta_object(sbd_lu_t *sl)
2478 {
2479 	/*
2480 	 * -allocate 1/2 the property size, the zfs property
2481 	 *  is 8k in size and stored as ascii hex string, all
2482 	 *  we needed is 4k buffer to store the binary data.
2483 	 * -initialize reader/write lock
2484 	 */
2485 	if ((sl->sl_zfs_meta = kmem_zalloc(ZAP_MAXVALUELEN / 2, KM_SLEEP))
2486 	    == NULL)
2487 		return (SBD_FAILURE);
2488 	rw_init(&sl->sl_zfs_meta_lock, NULL, RW_DRIVER, NULL);
2489 	return (SBD_SUCCESS);
2490 }
2491 
2492 char
2493 sbd_ctoi(char c)
2494 {
2495 	if ((c >= '0') && (c <= '9'))
2496 		c -= '0';
2497 	else if ((c >= 'A') && (c <= 'F'))
2498 		c = c - 'A' + 10;
2499 	else if ((c >= 'a') && (c <= 'f'))
2500 		c = c - 'a' + 10;
2501 	else
2502 		c = -1;
2503 	return (c);
2504 }
2505 
2506 /*
2507  * read zvol property and convert to binary
2508  */
2509 sbd_status_t
2510 sbd_open_zfs_meta(sbd_lu_t *sl)
2511 {
2512 	char		*meta = NULL, cl, ch;
2513 	int		i;
2514 	char		*tmp, *ptr;
2515 	uint64_t	rc = SBD_SUCCESS;
2516 	int		len;
2517 	char		*file;
2518 
2519 	if (sbd_create_zfs_meta_object(sl) == SBD_FAILURE)
2520 		return (SBD_FAILURE);
2521 
2522 	rw_enter(&sl->sl_zfs_meta_lock, RW_WRITER);
2523 	file = sbd_get_zvol_name(sl);
2524 	if (sbd_zvolget(file, &meta)) {
2525 		rc = SBD_FAILURE;
2526 		goto done;
2527 	}
2528 	tmp = meta;
2529 	/* convert ascii hex to binary meta */
2530 	len = strlen(meta);
2531 	ptr = sl->sl_zfs_meta;
2532 	for (i = 0; i < len; i += 2) {
2533 		ch = sbd_ctoi(*tmp++);
2534 		cl = sbd_ctoi(*tmp++);
2535 		if (ch == -1 || cl == -1) {
2536 			rc = SBD_FAILURE;
2537 			break;
2538 		}
2539 		*ptr++ = (ch << 4) + cl;
2540 	}
2541 done:
2542 	rw_exit(&sl->sl_zfs_meta_lock);
2543 	if (meta)
2544 		kmem_free(meta, len + 1);
2545 	kmem_free(file, strlen(file) + 1);
2546 	return (rc);
2547 }
2548 
2549 sbd_status_t
2550 sbd_read_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz, uint64_t off)
2551 {
2552 	ASSERT(sl->sl_zfs_meta);
2553 	rw_enter(&sl->sl_zfs_meta_lock, RW_READER);
2554 	bcopy(&sl->sl_zfs_meta[off], buf, sz);
2555 	rw_exit(&sl->sl_zfs_meta_lock);
2556 	return (SBD_SUCCESS);
2557 }
2558 
2559 sbd_status_t
2560 sbd_write_zfs_meta(sbd_lu_t *sl, uint8_t *buf, uint64_t sz, uint64_t off)
2561 {
2562 	char		*ptr, *ah_meta;
2563 	char		*dp = NULL;
2564 	int		i, num;
2565 	char		*file;
2566 
2567 	ASSERT(sl->sl_zfs_meta);
2568 	if ((off + sz) > (ZAP_MAXVALUELEN / 2 - 1)) {
2569 		return (SBD_META_CORRUPTED);
2570 	}
2571 	ptr = ah_meta = kmem_zalloc(ZAP_MAXVALUELEN, KM_SLEEP);
2572 	rw_enter(&sl->sl_zfs_meta_lock, RW_WRITER);
2573 	bcopy(buf, &sl->sl_zfs_meta[off], sz);
2574 	/* convert local copy to ascii hex */
2575 	dp = sl->sl_zfs_meta;
2576 	for (i = 0; i < sl->sl_total_meta_size; i++, dp++) {
2577 		num = ((*dp) >> 4) & 0xF;
2578 		*ah_meta++ = (num < 10) ? (num + '0') : (num + ('a' - 10));
2579 		num = (*dp) & 0xF;
2580 		*ah_meta++ = (num < 10) ? (num + '0') : (num + ('a' - 10));
2581 	}
2582 	*ah_meta = NULL;
2583 	file = sbd_get_zvol_name(sl);
2584 	if (sbd_zvolset(file, (char *)ptr)) {
2585 		rw_exit(&sl->sl_zfs_meta_lock);
2586 		kmem_free(ptr, ZAP_MAXVALUELEN);
2587 		kmem_free(file, strlen(file) + 1);
2588 		return (SBD_META_CORRUPTED);
2589 	}
2590 	rw_exit(&sl->sl_zfs_meta_lock);
2591 	kmem_free(ptr, ZAP_MAXVALUELEN);
2592 	kmem_free(file, strlen(file) + 1);
2593 	return (SBD_SUCCESS);
2594 }
2595 
2596 int
2597 sbd_is_zvol(char *path)
2598 {
2599 	int is_zfs = 0;
2600 
2601 	if (SBD_IS_ZVOL(path) == 0)
2602 		is_zfs = 1;
2603 
2604 	return (is_zfs);
2605 }
2606 
2607 /*
2608  * set write cache disable
2609  * wcd - 1 = disable, 0 = enable
2610  */
2611 sbd_status_t
2612 sbd_wcd_set(int wcd, sbd_lu_t *sl)
2613 {
2614 	/* translate to wce bit */
2615 	int wce = wcd ? 0 : 1;
2616 	int ret;
2617 	sbd_status_t sret = SBD_SUCCESS;
2618 
2619 	mutex_enter(&sl->sl_lock);
2620 	sl->sl_flags &= ~SL_WRITEBACK_CACHE_SET_UNSUPPORTED;
2621 
2622 	if (sl->sl_data_vp->v_type == VREG) {
2623 		sl->sl_flags |= SL_FLUSH_ON_DISABLED_WRITECACHE;
2624 		goto done;
2625 	}
2626 
2627 	ret = VOP_IOCTL(sl->sl_data_vp, DKIOCSETWCE, (intptr_t)&wce, FKIOCTL,
2628 	    kcred, NULL, NULL);
2629 	if (ret == 0) {
2630 		sl->sl_flags &= ~SL_WRITEBACK_CACHE_SET_UNSUPPORTED;
2631 		sl->sl_flags &= ~SL_FLUSH_ON_DISABLED_WRITECACHE;
2632 	} else {
2633 		sl->sl_flags |= SL_WRITEBACK_CACHE_SET_UNSUPPORTED;
2634 		sl->sl_flags |= SL_FLUSH_ON_DISABLED_WRITECACHE;
2635 		sret = SBD_FAILURE;
2636 		goto done;
2637 	}
2638 
2639 done:
2640 	mutex_exit(&sl->sl_lock);
2641 	return (sret);
2642 }
2643 
2644 /*
2645  * get write cache disable
2646  * wcd - 1 = disable, 0 = enable
2647  */
2648 void
2649 sbd_wcd_get(int *wcd, sbd_lu_t *sl)
2650 {
2651 	int wce;
2652 	int ret;
2653 
2654 	if (sl->sl_data_vp->v_type == VREG) {
2655 		*wcd = 0;
2656 		return;
2657 	}
2658 
2659 	ret = VOP_IOCTL(sl->sl_data_vp, DKIOCGETWCE, (intptr_t)&wce, FKIOCTL,
2660 	    kcred, NULL, NULL);
2661 	/* if write cache get failed, assume disabled */
2662 	if (ret) {
2663 		*wcd = 1;
2664 	} else {
2665 		/* translate to wcd bit */
2666 		*wcd = wce ? 0 : 1;
2667 	}
2668 }
2669 
2670 int
2671 sbd_zvolget(char *zvol_name, char **comstarprop)
2672 {
2673 	ldi_handle_t	zfs_lh;
2674 	nvlist_t	*nv = NULL, *nv2;
2675 	zfs_cmd_t	*zc;
2676 	char		*ptr;
2677 	int size = 1024;
2678 	int unused;
2679 	int rc;
2680 
2681 	if ((rc = ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
2682 	    &zfs_lh, sbd_zfs_ident)) != 0) {
2683 		cmn_err(CE_WARN, "ldi_open %d", rc);
2684 		return (ENXIO);
2685 	}
2686 
2687 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2688 	(void) strlcpy(zc->zc_name, zvol_name, sizeof (zc->zc_name));
2689 again:
2690 	zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(size,
2691 	    KM_SLEEP);
2692 	zc->zc_nvlist_dst_size = size;
2693 	rc = ldi_ioctl(zfs_lh, ZFS_IOC_OBJSET_STATS, (intptr_t)zc,
2694 	    FKIOCTL, kcred, &unused);
2695 	/*
2696 	 * ENOMEM means the list is larger than what we've allocated
2697 	 * ldi_ioctl will fail with ENOMEM only once
2698 	 */
2699 	if (rc == ENOMEM) {
2700 		int newsize;
2701 		newsize = zc->zc_nvlist_dst_size;
2702 		kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
2703 		size = newsize;
2704 		goto again;
2705 	} else if (rc != 0) {
2706 		goto out;
2707 	}
2708 	rc = nvlist_unpack((char *)(uintptr_t)zc->zc_nvlist_dst,
2709 	    zc->zc_nvlist_dst_size, &nv, 0);
2710 	ASSERT(rc == 0);	/* nvlist_unpack should not fail */
2711 	if ((rc = nvlist_lookup_nvlist(nv, "stmf_sbd_lu", &nv2)) == 0) {
2712 		rc = nvlist_lookup_string(nv2, ZPROP_VALUE, &ptr);
2713 		if (rc != 0) {
2714 			cmn_err(CE_WARN, "couldn't get value");
2715 		} else {
2716 			*comstarprop = kmem_alloc(strlen(ptr) + 1,
2717 			    KM_SLEEP);
2718 			(void) strcpy(*comstarprop, ptr);
2719 		}
2720 	}
2721 out:
2722 	if (nv != NULL)
2723 		nvlist_free(nv);
2724 	kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
2725 	kmem_free(zc, sizeof (zfs_cmd_t));
2726 	(void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
2727 
2728 	return (rc);
2729 }
2730 
2731 int
2732 sbd_zvolset(char *zvol_name, char *comstarprop)
2733 {
2734 	ldi_handle_t	zfs_lh;
2735 	nvlist_t	*nv;
2736 	char		*packed = NULL;
2737 	size_t		len;
2738 	zfs_cmd_t	*zc;
2739 	int unused;
2740 	int rc;
2741 
2742 	if ((rc = ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
2743 	    &zfs_lh, sbd_zfs_ident)) != 0) {
2744 		cmn_err(CE_WARN, "ldi_open %d", rc);
2745 		return (ENXIO);
2746 	}
2747 	(void) nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP);
2748 	(void) nvlist_add_string(nv, "stmf_sbd_lu", comstarprop);
2749 	if ((rc = nvlist_pack(nv, &packed, &len, NV_ENCODE_NATIVE, KM_SLEEP))) {
2750 		goto out;
2751 	}
2752 
2753 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2754 	(void) strlcpy(zc->zc_name, zvol_name, sizeof (zc->zc_name));
2755 	zc->zc_nvlist_src = (uint64_t)(intptr_t)packed;
2756 	zc->zc_nvlist_src_size = len;
2757 	rc = ldi_ioctl(zfs_lh, ZFS_IOC_SET_PROP, (intptr_t)zc,
2758 	    FKIOCTL, kcred, &unused);
2759 	if (rc != 0) {
2760 		cmn_err(CE_NOTE, "ioctl failed %d", rc);
2761 	}
2762 	kmem_free(zc, sizeof (zfs_cmd_t));
2763 out:
2764 	nvlist_free(nv);
2765 	(void) ldi_close(zfs_lh, FREAD|FWRITE, kcred);
2766 	return (rc);
2767 }
2768