xref: /freebsd/sys/cam/ctl/ctl_backend_block.c (revision 315ee00f)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2003 Silicon Graphics International Corp.
5  * Copyright (c) 2009-2011 Spectra Logic Corporation
6  * Copyright (c) 2012,2021 The FreeBSD Foundation
7  * Copyright (c) 2014-2021 Alexander Motin <mav@FreeBSD.org>
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Edward Tomasz Napierala
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Portions of this software were developed by Ka Ho Ng <khng@FreeBSD.org>
14  * under sponsorship from the FreeBSD Foundation.
15  *
16  * Redistribution and use in source and binary forms, with or without
17  * modification, are permitted provided that the following conditions
18  * are met:
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions, and the following disclaimer,
21  *    without modification.
22  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
23  *    substantially similar to the "NO WARRANTY" disclaimer below
24  *    ("Disclaimer") and any redistribution must be conditioned upon
25  *    including a substantially similar Disclaimer requirement for further
26  *    binary redistribution.
27  *
28  * NO WARRANTY
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
37  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
38  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39  * POSSIBILITY OF SUCH DAMAGES.
40  *
41  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
42  */
43 /*
44  * CAM Target Layer driver backend for block devices.
45  *
46  * Author: Ken Merry <ken@FreeBSD.org>
47  */
48 #include <sys/cdefs.h>
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/kernel.h>
52 #include <sys/types.h>
53 #include <sys/kthread.h>
54 #include <sys/bio.h>
55 #include <sys/fcntl.h>
56 #include <sys/limits.h>
57 #include <sys/lock.h>
58 #include <sys/mutex.h>
59 #include <sys/condvar.h>
60 #include <sys/malloc.h>
61 #include <sys/conf.h>
62 #include <sys/ioccom.h>
63 #include <sys/queue.h>
64 #include <sys/sbuf.h>
65 #include <sys/endian.h>
66 #include <sys/uio.h>
67 #include <sys/buf.h>
68 #include <sys/taskqueue.h>
69 #include <sys/vnode.h>
70 #include <sys/namei.h>
71 #include <sys/mount.h>
72 #include <sys/disk.h>
73 #include <sys/fcntl.h>
74 #include <sys/filedesc.h>
75 #include <sys/filio.h>
76 #include <sys/proc.h>
77 #include <sys/pcpu.h>
78 #include <sys/module.h>
79 #include <sys/sdt.h>
80 #include <sys/devicestat.h>
81 #include <sys/sysctl.h>
82 #include <sys/nv.h>
83 #include <sys/dnv.h>
84 #include <sys/sx.h>
85 #include <sys/unistd.h>
86 
87 #include <geom/geom.h>
88 
89 #include <cam/cam.h>
90 #include <cam/scsi/scsi_all.h>
91 #include <cam/scsi/scsi_da.h>
92 #include <cam/ctl/ctl_io.h>
93 #include <cam/ctl/ctl.h>
94 #include <cam/ctl/ctl_backend.h>
95 #include <cam/ctl/ctl_ioctl.h>
96 #include <cam/ctl/ctl_ha.h>
97 #include <cam/ctl/ctl_scsi_all.h>
98 #include <cam/ctl/ctl_private.h>
99 #include <cam/ctl/ctl_error.h>
100 
101 /*
102  * The idea here is to allocate enough S/G space to handle at least 1MB I/Os.
103  * On systems with small maxphys it can be 8 128KB segments.  On large systems
104  * it can be up to 8 1MB segments.  I/Os larger than that we'll split.
105  */
106 #define	CTLBLK_MAX_SEGS		8
107 #define	CTLBLK_HALF_SEGS	(CTLBLK_MAX_SEGS / 2)
108 #define	CTLBLK_MIN_SEG		(128 * 1024)
109 #define	CTLBLK_MAX_SEG		MIN(1024 * 1024, MAX(CTLBLK_MIN_SEG, maxphys))
110 #define	CTLBLK_MAX_IO_SIZE	(CTLBLK_MAX_SEG * CTLBLK_MAX_SEGS)
111 
112 #ifdef CTLBLK_DEBUG
113 #define DPRINTF(fmt, args...) \
114     printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
115 #else
116 #define DPRINTF(fmt, args...) do {} while(0)
117 #endif
118 
119 #define PRIV(io)	\
120     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
121 #define ARGS(io)	\
122     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
123 
124 SDT_PROVIDER_DEFINE(cbb);
125 
126 typedef enum {
127 	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
128 	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
129 } ctl_be_block_lun_flags;
130 
131 typedef enum {
132 	CTL_BE_BLOCK_NONE,
133 	CTL_BE_BLOCK_DEV,
134 	CTL_BE_BLOCK_FILE
135 } ctl_be_block_type;
136 
137 struct ctl_be_block_filedata {
138 	struct ucred *cred;
139 };
140 
141 union ctl_be_block_bedata {
142 	struct ctl_be_block_filedata file;
143 };
144 
145 struct ctl_be_block_io;
146 struct ctl_be_block_lun;
147 
148 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
149 			       struct ctl_be_block_io *beio);
150 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
151 				  const char *attrname);
152 
153 /*
154  * Backend LUN structure.  There is a 1:1 mapping between a block device
155  * and a backend block LUN, and between a backend block LUN and a CTL LUN.
156  */
157 struct ctl_be_block_lun {
158 	struct ctl_be_lun cbe_lun;		/* Must be first element. */
159 	struct ctl_lun_create_params params;
160 	char *dev_path;
161 	ctl_be_block_type dev_type;
162 	struct vnode *vn;
163 	union ctl_be_block_bedata backend;
164 	cbb_dispatch_t dispatch;
165 	cbb_dispatch_t lun_flush;
166 	cbb_dispatch_t unmap;
167 	cbb_dispatch_t get_lba_status;
168 	cbb_getattr_t getattr;
169 	uint64_t size_blocks;
170 	uint64_t size_bytes;
171 	struct ctl_be_block_softc *softc;
172 	struct devstat *disk_stats;
173 	ctl_be_block_lun_flags flags;
174 	SLIST_ENTRY(ctl_be_block_lun) links;
175 	struct taskqueue *io_taskqueue;
176 	struct task io_task;
177 	int num_threads;
178 	STAILQ_HEAD(, ctl_io_hdr) input_queue;
179 	STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
180 	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
181 	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
182 	struct mtx_padalign io_lock;
183 	struct mtx_padalign queue_lock;
184 };
185 
186 /*
187  * Overall softc structure for the block backend module.
188  */
189 struct ctl_be_block_softc {
190 	struct sx			 modify_lock;
191 	struct mtx			 lock;
192 	int				 num_luns;
193 	SLIST_HEAD(, ctl_be_block_lun)	 lun_list;
194 	uma_zone_t			 beio_zone;
195 	uma_zone_t			 bufmin_zone;
196 	uma_zone_t			 bufmax_zone;
197 };
198 
199 static struct ctl_be_block_softc backend_block_softc;
200 
201 /*
202  * Per-I/O information.
203  */
204 struct ctl_be_block_io {
205 	union ctl_io			*io;
206 	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
207 	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
208 	int				refcnt;
209 	int				bio_cmd;
210 	int				two_sglists;
211 	int				num_segs;
212 	int				num_bios_sent;
213 	int				num_bios_done;
214 	int				send_complete;
215 	int				first_error;
216 	uint64_t			first_error_offset;
217 	struct bintime			ds_t0;
218 	devstat_tag_type		ds_tag_type;
219 	devstat_trans_flags		ds_trans_type;
220 	uint64_t			io_len;
221 	uint64_t			io_offset;
222 	int				io_arg;
223 	struct ctl_be_block_softc	*softc;
224 	struct ctl_be_block_lun		*lun;
225 	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
226 };
227 
228 extern struct ctl_softc *control_softc;
229 
230 static int cbb_num_threads = 32;
231 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
232 	    "CAM Target Layer Block Backend");
233 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
234            &cbb_num_threads, 0, "Number of threads per backing file");
235 
236 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
237 static void ctl_free_beio(struct ctl_be_block_io *beio);
238 static void ctl_complete_beio(struct ctl_be_block_io *beio);
239 static int ctl_be_block_move_done(union ctl_io *io, bool samethr);
240 static void ctl_be_block_biodone(struct bio *bio);
241 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
242 				    struct ctl_be_block_io *beio);
243 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
244 				       struct ctl_be_block_io *beio);
245 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
246 				  struct ctl_be_block_io *beio);
247 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
248 					 const char *attrname);
249 static void ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
250 				    struct ctl_be_block_io *beio);
251 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
252 				   struct ctl_be_block_io *beio);
253 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
254 				   struct ctl_be_block_io *beio);
255 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
256 				      struct ctl_be_block_io *beio);
257 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
258 					 const char *attrname);
259 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
260 				    union ctl_io *io);
261 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
262 				    union ctl_io *io);
263 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
264 				  union ctl_io *io);
265 static void ctl_be_block_worker(void *context, int pending);
266 static int ctl_be_block_submit(union ctl_io *io);
267 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
268 				   int flag, struct thread *td);
269 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
270 				  struct ctl_lun_req *req);
271 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
272 				 struct ctl_lun_req *req);
273 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
274 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
275 			     struct ctl_lun_req *req);
276 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
277 			       struct ctl_lun_req *req);
278 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
279 			   struct ctl_lun_req *req);
280 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
281 			   struct ctl_lun_req *req);
282 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun);
283 static int ctl_be_block_config_write(union ctl_io *io);
284 static int ctl_be_block_config_read(union ctl_io *io);
285 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb);
286 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname);
287 static int ctl_be_block_init(void);
288 static int ctl_be_block_shutdown(void);
289 
290 static struct ctl_backend_driver ctl_be_block_driver =
291 {
292 	.name = "block",
293 	.flags = CTL_BE_FLAG_HAS_CONFIG,
294 	.init = ctl_be_block_init,
295 	.shutdown = ctl_be_block_shutdown,
296 	.data_submit = ctl_be_block_submit,
297 	.config_read = ctl_be_block_config_read,
298 	.config_write = ctl_be_block_config_write,
299 	.ioctl = ctl_be_block_ioctl,
300 	.lun_info = ctl_be_block_lun_info,
301 	.lun_attr = ctl_be_block_lun_attr
302 };
303 
304 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend");
305 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
306 
307 static void
308 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg,
309     size_t len)
310 {
311 
312 	if (len <= CTLBLK_MIN_SEG) {
313 		sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK);
314 	} else {
315 		KASSERT(len <= CTLBLK_MAX_SEG,
316 		    ("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG));
317 		sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK);
318 	}
319 	sg->len = len;
320 }
321 
322 static void
323 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg)
324 {
325 
326 	if (sg->len <= CTLBLK_MIN_SEG) {
327 		uma_zfree(softc->bufmin_zone, sg->addr);
328 	} else {
329 		KASSERT(sg->len <= CTLBLK_MAX_SEG,
330 		    ("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG));
331 		uma_zfree(softc->bufmax_zone, sg->addr);
332 	}
333 }
334 
335 static struct ctl_be_block_io *
336 ctl_alloc_beio(struct ctl_be_block_softc *softc)
337 {
338 	struct ctl_be_block_io *beio;
339 
340 	beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO);
341 	beio->softc = softc;
342 	beio->refcnt = 1;
343 	return (beio);
344 }
345 
346 static void
347 ctl_real_free_beio(struct ctl_be_block_io *beio)
348 {
349 	struct ctl_be_block_softc *softc = beio->softc;
350 	int i;
351 
352 	for (i = 0; i < beio->num_segs; i++) {
353 		ctl_free_seg(softc, &beio->sg_segs[i]);
354 
355 		/* For compare we had two equal S/G lists. */
356 		if (beio->two_sglists) {
357 			ctl_free_seg(softc,
358 			    &beio->sg_segs[i + CTLBLK_HALF_SEGS]);
359 		}
360 	}
361 
362 	uma_zfree(softc->beio_zone, beio);
363 }
364 
365 static void
366 ctl_refcnt_beio(void *arg, int diff)
367 {
368 	struct ctl_be_block_io *beio = arg;
369 
370 	if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0)
371 		ctl_real_free_beio(beio);
372 }
373 
374 static void
375 ctl_free_beio(struct ctl_be_block_io *beio)
376 {
377 
378 	ctl_refcnt_beio(beio, -1);
379 }
380 
381 static void
382 ctl_complete_beio(struct ctl_be_block_io *beio)
383 {
384 	union ctl_io *io = beio->io;
385 
386 	if (beio->beio_cont != NULL) {
387 		beio->beio_cont(beio);
388 	} else {
389 		ctl_free_beio(beio);
390 		ctl_data_submit_done(io);
391 	}
392 }
393 
394 static size_t
395 cmp(uint8_t *a, uint8_t *b, size_t size)
396 {
397 	size_t i;
398 
399 	for (i = 0; i < size; i++) {
400 		if (a[i] != b[i])
401 			break;
402 	}
403 	return (i);
404 }
405 
406 static void
407 ctl_be_block_compare(union ctl_io *io)
408 {
409 	struct ctl_be_block_io *beio;
410 	uint64_t off, res;
411 	int i;
412 	uint8_t info[8];
413 
414 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
415 	off = 0;
416 	for (i = 0; i < beio->num_segs; i++) {
417 		res = cmp(beio->sg_segs[i].addr,
418 		    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
419 		    beio->sg_segs[i].len);
420 		off += res;
421 		if (res < beio->sg_segs[i].len)
422 			break;
423 	}
424 	if (i < beio->num_segs) {
425 		scsi_u64to8b(off, info);
426 		ctl_set_sense(&io->scsiio, /*current_error*/ 1,
427 		    /*sense_key*/ SSD_KEY_MISCOMPARE,
428 		    /*asc*/ 0x1D, /*ascq*/ 0x00,
429 		    /*type*/ SSD_ELEM_INFO,
430 		    /*size*/ sizeof(info), /*data*/ &info,
431 		    /*type*/ SSD_ELEM_NONE);
432 	} else
433 		ctl_set_success(&io->scsiio);
434 }
435 
436 static int
437 ctl_be_block_move_done(union ctl_io *io, bool samethr)
438 {
439 	struct ctl_be_block_io *beio;
440 	struct ctl_be_block_lun *be_lun;
441 	struct ctl_lba_len_flags *lbalen;
442 
443 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
444 
445 	DPRINTF("entered\n");
446 	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
447 
448 	/*
449 	 * We set status at this point for read and compare commands.
450 	 */
451 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
452 	    (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) {
453 		lbalen = ARGS(io);
454 		if (lbalen->flags & CTL_LLF_READ) {
455 			ctl_set_success(&io->scsiio);
456 		} else if (lbalen->flags & CTL_LLF_COMPARE) {
457 			/* We have two data blocks ready for comparison. */
458 			ctl_be_block_compare(io);
459 		}
460 	}
461 
462 	/*
463 	 * If this is a read, or a write with errors, it is done.
464 	 */
465 	if ((beio->bio_cmd == BIO_READ)
466 	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
467 	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
468 		ctl_complete_beio(beio);
469 		return (0);
470 	}
471 
472 	/*
473 	 * At this point, we have a write and the DMA completed successfully.
474 	 * If we were called synchronously in the original thread then just
475 	 * dispatch, otherwise we now have to queue it to the task queue to
476 	 * execute the backend I/O.  That is because we do blocking
477 	 * memory allocations, and in the file backing case, blocking I/O.
478 	 * This move done routine is generally called in the SIM's
479 	 * interrupt context, and therefore we cannot block.
480 	 */
481 	be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
482 	if (samethr) {
483 		be_lun->dispatch(be_lun, beio);
484 	} else {
485 		mtx_lock(&be_lun->queue_lock);
486 		STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
487 		mtx_unlock(&be_lun->queue_lock);
488 		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
489 	}
490 	return (0);
491 }
492 
493 static void
494 ctl_be_block_biodone(struct bio *bio)
495 {
496 	struct ctl_be_block_io *beio = bio->bio_caller1;
497 	struct ctl_be_block_lun *be_lun = beio->lun;
498 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
499 	union ctl_io *io;
500 	int error;
501 
502 	io = beio->io;
503 
504 	DPRINTF("entered\n");
505 
506 	error = bio->bio_error;
507 	mtx_lock(&be_lun->io_lock);
508 	if (error != 0 &&
509 	    (beio->first_error == 0 ||
510 	     bio->bio_offset < beio->first_error_offset)) {
511 		beio->first_error = error;
512 		beio->first_error_offset = bio->bio_offset;
513 	}
514 
515 	beio->num_bios_done++;
516 
517 	/*
518 	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
519 	 * during the free might cause it to complain.
520 	 */
521 	g_destroy_bio(bio);
522 
523 	/*
524 	 * If the send complete bit isn't set, or we aren't the last I/O to
525 	 * complete, then we're done.
526 	 */
527 	if ((beio->send_complete == 0)
528 	 || (beio->num_bios_done < beio->num_bios_sent)) {
529 		mtx_unlock(&be_lun->io_lock);
530 		return;
531 	}
532 
533 	/*
534 	 * At this point, we've verified that we are the last I/O to
535 	 * complete, so it's safe to drop the lock.
536 	 */
537 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
538 	    beio->ds_tag_type, beio->ds_trans_type,
539 	    /*now*/ NULL, /*then*/&beio->ds_t0);
540 	mtx_unlock(&be_lun->io_lock);
541 
542 	/*
543 	 * If there are any errors from the backing device, we fail the
544 	 * entire I/O with a medium error.
545 	 */
546 	error = beio->first_error;
547 	if (error != 0) {
548 		if (error == EOPNOTSUPP) {
549 			ctl_set_invalid_opcode(&io->scsiio);
550 		} else if (error == ENOSPC || error == EDQUOT) {
551 			ctl_set_space_alloc_fail(&io->scsiio);
552 		} else if (error == EROFS || error == EACCES) {
553 			ctl_set_hw_write_protected(&io->scsiio);
554 		} else if (beio->bio_cmd == BIO_FLUSH) {
555 			/* XXX KDM is there is a better error here? */
556 			ctl_set_internal_failure(&io->scsiio,
557 						 /*sks_valid*/ 1,
558 						 /*retry_count*/ 0xbad2);
559 		} else {
560 			ctl_set_medium_error(&io->scsiio,
561 			    beio->bio_cmd == BIO_READ);
562 		}
563 		ctl_complete_beio(beio);
564 		return;
565 	}
566 
567 	/*
568 	 * If this is a write, a flush, a delete or verify, we're all done.
569 	 * If this is a read, we can now send the data to the user.
570 	 */
571 	if ((beio->bio_cmd == BIO_WRITE)
572 	 || (beio->bio_cmd == BIO_FLUSH)
573 	 || (beio->bio_cmd == BIO_DELETE)
574 	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
575 		ctl_set_success(&io->scsiio);
576 		ctl_complete_beio(beio);
577 	} else {
578 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
579 		    beio->beio_cont == NULL) {
580 			ctl_set_success(&io->scsiio);
581 			if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT)
582 				ctl_serseq_done(io);
583 		}
584 		ctl_datamove(io);
585 	}
586 }
587 
588 static void
589 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
590 			struct ctl_be_block_io *beio)
591 {
592 	union ctl_io *io = beio->io;
593 	struct mount *mountpoint;
594 	int error;
595 
596 	DPRINTF("entered\n");
597 
598 	binuptime(&beio->ds_t0);
599 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
600 
601 	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
602 
603 	vn_lock(be_lun->vn, vn_lktype_write(mountpoint, be_lun->vn) |
604 	    LK_RETRY);
605 	error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
606 	    curthread);
607 	VOP_UNLOCK(be_lun->vn);
608 
609 	vn_finished_write(mountpoint);
610 
611 	mtx_lock(&be_lun->io_lock);
612 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
613 	    beio->ds_tag_type, beio->ds_trans_type,
614 	    /*now*/ NULL, /*then*/&beio->ds_t0);
615 	mtx_unlock(&be_lun->io_lock);
616 
617 	if (error == 0)
618 		ctl_set_success(&io->scsiio);
619 	else {
620 		/* XXX KDM is there is a better error here? */
621 		ctl_set_internal_failure(&io->scsiio,
622 					 /*sks_valid*/ 1,
623 					 /*retry_count*/ 0xbad1);
624 	}
625 
626 	ctl_complete_beio(beio);
627 }
628 
629 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
630 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
631 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
632 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
633 
634 static void
635 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
636 			   struct ctl_be_block_io *beio)
637 {
638 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
639 	struct ctl_be_block_filedata *file_data;
640 	union ctl_io *io;
641 	struct uio xuio;
642 	struct iovec *xiovec;
643 	size_t s;
644 	int error, flags, i;
645 
646 	DPRINTF("entered\n");
647 
648 	file_data = &be_lun->backend.file;
649 	io = beio->io;
650 	flags = 0;
651 	if (ARGS(io)->flags & CTL_LLF_DPO)
652 		flags |= IO_DIRECT;
653 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
654 		flags |= IO_SYNC;
655 
656 	bzero(&xuio, sizeof(xuio));
657 	if (beio->bio_cmd == BIO_READ) {
658 		SDT_PROBE0(cbb, , read, file_start);
659 		xuio.uio_rw = UIO_READ;
660 	} else {
661 		SDT_PROBE0(cbb, , write, file_start);
662 		xuio.uio_rw = UIO_WRITE;
663 	}
664 	xuio.uio_offset = beio->io_offset;
665 	xuio.uio_resid = beio->io_len;
666 	xuio.uio_segflg = UIO_SYSSPACE;
667 	xuio.uio_iov = beio->xiovecs;
668 	xuio.uio_iovcnt = beio->num_segs;
669 	xuio.uio_td = curthread;
670 
671 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
672 		xiovec->iov_base = beio->sg_segs[i].addr;
673 		xiovec->iov_len = beio->sg_segs[i].len;
674 	}
675 
676 	binuptime(&beio->ds_t0);
677 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
678 
679 	if (beio->bio_cmd == BIO_READ) {
680 		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
681 
682 		if (beio->beio_cont == NULL &&
683 		    cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
684 			ctl_serseq_done(io);
685 		/*
686 		 * UFS pays attention to IO_DIRECT for reads.  If the
687 		 * DIRECTIO option is configured into the kernel, it calls
688 		 * ffs_rawread().  But that only works for single-segment
689 		 * uios with user space addresses.  In our case, with a
690 		 * kernel uio, it still reads into the buffer cache, but it
691 		 * will just try to release the buffer from the cache later
692 		 * on in ffs_read().
693 		 *
694 		 * ZFS does not pay attention to IO_DIRECT for reads.
695 		 *
696 		 * UFS does not pay attention to IO_SYNC for reads.
697 		 *
698 		 * ZFS pays attention to IO_SYNC (which translates into the
699 		 * Solaris define FRSYNC for zfs_read()) for reads.  It
700 		 * attempts to sync the file before reading.
701 		 */
702 		error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
703 
704 		VOP_UNLOCK(be_lun->vn);
705 		SDT_PROBE0(cbb, , read, file_done);
706 		if (error == 0 && xuio.uio_resid > 0) {
707 			/*
708 			 * If we red less then requested (EOF), then
709 			 * we should clean the rest of the buffer.
710 			 */
711 			s = beio->io_len - xuio.uio_resid;
712 			for (i = 0; i < beio->num_segs; i++) {
713 				if (s >= beio->sg_segs[i].len) {
714 					s -= beio->sg_segs[i].len;
715 					continue;
716 				}
717 				bzero((uint8_t *)beio->sg_segs[i].addr + s,
718 				    beio->sg_segs[i].len - s);
719 				s = 0;
720 			}
721 		}
722 	} else {
723 		struct mount *mountpoint;
724 
725 		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
726 		vn_lock(be_lun->vn, vn_lktype_write(mountpoint,
727 		    be_lun->vn) | LK_RETRY);
728 
729 		/*
730 		 * UFS pays attention to IO_DIRECT for writes.  The write
731 		 * is done asynchronously.  (Normally the write would just
732 		 * get put into cache.
733 		 *
734 		 * UFS pays attention to IO_SYNC for writes.  It will
735 		 * attempt to write the buffer out synchronously if that
736 		 * flag is set.
737 		 *
738 		 * ZFS does not pay attention to IO_DIRECT for writes.
739 		 *
740 		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
741 		 * for writes.  It will flush the transaction from the
742 		 * cache before returning.
743 		 */
744 		error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
745 		VOP_UNLOCK(be_lun->vn);
746 
747 		vn_finished_write(mountpoint);
748 		SDT_PROBE0(cbb, , write, file_done);
749         }
750 
751 	mtx_lock(&be_lun->io_lock);
752 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
753 	    beio->ds_tag_type, beio->ds_trans_type,
754 	    /*now*/ NULL, /*then*/&beio->ds_t0);
755 	mtx_unlock(&be_lun->io_lock);
756 
757 	/*
758 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
759 	 * return the I/O to the user.
760 	 */
761 	if (error != 0) {
762 		if (error == ENOSPC || error == EDQUOT) {
763 			ctl_set_space_alloc_fail(&io->scsiio);
764 		} else if (error == EROFS || error == EACCES) {
765 			ctl_set_hw_write_protected(&io->scsiio);
766 		} else {
767 			ctl_set_medium_error(&io->scsiio,
768 			    beio->bio_cmd == BIO_READ);
769 		}
770 		ctl_complete_beio(beio);
771 		return;
772 	}
773 
774 	/*
775 	 * If this is a write or a verify, we're all done.
776 	 * If this is a read, we can now send the data to the user.
777 	 */
778 	if ((beio->bio_cmd == BIO_WRITE) ||
779 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
780 		ctl_set_success(&io->scsiio);
781 		ctl_complete_beio(beio);
782 	} else {
783 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
784 		    beio->beio_cont == NULL) {
785 			ctl_set_success(&io->scsiio);
786 			if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
787 				ctl_serseq_done(io);
788 		}
789 		ctl_datamove(io);
790 	}
791 }
792 
793 static void
794 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
795 			struct ctl_be_block_io *beio)
796 {
797 	union ctl_io *io = beio->io;
798 	struct ctl_lba_len_flags *lbalen = ARGS(io);
799 	struct scsi_get_lba_status_data *data;
800 	off_t roff, off;
801 	int error, status;
802 
803 	DPRINTF("entered\n");
804 
805 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
806 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
807 	error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
808 	    0, curthread->td_ucred, curthread);
809 	if (error == 0 && off > roff)
810 		status = 0;	/* mapped up to off */
811 	else {
812 		error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
813 		    0, curthread->td_ucred, curthread);
814 		if (error == 0 && off > roff)
815 			status = 1;	/* deallocated up to off */
816 		else {
817 			status = 0;	/* unknown up to the end */
818 			off = be_lun->size_bytes;
819 		}
820 	}
821 	VOP_UNLOCK(be_lun->vn);
822 
823 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
824 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
825 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
826 	    lbalen->lba), data->descr[0].length);
827 	data->descr[0].status = status;
828 
829 	ctl_complete_beio(beio);
830 }
831 
832 static uint64_t
833 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
834 {
835 	struct vattr		vattr;
836 	struct statfs		statfs;
837 	uint64_t		val;
838 	int			error;
839 
840 	val = UINT64_MAX;
841 	if (be_lun->vn == NULL)
842 		return (val);
843 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
844 	if (strcmp(attrname, "blocksused") == 0) {
845 		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
846 		if (error == 0)
847 			val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
848 	}
849 	if (strcmp(attrname, "blocksavail") == 0 &&
850 	    !VN_IS_DOOMED(be_lun->vn)) {
851 		error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
852 		if (error == 0)
853 			val = statfs.f_bavail * statfs.f_bsize /
854 			    be_lun->cbe_lun.blocksize;
855 	}
856 	VOP_UNLOCK(be_lun->vn);
857 	return (val);
858 }
859 
860 static void
861 ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
862 		        struct ctl_be_block_io *beio)
863 {
864 	struct ctl_be_block_filedata *file_data;
865 	union ctl_io *io;
866 	struct ctl_ptr_len_flags *ptrlen;
867 	struct scsi_unmap_desc *buf, *end;
868 	struct mount *mp;
869 	off_t off, len;
870 	int error;
871 
872 	io = beio->io;
873 	file_data = &be_lun->backend.file;
874 	mp = NULL;
875 	error = 0;
876 
877 	binuptime(&beio->ds_t0);
878 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
879 
880 	(void)vn_start_write(be_lun->vn, &mp, V_WAIT);
881 	vn_lock(be_lun->vn, vn_lktype_write(mp, be_lun->vn) | LK_RETRY);
882 	if (beio->io_offset == -1) {
883 		beio->io_len = 0;
884 		ptrlen = (struct ctl_ptr_len_flags *)
885 		    &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
886 		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
887 		end = buf + ptrlen->len / sizeof(*buf);
888 		for (; buf < end; buf++) {
889 			off = (off_t)scsi_8btou64(buf->lba) *
890 			    be_lun->cbe_lun.blocksize;
891 			len = (off_t)scsi_4btoul(buf->length) *
892 			    be_lun->cbe_lun.blocksize;
893 			beio->io_len += len;
894 			error = vn_deallocate(be_lun->vn, &off, &len,
895 			    0, IO_NOMACCHECK | IO_NODELOCKED, file_data->cred,
896 			    NOCRED);
897 			if (error != 0)
898 				break;
899 		}
900 	} else {
901 		/* WRITE_SAME */
902 		off = beio->io_offset;
903 		len = beio->io_len;
904 		error = vn_deallocate(be_lun->vn, &off, &len, 0,
905 		    IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, NOCRED);
906 	}
907 	VOP_UNLOCK(be_lun->vn);
908 	vn_finished_write(mp);
909 
910 	mtx_lock(&be_lun->io_lock);
911 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
912 	    beio->ds_tag_type, beio->ds_trans_type,
913 	    /*now*/ NULL, /*then*/&beio->ds_t0);
914 	mtx_unlock(&be_lun->io_lock);
915 
916 	/*
917 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
918 	 * return the I/O to the user.
919 	 */
920 	switch (error) {
921 	case 0:
922 		ctl_set_success(&io->scsiio);
923 		break;
924 	case ENOSPC:
925 	case EDQUOT:
926 		ctl_set_space_alloc_fail(&io->scsiio);
927 		break;
928 	case EROFS:
929 	case EACCES:
930 		ctl_set_hw_write_protected(&io->scsiio);
931 		break;
932 	default:
933 		ctl_set_medium_error(&io->scsiio, false);
934 	}
935 	ctl_complete_beio(beio);
936 }
937 
938 static void
939 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
940 			   struct ctl_be_block_io *beio)
941 {
942 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
943 	union ctl_io *io;
944 	struct cdevsw *csw;
945 	struct cdev *dev;
946 	struct uio xuio;
947 	struct iovec *xiovec;
948 	int error, flags, i, ref;
949 
950 	DPRINTF("entered\n");
951 
952 	io = beio->io;
953 	flags = 0;
954 	if (ARGS(io)->flags & CTL_LLF_DPO)
955 		flags |= IO_DIRECT;
956 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
957 		flags |= IO_SYNC;
958 
959 	bzero(&xuio, sizeof(xuio));
960 	if (beio->bio_cmd == BIO_READ) {
961 		SDT_PROBE0(cbb, , read, file_start);
962 		xuio.uio_rw = UIO_READ;
963 	} else {
964 		SDT_PROBE0(cbb, , write, file_start);
965 		xuio.uio_rw = UIO_WRITE;
966 	}
967 	xuio.uio_offset = beio->io_offset;
968 	xuio.uio_resid = beio->io_len;
969 	xuio.uio_segflg = UIO_SYSSPACE;
970 	xuio.uio_iov = beio->xiovecs;
971 	xuio.uio_iovcnt = beio->num_segs;
972 	xuio.uio_td = curthread;
973 
974 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
975 		xiovec->iov_base = beio->sg_segs[i].addr;
976 		xiovec->iov_len = beio->sg_segs[i].len;
977 	}
978 
979 	binuptime(&beio->ds_t0);
980 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
981 
982 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
983 	if (csw) {
984 		if (beio->bio_cmd == BIO_READ) {
985 			if (beio->beio_cont == NULL &&
986 			    cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
987 				ctl_serseq_done(io);
988 			error = csw->d_read(dev, &xuio, flags);
989 		} else
990 			error = csw->d_write(dev, &xuio, flags);
991 		dev_relthread(dev, ref);
992 	} else
993 		error = ENXIO;
994 
995 	if (beio->bio_cmd == BIO_READ)
996 		SDT_PROBE0(cbb, , read, file_done);
997 	else
998 		SDT_PROBE0(cbb, , write, file_done);
999 
1000 	mtx_lock(&be_lun->io_lock);
1001 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
1002 	    beio->ds_tag_type, beio->ds_trans_type,
1003 	    /*now*/ NULL, /*then*/&beio->ds_t0);
1004 	mtx_unlock(&be_lun->io_lock);
1005 
1006 	/*
1007 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
1008 	 * return the I/O to the user.
1009 	 */
1010 	if (error != 0) {
1011 		if (error == ENOSPC || error == EDQUOT) {
1012 			ctl_set_space_alloc_fail(&io->scsiio);
1013 		} else if (error == EROFS || error == EACCES) {
1014 			ctl_set_hw_write_protected(&io->scsiio);
1015 		} else {
1016 			ctl_set_medium_error(&io->scsiio,
1017 			    beio->bio_cmd == BIO_READ);
1018 		}
1019 		ctl_complete_beio(beio);
1020 		return;
1021 	}
1022 
1023 	/*
1024 	 * If this is a write or a verify, we're all done.
1025 	 * If this is a read, we can now send the data to the user.
1026 	 */
1027 	if ((beio->bio_cmd == BIO_WRITE) ||
1028 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
1029 		ctl_set_success(&io->scsiio);
1030 		ctl_complete_beio(beio);
1031 	} else {
1032 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
1033 		    beio->beio_cont == NULL) {
1034 			ctl_set_success(&io->scsiio);
1035 			if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
1036 				ctl_serseq_done(io);
1037 		}
1038 		ctl_datamove(io);
1039 	}
1040 }
1041 
1042 static void
1043 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
1044 			struct ctl_be_block_io *beio)
1045 {
1046 	union ctl_io *io = beio->io;
1047 	struct cdevsw *csw;
1048 	struct cdev *dev;
1049 	struct ctl_lba_len_flags *lbalen = ARGS(io);
1050 	struct scsi_get_lba_status_data *data;
1051 	off_t roff, off;
1052 	int error, ref, status;
1053 
1054 	DPRINTF("entered\n");
1055 
1056 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1057 	if (csw == NULL) {
1058 		status = 0;	/* unknown up to the end */
1059 		off = be_lun->size_bytes;
1060 		goto done;
1061 	}
1062 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
1063 	error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
1064 	    curthread);
1065 	if (error == 0 && off > roff)
1066 		status = 0;	/* mapped up to off */
1067 	else {
1068 		error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
1069 		    curthread);
1070 		if (error == 0 && off > roff)
1071 			status = 1;	/* deallocated up to off */
1072 		else {
1073 			status = 0;	/* unknown up to the end */
1074 			off = be_lun->size_bytes;
1075 		}
1076 	}
1077 	dev_relthread(dev, ref);
1078 
1079 done:
1080 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
1081 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
1082 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
1083 	    lbalen->lba), data->descr[0].length);
1084 	data->descr[0].status = status;
1085 
1086 	ctl_complete_beio(beio);
1087 }
1088 
1089 static void
1090 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
1091 		       struct ctl_be_block_io *beio)
1092 {
1093 	struct bio *bio;
1094 	struct cdevsw *csw;
1095 	struct cdev *dev;
1096 	int ref;
1097 
1098 	DPRINTF("entered\n");
1099 
1100 	/* This can't fail, it's a blocking allocation. */
1101 	bio = g_alloc_bio();
1102 
1103 	bio->bio_cmd	    = BIO_FLUSH;
1104 	bio->bio_offset	    = 0;
1105 	bio->bio_data	    = 0;
1106 	bio->bio_done	    = ctl_be_block_biodone;
1107 	bio->bio_caller1    = beio;
1108 	bio->bio_pblkno	    = 0;
1109 
1110 	/*
1111 	 * We don't need to acquire the LUN lock here, because we are only
1112 	 * sending one bio, and so there is no other context to synchronize
1113 	 * with.
1114 	 */
1115 	beio->num_bios_sent = 1;
1116 	beio->send_complete = 1;
1117 
1118 	binuptime(&beio->ds_t0);
1119 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1120 
1121 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1122 	if (csw) {
1123 		bio->bio_dev = dev;
1124 		csw->d_strategy(bio);
1125 		dev_relthread(dev, ref);
1126 	} else {
1127 		bio->bio_error = ENXIO;
1128 		ctl_be_block_biodone(bio);
1129 	}
1130 }
1131 
1132 static void
1133 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1134 		       struct ctl_be_block_io *beio,
1135 		       uint64_t off, uint64_t len, int last)
1136 {
1137 	struct bio *bio;
1138 	uint64_t maxlen;
1139 	struct cdevsw *csw;
1140 	struct cdev *dev;
1141 	int ref;
1142 
1143 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1144 	maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1145 	while (len > 0) {
1146 		bio = g_alloc_bio();
1147 		bio->bio_cmd	    = BIO_DELETE;
1148 		bio->bio_dev	    = dev;
1149 		bio->bio_offset	    = off;
1150 		bio->bio_length	    = MIN(len, maxlen);
1151 		bio->bio_data	    = 0;
1152 		bio->bio_done	    = ctl_be_block_biodone;
1153 		bio->bio_caller1    = beio;
1154 		bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1155 
1156 		off += bio->bio_length;
1157 		len -= bio->bio_length;
1158 
1159 		mtx_lock(&be_lun->io_lock);
1160 		beio->num_bios_sent++;
1161 		if (last && len == 0)
1162 			beio->send_complete = 1;
1163 		mtx_unlock(&be_lun->io_lock);
1164 
1165 		if (csw) {
1166 			csw->d_strategy(bio);
1167 		} else {
1168 			bio->bio_error = ENXIO;
1169 			ctl_be_block_biodone(bio);
1170 		}
1171 	}
1172 	if (csw)
1173 		dev_relthread(dev, ref);
1174 }
1175 
1176 static void
1177 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1178 		       struct ctl_be_block_io *beio)
1179 {
1180 	union ctl_io *io;
1181 	struct ctl_ptr_len_flags *ptrlen;
1182 	struct scsi_unmap_desc *buf, *end;
1183 	uint64_t len;
1184 
1185 	io = beio->io;
1186 
1187 	DPRINTF("entered\n");
1188 
1189 	binuptime(&beio->ds_t0);
1190 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1191 
1192 	if (beio->io_offset == -1) {
1193 		beio->io_len = 0;
1194 		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1195 		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1196 		end = buf + ptrlen->len / sizeof(*buf);
1197 		for (; buf < end; buf++) {
1198 			len = (uint64_t)scsi_4btoul(buf->length) *
1199 			    be_lun->cbe_lun.blocksize;
1200 			beio->io_len += len;
1201 			ctl_be_block_unmap_dev_range(be_lun, beio,
1202 			    scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1203 			    len, (end - buf < 2) ? TRUE : FALSE);
1204 		}
1205 	} else
1206 		ctl_be_block_unmap_dev_range(be_lun, beio,
1207 		    beio->io_offset, beio->io_len, TRUE);
1208 }
1209 
1210 static void
1211 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1212 			  struct ctl_be_block_io *beio)
1213 {
1214 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1215 	struct bio *bio;
1216 	struct cdevsw *csw;
1217 	struct cdev *dev;
1218 	off_t cur_offset;
1219 	int i, max_iosize, ref;
1220 
1221 	DPRINTF("entered\n");
1222 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1223 
1224 	/*
1225 	 * We have to limit our I/O size to the maximum supported by the
1226 	 * backend device.
1227 	 */
1228 	if (csw) {
1229 		max_iosize = dev->si_iosize_max;
1230 		if (max_iosize <= 0)
1231 			max_iosize = DFLTPHYS;
1232 	} else
1233 		max_iosize = maxphys;
1234 
1235 	cur_offset = beio->io_offset;
1236 	for (i = 0; i < beio->num_segs; i++) {
1237 		size_t cur_size;
1238 		uint8_t *cur_ptr;
1239 
1240 		cur_size = beio->sg_segs[i].len;
1241 		cur_ptr = beio->sg_segs[i].addr;
1242 
1243 		while (cur_size > 0) {
1244 			/* This can't fail, it's a blocking allocation. */
1245 			bio = g_alloc_bio();
1246 
1247 			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1248 
1249 			bio->bio_cmd = beio->bio_cmd;
1250 			bio->bio_dev = dev;
1251 			bio->bio_caller1 = beio;
1252 			bio->bio_length = min(cur_size, max_iosize);
1253 			bio->bio_offset = cur_offset;
1254 			bio->bio_data = cur_ptr;
1255 			bio->bio_done = ctl_be_block_biodone;
1256 			bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1257 
1258 			cur_offset += bio->bio_length;
1259 			cur_ptr += bio->bio_length;
1260 			cur_size -= bio->bio_length;
1261 
1262 			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1263 			beio->num_bios_sent++;
1264 		}
1265 	}
1266 	beio->send_complete = 1;
1267 	binuptime(&beio->ds_t0);
1268 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1269 
1270 	/*
1271 	 * Fire off all allocated requests!
1272 	 */
1273 	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1274 		TAILQ_REMOVE(&queue, bio, bio_queue);
1275 		if (csw)
1276 			csw->d_strategy(bio);
1277 		else {
1278 			bio->bio_error = ENXIO;
1279 			ctl_be_block_biodone(bio);
1280 		}
1281 	}
1282 	if (csw)
1283 		dev_relthread(dev, ref);
1284 }
1285 
1286 static uint64_t
1287 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1288 {
1289 	struct diocgattr_arg	arg;
1290 	struct cdevsw *csw;
1291 	struct cdev *dev;
1292 	int error, ref;
1293 
1294 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1295 	if (csw == NULL)
1296 		return (UINT64_MAX);
1297 	strlcpy(arg.name, attrname, sizeof(arg.name));
1298 	arg.len = sizeof(arg.value.off);
1299 	if (csw->d_ioctl) {
1300 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1301 		    curthread);
1302 	} else
1303 		error = ENODEV;
1304 	dev_relthread(dev, ref);
1305 	if (error != 0)
1306 		return (UINT64_MAX);
1307 	return (arg.value.off);
1308 }
1309 
1310 static void
1311 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1312 			    union ctl_io *io)
1313 {
1314 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1315 	struct ctl_be_block_io *beio;
1316 	struct ctl_lba_len_flags *lbalen;
1317 
1318 	DPRINTF("entered\n");
1319 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1320 	lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1321 
1322 	beio->io_len = lbalen->len * cbe_lun->blocksize;
1323 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1324 	beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1325 	beio->bio_cmd = BIO_FLUSH;
1326 	beio->ds_trans_type = DEVSTAT_NO_DATA;
1327 	DPRINTF("SYNC\n");
1328 	be_lun->lun_flush(be_lun, beio);
1329 }
1330 
1331 static void
1332 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1333 {
1334 	union ctl_io *io;
1335 
1336 	io = beio->io;
1337 	ctl_free_beio(beio);
1338 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1339 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1340 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1341 		ctl_config_write_done(io);
1342 		return;
1343 	}
1344 
1345 	ctl_be_block_config_write(io);
1346 }
1347 
1348 static void
1349 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1350 			    union ctl_io *io)
1351 {
1352 	struct ctl_be_block_softc *softc = be_lun->softc;
1353 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1354 	struct ctl_be_block_io *beio;
1355 	struct ctl_lba_len_flags *lbalen;
1356 	uint64_t len_left, lba;
1357 	uint32_t pb, pbo, adj;
1358 	int i, seglen;
1359 	uint8_t *buf, *end;
1360 
1361 	DPRINTF("entered\n");
1362 
1363 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1364 	lbalen = ARGS(io);
1365 
1366 	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1367 	    (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1368 		ctl_free_beio(beio);
1369 		ctl_set_invalid_field(&io->scsiio,
1370 				      /*sks_valid*/ 1,
1371 				      /*command*/ 1,
1372 				      /*field*/ 1,
1373 				      /*bit_valid*/ 0,
1374 				      /*bit*/ 0);
1375 		ctl_config_write_done(io);
1376 		return;
1377 	}
1378 
1379 	if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1380 		beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1381 		beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1382 		beio->bio_cmd = BIO_DELETE;
1383 		beio->ds_trans_type = DEVSTAT_FREE;
1384 
1385 		be_lun->unmap(be_lun, beio);
1386 		return;
1387 	}
1388 
1389 	beio->bio_cmd = BIO_WRITE;
1390 	beio->ds_trans_type = DEVSTAT_WRITE;
1391 
1392 	DPRINTF("WRITE SAME at LBA %jx len %u\n",
1393 	       (uintmax_t)lbalen->lba, lbalen->len);
1394 
1395 	pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1396 	if (be_lun->cbe_lun.pblockoff > 0)
1397 		pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1398 	else
1399 		pbo = 0;
1400 	len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1401 	for (i = 0, lba = 0; i < CTLBLK_MAX_SEGS && len_left > 0; i++) {
1402 		/*
1403 		 * Setup the S/G entry for this chunk.
1404 		 */
1405 		seglen = MIN(CTLBLK_MAX_SEG, len_left);
1406 		if (pb > cbe_lun->blocksize) {
1407 			adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1408 			    seglen - pbo) % pb;
1409 			if (seglen > adj)
1410 				seglen -= adj;
1411 			else
1412 				seglen -= seglen % cbe_lun->blocksize;
1413 		} else
1414 			seglen -= seglen % cbe_lun->blocksize;
1415 		ctl_alloc_seg(softc, &beio->sg_segs[i], seglen);
1416 
1417 		DPRINTF("segment %d addr %p len %zd\n", i,
1418 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1419 
1420 		beio->num_segs++;
1421 		len_left -= seglen;
1422 
1423 		buf = beio->sg_segs[i].addr;
1424 		end = buf + seglen;
1425 		for (; buf < end; buf += cbe_lun->blocksize) {
1426 			if (lbalen->flags & SWS_NDOB) {
1427 				memset(buf, 0, cbe_lun->blocksize);
1428 			} else {
1429 				memcpy(buf, io->scsiio.kern_data_ptr,
1430 				    cbe_lun->blocksize);
1431 			}
1432 			if (lbalen->flags & SWS_LBDATA)
1433 				scsi_ulto4b(lbalen->lba + lba, buf);
1434 			lba++;
1435 		}
1436 	}
1437 
1438 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1439 	beio->io_len = lba * cbe_lun->blocksize;
1440 
1441 	/* We can not do all in one run. Correct and schedule rerun. */
1442 	if (len_left > 0) {
1443 		lbalen->lba += lba;
1444 		lbalen->len -= lba;
1445 		beio->beio_cont = ctl_be_block_cw_done_ws;
1446 	}
1447 
1448 	be_lun->dispatch(be_lun, beio);
1449 }
1450 
1451 static void
1452 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1453 			    union ctl_io *io)
1454 {
1455 	struct ctl_be_block_io *beio;
1456 	struct ctl_ptr_len_flags *ptrlen;
1457 
1458 	DPRINTF("entered\n");
1459 
1460 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1461 	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1462 
1463 	if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1464 		ctl_free_beio(beio);
1465 		ctl_set_invalid_field(&io->scsiio,
1466 				      /*sks_valid*/ 0,
1467 				      /*command*/ 1,
1468 				      /*field*/ 0,
1469 				      /*bit_valid*/ 0,
1470 				      /*bit*/ 0);
1471 		ctl_config_write_done(io);
1472 		return;
1473 	}
1474 
1475 	beio->io_len = 0;
1476 	beio->io_offset = -1;
1477 	beio->bio_cmd = BIO_DELETE;
1478 	beio->ds_trans_type = DEVSTAT_FREE;
1479 	DPRINTF("UNMAP\n");
1480 	be_lun->unmap(be_lun, beio);
1481 }
1482 
1483 static void
1484 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1485 {
1486 	union ctl_io *io;
1487 
1488 	io = beio->io;
1489 	ctl_free_beio(beio);
1490 	ctl_config_read_done(io);
1491 }
1492 
1493 static void
1494 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1495 			 union ctl_io *io)
1496 {
1497 	struct ctl_be_block_io *beio;
1498 	struct ctl_be_block_softc *softc;
1499 
1500 	DPRINTF("entered\n");
1501 
1502 	softc = be_lun->softc;
1503 	beio = ctl_alloc_beio(softc);
1504 	beio->io = io;
1505 	beio->lun = be_lun;
1506 	beio->beio_cont = ctl_be_block_cr_done;
1507 	PRIV(io)->ptr = (void *)beio;
1508 
1509 	switch (io->scsiio.cdb[0]) {
1510 	case SERVICE_ACTION_IN:		/* GET LBA STATUS */
1511 		beio->bio_cmd = -1;
1512 		beio->ds_trans_type = DEVSTAT_NO_DATA;
1513 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1514 		beio->io_len = 0;
1515 		if (be_lun->get_lba_status)
1516 			be_lun->get_lba_status(be_lun, beio);
1517 		else
1518 			ctl_be_block_cr_done(beio);
1519 		break;
1520 	default:
1521 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1522 		break;
1523 	}
1524 }
1525 
1526 static void
1527 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1528 {
1529 	union ctl_io *io;
1530 
1531 	io = beio->io;
1532 	ctl_free_beio(beio);
1533 	ctl_config_write_done(io);
1534 }
1535 
1536 static void
1537 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1538 			 union ctl_io *io)
1539 {
1540 	struct ctl_be_block_io *beio;
1541 	struct ctl_be_block_softc *softc;
1542 
1543 	DPRINTF("entered\n");
1544 
1545 	softc = be_lun->softc;
1546 	beio = ctl_alloc_beio(softc);
1547 	beio->io = io;
1548 	beio->lun = be_lun;
1549 	beio->beio_cont = ctl_be_block_cw_done;
1550 	switch (io->scsiio.tag_type) {
1551 	case CTL_TAG_ORDERED:
1552 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1553 		break;
1554 	case CTL_TAG_HEAD_OF_QUEUE:
1555 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1556 		break;
1557 	case CTL_TAG_UNTAGGED:
1558 	case CTL_TAG_SIMPLE:
1559 	case CTL_TAG_ACA:
1560 	default:
1561 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1562 		break;
1563 	}
1564 	PRIV(io)->ptr = (void *)beio;
1565 
1566 	switch (io->scsiio.cdb[0]) {
1567 	case SYNCHRONIZE_CACHE:
1568 	case SYNCHRONIZE_CACHE_16:
1569 		ctl_be_block_cw_dispatch_sync(be_lun, io);
1570 		break;
1571 	case WRITE_SAME_10:
1572 	case WRITE_SAME_16:
1573 		ctl_be_block_cw_dispatch_ws(be_lun, io);
1574 		break;
1575 	case UNMAP:
1576 		ctl_be_block_cw_dispatch_unmap(be_lun, io);
1577 		break;
1578 	default:
1579 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1580 		break;
1581 	}
1582 }
1583 
1584 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
1585 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
1586 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
1587 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
1588 
1589 static void
1590 ctl_be_block_next(struct ctl_be_block_io *beio)
1591 {
1592 	struct ctl_be_block_lun *be_lun;
1593 	union ctl_io *io;
1594 
1595 	io = beio->io;
1596 	be_lun = beio->lun;
1597 	ctl_free_beio(beio);
1598 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1599 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1600 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1601 		ctl_data_submit_done(io);
1602 		return;
1603 	}
1604 
1605 	io->io_hdr.status &= ~CTL_STATUS_MASK;
1606 	io->io_hdr.status |= CTL_STATUS_NONE;
1607 
1608 	mtx_lock(&be_lun->queue_lock);
1609 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1610 	mtx_unlock(&be_lun->queue_lock);
1611 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1612 }
1613 
1614 static void
1615 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1616 			   union ctl_io *io)
1617 {
1618 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1619 	struct ctl_be_block_io *beio;
1620 	struct ctl_be_block_softc *softc;
1621 	struct ctl_lba_len_flags *lbalen;
1622 	struct ctl_ptr_len_flags *bptrlen;
1623 	uint64_t len_left, lbas;
1624 	int i;
1625 
1626 	softc = be_lun->softc;
1627 
1628 	DPRINTF("entered\n");
1629 
1630 	lbalen = ARGS(io);
1631 	if (lbalen->flags & CTL_LLF_WRITE) {
1632 		SDT_PROBE0(cbb, , write, start);
1633 	} else {
1634 		SDT_PROBE0(cbb, , read, start);
1635 	}
1636 
1637 	beio = ctl_alloc_beio(softc);
1638 	beio->io = io;
1639 	beio->lun = be_lun;
1640 	bptrlen = PRIV(io);
1641 	bptrlen->ptr = (void *)beio;
1642 
1643 	switch (io->scsiio.tag_type) {
1644 	case CTL_TAG_ORDERED:
1645 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1646 		break;
1647 	case CTL_TAG_HEAD_OF_QUEUE:
1648 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1649 		break;
1650 	case CTL_TAG_UNTAGGED:
1651 	case CTL_TAG_SIMPLE:
1652 	case CTL_TAG_ACA:
1653 	default:
1654 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1655 		break;
1656 	}
1657 
1658 	if (lbalen->flags & CTL_LLF_WRITE) {
1659 		beio->bio_cmd = BIO_WRITE;
1660 		beio->ds_trans_type = DEVSTAT_WRITE;
1661 	} else {
1662 		beio->bio_cmd = BIO_READ;
1663 		beio->ds_trans_type = DEVSTAT_READ;
1664 	}
1665 
1666 	DPRINTF("%s at LBA %jx len %u @%ju\n",
1667 	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1668 	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1669 	lbas = CTLBLK_MAX_IO_SIZE;
1670 	if (lbalen->flags & CTL_LLF_COMPARE) {
1671 		beio->two_sglists = 1;
1672 		lbas /= 2;
1673 	}
1674 	lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1675 	beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1676 	beio->io_len = lbas * cbe_lun->blocksize;
1677 	bptrlen->len += lbas;
1678 
1679 	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1680 		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1681 		    i, CTLBLK_MAX_SEGS));
1682 
1683 		/*
1684 		 * Setup the S/G entry for this chunk.
1685 		 */
1686 		ctl_alloc_seg(softc, &beio->sg_segs[i],
1687 		    MIN(CTLBLK_MAX_SEG, len_left));
1688 
1689 		DPRINTF("segment %d addr %p len %zd\n", i,
1690 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1691 
1692 		/* Set up second segment for compare operation. */
1693 		if (beio->two_sglists) {
1694 			ctl_alloc_seg(softc,
1695 			    &beio->sg_segs[i + CTLBLK_HALF_SEGS],
1696 			    beio->sg_segs[i].len);
1697 		}
1698 
1699 		beio->num_segs++;
1700 		len_left -= beio->sg_segs[i].len;
1701 	}
1702 	if (bptrlen->len < lbalen->len)
1703 		beio->beio_cont = ctl_be_block_next;
1704 	io->scsiio.be_move_done = ctl_be_block_move_done;
1705 	/* For compare we have separate S/G lists for read and datamove. */
1706 	if (beio->two_sglists)
1707 		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1708 	else
1709 		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1710 	io->scsiio.kern_data_len = beio->io_len;
1711 	io->scsiio.kern_sg_entries = beio->num_segs;
1712 	io->scsiio.kern_data_ref = ctl_refcnt_beio;
1713 	io->scsiio.kern_data_arg = beio;
1714 	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
1715 
1716 	/*
1717 	 * For the read case, we need to read the data into our buffers and
1718 	 * then we can send it back to the user.  For the write case, we
1719 	 * need to get the data from the user first.
1720 	 */
1721 	if (beio->bio_cmd == BIO_READ) {
1722 		SDT_PROBE0(cbb, , read, alloc_done);
1723 		be_lun->dispatch(be_lun, beio);
1724 	} else {
1725 		SDT_PROBE0(cbb, , write, alloc_done);
1726 		ctl_datamove(io);
1727 	}
1728 }
1729 
1730 static void
1731 ctl_be_block_worker(void *context, int pending)
1732 {
1733 	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1734 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1735 	union ctl_io *io;
1736 	struct ctl_be_block_io *beio;
1737 
1738 	DPRINTF("entered\n");
1739 	/*
1740 	 * Fetch and process I/Os from all queues.  If we detect LUN
1741 	 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
1742 	 * so make response maximally opaque to not confuse initiator.
1743 	 */
1744 	for (;;) {
1745 		mtx_lock(&be_lun->queue_lock);
1746 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1747 		if (io != NULL) {
1748 			DPRINTF("datamove queue\n");
1749 			STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links);
1750 			mtx_unlock(&be_lun->queue_lock);
1751 			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1752 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1753 				ctl_set_busy(&io->scsiio);
1754 				ctl_complete_beio(beio);
1755 				continue;
1756 			}
1757 			be_lun->dispatch(be_lun, beio);
1758 			continue;
1759 		}
1760 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1761 		if (io != NULL) {
1762 			DPRINTF("config write queue\n");
1763 			STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links);
1764 			mtx_unlock(&be_lun->queue_lock);
1765 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1766 				ctl_set_busy(&io->scsiio);
1767 				ctl_config_write_done(io);
1768 				continue;
1769 			}
1770 			ctl_be_block_cw_dispatch(be_lun, io);
1771 			continue;
1772 		}
1773 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1774 		if (io != NULL) {
1775 			DPRINTF("config read queue\n");
1776 			STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links);
1777 			mtx_unlock(&be_lun->queue_lock);
1778 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1779 				ctl_set_busy(&io->scsiio);
1780 				ctl_config_read_done(io);
1781 				continue;
1782 			}
1783 			ctl_be_block_cr_dispatch(be_lun, io);
1784 			continue;
1785 		}
1786 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1787 		if (io != NULL) {
1788 			DPRINTF("input queue\n");
1789 			STAILQ_REMOVE_HEAD(&be_lun->input_queue, links);
1790 			mtx_unlock(&be_lun->queue_lock);
1791 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1792 				ctl_set_busy(&io->scsiio);
1793 				ctl_data_submit_done(io);
1794 				continue;
1795 			}
1796 			ctl_be_block_dispatch(be_lun, io);
1797 			continue;
1798 		}
1799 
1800 		/*
1801 		 * If we get here, there is no work left in the queues, so
1802 		 * just break out and let the task queue go to sleep.
1803 		 */
1804 		mtx_unlock(&be_lun->queue_lock);
1805 		break;
1806 	}
1807 }
1808 
1809 /*
1810  * Entry point from CTL to the backend for I/O.  We queue everything to a
1811  * work thread, so this just puts the I/O on a queue and wakes up the
1812  * thread.
1813  */
1814 static int
1815 ctl_be_block_submit(union ctl_io *io)
1816 {
1817 	struct ctl_be_block_lun *be_lun;
1818 
1819 	DPRINTF("entered\n");
1820 
1821 	be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
1822 
1823 	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI,
1824 	    ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type));
1825 
1826 	PRIV(io)->len = 0;
1827 
1828 	mtx_lock(&be_lun->queue_lock);
1829 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1830 	mtx_unlock(&be_lun->queue_lock);
1831 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1832 
1833 	return (CTL_RETVAL_COMPLETE);
1834 }
1835 
1836 static int
1837 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1838 			int flag, struct thread *td)
1839 {
1840 	struct ctl_be_block_softc *softc = &backend_block_softc;
1841 	int error;
1842 
1843 	error = 0;
1844 	switch (cmd) {
1845 	case CTL_LUN_REQ: {
1846 		struct ctl_lun_req *lun_req;
1847 
1848 		lun_req = (struct ctl_lun_req *)addr;
1849 
1850 		switch (lun_req->reqtype) {
1851 		case CTL_LUNREQ_CREATE:
1852 			error = ctl_be_block_create(softc, lun_req);
1853 			break;
1854 		case CTL_LUNREQ_RM:
1855 			error = ctl_be_block_rm(softc, lun_req);
1856 			break;
1857 		case CTL_LUNREQ_MODIFY:
1858 			error = ctl_be_block_modify(softc, lun_req);
1859 			break;
1860 		default:
1861 			lun_req->status = CTL_LUN_ERROR;
1862 			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1863 				 "invalid LUN request type %d",
1864 				 lun_req->reqtype);
1865 			break;
1866 		}
1867 		break;
1868 	}
1869 	default:
1870 		error = ENOTTY;
1871 		break;
1872 	}
1873 
1874 	return (error);
1875 }
1876 
1877 static int
1878 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1879 {
1880 	struct ctl_be_lun *cbe_lun;
1881 	struct ctl_be_block_filedata *file_data;
1882 	struct ctl_lun_create_params *params;
1883 	const char		     *value;
1884 	struct vattr		      vattr;
1885 	off_t			      ps, pss, po, pos, us, uss, uo, uos;
1886 	int			      error;
1887 	long			      pconf;
1888 
1889 	cbe_lun = &be_lun->cbe_lun;
1890 	file_data = &be_lun->backend.file;
1891 	params = &be_lun->params;
1892 
1893 	be_lun->dev_type = CTL_BE_BLOCK_FILE;
1894 	be_lun->dispatch = ctl_be_block_dispatch_file;
1895 	be_lun->lun_flush = ctl_be_block_flush_file;
1896 	be_lun->get_lba_status = ctl_be_block_gls_file;
1897 	be_lun->getattr = ctl_be_block_getattr_file;
1898 	be_lun->unmap = ctl_be_block_unmap_file;
1899 	cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1900 
1901 	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1902 	if (error != 0) {
1903 		snprintf(req->error_str, sizeof(req->error_str),
1904 			 "error calling VOP_GETATTR() for file %s",
1905 			 be_lun->dev_path);
1906 		return (error);
1907 	}
1908 
1909 	error = VOP_PATHCONF(be_lun->vn, _PC_DEALLOC_PRESENT, &pconf);
1910 	if (error != 0) {
1911 		snprintf(req->error_str, sizeof(req->error_str),
1912 		    "error calling VOP_PATHCONF() for file %s",
1913 		    be_lun->dev_path);
1914 		return (error);
1915 	}
1916 	if (pconf == 1)
1917 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
1918 
1919 	file_data->cred = crhold(curthread->td_ucred);
1920 	if (params->lun_size_bytes != 0)
1921 		be_lun->size_bytes = params->lun_size_bytes;
1922 	else
1923 		be_lun->size_bytes = vattr.va_size;
1924 
1925 	/*
1926 	 * For files we can use any logical block size.  Prefer 512 bytes
1927 	 * for compatibility reasons.  If file's vattr.va_blocksize
1928 	 * (preferred I/O block size) is bigger and multiple to chosen
1929 	 * logical block size -- report it as physical block size.
1930 	 */
1931 	if (params->blocksize_bytes != 0)
1932 		cbe_lun->blocksize = params->blocksize_bytes;
1933 	else if (cbe_lun->lun_type == T_CDROM)
1934 		cbe_lun->blocksize = 2048;
1935 	else
1936 		cbe_lun->blocksize = 512;
1937 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1938 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1939 	    0 : (be_lun->size_blocks - 1);
1940 
1941 	us = ps = vattr.va_blocksize;
1942 	uo = po = 0;
1943 
1944 	value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
1945 	if (value != NULL)
1946 		ctl_expand_number(value, &ps);
1947 	value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
1948 	if (value != NULL)
1949 		ctl_expand_number(value, &po);
1950 	pss = ps / cbe_lun->blocksize;
1951 	pos = po / cbe_lun->blocksize;
1952 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1953 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1954 		cbe_lun->pblockexp = fls(pss) - 1;
1955 		cbe_lun->pblockoff = (pss - pos) % pss;
1956 	}
1957 
1958 	value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
1959 	if (value != NULL)
1960 		ctl_expand_number(value, &us);
1961 	value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
1962 	if (value != NULL)
1963 		ctl_expand_number(value, &uo);
1964 	uss = us / cbe_lun->blocksize;
1965 	uos = uo / cbe_lun->blocksize;
1966 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1967 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1968 		cbe_lun->ublockexp = fls(uss) - 1;
1969 		cbe_lun->ublockoff = (uss - uos) % uss;
1970 	}
1971 
1972 	/*
1973 	 * Sanity check.  The media size has to be at least one
1974 	 * sector long.
1975 	 */
1976 	if (be_lun->size_bytes < cbe_lun->blocksize) {
1977 		error = EINVAL;
1978 		snprintf(req->error_str, sizeof(req->error_str),
1979 			 "file %s size %ju < block size %u", be_lun->dev_path,
1980 			 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1981 	}
1982 
1983 	cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1984 	return (error);
1985 }
1986 
1987 static int
1988 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1989 {
1990 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1991 	struct ctl_lun_create_params *params;
1992 	struct cdevsw		     *csw;
1993 	struct cdev		     *dev;
1994 	const char		     *value;
1995 	int			      error, atomic, maxio, ref, unmap, tmp;
1996 	off_t			      ps, pss, po, pos, us, uss, uo, uos, otmp;
1997 
1998 	params = &be_lun->params;
1999 
2000 	be_lun->dev_type = CTL_BE_BLOCK_DEV;
2001 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
2002 	if (csw == NULL)
2003 		return (ENXIO);
2004 	if (strcmp(csw->d_name, "zvol") == 0) {
2005 		be_lun->dispatch = ctl_be_block_dispatch_zvol;
2006 		be_lun->get_lba_status = ctl_be_block_gls_zvol;
2007 		atomic = maxio = CTLBLK_MAX_IO_SIZE;
2008 	} else {
2009 		be_lun->dispatch = ctl_be_block_dispatch_dev;
2010 		be_lun->get_lba_status = NULL;
2011 		atomic = 0;
2012 		maxio = dev->si_iosize_max;
2013 		if (maxio <= 0)
2014 			maxio = DFLTPHYS;
2015 		if (maxio > CTLBLK_MAX_SEG)
2016 			maxio = CTLBLK_MAX_SEG;
2017 	}
2018 	be_lun->lun_flush = ctl_be_block_flush_dev;
2019 	be_lun->getattr = ctl_be_block_getattr_dev;
2020 	be_lun->unmap = ctl_be_block_unmap_dev;
2021 
2022 	if (!csw->d_ioctl) {
2023 		dev_relthread(dev, ref);
2024 		snprintf(req->error_str, sizeof(req->error_str),
2025 			 "no d_ioctl for device %s!", be_lun->dev_path);
2026 		return (ENODEV);
2027 	}
2028 
2029 	error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
2030 			       curthread);
2031 	if (error) {
2032 		dev_relthread(dev, ref);
2033 		snprintf(req->error_str, sizeof(req->error_str),
2034 			 "error %d returned for DIOCGSECTORSIZE ioctl "
2035 			 "on %s!", error, be_lun->dev_path);
2036 		return (error);
2037 	}
2038 
2039 	/*
2040 	 * If the user has asked for a blocksize that is greater than the
2041 	 * backing device's blocksize, we can do it only if the blocksize
2042 	 * the user is asking for is an even multiple of the underlying
2043 	 * device's blocksize.
2044 	 */
2045 	if ((params->blocksize_bytes != 0) &&
2046 	    (params->blocksize_bytes >= tmp)) {
2047 		if (params->blocksize_bytes % tmp == 0) {
2048 			cbe_lun->blocksize = params->blocksize_bytes;
2049 		} else {
2050 			dev_relthread(dev, ref);
2051 			snprintf(req->error_str, sizeof(req->error_str),
2052 				 "requested blocksize %u is not an even "
2053 				 "multiple of backing device blocksize %u",
2054 				 params->blocksize_bytes, tmp);
2055 			return (EINVAL);
2056 		}
2057 	} else if (params->blocksize_bytes != 0) {
2058 		dev_relthread(dev, ref);
2059 		snprintf(req->error_str, sizeof(req->error_str),
2060 			 "requested blocksize %u < backing device "
2061 			 "blocksize %u", params->blocksize_bytes, tmp);
2062 		return (EINVAL);
2063 	} else if (cbe_lun->lun_type == T_CDROM)
2064 		cbe_lun->blocksize = MAX(tmp, 2048);
2065 	else
2066 		cbe_lun->blocksize = tmp;
2067 
2068 	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
2069 			     curthread);
2070 	if (error) {
2071 		dev_relthread(dev, ref);
2072 		snprintf(req->error_str, sizeof(req->error_str),
2073 			 "error %d returned for DIOCGMEDIASIZE "
2074 			 " ioctl on %s!", error,
2075 			 be_lun->dev_path);
2076 		return (error);
2077 	}
2078 
2079 	if (params->lun_size_bytes != 0) {
2080 		if (params->lun_size_bytes > otmp) {
2081 			dev_relthread(dev, ref);
2082 			snprintf(req->error_str, sizeof(req->error_str),
2083 				 "requested LUN size %ju > backing device "
2084 				 "size %ju",
2085 				 (uintmax_t)params->lun_size_bytes,
2086 				 (uintmax_t)otmp);
2087 			return (EINVAL);
2088 		}
2089 
2090 		be_lun->size_bytes = params->lun_size_bytes;
2091 	} else
2092 		be_lun->size_bytes = otmp;
2093 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2094 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2095 	    0 : (be_lun->size_blocks - 1);
2096 
2097 	error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2098 	    curthread);
2099 	if (error)
2100 		ps = po = 0;
2101 	else {
2102 		error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2103 		    FREAD, curthread);
2104 		if (error)
2105 			po = 0;
2106 	}
2107 	us = ps;
2108 	uo = po;
2109 
2110 	value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
2111 	if (value != NULL)
2112 		ctl_expand_number(value, &ps);
2113 	value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
2114 	if (value != NULL)
2115 		ctl_expand_number(value, &po);
2116 	pss = ps / cbe_lun->blocksize;
2117 	pos = po / cbe_lun->blocksize;
2118 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2119 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2120 		cbe_lun->pblockexp = fls(pss) - 1;
2121 		cbe_lun->pblockoff = (pss - pos) % pss;
2122 	}
2123 
2124 	value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
2125 	if (value != NULL)
2126 		ctl_expand_number(value, &us);
2127 	value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
2128 	if (value != NULL)
2129 		ctl_expand_number(value, &uo);
2130 	uss = us / cbe_lun->blocksize;
2131 	uos = uo / cbe_lun->blocksize;
2132 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2133 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2134 		cbe_lun->ublockexp = fls(uss) - 1;
2135 		cbe_lun->ublockoff = (uss - uos) % uss;
2136 	}
2137 
2138 	cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2139 	cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2140 
2141 	if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2142 		unmap = 1;
2143 	} else {
2144 		struct diocgattr_arg	arg;
2145 
2146 		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2147 		arg.len = sizeof(arg.value.i);
2148 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2149 		    curthread);
2150 		unmap = (error == 0) ? arg.value.i : 0;
2151 	}
2152 	value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
2153 	if (value != NULL)
2154 		unmap = (strcmp(value, "on") == 0);
2155 	if (unmap)
2156 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2157 	else
2158 		cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2159 
2160 	dev_relthread(dev, ref);
2161 	return (0);
2162 }
2163 
2164 static int
2165 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2166 {
2167 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2168 	int flags;
2169 
2170 	if (be_lun->vn) {
2171 		flags = FREAD;
2172 		if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2173 			flags |= FWRITE;
2174 		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2175 		be_lun->vn = NULL;
2176 
2177 		switch (be_lun->dev_type) {
2178 		case CTL_BE_BLOCK_DEV:
2179 			break;
2180 		case CTL_BE_BLOCK_FILE:
2181 			if (be_lun->backend.file.cred != NULL) {
2182 				crfree(be_lun->backend.file.cred);
2183 				be_lun->backend.file.cred = NULL;
2184 			}
2185 			break;
2186 		case CTL_BE_BLOCK_NONE:
2187 			break;
2188 		default:
2189 			panic("Unexpected backend type %d", be_lun->dev_type);
2190 			break;
2191 		}
2192 		be_lun->dev_type = CTL_BE_BLOCK_NONE;
2193 	}
2194 	return (0);
2195 }
2196 
2197 static int
2198 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2199 {
2200 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2201 	struct nameidata nd;
2202 	const char	*value;
2203 	int		 error, flags;
2204 
2205 	error = 0;
2206 	if (rootvnode == NULL) {
2207 		snprintf(req->error_str, sizeof(req->error_str),
2208 			 "Root filesystem is not mounted");
2209 		return (1);
2210 	}
2211 	pwd_ensure_dirs();
2212 
2213 	value = dnvlist_get_string(cbe_lun->options, "file", NULL);
2214 	if (value == NULL) {
2215 		snprintf(req->error_str, sizeof(req->error_str),
2216 			 "no file argument specified");
2217 		return (1);
2218 	}
2219 	free(be_lun->dev_path, M_CTLBLK);
2220 	be_lun->dev_path = strdup(value, M_CTLBLK);
2221 
2222 	flags = FREAD;
2223 	value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
2224 	if (value != NULL) {
2225 		if (strcmp(value, "on") != 0)
2226 			flags |= FWRITE;
2227 	} else if (cbe_lun->lun_type == T_DIRECT)
2228 		flags |= FWRITE;
2229 
2230 again:
2231 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path);
2232 	error = vn_open(&nd, &flags, 0, NULL);
2233 	if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2234 		flags &= ~FWRITE;
2235 		goto again;
2236 	}
2237 	if (error) {
2238 		/*
2239 		 * This is the only reasonable guess we can make as far as
2240 		 * path if the user doesn't give us a fully qualified path.
2241 		 * If they want to specify a file, they need to specify the
2242 		 * full path.
2243 		 */
2244 		if (be_lun->dev_path[0] != '/') {
2245 			char *dev_name;
2246 
2247 			asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2248 				be_lun->dev_path);
2249 			free(be_lun->dev_path, M_CTLBLK);
2250 			be_lun->dev_path = dev_name;
2251 			goto again;
2252 		}
2253 		snprintf(req->error_str, sizeof(req->error_str),
2254 		    "error opening %s: %d", be_lun->dev_path, error);
2255 		return (error);
2256 	}
2257 	if (flags & FWRITE)
2258 		cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2259 	else
2260 		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2261 
2262 	NDFREE_PNBUF(&nd);
2263 	be_lun->vn = nd.ni_vp;
2264 
2265 	/* We only support disks and files. */
2266 	if (vn_isdisk_error(be_lun->vn, &error)) {
2267 		error = ctl_be_block_open_dev(be_lun, req);
2268 	} else if (be_lun->vn->v_type == VREG) {
2269 		error = ctl_be_block_open_file(be_lun, req);
2270 	} else {
2271 		error = EINVAL;
2272 		snprintf(req->error_str, sizeof(req->error_str),
2273 			 "%s is not a disk or plain file", be_lun->dev_path);
2274 	}
2275 	VOP_UNLOCK(be_lun->vn);
2276 
2277 	if (error != 0)
2278 		ctl_be_block_close(be_lun);
2279 	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2280 	if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2281 		cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
2282 	value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
2283 	if (value != NULL && strcmp(value, "on") == 0)
2284 		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2285 	else if (value != NULL && strcmp(value, "read") == 0)
2286 		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2287 	else if (value != NULL && strcmp(value, "soft") == 0)
2288 		cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
2289 	else if (value != NULL && strcmp(value, "off") == 0)
2290 		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2291 	return (0);
2292 }
2293 
2294 static int
2295 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2296 {
2297 	struct ctl_be_lun *cbe_lun;
2298 	struct ctl_be_block_lun *be_lun;
2299 	struct ctl_lun_create_params *params;
2300 	char num_thread_str[16];
2301 	char tmpstr[32];
2302 	const char *value;
2303 	int retval, num_threads;
2304 	int tmp_num_threads;
2305 
2306 	params = &req->reqdata.create;
2307 	retval = 0;
2308 	req->status = CTL_LUN_OK;
2309 
2310 	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2311 	cbe_lun = &be_lun->cbe_lun;
2312 	be_lun->params = req->reqdata.create;
2313 	be_lun->softc = softc;
2314 	STAILQ_INIT(&be_lun->input_queue);
2315 	STAILQ_INIT(&be_lun->config_read_queue);
2316 	STAILQ_INIT(&be_lun->config_write_queue);
2317 	STAILQ_INIT(&be_lun->datamove_queue);
2318 	mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF);
2319 	mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF);
2320 	cbe_lun->options = nvlist_clone(req->args_nvl);
2321 
2322 	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2323 		cbe_lun->lun_type = params->device_type;
2324 	else
2325 		cbe_lun->lun_type = T_DIRECT;
2326 	be_lun->flags = 0;
2327 	cbe_lun->flags = 0;
2328 	value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2329 	if (value != NULL) {
2330 		if (strcmp(value, "primary") == 0)
2331 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2332 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2333 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2334 
2335 	if (cbe_lun->lun_type == T_DIRECT ||
2336 	    cbe_lun->lun_type == T_CDROM) {
2337 		be_lun->size_bytes = params->lun_size_bytes;
2338 		if (params->blocksize_bytes != 0)
2339 			cbe_lun->blocksize = params->blocksize_bytes;
2340 		else if (cbe_lun->lun_type == T_CDROM)
2341 			cbe_lun->blocksize = 2048;
2342 		else
2343 			cbe_lun->blocksize = 512;
2344 		be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2345 		cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2346 		    0 : (be_lun->size_blocks - 1);
2347 
2348 		if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2349 		    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2350 			retval = ctl_be_block_open(be_lun, req);
2351 			if (retval != 0) {
2352 				retval = 0;
2353 				req->status = CTL_LUN_WARNING;
2354 			}
2355 		}
2356 		num_threads = cbb_num_threads;
2357 	} else {
2358 		num_threads = 1;
2359 	}
2360 
2361 	value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL);
2362 	if (value != NULL) {
2363 		tmp_num_threads = strtol(value, NULL, 0);
2364 
2365 		/*
2366 		 * We don't let the user specify less than one
2367 		 * thread, but hope he's clueful enough not to
2368 		 * specify 1000 threads.
2369 		 */
2370 		if (tmp_num_threads < 1) {
2371 			snprintf(req->error_str, sizeof(req->error_str),
2372 				 "invalid number of threads %s",
2373 				 num_thread_str);
2374 			goto bailout_error;
2375 		}
2376 		num_threads = tmp_num_threads;
2377 	}
2378 
2379 	if (be_lun->vn == NULL)
2380 		cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2381 	/* Tell the user the blocksize we ended up using */
2382 	params->lun_size_bytes = be_lun->size_bytes;
2383 	params->blocksize_bytes = cbe_lun->blocksize;
2384 	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2385 		cbe_lun->req_lun_id = params->req_lun_id;
2386 		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2387 	} else
2388 		cbe_lun->req_lun_id = 0;
2389 
2390 	cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2391 	cbe_lun->be = &ctl_be_block_driver;
2392 
2393 	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2394 		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
2395 			 softc->num_luns);
2396 		strncpy((char *)cbe_lun->serial_num, tmpstr,
2397 			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2398 
2399 		/* Tell the user what we used for a serial number */
2400 		strncpy((char *)params->serial_num, tmpstr,
2401 			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2402 	} else {
2403 		strncpy((char *)cbe_lun->serial_num, params->serial_num,
2404 			MIN(sizeof(cbe_lun->serial_num),
2405 			sizeof(params->serial_num)));
2406 	}
2407 	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2408 		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
2409 		strncpy((char *)cbe_lun->device_id, tmpstr,
2410 			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2411 
2412 		/* Tell the user what we used for a device ID */
2413 		strncpy((char *)params->device_id, tmpstr,
2414 			MIN(sizeof(params->device_id), sizeof(tmpstr)));
2415 	} else {
2416 		strncpy((char *)cbe_lun->device_id, params->device_id,
2417 			MIN(sizeof(cbe_lun->device_id),
2418 			    sizeof(params->device_id)));
2419 	}
2420 
2421 	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2422 
2423 	be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK,
2424 	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2425 
2426 	if (be_lun->io_taskqueue == NULL) {
2427 		snprintf(req->error_str, sizeof(req->error_str),
2428 			 "unable to create taskqueue");
2429 		goto bailout_error;
2430 	}
2431 
2432 	/*
2433 	 * Note that we start the same number of threads by default for
2434 	 * both the file case and the block device case.  For the file
2435 	 * case, we need multiple threads to allow concurrency, because the
2436 	 * vnode interface is designed to be a blocking interface.  For the
2437 	 * block device case, ZFS zvols at least will block the caller's
2438 	 * context in many instances, and so we need multiple threads to
2439 	 * overcome that problem.  Other block devices don't need as many
2440 	 * threads, but they shouldn't cause too many problems.
2441 	 *
2442 	 * If the user wants to just have a single thread for a block
2443 	 * device, he can specify that when the LUN is created, or change
2444 	 * the tunable/sysctl to alter the default number of threads.
2445 	 */
2446 	retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue,
2447 					 /*num threads*/num_threads,
2448 					 /*priority*/PUSER,
2449 					 /*proc*/control_softc->ctl_proc,
2450 					 /*thread name*/"block");
2451 
2452 	if (retval != 0)
2453 		goto bailout_error;
2454 
2455 	be_lun->num_threads = num_threads;
2456 
2457 	retval = ctl_add_lun(&be_lun->cbe_lun);
2458 	if (retval != 0) {
2459 		snprintf(req->error_str, sizeof(req->error_str),
2460 			 "ctl_add_lun() returned error %d, see dmesg for "
2461 			 "details", retval);
2462 		retval = 0;
2463 		goto bailout_error;
2464 	}
2465 
2466 	be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id,
2467 					       cbe_lun->blocksize,
2468 					       DEVSTAT_ALL_SUPPORTED,
2469 					       cbe_lun->lun_type
2470 					       | DEVSTAT_TYPE_IF_OTHER,
2471 					       DEVSTAT_PRIORITY_OTHER);
2472 
2473 	mtx_lock(&softc->lock);
2474 	softc->num_luns++;
2475 	SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links);
2476 	mtx_unlock(&softc->lock);
2477 
2478 	params->req_lun_id = cbe_lun->lun_id;
2479 
2480 	return (retval);
2481 
2482 bailout_error:
2483 	req->status = CTL_LUN_ERROR;
2484 
2485 	if (be_lun->io_taskqueue != NULL)
2486 		taskqueue_free(be_lun->io_taskqueue);
2487 	ctl_be_block_close(be_lun);
2488 	if (be_lun->dev_path != NULL)
2489 		free(be_lun->dev_path, M_CTLBLK);
2490 	nvlist_destroy(cbe_lun->options);
2491 	mtx_destroy(&be_lun->queue_lock);
2492 	mtx_destroy(&be_lun->io_lock);
2493 	free(be_lun, M_CTLBLK);
2494 
2495 	return (retval);
2496 }
2497 
2498 static int
2499 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2500 {
2501 	struct ctl_lun_rm_params *params;
2502 	struct ctl_be_block_lun *be_lun;
2503 	struct ctl_be_lun *cbe_lun;
2504 	int retval;
2505 
2506 	params = &req->reqdata.rm;
2507 
2508 	sx_xlock(&softc->modify_lock);
2509 	mtx_lock(&softc->lock);
2510 	SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2511 		if (be_lun->cbe_lun.lun_id == params->lun_id) {
2512 			SLIST_REMOVE(&softc->lun_list, be_lun,
2513 			    ctl_be_block_lun, links);
2514 			softc->num_luns--;
2515 			break;
2516 		}
2517 	}
2518 	mtx_unlock(&softc->lock);
2519 	sx_xunlock(&softc->modify_lock);
2520 	if (be_lun == NULL) {
2521 		snprintf(req->error_str, sizeof(req->error_str),
2522 			 "LUN %u is not managed by the block backend",
2523 			 params->lun_id);
2524 		goto bailout_error;
2525 	}
2526 	cbe_lun = &be_lun->cbe_lun;
2527 
2528 	if (be_lun->vn != NULL) {
2529 		cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2530 		ctl_lun_no_media(cbe_lun);
2531 		taskqueue_drain_all(be_lun->io_taskqueue);
2532 		ctl_be_block_close(be_lun);
2533 	}
2534 
2535 	mtx_lock(&softc->lock);
2536 	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2537 	mtx_unlock(&softc->lock);
2538 
2539 	retval = ctl_remove_lun(cbe_lun);
2540 	if (retval != 0) {
2541 		snprintf(req->error_str, sizeof(req->error_str),
2542 			 "error %d returned from ctl_remove_lun() for "
2543 			 "LUN %d", retval, params->lun_id);
2544 		mtx_lock(&softc->lock);
2545 		be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2546 		mtx_unlock(&softc->lock);
2547 		goto bailout_error;
2548 	}
2549 
2550 	mtx_lock(&softc->lock);
2551 	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2552 		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0);
2553 		if (retval == EINTR)
2554 			break;
2555 	}
2556 	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2557 	if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2558 		mtx_unlock(&softc->lock);
2559 		free(be_lun, M_CTLBLK);
2560 	} else {
2561 		mtx_unlock(&softc->lock);
2562 		return (EINTR);
2563 	}
2564 
2565 	req->status = CTL_LUN_OK;
2566 	return (0);
2567 
2568 bailout_error:
2569 	req->status = CTL_LUN_ERROR;
2570 	return (0);
2571 }
2572 
2573 static int
2574 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2575 {
2576 	struct ctl_lun_modify_params *params;
2577 	struct ctl_be_block_lun *be_lun;
2578 	struct ctl_be_lun *cbe_lun;
2579 	const char *value;
2580 	uint64_t oldsize;
2581 	int error, wasprim;
2582 
2583 	params = &req->reqdata.modify;
2584 
2585 	sx_xlock(&softc->modify_lock);
2586 	mtx_lock(&softc->lock);
2587 	SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2588 		if (be_lun->cbe_lun.lun_id == params->lun_id)
2589 			break;
2590 	}
2591 	mtx_unlock(&softc->lock);
2592 	if (be_lun == NULL) {
2593 		snprintf(req->error_str, sizeof(req->error_str),
2594 			 "LUN %u is not managed by the block backend",
2595 			 params->lun_id);
2596 		goto bailout_error;
2597 	}
2598 	cbe_lun = &be_lun->cbe_lun;
2599 
2600 	if (params->lun_size_bytes != 0)
2601 		be_lun->params.lun_size_bytes = params->lun_size_bytes;
2602 
2603 	if (req->args_nvl != NULL) {
2604 		nvlist_destroy(cbe_lun->options);
2605 		cbe_lun->options = nvlist_clone(req->args_nvl);
2606 	}
2607 
2608 	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2609 	value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2610 	if (value != NULL) {
2611 		if (strcmp(value, "primary") == 0)
2612 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2613 		else
2614 			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2615 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2616 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2617 	else
2618 		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2619 	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2620 		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2621 			ctl_lun_primary(cbe_lun);
2622 		else
2623 			ctl_lun_secondary(cbe_lun);
2624 	}
2625 
2626 	oldsize = be_lun->size_blocks;
2627 	if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2628 	    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2629 		if (be_lun->vn == NULL)
2630 			error = ctl_be_block_open(be_lun, req);
2631 		else if (vn_isdisk_error(be_lun->vn, &error))
2632 			error = ctl_be_block_open_dev(be_lun, req);
2633 		else if (be_lun->vn->v_type == VREG) {
2634 			vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2635 			error = ctl_be_block_open_file(be_lun, req);
2636 			VOP_UNLOCK(be_lun->vn);
2637 		} else
2638 			error = EINVAL;
2639 		if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
2640 		    be_lun->vn != NULL) {
2641 			cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2642 			ctl_lun_has_media(cbe_lun);
2643 		} else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
2644 		    be_lun->vn == NULL) {
2645 			cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2646 			ctl_lun_no_media(cbe_lun);
2647 		}
2648 		cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2649 	} else {
2650 		if (be_lun->vn != NULL) {
2651 			cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2652 			ctl_lun_no_media(cbe_lun);
2653 			taskqueue_drain_all(be_lun->io_taskqueue);
2654 			error = ctl_be_block_close(be_lun);
2655 		} else
2656 			error = 0;
2657 	}
2658 	if (be_lun->size_blocks != oldsize)
2659 		ctl_lun_capacity_changed(cbe_lun);
2660 
2661 	/* Tell the user the exact size we ended up using */
2662 	params->lun_size_bytes = be_lun->size_bytes;
2663 
2664 	sx_xunlock(&softc->modify_lock);
2665 	req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2666 	return (0);
2667 
2668 bailout_error:
2669 	sx_xunlock(&softc->modify_lock);
2670 	req->status = CTL_LUN_ERROR;
2671 	return (0);
2672 }
2673 
2674 static void
2675 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun)
2676 {
2677 	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun;
2678 	struct ctl_be_block_softc *softc = be_lun->softc;
2679 
2680 	taskqueue_drain_all(be_lun->io_taskqueue);
2681 	taskqueue_free(be_lun->io_taskqueue);
2682 	if (be_lun->disk_stats != NULL)
2683 		devstat_remove_entry(be_lun->disk_stats);
2684 	nvlist_destroy(be_lun->cbe_lun.options);
2685 	free(be_lun->dev_path, M_CTLBLK);
2686 	mtx_destroy(&be_lun->queue_lock);
2687 	mtx_destroy(&be_lun->io_lock);
2688 
2689 	mtx_lock(&softc->lock);
2690 	be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2691 	if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2692 		wakeup(be_lun);
2693 	else
2694 		free(be_lun, M_CTLBLK);
2695 	mtx_unlock(&softc->lock);
2696 }
2697 
2698 static int
2699 ctl_be_block_config_write(union ctl_io *io)
2700 {
2701 	struct ctl_be_block_lun *be_lun;
2702 	struct ctl_be_lun *cbe_lun;
2703 	int retval;
2704 
2705 	DPRINTF("entered\n");
2706 
2707 	cbe_lun = CTL_BACKEND_LUN(io);
2708 	be_lun = (struct ctl_be_block_lun *)cbe_lun;
2709 
2710 	retval = 0;
2711 	switch (io->scsiio.cdb[0]) {
2712 	case SYNCHRONIZE_CACHE:
2713 	case SYNCHRONIZE_CACHE_16:
2714 	case WRITE_SAME_10:
2715 	case WRITE_SAME_16:
2716 	case UNMAP:
2717 		/*
2718 		 * The upper level CTL code will filter out any CDBs with
2719 		 * the immediate bit set and return the proper error.
2720 		 *
2721 		 * We don't really need to worry about what LBA range the
2722 		 * user asked to be synced out.  When they issue a sync
2723 		 * cache command, we'll sync out the whole thing.
2724 		 */
2725 		mtx_lock(&be_lun->queue_lock);
2726 		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2727 				   links);
2728 		mtx_unlock(&be_lun->queue_lock);
2729 		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2730 		break;
2731 	case START_STOP_UNIT: {
2732 		struct scsi_start_stop_unit *cdb;
2733 		struct ctl_lun_req req;
2734 
2735 		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2736 		if ((cdb->how & SSS_PC_MASK) != 0) {
2737 			ctl_set_success(&io->scsiio);
2738 			ctl_config_write_done(io);
2739 			break;
2740 		}
2741 		if (cdb->how & SSS_START) {
2742 			if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
2743 				retval = ctl_be_block_open(be_lun, &req);
2744 				cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2745 				if (retval == 0) {
2746 					cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2747 					ctl_lun_has_media(cbe_lun);
2748 				} else {
2749 					cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2750 					ctl_lun_no_media(cbe_lun);
2751 				}
2752 			}
2753 			ctl_start_lun(cbe_lun);
2754 		} else {
2755 			ctl_stop_lun(cbe_lun);
2756 			if (cdb->how & SSS_LOEJ) {
2757 				cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2758 				cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
2759 				ctl_lun_ejected(cbe_lun);
2760 				if (be_lun->vn != NULL)
2761 					ctl_be_block_close(be_lun);
2762 			}
2763 		}
2764 
2765 		ctl_set_success(&io->scsiio);
2766 		ctl_config_write_done(io);
2767 		break;
2768 	}
2769 	case PREVENT_ALLOW:
2770 		ctl_set_success(&io->scsiio);
2771 		ctl_config_write_done(io);
2772 		break;
2773 	default:
2774 		ctl_set_invalid_opcode(&io->scsiio);
2775 		ctl_config_write_done(io);
2776 		retval = CTL_RETVAL_COMPLETE;
2777 		break;
2778 	}
2779 
2780 	return (retval);
2781 }
2782 
2783 static int
2784 ctl_be_block_config_read(union ctl_io *io)
2785 {
2786 	struct ctl_be_block_lun *be_lun;
2787 	int retval = 0;
2788 
2789 	DPRINTF("entered\n");
2790 
2791 	be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
2792 
2793 	switch (io->scsiio.cdb[0]) {
2794 	case SERVICE_ACTION_IN:
2795 		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2796 			mtx_lock(&be_lun->queue_lock);
2797 			STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2798 			    &io->io_hdr, links);
2799 			mtx_unlock(&be_lun->queue_lock);
2800 			taskqueue_enqueue(be_lun->io_taskqueue,
2801 			    &be_lun->io_task);
2802 			retval = CTL_RETVAL_QUEUED;
2803 			break;
2804 		}
2805 		ctl_set_invalid_field(&io->scsiio,
2806 				      /*sks_valid*/ 1,
2807 				      /*command*/ 1,
2808 				      /*field*/ 1,
2809 				      /*bit_valid*/ 1,
2810 				      /*bit*/ 4);
2811 		ctl_config_read_done(io);
2812 		retval = CTL_RETVAL_COMPLETE;
2813 		break;
2814 	default:
2815 		ctl_set_invalid_opcode(&io->scsiio);
2816 		ctl_config_read_done(io);
2817 		retval = CTL_RETVAL_COMPLETE;
2818 		break;
2819 	}
2820 
2821 	return (retval);
2822 }
2823 
2824 static int
2825 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb)
2826 {
2827 	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2828 	int retval;
2829 
2830 	retval = sbuf_printf(sb, "\t<num_threads>");
2831 	if (retval != 0)
2832 		goto bailout;
2833 	retval = sbuf_printf(sb, "%d", lun->num_threads);
2834 	if (retval != 0)
2835 		goto bailout;
2836 	retval = sbuf_printf(sb, "</num_threads>\n");
2837 
2838 bailout:
2839 	return (retval);
2840 }
2841 
2842 static uint64_t
2843 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname)
2844 {
2845 	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2846 
2847 	if (lun->getattr == NULL)
2848 		return (UINT64_MAX);
2849 	return (lun->getattr(lun, attrname));
2850 }
2851 
2852 static int
2853 ctl_be_block_init(void)
2854 {
2855 	struct ctl_be_block_softc *softc = &backend_block_softc;
2856 
2857 	sx_init(&softc->modify_lock, "ctlblock modify");
2858 	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2859 	softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2860 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2861 	softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG,
2862 	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2863 	if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
2864 		softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG,
2865 		    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2866 	SLIST_INIT(&softc->lun_list);
2867 	return (0);
2868 }
2869 
2870 static int
2871 ctl_be_block_shutdown(void)
2872 {
2873 	struct ctl_be_block_softc *softc = &backend_block_softc;
2874 	struct ctl_be_block_lun *lun;
2875 
2876 	mtx_lock(&softc->lock);
2877 	while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) {
2878 		SLIST_REMOVE_HEAD(&softc->lun_list, links);
2879 		softc->num_luns--;
2880 		/*
2881 		 * Drop our lock here.  Since ctl_remove_lun() can call
2882 		 * back into us, this could potentially lead to a recursive
2883 		 * lock of the same mutex, which would cause a hang.
2884 		 */
2885 		mtx_unlock(&softc->lock);
2886 		ctl_remove_lun(&lun->cbe_lun);
2887 		mtx_lock(&softc->lock);
2888 	}
2889 	mtx_unlock(&softc->lock);
2890 	uma_zdestroy(softc->bufmin_zone);
2891 	if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
2892 		uma_zdestroy(softc->bufmax_zone);
2893 	uma_zdestroy(softc->beio_zone);
2894 	mtx_destroy(&softc->lock);
2895 	sx_destroy(&softc->modify_lock);
2896 	return (0);
2897 }
2898