xref: /freebsd/sys/cam/ctl/ctl_backend_block.c (revision 266f97b5)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2003 Silicon Graphics International Corp.
5  * Copyright (c) 2009-2011 Spectra Logic Corporation
6  * Copyright (c) 2012,2021 The FreeBSD Foundation
7  * Copyright (c) 2014-2015 Alexander Motin <mav@FreeBSD.org>
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Edward Tomasz Napierala
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Portions of this software were developed by Ka Ho Ng <khng@FreeBSD.org>
14  * under sponsorship from the FreeBSD Foundation.
15  *
16  * Redistribution and use in source and binary forms, with or without
17  * modification, are permitted provided that the following conditions
18  * are met:
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions, and the following disclaimer,
21  *    without modification.
22  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
23  *    substantially similar to the "NO WARRANTY" disclaimer below
24  *    ("Disclaimer") and any redistribution must be conditioned upon
25  *    including a substantially similar Disclaimer requirement for further
26  *    binary redistribution.
27  *
28  * NO WARRANTY
29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
37  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
38  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39  * POSSIBILITY OF SUCH DAMAGES.
40  *
41  * $Id: //depot/users/kenm/FreeBSD-test2/sys/cam/ctl/ctl_backend_block.c#5 $
42  */
43 /*
44  * CAM Target Layer driver backend for block devices.
45  *
46  * Author: Ken Merry <ken@FreeBSD.org>
47  */
48 #include <sys/cdefs.h>
49 __FBSDID("$FreeBSD$");
50 
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/kernel.h>
54 #include <sys/types.h>
55 #include <sys/kthread.h>
56 #include <sys/bio.h>
57 #include <sys/fcntl.h>
58 #include <sys/limits.h>
59 #include <sys/lock.h>
60 #include <sys/mutex.h>
61 #include <sys/condvar.h>
62 #include <sys/malloc.h>
63 #include <sys/conf.h>
64 #include <sys/ioccom.h>
65 #include <sys/queue.h>
66 #include <sys/sbuf.h>
67 #include <sys/endian.h>
68 #include <sys/uio.h>
69 #include <sys/buf.h>
70 #include <sys/taskqueue.h>
71 #include <sys/vnode.h>
72 #include <sys/namei.h>
73 #include <sys/mount.h>
74 #include <sys/disk.h>
75 #include <sys/fcntl.h>
76 #include <sys/filedesc.h>
77 #include <sys/filio.h>
78 #include <sys/proc.h>
79 #include <sys/pcpu.h>
80 #include <sys/module.h>
81 #include <sys/sdt.h>
82 #include <sys/devicestat.h>
83 #include <sys/sysctl.h>
84 #include <sys/nv.h>
85 #include <sys/dnv.h>
86 #include <sys/sx.h>
87 #include <sys/unistd.h>
88 
89 #include <geom/geom.h>
90 
91 #include <cam/cam.h>
92 #include <cam/scsi/scsi_all.h>
93 #include <cam/scsi/scsi_da.h>
94 #include <cam/ctl/ctl_io.h>
95 #include <cam/ctl/ctl.h>
96 #include <cam/ctl/ctl_backend.h>
97 #include <cam/ctl/ctl_ioctl.h>
98 #include <cam/ctl/ctl_ha.h>
99 #include <cam/ctl/ctl_scsi_all.h>
100 #include <cam/ctl/ctl_private.h>
101 #include <cam/ctl/ctl_error.h>
102 
103 /*
104  * The idea here is that we'll allocate enough S/G space to hold a 1MB
105  * I/O.  If we get an I/O larger than that, we'll split it.
106  */
107 #define	CTLBLK_HALF_IO_SIZE	(512 * 1024)
108 #define	CTLBLK_MAX_IO_SIZE	(CTLBLK_HALF_IO_SIZE * 2)
109 #define	CTLBLK_MIN_SEG		(128 * 1024)
110 #define	CTLBLK_MAX_SEG		MIN(CTLBLK_HALF_IO_SIZE, maxphys)
111 #define	CTLBLK_HALF_SEGS	MAX(CTLBLK_HALF_IO_SIZE / CTLBLK_MIN_SEG, 1)
112 #define	CTLBLK_MAX_SEGS		(CTLBLK_HALF_SEGS * 2)
113 #define	CTLBLK_NUM_SEGS		(CTLBLK_MAX_IO_SIZE / CTLBLK_MAX_SEG)
114 
115 #ifdef CTLBLK_DEBUG
116 #define DPRINTF(fmt, args...) \
117     printf("cbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
118 #else
119 #define DPRINTF(fmt, args...) do {} while(0)
120 #endif
121 
122 #define PRIV(io)	\
123     ((struct ctl_ptr_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_BACKEND])
124 #define ARGS(io)	\
125     ((struct ctl_lba_len_flags *)&(io)->io_hdr.ctl_private[CTL_PRIV_LBA_LEN])
126 
127 SDT_PROVIDER_DEFINE(cbb);
128 
129 typedef enum {
130 	CTL_BE_BLOCK_LUN_UNCONFIGURED	= 0x01,
131 	CTL_BE_BLOCK_LUN_WAITING	= 0x04,
132 } ctl_be_block_lun_flags;
133 
134 typedef enum {
135 	CTL_BE_BLOCK_NONE,
136 	CTL_BE_BLOCK_DEV,
137 	CTL_BE_BLOCK_FILE
138 } ctl_be_block_type;
139 
140 struct ctl_be_block_filedata {
141 	struct ucred *cred;
142 };
143 
144 union ctl_be_block_bedata {
145 	struct ctl_be_block_filedata file;
146 };
147 
148 struct ctl_be_block_io;
149 struct ctl_be_block_lun;
150 
151 typedef void (*cbb_dispatch_t)(struct ctl_be_block_lun *be_lun,
152 			       struct ctl_be_block_io *beio);
153 typedef uint64_t (*cbb_getattr_t)(struct ctl_be_block_lun *be_lun,
154 				  const char *attrname);
155 
156 /*
157  * Backend LUN structure.  There is a 1:1 mapping between a block device
158  * and a backend block LUN, and between a backend block LUN and a CTL LUN.
159  */
160 struct ctl_be_block_lun {
161 	struct ctl_be_lun cbe_lun;		/* Must be first element. */
162 	struct ctl_lun_create_params params;
163 	char *dev_path;
164 	ctl_be_block_type dev_type;
165 	struct vnode *vn;
166 	union ctl_be_block_bedata backend;
167 	cbb_dispatch_t dispatch;
168 	cbb_dispatch_t lun_flush;
169 	cbb_dispatch_t unmap;
170 	cbb_dispatch_t get_lba_status;
171 	cbb_getattr_t getattr;
172 	uint64_t size_blocks;
173 	uint64_t size_bytes;
174 	struct ctl_be_block_softc *softc;
175 	struct devstat *disk_stats;
176 	ctl_be_block_lun_flags flags;
177 	SLIST_ENTRY(ctl_be_block_lun) links;
178 	struct taskqueue *io_taskqueue;
179 	struct task io_task;
180 	int num_threads;
181 	STAILQ_HEAD(, ctl_io_hdr) input_queue;
182 	STAILQ_HEAD(, ctl_io_hdr) config_read_queue;
183 	STAILQ_HEAD(, ctl_io_hdr) config_write_queue;
184 	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
185 	struct mtx_padalign io_lock;
186 	struct mtx_padalign queue_lock;
187 };
188 
189 /*
190  * Overall softc structure for the block backend module.
191  */
192 struct ctl_be_block_softc {
193 	struct sx			 modify_lock;
194 	struct mtx			 lock;
195 	int				 num_luns;
196 	SLIST_HEAD(, ctl_be_block_lun)	 lun_list;
197 	uma_zone_t			 beio_zone;
198 	uma_zone_t			 bufmin_zone;
199 	uma_zone_t			 bufmax_zone;
200 };
201 
202 static struct ctl_be_block_softc backend_block_softc;
203 
204 /*
205  * Per-I/O information.
206  */
207 struct ctl_be_block_io {
208 	union ctl_io			*io;
209 	struct ctl_sg_entry		sg_segs[CTLBLK_MAX_SEGS];
210 	struct iovec			xiovecs[CTLBLK_MAX_SEGS];
211 	int				refcnt;
212 	int				bio_cmd;
213 	int				two_sglists;
214 	int				num_segs;
215 	int				num_bios_sent;
216 	int				num_bios_done;
217 	int				send_complete;
218 	int				first_error;
219 	uint64_t			first_error_offset;
220 	struct bintime			ds_t0;
221 	devstat_tag_type		ds_tag_type;
222 	devstat_trans_flags		ds_trans_type;
223 	uint64_t			io_len;
224 	uint64_t			io_offset;
225 	int				io_arg;
226 	struct ctl_be_block_softc	*softc;
227 	struct ctl_be_block_lun		*lun;
228 	void (*beio_cont)(struct ctl_be_block_io *beio); /* to continue processing */
229 };
230 
231 extern struct ctl_softc *control_softc;
232 
233 static int cbb_num_threads = 32;
234 SYSCTL_NODE(_kern_cam_ctl, OID_AUTO, block, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
235 	    "CAM Target Layer Block Backend");
236 SYSCTL_INT(_kern_cam_ctl_block, OID_AUTO, num_threads, CTLFLAG_RWTUN,
237            &cbb_num_threads, 0, "Number of threads per backing file");
238 
239 static struct ctl_be_block_io *ctl_alloc_beio(struct ctl_be_block_softc *softc);
240 static void ctl_free_beio(struct ctl_be_block_io *beio);
241 static void ctl_complete_beio(struct ctl_be_block_io *beio);
242 static int ctl_be_block_move_done(union ctl_io *io, bool samethr);
243 static void ctl_be_block_biodone(struct bio *bio);
244 static void ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
245 				    struct ctl_be_block_io *beio);
246 static void ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
247 				       struct ctl_be_block_io *beio);
248 static void ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
249 				  struct ctl_be_block_io *beio);
250 static uint64_t ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun,
251 					 const char *attrname);
252 static void ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
253 				    struct ctl_be_block_io *beio);
254 static void ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
255 				   struct ctl_be_block_io *beio);
256 static void ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
257 				   struct ctl_be_block_io *beio);
258 static void ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
259 				      struct ctl_be_block_io *beio);
260 static uint64_t ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun,
261 					 const char *attrname);
262 static void ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
263 				    union ctl_io *io);
264 static void ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
265 				    union ctl_io *io);
266 static void ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
267 				  union ctl_io *io);
268 static void ctl_be_block_worker(void *context, int pending);
269 static int ctl_be_block_submit(union ctl_io *io);
270 static int ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
271 				   int flag, struct thread *td);
272 static int ctl_be_block_open_file(struct ctl_be_block_lun *be_lun,
273 				  struct ctl_lun_req *req);
274 static int ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun,
275 				 struct ctl_lun_req *req);
276 static int ctl_be_block_close(struct ctl_be_block_lun *be_lun);
277 static int ctl_be_block_open(struct ctl_be_block_lun *be_lun,
278 			     struct ctl_lun_req *req);
279 static int ctl_be_block_create(struct ctl_be_block_softc *softc,
280 			       struct ctl_lun_req *req);
281 static int ctl_be_block_rm(struct ctl_be_block_softc *softc,
282 			   struct ctl_lun_req *req);
283 static int ctl_be_block_modify(struct ctl_be_block_softc *softc,
284 			   struct ctl_lun_req *req);
285 static void ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun);
286 static int ctl_be_block_config_write(union ctl_io *io);
287 static int ctl_be_block_config_read(union ctl_io *io);
288 static int ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb);
289 static uint64_t ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname);
290 static int ctl_be_block_init(void);
291 static int ctl_be_block_shutdown(void);
292 
293 static struct ctl_backend_driver ctl_be_block_driver =
294 {
295 	.name = "block",
296 	.flags = CTL_BE_FLAG_HAS_CONFIG,
297 	.init = ctl_be_block_init,
298 	.shutdown = ctl_be_block_shutdown,
299 	.data_submit = ctl_be_block_submit,
300 	.config_read = ctl_be_block_config_read,
301 	.config_write = ctl_be_block_config_write,
302 	.ioctl = ctl_be_block_ioctl,
303 	.lun_info = ctl_be_block_lun_info,
304 	.lun_attr = ctl_be_block_lun_attr
305 };
306 
307 MALLOC_DEFINE(M_CTLBLK, "ctlblock", "Memory used for CTL block backend");
308 CTL_BACKEND_DECLARE(cbb, ctl_be_block_driver);
309 
310 static void
311 ctl_alloc_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg,
312     size_t len)
313 {
314 
315 	if (len <= CTLBLK_MIN_SEG) {
316 		sg->addr = uma_zalloc(softc->bufmin_zone, M_WAITOK);
317 	} else {
318 		KASSERT(len <= CTLBLK_MAX_SEG,
319 		    ("Too large alloc %zu > %lu", len, CTLBLK_MAX_SEG));
320 		sg->addr = uma_zalloc(softc->bufmax_zone, M_WAITOK);
321 	}
322 	sg->len = len;
323 }
324 
325 static void
326 ctl_free_seg(struct ctl_be_block_softc *softc, struct ctl_sg_entry *sg)
327 {
328 
329 	if (sg->len <= CTLBLK_MIN_SEG) {
330 		uma_zfree(softc->bufmin_zone, sg->addr);
331 	} else {
332 		KASSERT(sg->len <= CTLBLK_MAX_SEG,
333 		    ("Too large free %zu > %lu", sg->len, CTLBLK_MAX_SEG));
334 		uma_zfree(softc->bufmax_zone, sg->addr);
335 	}
336 }
337 
338 static struct ctl_be_block_io *
339 ctl_alloc_beio(struct ctl_be_block_softc *softc)
340 {
341 	struct ctl_be_block_io *beio;
342 
343 	beio = uma_zalloc(softc->beio_zone, M_WAITOK | M_ZERO);
344 	beio->softc = softc;
345 	beio->refcnt = 1;
346 	return (beio);
347 }
348 
349 static void
350 ctl_real_free_beio(struct ctl_be_block_io *beio)
351 {
352 	struct ctl_be_block_softc *softc = beio->softc;
353 	int i;
354 
355 	for (i = 0; i < beio->num_segs; i++) {
356 		ctl_free_seg(softc, &beio->sg_segs[i]);
357 
358 		/* For compare we had two equal S/G lists. */
359 		if (beio->two_sglists) {
360 			ctl_free_seg(softc,
361 			    &beio->sg_segs[i + CTLBLK_HALF_SEGS]);
362 		}
363 	}
364 
365 	uma_zfree(softc->beio_zone, beio);
366 }
367 
368 static void
369 ctl_refcnt_beio(void *arg, int diff)
370 {
371 	struct ctl_be_block_io *beio = arg;
372 
373 	if (atomic_fetchadd_int(&beio->refcnt, diff) + diff == 0)
374 		ctl_real_free_beio(beio);
375 }
376 
377 static void
378 ctl_free_beio(struct ctl_be_block_io *beio)
379 {
380 
381 	ctl_refcnt_beio(beio, -1);
382 }
383 
384 static void
385 ctl_complete_beio(struct ctl_be_block_io *beio)
386 {
387 	union ctl_io *io = beio->io;
388 
389 	if (beio->beio_cont != NULL) {
390 		beio->beio_cont(beio);
391 	} else {
392 		ctl_free_beio(beio);
393 		ctl_data_submit_done(io);
394 	}
395 }
396 
397 static size_t
398 cmp(uint8_t *a, uint8_t *b, size_t size)
399 {
400 	size_t i;
401 
402 	for (i = 0; i < size; i++) {
403 		if (a[i] != b[i])
404 			break;
405 	}
406 	return (i);
407 }
408 
409 static void
410 ctl_be_block_compare(union ctl_io *io)
411 {
412 	struct ctl_be_block_io *beio;
413 	uint64_t off, res;
414 	int i;
415 	uint8_t info[8];
416 
417 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
418 	off = 0;
419 	for (i = 0; i < beio->num_segs; i++) {
420 		res = cmp(beio->sg_segs[i].addr,
421 		    beio->sg_segs[i + CTLBLK_HALF_SEGS].addr,
422 		    beio->sg_segs[i].len);
423 		off += res;
424 		if (res < beio->sg_segs[i].len)
425 			break;
426 	}
427 	if (i < beio->num_segs) {
428 		scsi_u64to8b(off, info);
429 		ctl_set_sense(&io->scsiio, /*current_error*/ 1,
430 		    /*sense_key*/ SSD_KEY_MISCOMPARE,
431 		    /*asc*/ 0x1D, /*ascq*/ 0x00,
432 		    /*type*/ SSD_ELEM_INFO,
433 		    /*size*/ sizeof(info), /*data*/ &info,
434 		    /*type*/ SSD_ELEM_NONE);
435 	} else
436 		ctl_set_success(&io->scsiio);
437 }
438 
439 static int
440 ctl_be_block_move_done(union ctl_io *io, bool samethr)
441 {
442 	struct ctl_be_block_io *beio;
443 	struct ctl_be_block_lun *be_lun;
444 	struct ctl_lba_len_flags *lbalen;
445 
446 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
447 
448 	DPRINTF("entered\n");
449 	io->scsiio.kern_rel_offset += io->scsiio.kern_data_len;
450 
451 	/*
452 	 * We set status at this point for read and compare commands.
453 	 */
454 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) == 0 &&
455 	    (io->io_hdr.status & CTL_STATUS_MASK) == CTL_STATUS_NONE) {
456 		lbalen = ARGS(io);
457 		if (lbalen->flags & CTL_LLF_READ) {
458 			ctl_set_success(&io->scsiio);
459 		} else if (lbalen->flags & CTL_LLF_COMPARE) {
460 			/* We have two data blocks ready for comparison. */
461 			ctl_be_block_compare(io);
462 		}
463 	}
464 
465 	/*
466 	 * If this is a read, or a write with errors, it is done.
467 	 */
468 	if ((beio->bio_cmd == BIO_READ)
469 	 || ((io->io_hdr.flags & CTL_FLAG_ABORT) != 0)
470 	 || ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE)) {
471 		ctl_complete_beio(beio);
472 		return (0);
473 	}
474 
475 	/*
476 	 * At this point, we have a write and the DMA completed successfully.
477 	 * If we were called synchronously in the original thread then just
478 	 * dispatch, otherwise we now have to queue it to the task queue to
479 	 * execute the backend I/O.  That is because we do blocking
480 	 * memory allocations, and in the file backing case, blocking I/O.
481 	 * This move done routine is generally called in the SIM's
482 	 * interrupt context, and therefore we cannot block.
483 	 */
484 	be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
485 	if (samethr) {
486 		be_lun->dispatch(be_lun, beio);
487 	} else {
488 		mtx_lock(&be_lun->queue_lock);
489 		STAILQ_INSERT_TAIL(&be_lun->datamove_queue, &io->io_hdr, links);
490 		mtx_unlock(&be_lun->queue_lock);
491 		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
492 	}
493 	return (0);
494 }
495 
496 static void
497 ctl_be_block_biodone(struct bio *bio)
498 {
499 	struct ctl_be_block_io *beio = bio->bio_caller1;
500 	struct ctl_be_block_lun *be_lun = beio->lun;
501 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
502 	union ctl_io *io;
503 	int error;
504 
505 	io = beio->io;
506 
507 	DPRINTF("entered\n");
508 
509 	error = bio->bio_error;
510 	mtx_lock(&be_lun->io_lock);
511 	if (error != 0 &&
512 	    (beio->first_error == 0 ||
513 	     bio->bio_offset < beio->first_error_offset)) {
514 		beio->first_error = error;
515 		beio->first_error_offset = bio->bio_offset;
516 	}
517 
518 	beio->num_bios_done++;
519 
520 	/*
521 	 * XXX KDM will this cause WITNESS to complain?  Holding a lock
522 	 * during the free might cause it to complain.
523 	 */
524 	g_destroy_bio(bio);
525 
526 	/*
527 	 * If the send complete bit isn't set, or we aren't the last I/O to
528 	 * complete, then we're done.
529 	 */
530 	if ((beio->send_complete == 0)
531 	 || (beio->num_bios_done < beio->num_bios_sent)) {
532 		mtx_unlock(&be_lun->io_lock);
533 		return;
534 	}
535 
536 	/*
537 	 * At this point, we've verified that we are the last I/O to
538 	 * complete, so it's safe to drop the lock.
539 	 */
540 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
541 	    beio->ds_tag_type, beio->ds_trans_type,
542 	    /*now*/ NULL, /*then*/&beio->ds_t0);
543 	mtx_unlock(&be_lun->io_lock);
544 
545 	/*
546 	 * If there are any errors from the backing device, we fail the
547 	 * entire I/O with a medium error.
548 	 */
549 	error = beio->first_error;
550 	if (error != 0) {
551 		if (error == EOPNOTSUPP) {
552 			ctl_set_invalid_opcode(&io->scsiio);
553 		} else if (error == ENOSPC || error == EDQUOT) {
554 			ctl_set_space_alloc_fail(&io->scsiio);
555 		} else if (error == EROFS || error == EACCES) {
556 			ctl_set_hw_write_protected(&io->scsiio);
557 		} else if (beio->bio_cmd == BIO_FLUSH) {
558 			/* XXX KDM is there is a better error here? */
559 			ctl_set_internal_failure(&io->scsiio,
560 						 /*sks_valid*/ 1,
561 						 /*retry_count*/ 0xbad2);
562 		} else {
563 			ctl_set_medium_error(&io->scsiio,
564 			    beio->bio_cmd == BIO_READ);
565 		}
566 		ctl_complete_beio(beio);
567 		return;
568 	}
569 
570 	/*
571 	 * If this is a write, a flush, a delete or verify, we're all done.
572 	 * If this is a read, we can now send the data to the user.
573 	 */
574 	if ((beio->bio_cmd == BIO_WRITE)
575 	 || (beio->bio_cmd == BIO_FLUSH)
576 	 || (beio->bio_cmd == BIO_DELETE)
577 	 || (ARGS(io)->flags & CTL_LLF_VERIFY)) {
578 		ctl_set_success(&io->scsiio);
579 		ctl_complete_beio(beio);
580 	} else {
581 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
582 		    beio->beio_cont == NULL) {
583 			ctl_set_success(&io->scsiio);
584 			if (cbe_lun->serseq >= CTL_LUN_SERSEQ_SOFT)
585 				ctl_serseq_done(io);
586 		}
587 		ctl_datamove(io);
588 	}
589 }
590 
591 static void
592 ctl_be_block_flush_file(struct ctl_be_block_lun *be_lun,
593 			struct ctl_be_block_io *beio)
594 {
595 	union ctl_io *io = beio->io;
596 	struct mount *mountpoint;
597 	int error;
598 
599 	DPRINTF("entered\n");
600 
601 	binuptime(&beio->ds_t0);
602 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
603 
604 	(void) vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
605 
606 	vn_lock(be_lun->vn, vn_lktype_write(mountpoint, be_lun->vn) |
607 	    LK_RETRY);
608 	error = VOP_FSYNC(be_lun->vn, beio->io_arg ? MNT_NOWAIT : MNT_WAIT,
609 	    curthread);
610 	VOP_UNLOCK(be_lun->vn);
611 
612 	vn_finished_write(mountpoint);
613 
614 	mtx_lock(&be_lun->io_lock);
615 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
616 	    beio->ds_tag_type, beio->ds_trans_type,
617 	    /*now*/ NULL, /*then*/&beio->ds_t0);
618 	mtx_unlock(&be_lun->io_lock);
619 
620 	if (error == 0)
621 		ctl_set_success(&io->scsiio);
622 	else {
623 		/* XXX KDM is there is a better error here? */
624 		ctl_set_internal_failure(&io->scsiio,
625 					 /*sks_valid*/ 1,
626 					 /*retry_count*/ 0xbad1);
627 	}
628 
629 	ctl_complete_beio(beio);
630 }
631 
632 SDT_PROBE_DEFINE1(cbb, , read, file_start, "uint64_t");
633 SDT_PROBE_DEFINE1(cbb, , write, file_start, "uint64_t");
634 SDT_PROBE_DEFINE1(cbb, , read, file_done,"uint64_t");
635 SDT_PROBE_DEFINE1(cbb, , write, file_done, "uint64_t");
636 
637 static void
638 ctl_be_block_dispatch_file(struct ctl_be_block_lun *be_lun,
639 			   struct ctl_be_block_io *beio)
640 {
641 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
642 	struct ctl_be_block_filedata *file_data;
643 	union ctl_io *io;
644 	struct uio xuio;
645 	struct iovec *xiovec;
646 	size_t s;
647 	int error, flags, i;
648 
649 	DPRINTF("entered\n");
650 
651 	file_data = &be_lun->backend.file;
652 	io = beio->io;
653 	flags = 0;
654 	if (ARGS(io)->flags & CTL_LLF_DPO)
655 		flags |= IO_DIRECT;
656 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
657 		flags |= IO_SYNC;
658 
659 	bzero(&xuio, sizeof(xuio));
660 	if (beio->bio_cmd == BIO_READ) {
661 		SDT_PROBE0(cbb, , read, file_start);
662 		xuio.uio_rw = UIO_READ;
663 	} else {
664 		SDT_PROBE0(cbb, , write, file_start);
665 		xuio.uio_rw = UIO_WRITE;
666 	}
667 	xuio.uio_offset = beio->io_offset;
668 	xuio.uio_resid = beio->io_len;
669 	xuio.uio_segflg = UIO_SYSSPACE;
670 	xuio.uio_iov = beio->xiovecs;
671 	xuio.uio_iovcnt = beio->num_segs;
672 	xuio.uio_td = curthread;
673 
674 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
675 		xiovec->iov_base = beio->sg_segs[i].addr;
676 		xiovec->iov_len = beio->sg_segs[i].len;
677 	}
678 
679 	binuptime(&beio->ds_t0);
680 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
681 
682 	if (beio->bio_cmd == BIO_READ) {
683 		vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
684 
685 		if (beio->beio_cont == NULL &&
686 		    cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
687 			ctl_serseq_done(io);
688 		/*
689 		 * UFS pays attention to IO_DIRECT for reads.  If the
690 		 * DIRECTIO option is configured into the kernel, it calls
691 		 * ffs_rawread().  But that only works for single-segment
692 		 * uios with user space addresses.  In our case, with a
693 		 * kernel uio, it still reads into the buffer cache, but it
694 		 * will just try to release the buffer from the cache later
695 		 * on in ffs_read().
696 		 *
697 		 * ZFS does not pay attention to IO_DIRECT for reads.
698 		 *
699 		 * UFS does not pay attention to IO_SYNC for reads.
700 		 *
701 		 * ZFS pays attention to IO_SYNC (which translates into the
702 		 * Solaris define FRSYNC for zfs_read()) for reads.  It
703 		 * attempts to sync the file before reading.
704 		 */
705 		error = VOP_READ(be_lun->vn, &xuio, flags, file_data->cred);
706 
707 		VOP_UNLOCK(be_lun->vn);
708 		SDT_PROBE0(cbb, , read, file_done);
709 		if (error == 0 && xuio.uio_resid > 0) {
710 			/*
711 			 * If we red less then requested (EOF), then
712 			 * we should clean the rest of the buffer.
713 			 */
714 			s = beio->io_len - xuio.uio_resid;
715 			for (i = 0; i < beio->num_segs; i++) {
716 				if (s >= beio->sg_segs[i].len) {
717 					s -= beio->sg_segs[i].len;
718 					continue;
719 				}
720 				bzero((uint8_t *)beio->sg_segs[i].addr + s,
721 				    beio->sg_segs[i].len - s);
722 				s = 0;
723 			}
724 		}
725 	} else {
726 		struct mount *mountpoint;
727 
728 		(void)vn_start_write(be_lun->vn, &mountpoint, V_WAIT);
729 		vn_lock(be_lun->vn, vn_lktype_write(mountpoint,
730 		    be_lun->vn) | LK_RETRY);
731 
732 		/*
733 		 * UFS pays attention to IO_DIRECT for writes.  The write
734 		 * is done asynchronously.  (Normally the write would just
735 		 * get put into cache.
736 		 *
737 		 * UFS pays attention to IO_SYNC for writes.  It will
738 		 * attempt to write the buffer out synchronously if that
739 		 * flag is set.
740 		 *
741 		 * ZFS does not pay attention to IO_DIRECT for writes.
742 		 *
743 		 * ZFS pays attention to IO_SYNC (a.k.a. FSYNC or FRSYNC)
744 		 * for writes.  It will flush the transaction from the
745 		 * cache before returning.
746 		 */
747 		error = VOP_WRITE(be_lun->vn, &xuio, flags, file_data->cred);
748 		VOP_UNLOCK(be_lun->vn);
749 
750 		vn_finished_write(mountpoint);
751 		SDT_PROBE0(cbb, , write, file_done);
752         }
753 
754 	mtx_lock(&be_lun->io_lock);
755 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
756 	    beio->ds_tag_type, beio->ds_trans_type,
757 	    /*now*/ NULL, /*then*/&beio->ds_t0);
758 	mtx_unlock(&be_lun->io_lock);
759 
760 	/*
761 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
762 	 * return the I/O to the user.
763 	 */
764 	if (error != 0) {
765 		if (error == ENOSPC || error == EDQUOT) {
766 			ctl_set_space_alloc_fail(&io->scsiio);
767 		} else if (error == EROFS || error == EACCES) {
768 			ctl_set_hw_write_protected(&io->scsiio);
769 		} else {
770 			ctl_set_medium_error(&io->scsiio,
771 			    beio->bio_cmd == BIO_READ);
772 		}
773 		ctl_complete_beio(beio);
774 		return;
775 	}
776 
777 	/*
778 	 * If this is a write or a verify, we're all done.
779 	 * If this is a read, we can now send the data to the user.
780 	 */
781 	if ((beio->bio_cmd == BIO_WRITE) ||
782 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
783 		ctl_set_success(&io->scsiio);
784 		ctl_complete_beio(beio);
785 	} else {
786 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
787 		    beio->beio_cont == NULL) {
788 			ctl_set_success(&io->scsiio);
789 			if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
790 				ctl_serseq_done(io);
791 		}
792 		ctl_datamove(io);
793 	}
794 }
795 
796 static void
797 ctl_be_block_gls_file(struct ctl_be_block_lun *be_lun,
798 			struct ctl_be_block_io *beio)
799 {
800 	union ctl_io *io = beio->io;
801 	struct ctl_lba_len_flags *lbalen = ARGS(io);
802 	struct scsi_get_lba_status_data *data;
803 	off_t roff, off;
804 	int error, status;
805 
806 	DPRINTF("entered\n");
807 
808 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
809 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
810 	error = VOP_IOCTL(be_lun->vn, FIOSEEKHOLE, &off,
811 	    0, curthread->td_ucred, curthread);
812 	if (error == 0 && off > roff)
813 		status = 0;	/* mapped up to off */
814 	else {
815 		error = VOP_IOCTL(be_lun->vn, FIOSEEKDATA, &off,
816 		    0, curthread->td_ucred, curthread);
817 		if (error == 0 && off > roff)
818 			status = 1;	/* deallocated up to off */
819 		else {
820 			status = 0;	/* unknown up to the end */
821 			off = be_lun->size_bytes;
822 		}
823 	}
824 	VOP_UNLOCK(be_lun->vn);
825 
826 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
827 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
828 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
829 	    lbalen->lba), data->descr[0].length);
830 	data->descr[0].status = status;
831 
832 	ctl_complete_beio(beio);
833 }
834 
835 static uint64_t
836 ctl_be_block_getattr_file(struct ctl_be_block_lun *be_lun, const char *attrname)
837 {
838 	struct vattr		vattr;
839 	struct statfs		statfs;
840 	uint64_t		val;
841 	int			error;
842 
843 	val = UINT64_MAX;
844 	if (be_lun->vn == NULL)
845 		return (val);
846 	vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
847 	if (strcmp(attrname, "blocksused") == 0) {
848 		error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
849 		if (error == 0)
850 			val = vattr.va_bytes / be_lun->cbe_lun.blocksize;
851 	}
852 	if (strcmp(attrname, "blocksavail") == 0 &&
853 	    !VN_IS_DOOMED(be_lun->vn)) {
854 		error = VFS_STATFS(be_lun->vn->v_mount, &statfs);
855 		if (error == 0)
856 			val = statfs.f_bavail * statfs.f_bsize /
857 			    be_lun->cbe_lun.blocksize;
858 	}
859 	VOP_UNLOCK(be_lun->vn);
860 	return (val);
861 }
862 
863 static void
864 ctl_be_block_unmap_file(struct ctl_be_block_lun *be_lun,
865 		        struct ctl_be_block_io *beio)
866 {
867 	struct ctl_be_block_filedata *file_data;
868 	union ctl_io *io;
869 	struct ctl_ptr_len_flags *ptrlen;
870 	struct scsi_unmap_desc *buf, *end;
871 	struct mount *mp;
872 	off_t off, len;
873 	int error;
874 
875 	io = beio->io;
876 	file_data = &be_lun->backend.file;
877 	mp = NULL;
878 	error = 0;
879 
880 	binuptime(&beio->ds_t0);
881 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
882 
883 	(void)vn_start_write(be_lun->vn, &mp, V_WAIT);
884 	vn_lock(be_lun->vn, vn_lktype_write(mp, be_lun->vn) | LK_RETRY);
885 	if (beio->io_offset == -1) {
886 		beio->io_len = 0;
887 		ptrlen = (struct ctl_ptr_len_flags *)
888 		    &io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
889 		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
890 		end = buf + ptrlen->len / sizeof(*buf);
891 		for (; buf < end; buf++) {
892 			off = (off_t)scsi_8btou64(buf->lba) *
893 			    be_lun->cbe_lun.blocksize;
894 			len = (off_t)scsi_4btoul(buf->length) *
895 			    be_lun->cbe_lun.blocksize;
896 			beio->io_len += len;
897 			error = vn_deallocate(be_lun->vn, &off, &len,
898 			    0, IO_NOMACCHECK | IO_NODELOCKED, file_data->cred,
899 			    NOCRED);
900 			if (error != 0)
901 				break;
902 		}
903 	} else {
904 		/* WRITE_SAME */
905 		off = beio->io_offset;
906 		len = beio->io_len;
907 		error = vn_deallocate(be_lun->vn, &off, &len, 0,
908 		    IO_NOMACCHECK | IO_NODELOCKED, file_data->cred, NOCRED);
909 	}
910 	VOP_UNLOCK(be_lun->vn);
911 	vn_finished_write(mp);
912 
913 	mtx_lock(&be_lun->io_lock);
914 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
915 	    beio->ds_tag_type, beio->ds_trans_type,
916 	    /*now*/ NULL, /*then*/&beio->ds_t0);
917 	mtx_unlock(&be_lun->io_lock);
918 
919 	/*
920 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
921 	 * return the I/O to the user.
922 	 */
923 	switch (error) {
924 	case 0:
925 		ctl_set_success(&io->scsiio);
926 		break;
927 	case ENOSPC:
928 	case EDQUOT:
929 		ctl_set_space_alloc_fail(&io->scsiio);
930 		break;
931 	case EROFS:
932 	case EACCES:
933 		ctl_set_hw_write_protected(&io->scsiio);
934 		break;
935 	default:
936 		ctl_set_medium_error(&io->scsiio, false);
937 	}
938 	ctl_complete_beio(beio);
939 }
940 
941 static void
942 ctl_be_block_dispatch_zvol(struct ctl_be_block_lun *be_lun,
943 			   struct ctl_be_block_io *beio)
944 {
945 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
946 	union ctl_io *io;
947 	struct cdevsw *csw;
948 	struct cdev *dev;
949 	struct uio xuio;
950 	struct iovec *xiovec;
951 	int error, flags, i, ref;
952 
953 	DPRINTF("entered\n");
954 
955 	io = beio->io;
956 	flags = 0;
957 	if (ARGS(io)->flags & CTL_LLF_DPO)
958 		flags |= IO_DIRECT;
959 	if (beio->bio_cmd == BIO_WRITE && ARGS(io)->flags & CTL_LLF_FUA)
960 		flags |= IO_SYNC;
961 
962 	bzero(&xuio, sizeof(xuio));
963 	if (beio->bio_cmd == BIO_READ) {
964 		SDT_PROBE0(cbb, , read, file_start);
965 		xuio.uio_rw = UIO_READ;
966 	} else {
967 		SDT_PROBE0(cbb, , write, file_start);
968 		xuio.uio_rw = UIO_WRITE;
969 	}
970 	xuio.uio_offset = beio->io_offset;
971 	xuio.uio_resid = beio->io_len;
972 	xuio.uio_segflg = UIO_SYSSPACE;
973 	xuio.uio_iov = beio->xiovecs;
974 	xuio.uio_iovcnt = beio->num_segs;
975 	xuio.uio_td = curthread;
976 
977 	for (i = 0, xiovec = xuio.uio_iov; i < xuio.uio_iovcnt; i++, xiovec++) {
978 		xiovec->iov_base = beio->sg_segs[i].addr;
979 		xiovec->iov_len = beio->sg_segs[i].len;
980 	}
981 
982 	binuptime(&beio->ds_t0);
983 	devstat_start_transaction(beio->lun->disk_stats, &beio->ds_t0);
984 
985 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
986 	if (csw) {
987 		if (beio->bio_cmd == BIO_READ) {
988 			if (beio->beio_cont == NULL &&
989 			    cbe_lun->serseq == CTL_LUN_SERSEQ_SOFT)
990 				ctl_serseq_done(io);
991 			error = csw->d_read(dev, &xuio, flags);
992 		} else
993 			error = csw->d_write(dev, &xuio, flags);
994 		dev_relthread(dev, ref);
995 	} else
996 		error = ENXIO;
997 
998 	if (beio->bio_cmd == BIO_READ)
999 		SDT_PROBE0(cbb, , read, file_done);
1000 	else
1001 		SDT_PROBE0(cbb, , write, file_done);
1002 
1003 	mtx_lock(&be_lun->io_lock);
1004 	devstat_end_transaction(beio->lun->disk_stats, beio->io_len,
1005 	    beio->ds_tag_type, beio->ds_trans_type,
1006 	    /*now*/ NULL, /*then*/&beio->ds_t0);
1007 	mtx_unlock(&be_lun->io_lock);
1008 
1009 	/*
1010 	 * If we got an error, set the sense data to "MEDIUM ERROR" and
1011 	 * return the I/O to the user.
1012 	 */
1013 	if (error != 0) {
1014 		if (error == ENOSPC || error == EDQUOT) {
1015 			ctl_set_space_alloc_fail(&io->scsiio);
1016 		} else if (error == EROFS || error == EACCES) {
1017 			ctl_set_hw_write_protected(&io->scsiio);
1018 		} else {
1019 			ctl_set_medium_error(&io->scsiio,
1020 			    beio->bio_cmd == BIO_READ);
1021 		}
1022 		ctl_complete_beio(beio);
1023 		return;
1024 	}
1025 
1026 	/*
1027 	 * If this is a write or a verify, we're all done.
1028 	 * If this is a read, we can now send the data to the user.
1029 	 */
1030 	if ((beio->bio_cmd == BIO_WRITE) ||
1031 	    (ARGS(io)->flags & CTL_LLF_VERIFY)) {
1032 		ctl_set_success(&io->scsiio);
1033 		ctl_complete_beio(beio);
1034 	} else {
1035 		if ((ARGS(io)->flags & CTL_LLF_READ) &&
1036 		    beio->beio_cont == NULL) {
1037 			ctl_set_success(&io->scsiio);
1038 			if (cbe_lun->serseq > CTL_LUN_SERSEQ_SOFT)
1039 				ctl_serseq_done(io);
1040 		}
1041 		ctl_datamove(io);
1042 	}
1043 }
1044 
1045 static void
1046 ctl_be_block_gls_zvol(struct ctl_be_block_lun *be_lun,
1047 			struct ctl_be_block_io *beio)
1048 {
1049 	union ctl_io *io = beio->io;
1050 	struct cdevsw *csw;
1051 	struct cdev *dev;
1052 	struct ctl_lba_len_flags *lbalen = ARGS(io);
1053 	struct scsi_get_lba_status_data *data;
1054 	off_t roff, off;
1055 	int error, ref, status;
1056 
1057 	DPRINTF("entered\n");
1058 
1059 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1060 	if (csw == NULL) {
1061 		status = 0;	/* unknown up to the end */
1062 		off = be_lun->size_bytes;
1063 		goto done;
1064 	}
1065 	off = roff = ((off_t)lbalen->lba) * be_lun->cbe_lun.blocksize;
1066 	error = csw->d_ioctl(dev, FIOSEEKHOLE, (caddr_t)&off, FREAD,
1067 	    curthread);
1068 	if (error == 0 && off > roff)
1069 		status = 0;	/* mapped up to off */
1070 	else {
1071 		error = csw->d_ioctl(dev, FIOSEEKDATA, (caddr_t)&off, FREAD,
1072 		    curthread);
1073 		if (error == 0 && off > roff)
1074 			status = 1;	/* deallocated up to off */
1075 		else {
1076 			status = 0;	/* unknown up to the end */
1077 			off = be_lun->size_bytes;
1078 		}
1079 	}
1080 	dev_relthread(dev, ref);
1081 
1082 done:
1083 	data = (struct scsi_get_lba_status_data *)io->scsiio.kern_data_ptr;
1084 	scsi_u64to8b(lbalen->lba, data->descr[0].addr);
1085 	scsi_ulto4b(MIN(UINT32_MAX, off / be_lun->cbe_lun.blocksize -
1086 	    lbalen->lba), data->descr[0].length);
1087 	data->descr[0].status = status;
1088 
1089 	ctl_complete_beio(beio);
1090 }
1091 
1092 static void
1093 ctl_be_block_flush_dev(struct ctl_be_block_lun *be_lun,
1094 		       struct ctl_be_block_io *beio)
1095 {
1096 	struct bio *bio;
1097 	struct cdevsw *csw;
1098 	struct cdev *dev;
1099 	int ref;
1100 
1101 	DPRINTF("entered\n");
1102 
1103 	/* This can't fail, it's a blocking allocation. */
1104 	bio = g_alloc_bio();
1105 
1106 	bio->bio_cmd	    = BIO_FLUSH;
1107 	bio->bio_offset	    = 0;
1108 	bio->bio_data	    = 0;
1109 	bio->bio_done	    = ctl_be_block_biodone;
1110 	bio->bio_caller1    = beio;
1111 	bio->bio_pblkno	    = 0;
1112 
1113 	/*
1114 	 * We don't need to acquire the LUN lock here, because we are only
1115 	 * sending one bio, and so there is no other context to synchronize
1116 	 * with.
1117 	 */
1118 	beio->num_bios_sent = 1;
1119 	beio->send_complete = 1;
1120 
1121 	binuptime(&beio->ds_t0);
1122 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1123 
1124 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1125 	if (csw) {
1126 		bio->bio_dev = dev;
1127 		csw->d_strategy(bio);
1128 		dev_relthread(dev, ref);
1129 	} else {
1130 		bio->bio_error = ENXIO;
1131 		ctl_be_block_biodone(bio);
1132 	}
1133 }
1134 
1135 static void
1136 ctl_be_block_unmap_dev_range(struct ctl_be_block_lun *be_lun,
1137 		       struct ctl_be_block_io *beio,
1138 		       uint64_t off, uint64_t len, int last)
1139 {
1140 	struct bio *bio;
1141 	uint64_t maxlen;
1142 	struct cdevsw *csw;
1143 	struct cdev *dev;
1144 	int ref;
1145 
1146 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1147 	maxlen = LONG_MAX - (LONG_MAX % be_lun->cbe_lun.blocksize);
1148 	while (len > 0) {
1149 		bio = g_alloc_bio();
1150 		bio->bio_cmd	    = BIO_DELETE;
1151 		bio->bio_dev	    = dev;
1152 		bio->bio_offset	    = off;
1153 		bio->bio_length	    = MIN(len, maxlen);
1154 		bio->bio_data	    = 0;
1155 		bio->bio_done	    = ctl_be_block_biodone;
1156 		bio->bio_caller1    = beio;
1157 		bio->bio_pblkno     = off / be_lun->cbe_lun.blocksize;
1158 
1159 		off += bio->bio_length;
1160 		len -= bio->bio_length;
1161 
1162 		mtx_lock(&be_lun->io_lock);
1163 		beio->num_bios_sent++;
1164 		if (last && len == 0)
1165 			beio->send_complete = 1;
1166 		mtx_unlock(&be_lun->io_lock);
1167 
1168 		if (csw) {
1169 			csw->d_strategy(bio);
1170 		} else {
1171 			bio->bio_error = ENXIO;
1172 			ctl_be_block_biodone(bio);
1173 		}
1174 	}
1175 	if (csw)
1176 		dev_relthread(dev, ref);
1177 }
1178 
1179 static void
1180 ctl_be_block_unmap_dev(struct ctl_be_block_lun *be_lun,
1181 		       struct ctl_be_block_io *beio)
1182 {
1183 	union ctl_io *io;
1184 	struct ctl_ptr_len_flags *ptrlen;
1185 	struct scsi_unmap_desc *buf, *end;
1186 	uint64_t len;
1187 
1188 	io = beio->io;
1189 
1190 	DPRINTF("entered\n");
1191 
1192 	binuptime(&beio->ds_t0);
1193 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1194 
1195 	if (beio->io_offset == -1) {
1196 		beio->io_len = 0;
1197 		ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1198 		buf = (struct scsi_unmap_desc *)ptrlen->ptr;
1199 		end = buf + ptrlen->len / sizeof(*buf);
1200 		for (; buf < end; buf++) {
1201 			len = (uint64_t)scsi_4btoul(buf->length) *
1202 			    be_lun->cbe_lun.blocksize;
1203 			beio->io_len += len;
1204 			ctl_be_block_unmap_dev_range(be_lun, beio,
1205 			    scsi_8btou64(buf->lba) * be_lun->cbe_lun.blocksize,
1206 			    len, (end - buf < 2) ? TRUE : FALSE);
1207 		}
1208 	} else
1209 		ctl_be_block_unmap_dev_range(be_lun, beio,
1210 		    beio->io_offset, beio->io_len, TRUE);
1211 }
1212 
1213 static void
1214 ctl_be_block_dispatch_dev(struct ctl_be_block_lun *be_lun,
1215 			  struct ctl_be_block_io *beio)
1216 {
1217 	TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
1218 	struct bio *bio;
1219 	struct cdevsw *csw;
1220 	struct cdev *dev;
1221 	off_t cur_offset;
1222 	int i, max_iosize, ref;
1223 
1224 	DPRINTF("entered\n");
1225 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1226 
1227 	/*
1228 	 * We have to limit our I/O size to the maximum supported by the
1229 	 * backend device.
1230 	 */
1231 	if (csw) {
1232 		max_iosize = dev->si_iosize_max;
1233 		if (max_iosize < PAGE_SIZE)
1234 			max_iosize = DFLTPHYS;
1235 	} else
1236 		max_iosize = DFLTPHYS;
1237 
1238 	cur_offset = beio->io_offset;
1239 	for (i = 0; i < beio->num_segs; i++) {
1240 		size_t cur_size;
1241 		uint8_t *cur_ptr;
1242 
1243 		cur_size = beio->sg_segs[i].len;
1244 		cur_ptr = beio->sg_segs[i].addr;
1245 
1246 		while (cur_size > 0) {
1247 			/* This can't fail, it's a blocking allocation. */
1248 			bio = g_alloc_bio();
1249 
1250 			KASSERT(bio != NULL, ("g_alloc_bio() failed!\n"));
1251 
1252 			bio->bio_cmd = beio->bio_cmd;
1253 			bio->bio_dev = dev;
1254 			bio->bio_caller1 = beio;
1255 			bio->bio_length = min(cur_size, max_iosize);
1256 			bio->bio_offset = cur_offset;
1257 			bio->bio_data = cur_ptr;
1258 			bio->bio_done = ctl_be_block_biodone;
1259 			bio->bio_pblkno = cur_offset / be_lun->cbe_lun.blocksize;
1260 
1261 			cur_offset += bio->bio_length;
1262 			cur_ptr += bio->bio_length;
1263 			cur_size -= bio->bio_length;
1264 
1265 			TAILQ_INSERT_TAIL(&queue, bio, bio_queue);
1266 			beio->num_bios_sent++;
1267 		}
1268 	}
1269 	beio->send_complete = 1;
1270 	binuptime(&beio->ds_t0);
1271 	devstat_start_transaction(be_lun->disk_stats, &beio->ds_t0);
1272 
1273 	/*
1274 	 * Fire off all allocated requests!
1275 	 */
1276 	while ((bio = TAILQ_FIRST(&queue)) != NULL) {
1277 		TAILQ_REMOVE(&queue, bio, bio_queue);
1278 		if (csw)
1279 			csw->d_strategy(bio);
1280 		else {
1281 			bio->bio_error = ENXIO;
1282 			ctl_be_block_biodone(bio);
1283 		}
1284 	}
1285 	if (csw)
1286 		dev_relthread(dev, ref);
1287 }
1288 
1289 static uint64_t
1290 ctl_be_block_getattr_dev(struct ctl_be_block_lun *be_lun, const char *attrname)
1291 {
1292 	struct diocgattr_arg	arg;
1293 	struct cdevsw *csw;
1294 	struct cdev *dev;
1295 	int error, ref;
1296 
1297 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
1298 	if (csw == NULL)
1299 		return (UINT64_MAX);
1300 	strlcpy(arg.name, attrname, sizeof(arg.name));
1301 	arg.len = sizeof(arg.value.off);
1302 	if (csw->d_ioctl) {
1303 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
1304 		    curthread);
1305 	} else
1306 		error = ENODEV;
1307 	dev_relthread(dev, ref);
1308 	if (error != 0)
1309 		return (UINT64_MAX);
1310 	return (arg.value.off);
1311 }
1312 
1313 static void
1314 ctl_be_block_cw_dispatch_sync(struct ctl_be_block_lun *be_lun,
1315 			    union ctl_io *io)
1316 {
1317 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1318 	struct ctl_be_block_io *beio;
1319 	struct ctl_lba_len_flags *lbalen;
1320 
1321 	DPRINTF("entered\n");
1322 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1323 	lbalen = (struct ctl_lba_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1324 
1325 	beio->io_len = lbalen->len * cbe_lun->blocksize;
1326 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1327 	beio->io_arg = (lbalen->flags & SSC_IMMED) != 0;
1328 	beio->bio_cmd = BIO_FLUSH;
1329 	beio->ds_trans_type = DEVSTAT_NO_DATA;
1330 	DPRINTF("SYNC\n");
1331 	be_lun->lun_flush(be_lun, beio);
1332 }
1333 
1334 static void
1335 ctl_be_block_cw_done_ws(struct ctl_be_block_io *beio)
1336 {
1337 	union ctl_io *io;
1338 
1339 	io = beio->io;
1340 	ctl_free_beio(beio);
1341 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1342 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1343 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1344 		ctl_config_write_done(io);
1345 		return;
1346 	}
1347 
1348 	ctl_be_block_config_write(io);
1349 }
1350 
1351 static void
1352 ctl_be_block_cw_dispatch_ws(struct ctl_be_block_lun *be_lun,
1353 			    union ctl_io *io)
1354 {
1355 	struct ctl_be_block_softc *softc = be_lun->softc;
1356 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1357 	struct ctl_be_block_io *beio;
1358 	struct ctl_lba_len_flags *lbalen;
1359 	uint64_t len_left, lba;
1360 	uint32_t pb, pbo, adj;
1361 	int i, seglen;
1362 	uint8_t *buf, *end;
1363 
1364 	DPRINTF("entered\n");
1365 
1366 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1367 	lbalen = ARGS(io);
1368 
1369 	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB) ||
1370 	    (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR) && be_lun->unmap == NULL)) {
1371 		ctl_free_beio(beio);
1372 		ctl_set_invalid_field(&io->scsiio,
1373 				      /*sks_valid*/ 1,
1374 				      /*command*/ 1,
1375 				      /*field*/ 1,
1376 				      /*bit_valid*/ 0,
1377 				      /*bit*/ 0);
1378 		ctl_config_write_done(io);
1379 		return;
1380 	}
1381 
1382 	if (lbalen->flags & (SWS_UNMAP | SWS_ANCHOR)) {
1383 		beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1384 		beio->io_len = (uint64_t)lbalen->len * cbe_lun->blocksize;
1385 		beio->bio_cmd = BIO_DELETE;
1386 		beio->ds_trans_type = DEVSTAT_FREE;
1387 
1388 		be_lun->unmap(be_lun, beio);
1389 		return;
1390 	}
1391 
1392 	beio->bio_cmd = BIO_WRITE;
1393 	beio->ds_trans_type = DEVSTAT_WRITE;
1394 
1395 	DPRINTF("WRITE SAME at LBA %jx len %u\n",
1396 	       (uintmax_t)lbalen->lba, lbalen->len);
1397 
1398 	pb = cbe_lun->blocksize << be_lun->cbe_lun.pblockexp;
1399 	if (be_lun->cbe_lun.pblockoff > 0)
1400 		pbo = pb - cbe_lun->blocksize * be_lun->cbe_lun.pblockoff;
1401 	else
1402 		pbo = 0;
1403 	len_left = (uint64_t)lbalen->len * cbe_lun->blocksize;
1404 	for (i = 0, lba = 0; i < CTLBLK_NUM_SEGS && len_left > 0; i++) {
1405 		/*
1406 		 * Setup the S/G entry for this chunk.
1407 		 */
1408 		seglen = MIN(CTLBLK_MAX_SEG, len_left);
1409 		if (pb > cbe_lun->blocksize) {
1410 			adj = ((lbalen->lba + lba) * cbe_lun->blocksize +
1411 			    seglen - pbo) % pb;
1412 			if (seglen > adj)
1413 				seglen -= adj;
1414 			else
1415 				seglen -= seglen % cbe_lun->blocksize;
1416 		} else
1417 			seglen -= seglen % cbe_lun->blocksize;
1418 		ctl_alloc_seg(softc, &beio->sg_segs[i], seglen);
1419 
1420 		DPRINTF("segment %d addr %p len %zd\n", i,
1421 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1422 
1423 		beio->num_segs++;
1424 		len_left -= seglen;
1425 
1426 		buf = beio->sg_segs[i].addr;
1427 		end = buf + seglen;
1428 		for (; buf < end; buf += cbe_lun->blocksize) {
1429 			if (lbalen->flags & SWS_NDOB) {
1430 				memset(buf, 0, cbe_lun->blocksize);
1431 			} else {
1432 				memcpy(buf, io->scsiio.kern_data_ptr,
1433 				    cbe_lun->blocksize);
1434 			}
1435 			if (lbalen->flags & SWS_LBDATA)
1436 				scsi_ulto4b(lbalen->lba + lba, buf);
1437 			lba++;
1438 		}
1439 	}
1440 
1441 	beio->io_offset = lbalen->lba * cbe_lun->blocksize;
1442 	beio->io_len = lba * cbe_lun->blocksize;
1443 
1444 	/* We can not do all in one run. Correct and schedule rerun. */
1445 	if (len_left > 0) {
1446 		lbalen->lba += lba;
1447 		lbalen->len -= lba;
1448 		beio->beio_cont = ctl_be_block_cw_done_ws;
1449 	}
1450 
1451 	be_lun->dispatch(be_lun, beio);
1452 }
1453 
1454 static void
1455 ctl_be_block_cw_dispatch_unmap(struct ctl_be_block_lun *be_lun,
1456 			    union ctl_io *io)
1457 {
1458 	struct ctl_be_block_io *beio;
1459 	struct ctl_ptr_len_flags *ptrlen;
1460 
1461 	DPRINTF("entered\n");
1462 
1463 	beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1464 	ptrlen = (struct ctl_ptr_len_flags *)&io->io_hdr.ctl_private[CTL_PRIV_LBA_LEN];
1465 
1466 	if ((ptrlen->flags & ~SU_ANCHOR) != 0 || be_lun->unmap == NULL) {
1467 		ctl_free_beio(beio);
1468 		ctl_set_invalid_field(&io->scsiio,
1469 				      /*sks_valid*/ 0,
1470 				      /*command*/ 1,
1471 				      /*field*/ 0,
1472 				      /*bit_valid*/ 0,
1473 				      /*bit*/ 0);
1474 		ctl_config_write_done(io);
1475 		return;
1476 	}
1477 
1478 	beio->io_len = 0;
1479 	beio->io_offset = -1;
1480 	beio->bio_cmd = BIO_DELETE;
1481 	beio->ds_trans_type = DEVSTAT_FREE;
1482 	DPRINTF("UNMAP\n");
1483 	be_lun->unmap(be_lun, beio);
1484 }
1485 
1486 static void
1487 ctl_be_block_cr_done(struct ctl_be_block_io *beio)
1488 {
1489 	union ctl_io *io;
1490 
1491 	io = beio->io;
1492 	ctl_free_beio(beio);
1493 	ctl_config_read_done(io);
1494 }
1495 
1496 static void
1497 ctl_be_block_cr_dispatch(struct ctl_be_block_lun *be_lun,
1498 			 union ctl_io *io)
1499 {
1500 	struct ctl_be_block_io *beio;
1501 	struct ctl_be_block_softc *softc;
1502 
1503 	DPRINTF("entered\n");
1504 
1505 	softc = be_lun->softc;
1506 	beio = ctl_alloc_beio(softc);
1507 	beio->io = io;
1508 	beio->lun = be_lun;
1509 	beio->beio_cont = ctl_be_block_cr_done;
1510 	PRIV(io)->ptr = (void *)beio;
1511 
1512 	switch (io->scsiio.cdb[0]) {
1513 	case SERVICE_ACTION_IN:		/* GET LBA STATUS */
1514 		beio->bio_cmd = -1;
1515 		beio->ds_trans_type = DEVSTAT_NO_DATA;
1516 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1517 		beio->io_len = 0;
1518 		if (be_lun->get_lba_status)
1519 			be_lun->get_lba_status(be_lun, beio);
1520 		else
1521 			ctl_be_block_cr_done(beio);
1522 		break;
1523 	default:
1524 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1525 		break;
1526 	}
1527 }
1528 
1529 static void
1530 ctl_be_block_cw_done(struct ctl_be_block_io *beio)
1531 {
1532 	union ctl_io *io;
1533 
1534 	io = beio->io;
1535 	ctl_free_beio(beio);
1536 	ctl_config_write_done(io);
1537 }
1538 
1539 static void
1540 ctl_be_block_cw_dispatch(struct ctl_be_block_lun *be_lun,
1541 			 union ctl_io *io)
1542 {
1543 	struct ctl_be_block_io *beio;
1544 	struct ctl_be_block_softc *softc;
1545 
1546 	DPRINTF("entered\n");
1547 
1548 	softc = be_lun->softc;
1549 	beio = ctl_alloc_beio(softc);
1550 	beio->io = io;
1551 	beio->lun = be_lun;
1552 	beio->beio_cont = ctl_be_block_cw_done;
1553 	switch (io->scsiio.tag_type) {
1554 	case CTL_TAG_ORDERED:
1555 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1556 		break;
1557 	case CTL_TAG_HEAD_OF_QUEUE:
1558 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1559 		break;
1560 	case CTL_TAG_UNTAGGED:
1561 	case CTL_TAG_SIMPLE:
1562 	case CTL_TAG_ACA:
1563 	default:
1564 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1565 		break;
1566 	}
1567 	PRIV(io)->ptr = (void *)beio;
1568 
1569 	switch (io->scsiio.cdb[0]) {
1570 	case SYNCHRONIZE_CACHE:
1571 	case SYNCHRONIZE_CACHE_16:
1572 		ctl_be_block_cw_dispatch_sync(be_lun, io);
1573 		break;
1574 	case WRITE_SAME_10:
1575 	case WRITE_SAME_16:
1576 		ctl_be_block_cw_dispatch_ws(be_lun, io);
1577 		break;
1578 	case UNMAP:
1579 		ctl_be_block_cw_dispatch_unmap(be_lun, io);
1580 		break;
1581 	default:
1582 		panic("Unhandled CDB type %#x", io->scsiio.cdb[0]);
1583 		break;
1584 	}
1585 }
1586 
1587 SDT_PROBE_DEFINE1(cbb, , read, start, "uint64_t");
1588 SDT_PROBE_DEFINE1(cbb, , write, start, "uint64_t");
1589 SDT_PROBE_DEFINE1(cbb, , read, alloc_done, "uint64_t");
1590 SDT_PROBE_DEFINE1(cbb, , write, alloc_done, "uint64_t");
1591 
1592 static void
1593 ctl_be_block_next(struct ctl_be_block_io *beio)
1594 {
1595 	struct ctl_be_block_lun *be_lun;
1596 	union ctl_io *io;
1597 
1598 	io = beio->io;
1599 	be_lun = beio->lun;
1600 	ctl_free_beio(beio);
1601 	if ((io->io_hdr.flags & CTL_FLAG_ABORT) ||
1602 	    ((io->io_hdr.status & CTL_STATUS_MASK) != CTL_STATUS_NONE &&
1603 	     (io->io_hdr.status & CTL_STATUS_MASK) != CTL_SUCCESS)) {
1604 		ctl_data_submit_done(io);
1605 		return;
1606 	}
1607 
1608 	io->io_hdr.status &= ~CTL_STATUS_MASK;
1609 	io->io_hdr.status |= CTL_STATUS_NONE;
1610 
1611 	mtx_lock(&be_lun->queue_lock);
1612 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1613 	mtx_unlock(&be_lun->queue_lock);
1614 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1615 }
1616 
1617 static void
1618 ctl_be_block_dispatch(struct ctl_be_block_lun *be_lun,
1619 			   union ctl_io *io)
1620 {
1621 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1622 	struct ctl_be_block_io *beio;
1623 	struct ctl_be_block_softc *softc;
1624 	struct ctl_lba_len_flags *lbalen;
1625 	struct ctl_ptr_len_flags *bptrlen;
1626 	uint64_t len_left, lbas;
1627 	int i;
1628 
1629 	softc = be_lun->softc;
1630 
1631 	DPRINTF("entered\n");
1632 
1633 	lbalen = ARGS(io);
1634 	if (lbalen->flags & CTL_LLF_WRITE) {
1635 		SDT_PROBE0(cbb, , write, start);
1636 	} else {
1637 		SDT_PROBE0(cbb, , read, start);
1638 	}
1639 
1640 	beio = ctl_alloc_beio(softc);
1641 	beio->io = io;
1642 	beio->lun = be_lun;
1643 	bptrlen = PRIV(io);
1644 	bptrlen->ptr = (void *)beio;
1645 
1646 	switch (io->scsiio.tag_type) {
1647 	case CTL_TAG_ORDERED:
1648 		beio->ds_tag_type = DEVSTAT_TAG_ORDERED;
1649 		break;
1650 	case CTL_TAG_HEAD_OF_QUEUE:
1651 		beio->ds_tag_type = DEVSTAT_TAG_HEAD;
1652 		break;
1653 	case CTL_TAG_UNTAGGED:
1654 	case CTL_TAG_SIMPLE:
1655 	case CTL_TAG_ACA:
1656 	default:
1657 		beio->ds_tag_type = DEVSTAT_TAG_SIMPLE;
1658 		break;
1659 	}
1660 
1661 	if (lbalen->flags & CTL_LLF_WRITE) {
1662 		beio->bio_cmd = BIO_WRITE;
1663 		beio->ds_trans_type = DEVSTAT_WRITE;
1664 	} else {
1665 		beio->bio_cmd = BIO_READ;
1666 		beio->ds_trans_type = DEVSTAT_READ;
1667 	}
1668 
1669 	DPRINTF("%s at LBA %jx len %u @%ju\n",
1670 	       (beio->bio_cmd == BIO_READ) ? "READ" : "WRITE",
1671 	       (uintmax_t)lbalen->lba, lbalen->len, bptrlen->len);
1672 	if (lbalen->flags & CTL_LLF_COMPARE) {
1673 		beio->two_sglists = 1;
1674 		lbas = CTLBLK_HALF_IO_SIZE;
1675 	} else {
1676 		lbas = CTLBLK_MAX_IO_SIZE;
1677 	}
1678 	lbas = MIN(lbalen->len - bptrlen->len, lbas / cbe_lun->blocksize);
1679 	beio->io_offset = (lbalen->lba + bptrlen->len) * cbe_lun->blocksize;
1680 	beio->io_len = lbas * cbe_lun->blocksize;
1681 	bptrlen->len += lbas;
1682 
1683 	for (i = 0, len_left = beio->io_len; len_left > 0; i++) {
1684 		KASSERT(i < CTLBLK_MAX_SEGS, ("Too many segs (%d >= %d)",
1685 		    i, CTLBLK_MAX_SEGS));
1686 
1687 		/*
1688 		 * Setup the S/G entry for this chunk.
1689 		 */
1690 		ctl_alloc_seg(softc, &beio->sg_segs[i],
1691 		    MIN(CTLBLK_MAX_SEG, len_left));
1692 
1693 		DPRINTF("segment %d addr %p len %zd\n", i,
1694 			beio->sg_segs[i].addr, beio->sg_segs[i].len);
1695 
1696 		/* Set up second segment for compare operation. */
1697 		if (beio->two_sglists) {
1698 			ctl_alloc_seg(softc,
1699 			    &beio->sg_segs[i + CTLBLK_HALF_SEGS],
1700 			    beio->sg_segs[i].len);
1701 		}
1702 
1703 		beio->num_segs++;
1704 		len_left -= beio->sg_segs[i].len;
1705 	}
1706 	if (bptrlen->len < lbalen->len)
1707 		beio->beio_cont = ctl_be_block_next;
1708 	io->scsiio.be_move_done = ctl_be_block_move_done;
1709 	/* For compare we have separate S/G lists for read and datamove. */
1710 	if (beio->two_sglists)
1711 		io->scsiio.kern_data_ptr = (uint8_t *)&beio->sg_segs[CTLBLK_HALF_SEGS];
1712 	else
1713 		io->scsiio.kern_data_ptr = (uint8_t *)beio->sg_segs;
1714 	io->scsiio.kern_data_len = beio->io_len;
1715 	io->scsiio.kern_sg_entries = beio->num_segs;
1716 	io->scsiio.kern_data_ref = ctl_refcnt_beio;
1717 	io->scsiio.kern_data_arg = beio;
1718 	io->io_hdr.flags |= CTL_FLAG_ALLOCATED;
1719 
1720 	/*
1721 	 * For the read case, we need to read the data into our buffers and
1722 	 * then we can send it back to the user.  For the write case, we
1723 	 * need to get the data from the user first.
1724 	 */
1725 	if (beio->bio_cmd == BIO_READ) {
1726 		SDT_PROBE0(cbb, , read, alloc_done);
1727 		be_lun->dispatch(be_lun, beio);
1728 	} else {
1729 		SDT_PROBE0(cbb, , write, alloc_done);
1730 		ctl_datamove(io);
1731 	}
1732 }
1733 
1734 static void
1735 ctl_be_block_worker(void *context, int pending)
1736 {
1737 	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)context;
1738 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1739 	union ctl_io *io;
1740 	struct ctl_be_block_io *beio;
1741 
1742 	DPRINTF("entered\n");
1743 	/*
1744 	 * Fetch and process I/Os from all queues.  If we detect LUN
1745 	 * CTL_LUN_FLAG_NO_MEDIA status here -- it is result of a race,
1746 	 * so make response maximally opaque to not confuse initiator.
1747 	 */
1748 	for (;;) {
1749 		mtx_lock(&be_lun->queue_lock);
1750 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->datamove_queue);
1751 		if (io != NULL) {
1752 			DPRINTF("datamove queue\n");
1753 			STAILQ_REMOVE_HEAD(&be_lun->datamove_queue, links);
1754 			mtx_unlock(&be_lun->queue_lock);
1755 			beio = (struct ctl_be_block_io *)PRIV(io)->ptr;
1756 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1757 				ctl_set_busy(&io->scsiio);
1758 				ctl_complete_beio(beio);
1759 				continue;
1760 			}
1761 			be_lun->dispatch(be_lun, beio);
1762 			continue;
1763 		}
1764 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_write_queue);
1765 		if (io != NULL) {
1766 			DPRINTF("config write queue\n");
1767 			STAILQ_REMOVE_HEAD(&be_lun->config_write_queue, links);
1768 			mtx_unlock(&be_lun->queue_lock);
1769 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1770 				ctl_set_busy(&io->scsiio);
1771 				ctl_config_write_done(io);
1772 				continue;
1773 			}
1774 			ctl_be_block_cw_dispatch(be_lun, io);
1775 			continue;
1776 		}
1777 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->config_read_queue);
1778 		if (io != NULL) {
1779 			DPRINTF("config read queue\n");
1780 			STAILQ_REMOVE_HEAD(&be_lun->config_read_queue, links);
1781 			mtx_unlock(&be_lun->queue_lock);
1782 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1783 				ctl_set_busy(&io->scsiio);
1784 				ctl_config_read_done(io);
1785 				continue;
1786 			}
1787 			ctl_be_block_cr_dispatch(be_lun, io);
1788 			continue;
1789 		}
1790 		io = (union ctl_io *)STAILQ_FIRST(&be_lun->input_queue);
1791 		if (io != NULL) {
1792 			DPRINTF("input queue\n");
1793 			STAILQ_REMOVE_HEAD(&be_lun->input_queue, links);
1794 			mtx_unlock(&be_lun->queue_lock);
1795 			if (cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) {
1796 				ctl_set_busy(&io->scsiio);
1797 				ctl_data_submit_done(io);
1798 				continue;
1799 			}
1800 			ctl_be_block_dispatch(be_lun, io);
1801 			continue;
1802 		}
1803 
1804 		/*
1805 		 * If we get here, there is no work left in the queues, so
1806 		 * just break out and let the task queue go to sleep.
1807 		 */
1808 		mtx_unlock(&be_lun->queue_lock);
1809 		break;
1810 	}
1811 }
1812 
1813 /*
1814  * Entry point from CTL to the backend for I/O.  We queue everything to a
1815  * work thread, so this just puts the I/O on a queue and wakes up the
1816  * thread.
1817  */
1818 static int
1819 ctl_be_block_submit(union ctl_io *io)
1820 {
1821 	struct ctl_be_block_lun *be_lun;
1822 
1823 	DPRINTF("entered\n");
1824 
1825 	be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
1826 
1827 	KASSERT(io->io_hdr.io_type == CTL_IO_SCSI,
1828 	    ("%s: unexpected I/O type %x", __func__, io->io_hdr.io_type));
1829 
1830 	PRIV(io)->len = 0;
1831 
1832 	mtx_lock(&be_lun->queue_lock);
1833 	STAILQ_INSERT_TAIL(&be_lun->input_queue, &io->io_hdr, links);
1834 	mtx_unlock(&be_lun->queue_lock);
1835 	taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
1836 
1837 	return (CTL_RETVAL_COMPLETE);
1838 }
1839 
1840 static int
1841 ctl_be_block_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
1842 			int flag, struct thread *td)
1843 {
1844 	struct ctl_be_block_softc *softc = &backend_block_softc;
1845 	int error;
1846 
1847 	error = 0;
1848 	switch (cmd) {
1849 	case CTL_LUN_REQ: {
1850 		struct ctl_lun_req *lun_req;
1851 
1852 		lun_req = (struct ctl_lun_req *)addr;
1853 
1854 		switch (lun_req->reqtype) {
1855 		case CTL_LUNREQ_CREATE:
1856 			error = ctl_be_block_create(softc, lun_req);
1857 			break;
1858 		case CTL_LUNREQ_RM:
1859 			error = ctl_be_block_rm(softc, lun_req);
1860 			break;
1861 		case CTL_LUNREQ_MODIFY:
1862 			error = ctl_be_block_modify(softc, lun_req);
1863 			break;
1864 		default:
1865 			lun_req->status = CTL_LUN_ERROR;
1866 			snprintf(lun_req->error_str, sizeof(lun_req->error_str),
1867 				 "invalid LUN request type %d",
1868 				 lun_req->reqtype);
1869 			break;
1870 		}
1871 		break;
1872 	}
1873 	default:
1874 		error = ENOTTY;
1875 		break;
1876 	}
1877 
1878 	return (error);
1879 }
1880 
1881 static int
1882 ctl_be_block_open_file(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1883 {
1884 	struct ctl_be_lun *cbe_lun;
1885 	struct ctl_be_block_filedata *file_data;
1886 	struct ctl_lun_create_params *params;
1887 	const char		     *value;
1888 	struct vattr		      vattr;
1889 	off_t			      ps, pss, po, pos, us, uss, uo, uos;
1890 	int			      error;
1891 	long			      pconf;
1892 
1893 	cbe_lun = &be_lun->cbe_lun;
1894 	file_data = &be_lun->backend.file;
1895 	params = &be_lun->params;
1896 
1897 	be_lun->dev_type = CTL_BE_BLOCK_FILE;
1898 	be_lun->dispatch = ctl_be_block_dispatch_file;
1899 	be_lun->lun_flush = ctl_be_block_flush_file;
1900 	be_lun->get_lba_status = ctl_be_block_gls_file;
1901 	be_lun->getattr = ctl_be_block_getattr_file;
1902 	be_lun->unmap = ctl_be_block_unmap_file;
1903 	cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
1904 
1905 	error = VOP_GETATTR(be_lun->vn, &vattr, curthread->td_ucred);
1906 	if (error != 0) {
1907 		snprintf(req->error_str, sizeof(req->error_str),
1908 			 "error calling VOP_GETATTR() for file %s",
1909 			 be_lun->dev_path);
1910 		return (error);
1911 	}
1912 
1913 	error = VOP_PATHCONF(be_lun->vn, _PC_DEALLOC_PRESENT, &pconf);
1914 	if (error != 0) {
1915 		snprintf(req->error_str, sizeof(req->error_str),
1916 		    "error calling VOP_PATHCONF() for file %s",
1917 		    be_lun->dev_path);
1918 		return (error);
1919 	}
1920 	if (pconf == 1)
1921 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
1922 
1923 	file_data->cred = crhold(curthread->td_ucred);
1924 	if (params->lun_size_bytes != 0)
1925 		be_lun->size_bytes = params->lun_size_bytes;
1926 	else
1927 		be_lun->size_bytes = vattr.va_size;
1928 
1929 	/*
1930 	 * For files we can use any logical block size.  Prefer 512 bytes
1931 	 * for compatibility reasons.  If file's vattr.va_blocksize
1932 	 * (preferred I/O block size) is bigger and multiple to chosen
1933 	 * logical block size -- report it as physical block size.
1934 	 */
1935 	if (params->blocksize_bytes != 0)
1936 		cbe_lun->blocksize = params->blocksize_bytes;
1937 	else if (cbe_lun->lun_type == T_CDROM)
1938 		cbe_lun->blocksize = 2048;
1939 	else
1940 		cbe_lun->blocksize = 512;
1941 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
1942 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
1943 	    0 : (be_lun->size_blocks - 1);
1944 
1945 	us = ps = vattr.va_blocksize;
1946 	uo = po = 0;
1947 
1948 	value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
1949 	if (value != NULL)
1950 		ctl_expand_number(value, &ps);
1951 	value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
1952 	if (value != NULL)
1953 		ctl_expand_number(value, &po);
1954 	pss = ps / cbe_lun->blocksize;
1955 	pos = po / cbe_lun->blocksize;
1956 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
1957 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
1958 		cbe_lun->pblockexp = fls(pss) - 1;
1959 		cbe_lun->pblockoff = (pss - pos) % pss;
1960 	}
1961 
1962 	value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
1963 	if (value != NULL)
1964 		ctl_expand_number(value, &us);
1965 	value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
1966 	if (value != NULL)
1967 		ctl_expand_number(value, &uo);
1968 	uss = us / cbe_lun->blocksize;
1969 	uos = uo / cbe_lun->blocksize;
1970 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
1971 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
1972 		cbe_lun->ublockexp = fls(uss) - 1;
1973 		cbe_lun->ublockoff = (uss - uos) % uss;
1974 	}
1975 
1976 	/*
1977 	 * Sanity check.  The media size has to be at least one
1978 	 * sector long.
1979 	 */
1980 	if (be_lun->size_bytes < cbe_lun->blocksize) {
1981 		error = EINVAL;
1982 		snprintf(req->error_str, sizeof(req->error_str),
1983 			 "file %s size %ju < block size %u", be_lun->dev_path,
1984 			 (uintmax_t)be_lun->size_bytes, cbe_lun->blocksize);
1985 	}
1986 
1987 	cbe_lun->opttxferlen = CTLBLK_MAX_IO_SIZE / cbe_lun->blocksize;
1988 	return (error);
1989 }
1990 
1991 static int
1992 ctl_be_block_open_dev(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
1993 {
1994 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
1995 	struct ctl_lun_create_params *params;
1996 	struct cdevsw		     *csw;
1997 	struct cdev		     *dev;
1998 	const char		     *value;
1999 	int			      error, atomic, maxio, ref, unmap, tmp;
2000 	off_t			      ps, pss, po, pos, us, uss, uo, uos, otmp;
2001 
2002 	params = &be_lun->params;
2003 
2004 	be_lun->dev_type = CTL_BE_BLOCK_DEV;
2005 	csw = devvn_refthread(be_lun->vn, &dev, &ref);
2006 	if (csw == NULL)
2007 		return (ENXIO);
2008 	if (strcmp(csw->d_name, "zvol") == 0) {
2009 		be_lun->dispatch = ctl_be_block_dispatch_zvol;
2010 		be_lun->get_lba_status = ctl_be_block_gls_zvol;
2011 		atomic = maxio = CTLBLK_MAX_IO_SIZE;
2012 	} else {
2013 		be_lun->dispatch = ctl_be_block_dispatch_dev;
2014 		be_lun->get_lba_status = NULL;
2015 		atomic = 0;
2016 		maxio = dev->si_iosize_max;
2017 		if (maxio <= 0)
2018 			maxio = DFLTPHYS;
2019 		if (maxio > CTLBLK_MAX_SEG)
2020 			maxio = CTLBLK_MAX_SEG;
2021 	}
2022 	be_lun->lun_flush = ctl_be_block_flush_dev;
2023 	be_lun->getattr = ctl_be_block_getattr_dev;
2024 	be_lun->unmap = ctl_be_block_unmap_dev;
2025 
2026 	if (!csw->d_ioctl) {
2027 		dev_relthread(dev, ref);
2028 		snprintf(req->error_str, sizeof(req->error_str),
2029 			 "no d_ioctl for device %s!", be_lun->dev_path);
2030 		return (ENODEV);
2031 	}
2032 
2033 	error = csw->d_ioctl(dev, DIOCGSECTORSIZE, (caddr_t)&tmp, FREAD,
2034 			       curthread);
2035 	if (error) {
2036 		dev_relthread(dev, ref);
2037 		snprintf(req->error_str, sizeof(req->error_str),
2038 			 "error %d returned for DIOCGSECTORSIZE ioctl "
2039 			 "on %s!", error, be_lun->dev_path);
2040 		return (error);
2041 	}
2042 
2043 	/*
2044 	 * If the user has asked for a blocksize that is greater than the
2045 	 * backing device's blocksize, we can do it only if the blocksize
2046 	 * the user is asking for is an even multiple of the underlying
2047 	 * device's blocksize.
2048 	 */
2049 	if ((params->blocksize_bytes != 0) &&
2050 	    (params->blocksize_bytes >= tmp)) {
2051 		if (params->blocksize_bytes % tmp == 0) {
2052 			cbe_lun->blocksize = params->blocksize_bytes;
2053 		} else {
2054 			dev_relthread(dev, ref);
2055 			snprintf(req->error_str, sizeof(req->error_str),
2056 				 "requested blocksize %u is not an even "
2057 				 "multiple of backing device blocksize %u",
2058 				 params->blocksize_bytes, tmp);
2059 			return (EINVAL);
2060 		}
2061 	} else if (params->blocksize_bytes != 0) {
2062 		dev_relthread(dev, ref);
2063 		snprintf(req->error_str, sizeof(req->error_str),
2064 			 "requested blocksize %u < backing device "
2065 			 "blocksize %u", params->blocksize_bytes, tmp);
2066 		return (EINVAL);
2067 	} else if (cbe_lun->lun_type == T_CDROM)
2068 		cbe_lun->blocksize = MAX(tmp, 2048);
2069 	else
2070 		cbe_lun->blocksize = tmp;
2071 
2072 	error = csw->d_ioctl(dev, DIOCGMEDIASIZE, (caddr_t)&otmp, FREAD,
2073 			     curthread);
2074 	if (error) {
2075 		dev_relthread(dev, ref);
2076 		snprintf(req->error_str, sizeof(req->error_str),
2077 			 "error %d returned for DIOCGMEDIASIZE "
2078 			 " ioctl on %s!", error,
2079 			 be_lun->dev_path);
2080 		return (error);
2081 	}
2082 
2083 	if (params->lun_size_bytes != 0) {
2084 		if (params->lun_size_bytes > otmp) {
2085 			dev_relthread(dev, ref);
2086 			snprintf(req->error_str, sizeof(req->error_str),
2087 				 "requested LUN size %ju > backing device "
2088 				 "size %ju",
2089 				 (uintmax_t)params->lun_size_bytes,
2090 				 (uintmax_t)otmp);
2091 			return (EINVAL);
2092 		}
2093 
2094 		be_lun->size_bytes = params->lun_size_bytes;
2095 	} else
2096 		be_lun->size_bytes = otmp;
2097 	be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2098 	cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2099 	    0 : (be_lun->size_blocks - 1);
2100 
2101 	error = csw->d_ioctl(dev, DIOCGSTRIPESIZE, (caddr_t)&ps, FREAD,
2102 	    curthread);
2103 	if (error)
2104 		ps = po = 0;
2105 	else {
2106 		error = csw->d_ioctl(dev, DIOCGSTRIPEOFFSET, (caddr_t)&po,
2107 		    FREAD, curthread);
2108 		if (error)
2109 			po = 0;
2110 	}
2111 	us = ps;
2112 	uo = po;
2113 
2114 	value = dnvlist_get_string(cbe_lun->options, "pblocksize", NULL);
2115 	if (value != NULL)
2116 		ctl_expand_number(value, &ps);
2117 	value = dnvlist_get_string(cbe_lun->options, "pblockoffset", NULL);
2118 	if (value != NULL)
2119 		ctl_expand_number(value, &po);
2120 	pss = ps / cbe_lun->blocksize;
2121 	pos = po / cbe_lun->blocksize;
2122 	if ((pss > 0) && (pss * cbe_lun->blocksize == ps) && (pss >= pos) &&
2123 	    ((pss & (pss - 1)) == 0) && (pos * cbe_lun->blocksize == po)) {
2124 		cbe_lun->pblockexp = fls(pss) - 1;
2125 		cbe_lun->pblockoff = (pss - pos) % pss;
2126 	}
2127 
2128 	value = dnvlist_get_string(cbe_lun->options, "ublocksize", NULL);
2129 	if (value != NULL)
2130 		ctl_expand_number(value, &us);
2131 	value = dnvlist_get_string(cbe_lun->options, "ublockoffset", NULL);
2132 	if (value != NULL)
2133 		ctl_expand_number(value, &uo);
2134 	uss = us / cbe_lun->blocksize;
2135 	uos = uo / cbe_lun->blocksize;
2136 	if ((uss > 0) && (uss * cbe_lun->blocksize == us) && (uss >= uos) &&
2137 	    ((uss & (uss - 1)) == 0) && (uos * cbe_lun->blocksize == uo)) {
2138 		cbe_lun->ublockexp = fls(uss) - 1;
2139 		cbe_lun->ublockoff = (uss - uos) % uss;
2140 	}
2141 
2142 	cbe_lun->atomicblock = atomic / cbe_lun->blocksize;
2143 	cbe_lun->opttxferlen = maxio / cbe_lun->blocksize;
2144 
2145 	if (be_lun->dispatch == ctl_be_block_dispatch_zvol) {
2146 		unmap = 1;
2147 	} else {
2148 		struct diocgattr_arg	arg;
2149 
2150 		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
2151 		arg.len = sizeof(arg.value.i);
2152 		error = csw->d_ioctl(dev, DIOCGATTR, (caddr_t)&arg, FREAD,
2153 		    curthread);
2154 		unmap = (error == 0) ? arg.value.i : 0;
2155 	}
2156 	value = dnvlist_get_string(cbe_lun->options, "unmap", NULL);
2157 	if (value != NULL)
2158 		unmap = (strcmp(value, "on") == 0);
2159 	if (unmap)
2160 		cbe_lun->flags |= CTL_LUN_FLAG_UNMAP;
2161 	else
2162 		cbe_lun->flags &= ~CTL_LUN_FLAG_UNMAP;
2163 
2164 	dev_relthread(dev, ref);
2165 	return (0);
2166 }
2167 
2168 static int
2169 ctl_be_block_close(struct ctl_be_block_lun *be_lun)
2170 {
2171 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2172 	int flags;
2173 
2174 	if (be_lun->vn) {
2175 		flags = FREAD;
2176 		if ((cbe_lun->flags & CTL_LUN_FLAG_READONLY) == 0)
2177 			flags |= FWRITE;
2178 		(void)vn_close(be_lun->vn, flags, NOCRED, curthread);
2179 		be_lun->vn = NULL;
2180 
2181 		switch (be_lun->dev_type) {
2182 		case CTL_BE_BLOCK_DEV:
2183 			break;
2184 		case CTL_BE_BLOCK_FILE:
2185 			if (be_lun->backend.file.cred != NULL) {
2186 				crfree(be_lun->backend.file.cred);
2187 				be_lun->backend.file.cred = NULL;
2188 			}
2189 			break;
2190 		case CTL_BE_BLOCK_NONE:
2191 			break;
2192 		default:
2193 			panic("Unexpected backend type %d", be_lun->dev_type);
2194 			break;
2195 		}
2196 		be_lun->dev_type = CTL_BE_BLOCK_NONE;
2197 	}
2198 	return (0);
2199 }
2200 
2201 static int
2202 ctl_be_block_open(struct ctl_be_block_lun *be_lun, struct ctl_lun_req *req)
2203 {
2204 	struct ctl_be_lun *cbe_lun = &be_lun->cbe_lun;
2205 	struct nameidata nd;
2206 	const char	*value;
2207 	int		 error, flags;
2208 
2209 	error = 0;
2210 	if (rootvnode == NULL) {
2211 		snprintf(req->error_str, sizeof(req->error_str),
2212 			 "Root filesystem is not mounted");
2213 		return (1);
2214 	}
2215 	pwd_ensure_dirs();
2216 
2217 	value = dnvlist_get_string(cbe_lun->options, "file", NULL);
2218 	if (value == NULL) {
2219 		snprintf(req->error_str, sizeof(req->error_str),
2220 			 "no file argument specified");
2221 		return (1);
2222 	}
2223 	free(be_lun->dev_path, M_CTLBLK);
2224 	be_lun->dev_path = strdup(value, M_CTLBLK);
2225 
2226 	flags = FREAD;
2227 	value = dnvlist_get_string(cbe_lun->options, "readonly", NULL);
2228 	if (value != NULL) {
2229 		if (strcmp(value, "on") != 0)
2230 			flags |= FWRITE;
2231 	} else if (cbe_lun->lun_type == T_DIRECT)
2232 		flags |= FWRITE;
2233 
2234 again:
2235 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, be_lun->dev_path);
2236 	error = vn_open(&nd, &flags, 0, NULL);
2237 	if ((error == EROFS || error == EACCES) && (flags & FWRITE)) {
2238 		flags &= ~FWRITE;
2239 		goto again;
2240 	}
2241 	if (error) {
2242 		/*
2243 		 * This is the only reasonable guess we can make as far as
2244 		 * path if the user doesn't give us a fully qualified path.
2245 		 * If they want to specify a file, they need to specify the
2246 		 * full path.
2247 		 */
2248 		if (be_lun->dev_path[0] != '/') {
2249 			char *dev_name;
2250 
2251 			asprintf(&dev_name, M_CTLBLK, "/dev/%s",
2252 				be_lun->dev_path);
2253 			free(be_lun->dev_path, M_CTLBLK);
2254 			be_lun->dev_path = dev_name;
2255 			goto again;
2256 		}
2257 		snprintf(req->error_str, sizeof(req->error_str),
2258 		    "error opening %s: %d", be_lun->dev_path, error);
2259 		return (error);
2260 	}
2261 	if (flags & FWRITE)
2262 		cbe_lun->flags &= ~CTL_LUN_FLAG_READONLY;
2263 	else
2264 		cbe_lun->flags |= CTL_LUN_FLAG_READONLY;
2265 
2266 	NDFREE(&nd, NDF_ONLY_PNBUF);
2267 	be_lun->vn = nd.ni_vp;
2268 
2269 	/* We only support disks and files. */
2270 	if (vn_isdisk_error(be_lun->vn, &error)) {
2271 		error = ctl_be_block_open_dev(be_lun, req);
2272 	} else if (be_lun->vn->v_type == VREG) {
2273 		error = ctl_be_block_open_file(be_lun, req);
2274 	} else {
2275 		error = EINVAL;
2276 		snprintf(req->error_str, sizeof(req->error_str),
2277 			 "%s is not a disk or plain file", be_lun->dev_path);
2278 	}
2279 	VOP_UNLOCK(be_lun->vn);
2280 
2281 	if (error != 0)
2282 		ctl_be_block_close(be_lun);
2283 	cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2284 	if (be_lun->dispatch != ctl_be_block_dispatch_dev)
2285 		cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
2286 	value = dnvlist_get_string(cbe_lun->options, "serseq", NULL);
2287 	if (value != NULL && strcmp(value, "on") == 0)
2288 		cbe_lun->serseq = CTL_LUN_SERSEQ_ON;
2289 	else if (value != NULL && strcmp(value, "read") == 0)
2290 		cbe_lun->serseq = CTL_LUN_SERSEQ_READ;
2291 	else if (value != NULL && strcmp(value, "soft") == 0)
2292 		cbe_lun->serseq = CTL_LUN_SERSEQ_SOFT;
2293 	else if (value != NULL && strcmp(value, "off") == 0)
2294 		cbe_lun->serseq = CTL_LUN_SERSEQ_OFF;
2295 	return (0);
2296 }
2297 
2298 static int
2299 ctl_be_block_create(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2300 {
2301 	struct ctl_be_lun *cbe_lun;
2302 	struct ctl_be_block_lun *be_lun;
2303 	struct ctl_lun_create_params *params;
2304 	char num_thread_str[16];
2305 	char tmpstr[32];
2306 	const char *value;
2307 	int retval, num_threads;
2308 	int tmp_num_threads;
2309 
2310 	params = &req->reqdata.create;
2311 	retval = 0;
2312 	req->status = CTL_LUN_OK;
2313 
2314 	be_lun = malloc(sizeof(*be_lun), M_CTLBLK, M_ZERO | M_WAITOK);
2315 	cbe_lun = &be_lun->cbe_lun;
2316 	be_lun->params = req->reqdata.create;
2317 	be_lun->softc = softc;
2318 	STAILQ_INIT(&be_lun->input_queue);
2319 	STAILQ_INIT(&be_lun->config_read_queue);
2320 	STAILQ_INIT(&be_lun->config_write_queue);
2321 	STAILQ_INIT(&be_lun->datamove_queue);
2322 	mtx_init(&be_lun->io_lock, "ctlblock io", NULL, MTX_DEF);
2323 	mtx_init(&be_lun->queue_lock, "ctlblock queue", NULL, MTX_DEF);
2324 	cbe_lun->options = nvlist_clone(req->args_nvl);
2325 
2326 	if (params->flags & CTL_LUN_FLAG_DEV_TYPE)
2327 		cbe_lun->lun_type = params->device_type;
2328 	else
2329 		cbe_lun->lun_type = T_DIRECT;
2330 	be_lun->flags = 0;
2331 	cbe_lun->flags = 0;
2332 	value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2333 	if (value != NULL) {
2334 		if (strcmp(value, "primary") == 0)
2335 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2336 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2337 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2338 
2339 	if (cbe_lun->lun_type == T_DIRECT ||
2340 	    cbe_lun->lun_type == T_CDROM) {
2341 		be_lun->size_bytes = params->lun_size_bytes;
2342 		if (params->blocksize_bytes != 0)
2343 			cbe_lun->blocksize = params->blocksize_bytes;
2344 		else if (cbe_lun->lun_type == T_CDROM)
2345 			cbe_lun->blocksize = 2048;
2346 		else
2347 			cbe_lun->blocksize = 512;
2348 		be_lun->size_blocks = be_lun->size_bytes / cbe_lun->blocksize;
2349 		cbe_lun->maxlba = (be_lun->size_blocks == 0) ?
2350 		    0 : (be_lun->size_blocks - 1);
2351 
2352 		if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2353 		    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2354 			retval = ctl_be_block_open(be_lun, req);
2355 			if (retval != 0) {
2356 				retval = 0;
2357 				req->status = CTL_LUN_WARNING;
2358 			}
2359 		}
2360 		num_threads = cbb_num_threads;
2361 	} else {
2362 		num_threads = 1;
2363 	}
2364 
2365 	value = dnvlist_get_string(cbe_lun->options, "num_threads", NULL);
2366 	if (value != NULL) {
2367 		tmp_num_threads = strtol(value, NULL, 0);
2368 
2369 		/*
2370 		 * We don't let the user specify less than one
2371 		 * thread, but hope he's clueful enough not to
2372 		 * specify 1000 threads.
2373 		 */
2374 		if (tmp_num_threads < 1) {
2375 			snprintf(req->error_str, sizeof(req->error_str),
2376 				 "invalid number of threads %s",
2377 				 num_thread_str);
2378 			goto bailout_error;
2379 		}
2380 		num_threads = tmp_num_threads;
2381 	}
2382 
2383 	if (be_lun->vn == NULL)
2384 		cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2385 	/* Tell the user the blocksize we ended up using */
2386 	params->lun_size_bytes = be_lun->size_bytes;
2387 	params->blocksize_bytes = cbe_lun->blocksize;
2388 	if (params->flags & CTL_LUN_FLAG_ID_REQ) {
2389 		cbe_lun->req_lun_id = params->req_lun_id;
2390 		cbe_lun->flags |= CTL_LUN_FLAG_ID_REQ;
2391 	} else
2392 		cbe_lun->req_lun_id = 0;
2393 
2394 	cbe_lun->lun_shutdown = ctl_be_block_lun_shutdown;
2395 	cbe_lun->be = &ctl_be_block_driver;
2396 
2397 	if ((params->flags & CTL_LUN_FLAG_SERIAL_NUM) == 0) {
2398 		snprintf(tmpstr, sizeof(tmpstr), "MYSERIAL%04d",
2399 			 softc->num_luns);
2400 		strncpy((char *)cbe_lun->serial_num, tmpstr,
2401 			MIN(sizeof(cbe_lun->serial_num), sizeof(tmpstr)));
2402 
2403 		/* Tell the user what we used for a serial number */
2404 		strncpy((char *)params->serial_num, tmpstr,
2405 			MIN(sizeof(params->serial_num), sizeof(tmpstr)));
2406 	} else {
2407 		strncpy((char *)cbe_lun->serial_num, params->serial_num,
2408 			MIN(sizeof(cbe_lun->serial_num),
2409 			sizeof(params->serial_num)));
2410 	}
2411 	if ((params->flags & CTL_LUN_FLAG_DEVID) == 0) {
2412 		snprintf(tmpstr, sizeof(tmpstr), "MYDEVID%04d", softc->num_luns);
2413 		strncpy((char *)cbe_lun->device_id, tmpstr,
2414 			MIN(sizeof(cbe_lun->device_id), sizeof(tmpstr)));
2415 
2416 		/* Tell the user what we used for a device ID */
2417 		strncpy((char *)params->device_id, tmpstr,
2418 			MIN(sizeof(params->device_id), sizeof(tmpstr)));
2419 	} else {
2420 		strncpy((char *)cbe_lun->device_id, params->device_id,
2421 			MIN(sizeof(cbe_lun->device_id),
2422 			    sizeof(params->device_id)));
2423 	}
2424 
2425 	TASK_INIT(&be_lun->io_task, /*priority*/0, ctl_be_block_worker, be_lun);
2426 
2427 	be_lun->io_taskqueue = taskqueue_create("ctlblocktq", M_WAITOK,
2428 	    taskqueue_thread_enqueue, /*context*/&be_lun->io_taskqueue);
2429 
2430 	if (be_lun->io_taskqueue == NULL) {
2431 		snprintf(req->error_str, sizeof(req->error_str),
2432 			 "unable to create taskqueue");
2433 		goto bailout_error;
2434 	}
2435 
2436 	/*
2437 	 * Note that we start the same number of threads by default for
2438 	 * both the file case and the block device case.  For the file
2439 	 * case, we need multiple threads to allow concurrency, because the
2440 	 * vnode interface is designed to be a blocking interface.  For the
2441 	 * block device case, ZFS zvols at least will block the caller's
2442 	 * context in many instances, and so we need multiple threads to
2443 	 * overcome that problem.  Other block devices don't need as many
2444 	 * threads, but they shouldn't cause too many problems.
2445 	 *
2446 	 * If the user wants to just have a single thread for a block
2447 	 * device, he can specify that when the LUN is created, or change
2448 	 * the tunable/sysctl to alter the default number of threads.
2449 	 */
2450 	retval = taskqueue_start_threads_in_proc(&be_lun->io_taskqueue,
2451 					 /*num threads*/num_threads,
2452 					 /*priority*/PUSER,
2453 					 /*proc*/control_softc->ctl_proc,
2454 					 /*thread name*/"block");
2455 
2456 	if (retval != 0)
2457 		goto bailout_error;
2458 
2459 	be_lun->num_threads = num_threads;
2460 
2461 	retval = ctl_add_lun(&be_lun->cbe_lun);
2462 	if (retval != 0) {
2463 		snprintf(req->error_str, sizeof(req->error_str),
2464 			 "ctl_add_lun() returned error %d, see dmesg for "
2465 			 "details", retval);
2466 		retval = 0;
2467 		goto bailout_error;
2468 	}
2469 
2470 	be_lun->disk_stats = devstat_new_entry("cbb", cbe_lun->lun_id,
2471 					       cbe_lun->blocksize,
2472 					       DEVSTAT_ALL_SUPPORTED,
2473 					       cbe_lun->lun_type
2474 					       | DEVSTAT_TYPE_IF_OTHER,
2475 					       DEVSTAT_PRIORITY_OTHER);
2476 
2477 	mtx_lock(&softc->lock);
2478 	softc->num_luns++;
2479 	SLIST_INSERT_HEAD(&softc->lun_list, be_lun, links);
2480 	mtx_unlock(&softc->lock);
2481 
2482 	params->req_lun_id = cbe_lun->lun_id;
2483 
2484 	return (retval);
2485 
2486 bailout_error:
2487 	req->status = CTL_LUN_ERROR;
2488 
2489 	if (be_lun->io_taskqueue != NULL)
2490 		taskqueue_free(be_lun->io_taskqueue);
2491 	ctl_be_block_close(be_lun);
2492 	if (be_lun->dev_path != NULL)
2493 		free(be_lun->dev_path, M_CTLBLK);
2494 	nvlist_destroy(cbe_lun->options);
2495 	mtx_destroy(&be_lun->queue_lock);
2496 	mtx_destroy(&be_lun->io_lock);
2497 	free(be_lun, M_CTLBLK);
2498 
2499 	return (retval);
2500 }
2501 
2502 static int
2503 ctl_be_block_rm(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2504 {
2505 	struct ctl_lun_rm_params *params;
2506 	struct ctl_be_block_lun *be_lun;
2507 	struct ctl_be_lun *cbe_lun;
2508 	int retval;
2509 
2510 	params = &req->reqdata.rm;
2511 
2512 	sx_xlock(&softc->modify_lock);
2513 	mtx_lock(&softc->lock);
2514 	SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2515 		if (be_lun->cbe_lun.lun_id == params->lun_id) {
2516 			SLIST_REMOVE(&softc->lun_list, be_lun,
2517 			    ctl_be_block_lun, links);
2518 			softc->num_luns--;
2519 			break;
2520 		}
2521 	}
2522 	mtx_unlock(&softc->lock);
2523 	sx_xunlock(&softc->modify_lock);
2524 	if (be_lun == NULL) {
2525 		snprintf(req->error_str, sizeof(req->error_str),
2526 			 "LUN %u is not managed by the block backend",
2527 			 params->lun_id);
2528 		goto bailout_error;
2529 	}
2530 	cbe_lun = &be_lun->cbe_lun;
2531 
2532 	if (be_lun->vn != NULL) {
2533 		cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2534 		ctl_lun_no_media(cbe_lun);
2535 		taskqueue_drain_all(be_lun->io_taskqueue);
2536 		ctl_be_block_close(be_lun);
2537 	}
2538 
2539 	mtx_lock(&softc->lock);
2540 	be_lun->flags |= CTL_BE_BLOCK_LUN_WAITING;
2541 	mtx_unlock(&softc->lock);
2542 
2543 	retval = ctl_remove_lun(cbe_lun);
2544 	if (retval != 0) {
2545 		snprintf(req->error_str, sizeof(req->error_str),
2546 			 "error %d returned from ctl_remove_lun() for "
2547 			 "LUN %d", retval, params->lun_id);
2548 		mtx_lock(&softc->lock);
2549 		be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2550 		mtx_unlock(&softc->lock);
2551 		goto bailout_error;
2552 	}
2553 
2554 	mtx_lock(&softc->lock);
2555 	while ((be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) == 0) {
2556 		retval = msleep(be_lun, &softc->lock, PCATCH, "ctlblockrm", 0);
2557 		if (retval == EINTR)
2558 			break;
2559 	}
2560 	be_lun->flags &= ~CTL_BE_BLOCK_LUN_WAITING;
2561 	if (be_lun->flags & CTL_BE_BLOCK_LUN_UNCONFIGURED) {
2562 		mtx_unlock(&softc->lock);
2563 		free(be_lun, M_CTLBLK);
2564 	} else {
2565 		mtx_unlock(&softc->lock);
2566 		return (EINTR);
2567 	}
2568 
2569 	req->status = CTL_LUN_OK;
2570 	return (0);
2571 
2572 bailout_error:
2573 	req->status = CTL_LUN_ERROR;
2574 	return (0);
2575 }
2576 
2577 static int
2578 ctl_be_block_modify(struct ctl_be_block_softc *softc, struct ctl_lun_req *req)
2579 {
2580 	struct ctl_lun_modify_params *params;
2581 	struct ctl_be_block_lun *be_lun;
2582 	struct ctl_be_lun *cbe_lun;
2583 	const char *value;
2584 	uint64_t oldsize;
2585 	int error, wasprim;
2586 
2587 	params = &req->reqdata.modify;
2588 
2589 	sx_xlock(&softc->modify_lock);
2590 	mtx_lock(&softc->lock);
2591 	SLIST_FOREACH(be_lun, &softc->lun_list, links) {
2592 		if (be_lun->cbe_lun.lun_id == params->lun_id)
2593 			break;
2594 	}
2595 	mtx_unlock(&softc->lock);
2596 	if (be_lun == NULL) {
2597 		snprintf(req->error_str, sizeof(req->error_str),
2598 			 "LUN %u is not managed by the block backend",
2599 			 params->lun_id);
2600 		goto bailout_error;
2601 	}
2602 	cbe_lun = &be_lun->cbe_lun;
2603 
2604 	if (params->lun_size_bytes != 0)
2605 		be_lun->params.lun_size_bytes = params->lun_size_bytes;
2606 
2607 	if (req->args_nvl != NULL) {
2608 		nvlist_destroy(cbe_lun->options);
2609 		cbe_lun->options = nvlist_clone(req->args_nvl);
2610 	}
2611 
2612 	wasprim = (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY);
2613 	value = dnvlist_get_string(cbe_lun->options, "ha_role", NULL);
2614 	if (value != NULL) {
2615 		if (strcmp(value, "primary") == 0)
2616 			cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2617 		else
2618 			cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2619 	} else if (control_softc->flags & CTL_FLAG_ACTIVE_SHELF)
2620 		cbe_lun->flags |= CTL_LUN_FLAG_PRIMARY;
2621 	else
2622 		cbe_lun->flags &= ~CTL_LUN_FLAG_PRIMARY;
2623 	if (wasprim != (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)) {
2624 		if (cbe_lun->flags & CTL_LUN_FLAG_PRIMARY)
2625 			ctl_lun_primary(cbe_lun);
2626 		else
2627 			ctl_lun_secondary(cbe_lun);
2628 	}
2629 
2630 	oldsize = be_lun->size_blocks;
2631 	if ((cbe_lun->flags & CTL_LUN_FLAG_PRIMARY) ||
2632 	    control_softc->ha_mode == CTL_HA_MODE_SER_ONLY) {
2633 		if (be_lun->vn == NULL)
2634 			error = ctl_be_block_open(be_lun, req);
2635 		else if (vn_isdisk_error(be_lun->vn, &error))
2636 			error = ctl_be_block_open_dev(be_lun, req);
2637 		else if (be_lun->vn->v_type == VREG) {
2638 			vn_lock(be_lun->vn, LK_SHARED | LK_RETRY);
2639 			error = ctl_be_block_open_file(be_lun, req);
2640 			VOP_UNLOCK(be_lun->vn);
2641 		} else
2642 			error = EINVAL;
2643 		if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) &&
2644 		    be_lun->vn != NULL) {
2645 			cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2646 			ctl_lun_has_media(cbe_lun);
2647 		} else if ((cbe_lun->flags & CTL_LUN_FLAG_NO_MEDIA) == 0 &&
2648 		    be_lun->vn == NULL) {
2649 			cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2650 			ctl_lun_no_media(cbe_lun);
2651 		}
2652 		cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2653 	} else {
2654 		if (be_lun->vn != NULL) {
2655 			cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2656 			ctl_lun_no_media(cbe_lun);
2657 			taskqueue_drain_all(be_lun->io_taskqueue);
2658 			error = ctl_be_block_close(be_lun);
2659 		} else
2660 			error = 0;
2661 	}
2662 	if (be_lun->size_blocks != oldsize)
2663 		ctl_lun_capacity_changed(cbe_lun);
2664 
2665 	/* Tell the user the exact size we ended up using */
2666 	params->lun_size_bytes = be_lun->size_bytes;
2667 
2668 	sx_xunlock(&softc->modify_lock);
2669 	req->status = error ? CTL_LUN_WARNING : CTL_LUN_OK;
2670 	return (0);
2671 
2672 bailout_error:
2673 	sx_xunlock(&softc->modify_lock);
2674 	req->status = CTL_LUN_ERROR;
2675 	return (0);
2676 }
2677 
2678 static void
2679 ctl_be_block_lun_shutdown(struct ctl_be_lun *cbe_lun)
2680 {
2681 	struct ctl_be_block_lun *be_lun = (struct ctl_be_block_lun *)cbe_lun;
2682 	struct ctl_be_block_softc *softc = be_lun->softc;
2683 
2684 	taskqueue_drain_all(be_lun->io_taskqueue);
2685 	taskqueue_free(be_lun->io_taskqueue);
2686 	if (be_lun->disk_stats != NULL)
2687 		devstat_remove_entry(be_lun->disk_stats);
2688 	nvlist_destroy(be_lun->cbe_lun.options);
2689 	free(be_lun->dev_path, M_CTLBLK);
2690 	mtx_destroy(&be_lun->queue_lock);
2691 	mtx_destroy(&be_lun->io_lock);
2692 
2693 	mtx_lock(&softc->lock);
2694 	be_lun->flags |= CTL_BE_BLOCK_LUN_UNCONFIGURED;
2695 	if (be_lun->flags & CTL_BE_BLOCK_LUN_WAITING)
2696 		wakeup(be_lun);
2697 	else
2698 		free(be_lun, M_CTLBLK);
2699 	mtx_unlock(&softc->lock);
2700 }
2701 
2702 static int
2703 ctl_be_block_config_write(union ctl_io *io)
2704 {
2705 	struct ctl_be_block_lun *be_lun;
2706 	struct ctl_be_lun *cbe_lun;
2707 	int retval;
2708 
2709 	DPRINTF("entered\n");
2710 
2711 	cbe_lun = CTL_BACKEND_LUN(io);
2712 	be_lun = (struct ctl_be_block_lun *)cbe_lun;
2713 
2714 	retval = 0;
2715 	switch (io->scsiio.cdb[0]) {
2716 	case SYNCHRONIZE_CACHE:
2717 	case SYNCHRONIZE_CACHE_16:
2718 	case WRITE_SAME_10:
2719 	case WRITE_SAME_16:
2720 	case UNMAP:
2721 		/*
2722 		 * The upper level CTL code will filter out any CDBs with
2723 		 * the immediate bit set and return the proper error.
2724 		 *
2725 		 * We don't really need to worry about what LBA range the
2726 		 * user asked to be synced out.  When they issue a sync
2727 		 * cache command, we'll sync out the whole thing.
2728 		 */
2729 		mtx_lock(&be_lun->queue_lock);
2730 		STAILQ_INSERT_TAIL(&be_lun->config_write_queue, &io->io_hdr,
2731 				   links);
2732 		mtx_unlock(&be_lun->queue_lock);
2733 		taskqueue_enqueue(be_lun->io_taskqueue, &be_lun->io_task);
2734 		break;
2735 	case START_STOP_UNIT: {
2736 		struct scsi_start_stop_unit *cdb;
2737 		struct ctl_lun_req req;
2738 
2739 		cdb = (struct scsi_start_stop_unit *)io->scsiio.cdb;
2740 		if ((cdb->how & SSS_PC_MASK) != 0) {
2741 			ctl_set_success(&io->scsiio);
2742 			ctl_config_write_done(io);
2743 			break;
2744 		}
2745 		if (cdb->how & SSS_START) {
2746 			if ((cdb->how & SSS_LOEJ) && be_lun->vn == NULL) {
2747 				retval = ctl_be_block_open(be_lun, &req);
2748 				cbe_lun->flags &= ~CTL_LUN_FLAG_EJECTED;
2749 				if (retval == 0) {
2750 					cbe_lun->flags &= ~CTL_LUN_FLAG_NO_MEDIA;
2751 					ctl_lun_has_media(cbe_lun);
2752 				} else {
2753 					cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2754 					ctl_lun_no_media(cbe_lun);
2755 				}
2756 			}
2757 			ctl_start_lun(cbe_lun);
2758 		} else {
2759 			ctl_stop_lun(cbe_lun);
2760 			if (cdb->how & SSS_LOEJ) {
2761 				cbe_lun->flags |= CTL_LUN_FLAG_NO_MEDIA;
2762 				cbe_lun->flags |= CTL_LUN_FLAG_EJECTED;
2763 				ctl_lun_ejected(cbe_lun);
2764 				if (be_lun->vn != NULL)
2765 					ctl_be_block_close(be_lun);
2766 			}
2767 		}
2768 
2769 		ctl_set_success(&io->scsiio);
2770 		ctl_config_write_done(io);
2771 		break;
2772 	}
2773 	case PREVENT_ALLOW:
2774 		ctl_set_success(&io->scsiio);
2775 		ctl_config_write_done(io);
2776 		break;
2777 	default:
2778 		ctl_set_invalid_opcode(&io->scsiio);
2779 		ctl_config_write_done(io);
2780 		retval = CTL_RETVAL_COMPLETE;
2781 		break;
2782 	}
2783 
2784 	return (retval);
2785 }
2786 
2787 static int
2788 ctl_be_block_config_read(union ctl_io *io)
2789 {
2790 	struct ctl_be_block_lun *be_lun;
2791 	int retval = 0;
2792 
2793 	DPRINTF("entered\n");
2794 
2795 	be_lun = (struct ctl_be_block_lun *)CTL_BACKEND_LUN(io);
2796 
2797 	switch (io->scsiio.cdb[0]) {
2798 	case SERVICE_ACTION_IN:
2799 		if (io->scsiio.cdb[1] == SGLS_SERVICE_ACTION) {
2800 			mtx_lock(&be_lun->queue_lock);
2801 			STAILQ_INSERT_TAIL(&be_lun->config_read_queue,
2802 			    &io->io_hdr, links);
2803 			mtx_unlock(&be_lun->queue_lock);
2804 			taskqueue_enqueue(be_lun->io_taskqueue,
2805 			    &be_lun->io_task);
2806 			retval = CTL_RETVAL_QUEUED;
2807 			break;
2808 		}
2809 		ctl_set_invalid_field(&io->scsiio,
2810 				      /*sks_valid*/ 1,
2811 				      /*command*/ 1,
2812 				      /*field*/ 1,
2813 				      /*bit_valid*/ 1,
2814 				      /*bit*/ 4);
2815 		ctl_config_read_done(io);
2816 		retval = CTL_RETVAL_COMPLETE;
2817 		break;
2818 	default:
2819 		ctl_set_invalid_opcode(&io->scsiio);
2820 		ctl_config_read_done(io);
2821 		retval = CTL_RETVAL_COMPLETE;
2822 		break;
2823 	}
2824 
2825 	return (retval);
2826 }
2827 
2828 static int
2829 ctl_be_block_lun_info(struct ctl_be_lun *cbe_lun, struct sbuf *sb)
2830 {
2831 	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2832 	int retval;
2833 
2834 	retval = sbuf_printf(sb, "\t<num_threads>");
2835 	if (retval != 0)
2836 		goto bailout;
2837 	retval = sbuf_printf(sb, "%d", lun->num_threads);
2838 	if (retval != 0)
2839 		goto bailout;
2840 	retval = sbuf_printf(sb, "</num_threads>\n");
2841 
2842 bailout:
2843 	return (retval);
2844 }
2845 
2846 static uint64_t
2847 ctl_be_block_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname)
2848 {
2849 	struct ctl_be_block_lun *lun = (struct ctl_be_block_lun *)cbe_lun;
2850 
2851 	if (lun->getattr == NULL)
2852 		return (UINT64_MAX);
2853 	return (lun->getattr(lun, attrname));
2854 }
2855 
2856 static int
2857 ctl_be_block_init(void)
2858 {
2859 	struct ctl_be_block_softc *softc = &backend_block_softc;
2860 
2861 	sx_init(&softc->modify_lock, "ctlblock modify");
2862 	mtx_init(&softc->lock, "ctlblock", NULL, MTX_DEF);
2863 	softc->beio_zone = uma_zcreate("beio", sizeof(struct ctl_be_block_io),
2864 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
2865 	softc->bufmin_zone = uma_zcreate("ctlblockmin", CTLBLK_MIN_SEG,
2866 	    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2867 	if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
2868 		softc->bufmax_zone = uma_zcreate("ctlblockmax", CTLBLK_MAX_SEG,
2869 		    NULL, NULL, NULL, NULL, /*align*/ 0, /*flags*/0);
2870 	SLIST_INIT(&softc->lun_list);
2871 	return (0);
2872 }
2873 
2874 static int
2875 ctl_be_block_shutdown(void)
2876 {
2877 	struct ctl_be_block_softc *softc = &backend_block_softc;
2878 	struct ctl_be_block_lun *lun;
2879 
2880 	mtx_lock(&softc->lock);
2881 	while ((lun = SLIST_FIRST(&softc->lun_list)) != NULL) {
2882 		SLIST_REMOVE_HEAD(&softc->lun_list, links);
2883 		softc->num_luns--;
2884 		/*
2885 		 * Drop our lock here.  Since ctl_remove_lun() can call
2886 		 * back into us, this could potentially lead to a recursive
2887 		 * lock of the same mutex, which would cause a hang.
2888 		 */
2889 		mtx_unlock(&softc->lock);
2890 		ctl_remove_lun(&lun->cbe_lun);
2891 		mtx_lock(&softc->lock);
2892 	}
2893 	mtx_unlock(&softc->lock);
2894 	uma_zdestroy(softc->bufmin_zone);
2895 	if (CTLBLK_MIN_SEG < CTLBLK_MAX_SEG)
2896 		uma_zdestroy(softc->bufmax_zone);
2897 	uma_zdestroy(softc->beio_zone);
2898 	mtx_destroy(&softc->lock);
2899 	sx_destroy(&softc->modify_lock);
2900 	return (0);
2901 }
2902