xref: /freebsd/usr.sbin/camdd/camdd.c (revision 0957b409)
1 /*-
2  * Copyright (c) 1997-2007 Kenneth D. Merry
3  * Copyright (c) 2013, 2014, 2015 Spectra Logic Corporation
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions, and the following disclaimer,
11  *    without modification.
12  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13  *    substantially similar to the "NO WARRANTY" disclaimer below
14  *    ("Disclaimer") and any redistribution must be conditioned upon
15  *    including a substantially similar Disclaimer requirement for further
16  *    binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
28  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGES.
30  *
31  * Authors: Ken Merry           (Spectra Logic Corporation)
32  */
33 
34 /*
35  * This is eventually intended to be:
36  * - A basic data transfer/copy utility
37  * - A simple benchmark utility
38  * - An example of how to use the asynchronous pass(4) driver interface.
39  */
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42 
43 #include <sys/ioctl.h>
44 #include <sys/stdint.h>
45 #include <sys/types.h>
46 #include <sys/endian.h>
47 #include <sys/param.h>
48 #include <sys/sbuf.h>
49 #include <sys/stat.h>
50 #include <sys/event.h>
51 #include <sys/time.h>
52 #include <sys/uio.h>
53 #include <vm/vm.h>
54 #include <machine/bus.h>
55 #include <sys/bus.h>
56 #include <sys/bus_dma.h>
57 #include <sys/mtio.h>
58 #include <sys/conf.h>
59 #include <sys/disk.h>
60 
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <semaphore.h>
64 #include <string.h>
65 #include <unistd.h>
66 #include <inttypes.h>
67 #include <limits.h>
68 #include <fcntl.h>
69 #include <ctype.h>
70 #include <err.h>
71 #include <libutil.h>
72 #include <pthread.h>
73 #include <assert.h>
74 #include <bsdxml.h>
75 
76 #include <cam/cam.h>
77 #include <cam/cam_debug.h>
78 #include <cam/cam_ccb.h>
79 #include <cam/scsi/scsi_all.h>
80 #include <cam/scsi/scsi_da.h>
81 #include <cam/scsi/scsi_pass.h>
82 #include <cam/scsi/scsi_message.h>
83 #include <cam/scsi/smp_all.h>
84 #include <camlib.h>
85 #include <mtlib.h>
86 #include <zlib.h>
87 
88 typedef enum {
89 	CAMDD_CMD_NONE		= 0x00000000,
90 	CAMDD_CMD_HELP		= 0x00000001,
91 	CAMDD_CMD_WRITE		= 0x00000002,
92 	CAMDD_CMD_READ		= 0x00000003
93 } camdd_cmdmask;
94 
95 typedef enum {
96 	CAMDD_ARG_NONE		= 0x00000000,
97 	CAMDD_ARG_VERBOSE	= 0x00000001,
98 	CAMDD_ARG_DEVICE	= 0x00000002,
99 	CAMDD_ARG_BUS		= 0x00000004,
100 	CAMDD_ARG_TARGET	= 0x00000008,
101 	CAMDD_ARG_LUN		= 0x00000010,
102 	CAMDD_ARG_UNIT		= 0x00000020,
103 	CAMDD_ARG_TIMEOUT	= 0x00000040,
104 	CAMDD_ARG_ERR_RECOVER	= 0x00000080,
105 	CAMDD_ARG_RETRIES	= 0x00000100
106 } camdd_argmask;
107 
108 typedef enum {
109 	CAMDD_DEV_NONE		= 0x00,
110 	CAMDD_DEV_PASS		= 0x01,
111 	CAMDD_DEV_FILE		= 0x02
112 } camdd_dev_type;
113 
114 struct camdd_io_opts {
115 	camdd_dev_type	dev_type;
116 	char		*dev_name;
117 	uint64_t	blocksize;
118 	uint64_t	queue_depth;
119 	uint64_t	offset;
120 	int		min_cmd_size;
121 	int		write_dev;
122 	uint64_t	debug;
123 };
124 
125 typedef enum {
126 	CAMDD_BUF_NONE,
127 	CAMDD_BUF_DATA,
128 	CAMDD_BUF_INDIRECT
129 } camdd_buf_type;
130 
131 struct camdd_buf_indirect {
132 	/*
133 	 * Pointer to the source buffer.
134 	 */
135 	struct camdd_buf *src_buf;
136 
137 	/*
138 	 * Offset into the source buffer, in bytes.
139 	 */
140 	uint64_t	  offset;
141 	/*
142 	 * Pointer to the starting point in the source buffer.
143 	 */
144 	uint8_t		 *start_ptr;
145 
146 	/*
147 	 * Length of this chunk in bytes.
148 	 */
149 	size_t		  len;
150 };
151 
152 struct camdd_buf_data {
153 	/*
154 	 * Buffer allocated when we allocate this camdd_buf.  This should
155 	 * be the size of the blocksize for this device.
156 	 */
157 	uint8_t			*buf;
158 
159 	/*
160 	 * The amount of backing store allocated in buf.  Generally this
161 	 * will be the blocksize of the device.
162 	 */
163 	uint32_t		 alloc_len;
164 
165 	/*
166 	 * The amount of data that was put into the buffer (on reads) or
167 	 * the amount of data we have put onto the src_list so far (on
168 	 * writes).
169 	 */
170 	uint32_t		 fill_len;
171 
172 	/*
173 	 * The amount of data that was not transferred.
174 	 */
175 	uint32_t		 resid;
176 
177 	/*
178 	 * Starting byte offset on the reader.
179 	 */
180 	uint64_t		 src_start_offset;
181 
182 	/*
183 	 * CCB used for pass(4) device targets.
184 	 */
185 	union ccb		 ccb;
186 
187 	/*
188 	 * Number of scatter/gather segments.
189 	 */
190 	int			 sg_count;
191 
192 	/*
193 	 * Set if we had to tack on an extra buffer to round the transfer
194 	 * up to a sector size.
195 	 */
196 	int			 extra_buf;
197 
198 	/*
199 	 * Scatter/gather list used generally when we're the writer for a
200 	 * pass(4) device.
201 	 */
202 	bus_dma_segment_t	*segs;
203 
204 	/*
205 	 * Scatter/gather list used generally when we're the writer for a
206 	 * file or block device;
207 	 */
208 	struct iovec		*iovec;
209 };
210 
211 union camdd_buf_types {
212 	struct camdd_buf_indirect	indirect;
213 	struct camdd_buf_data		data;
214 };
215 
216 typedef enum {
217 	CAMDD_STATUS_NONE,
218 	CAMDD_STATUS_OK,
219 	CAMDD_STATUS_SHORT_IO,
220 	CAMDD_STATUS_EOF,
221 	CAMDD_STATUS_ERROR
222 } camdd_buf_status;
223 
224 struct camdd_buf {
225 	camdd_buf_type		 buf_type;
226 	union camdd_buf_types	 buf_type_spec;
227 
228 	camdd_buf_status	 status;
229 
230 	uint64_t		 lba;
231 	size_t			 len;
232 
233 	/*
234 	 * A reference count of how many indirect buffers point to this
235 	 * buffer.
236 	 */
237 	int			 refcount;
238 
239 	/*
240 	 * A link back to our parent device.
241 	 */
242 	struct camdd_dev	*dev;
243 	STAILQ_ENTRY(camdd_buf)  links;
244 	STAILQ_ENTRY(camdd_buf)  work_links;
245 
246 	/*
247 	 * A count of the buffers on the src_list.
248 	 */
249 	int			 src_count;
250 
251 	/*
252 	 * List of buffers from our partner thread that are the components
253 	 * of this buffer for the I/O.  Uses src_links.
254 	 */
255 	STAILQ_HEAD(,camdd_buf)	 src_list;
256 	STAILQ_ENTRY(camdd_buf)  src_links;
257 };
258 
259 #define	NUM_DEV_TYPES	2
260 
261 struct camdd_dev_pass {
262 	int			 scsi_dev_type;
263 	int			 protocol;
264 	struct cam_device	*dev;
265 	uint64_t		 max_sector;
266 	uint32_t		 block_len;
267 	uint32_t		 cpi_maxio;
268 };
269 
270 typedef enum {
271 	CAMDD_FILE_NONE,
272 	CAMDD_FILE_REG,
273 	CAMDD_FILE_STD,
274 	CAMDD_FILE_PIPE,
275 	CAMDD_FILE_DISK,
276 	CAMDD_FILE_TAPE,
277 	CAMDD_FILE_TTY,
278 	CAMDD_FILE_MEM
279 } camdd_file_type;
280 
281 typedef enum {
282 	CAMDD_FF_NONE 		= 0x00,
283 	CAMDD_FF_CAN_SEEK	= 0x01
284 } camdd_file_flags;
285 
286 struct camdd_dev_file {
287 	int			 fd;
288 	struct stat		 sb;
289 	char			 filename[MAXPATHLEN + 1];
290 	camdd_file_type		 file_type;
291 	camdd_file_flags	 file_flags;
292 	uint8_t			*tmp_buf;
293 };
294 
295 struct camdd_dev_block {
296 	int			 fd;
297 	uint64_t		 size_bytes;
298 	uint32_t		 block_len;
299 };
300 
301 union camdd_dev_spec {
302 	struct camdd_dev_pass	pass;
303 	struct camdd_dev_file	file;
304 	struct camdd_dev_block	block;
305 };
306 
307 typedef enum {
308 	CAMDD_DEV_FLAG_NONE		= 0x00,
309 	CAMDD_DEV_FLAG_EOF		= 0x01,
310 	CAMDD_DEV_FLAG_PEER_EOF		= 0x02,
311 	CAMDD_DEV_FLAG_ACTIVE		= 0x04,
312 	CAMDD_DEV_FLAG_EOF_SENT		= 0x08,
313 	CAMDD_DEV_FLAG_EOF_QUEUED	= 0x10
314 } camdd_dev_flags;
315 
316 struct camdd_dev {
317 	camdd_dev_type		 dev_type;
318 	union camdd_dev_spec	 dev_spec;
319 	camdd_dev_flags		 flags;
320 	char			 device_name[MAXPATHLEN+1];
321 	uint32_t		 blocksize;
322 	uint32_t		 sector_size;
323 	uint64_t		 max_sector;
324 	uint64_t		 sector_io_limit;
325 	int			 min_cmd_size;
326 	int			 write_dev;
327 	int			 retry_count;
328 	int			 io_timeout;
329 	int			 debug;
330 	uint64_t		 start_offset_bytes;
331 	uint64_t		 next_io_pos_bytes;
332 	uint64_t		 next_peer_pos_bytes;
333 	uint64_t		 next_completion_pos_bytes;
334 	uint64_t		 peer_bytes_queued;
335 	uint64_t		 bytes_transferred;
336 	uint32_t		 target_queue_depth;
337 	uint32_t		 cur_active_io;
338 	uint8_t			*extra_buf;
339 	uint32_t		 extra_buf_len;
340 	struct camdd_dev	*peer_dev;
341 	pthread_mutex_t		 mutex;
342 	pthread_cond_t		 cond;
343 	int			 kq;
344 
345 	int			 (*run)(struct camdd_dev *dev);
346 	int			 (*fetch)(struct camdd_dev *dev);
347 
348 	/*
349 	 * Buffers that are available for I/O.  Uses links.
350 	 */
351 	STAILQ_HEAD(,camdd_buf)	 free_queue;
352 
353 	/*
354 	 * Free indirect buffers.  These are used for breaking a large
355 	 * buffer into multiple pieces.
356 	 */
357 	STAILQ_HEAD(,camdd_buf)	 free_indirect_queue;
358 
359 	/*
360 	 * Buffers that have been queued to the kernel.  Uses links.
361 	 */
362 	STAILQ_HEAD(,camdd_buf)	 active_queue;
363 
364 	/*
365 	 * Will generally contain one of our buffers that is waiting for enough
366 	 * I/O from our partner thread to be able to execute.  This will
367 	 * generally happen when our per-I/O-size is larger than the
368 	 * partner thread's per-I/O-size.  Uses links.
369 	 */
370 	STAILQ_HEAD(,camdd_buf)	 pending_queue;
371 
372 	/*
373 	 * Number of buffers on the pending queue
374 	 */
375 	int			 num_pending_queue;
376 
377 	/*
378 	 * Buffers that are filled and ready to execute.  This is used when
379 	 * our partner (reader) thread sends us blocks that are larger than
380 	 * our blocksize, and so we have to split them into multiple pieces.
381 	 */
382 	STAILQ_HEAD(,camdd_buf)	 run_queue;
383 
384 	/*
385 	 * Number of buffers on the run queue.
386 	 */
387 	int			 num_run_queue;
388 
389 	STAILQ_HEAD(,camdd_buf)	 reorder_queue;
390 
391 	int			 num_reorder_queue;
392 
393 	/*
394 	 * Buffers that have been queued to us by our partner thread
395 	 * (generally the reader thread) to be written out.  Uses
396 	 * work_links.
397 	 */
398 	STAILQ_HEAD(,camdd_buf)	 work_queue;
399 
400 	/*
401 	 * Buffers that have been completed by our partner thread.  Uses
402 	 * work_links.
403 	 */
404 	STAILQ_HEAD(,camdd_buf)	 peer_done_queue;
405 
406 	/*
407 	 * Number of buffers on the peer done queue.
408 	 */
409 	uint32_t		 num_peer_done_queue;
410 
411 	/*
412 	 * A list of buffers that we have queued to our peer thread.  Uses
413 	 * links.
414 	 */
415 	STAILQ_HEAD(,camdd_buf)	 peer_work_queue;
416 
417 	/*
418 	 * Number of buffers on the peer work queue.
419 	 */
420 	uint32_t		 num_peer_work_queue;
421 };
422 
423 static sem_t camdd_sem;
424 static sig_atomic_t need_exit = 0;
425 static sig_atomic_t error_exit = 0;
426 static sig_atomic_t need_status = 0;
427 
428 #ifndef min
429 #define	min(a, b) (a < b) ? a : b
430 #endif
431 
432 
433 /* Generically useful offsets into the peripheral private area */
434 #define ppriv_ptr0 periph_priv.entries[0].ptr
435 #define ppriv_ptr1 periph_priv.entries[1].ptr
436 #define ppriv_field0 periph_priv.entries[0].field
437 #define ppriv_field1 periph_priv.entries[1].field
438 
439 #define	ccb_buf	ppriv_ptr0
440 
441 #define	CAMDD_FILE_DEFAULT_BLOCK	524288
442 #define	CAMDD_FILE_DEFAULT_DEPTH	1
443 #define	CAMDD_PASS_MAX_BLOCK		1048576
444 #define	CAMDD_PASS_DEFAULT_DEPTH	6
445 #define	CAMDD_PASS_RW_TIMEOUT		60 * 1000
446 
447 static int parse_btl(char *tstr, int *bus, int *target, int *lun,
448 		     camdd_argmask *arglst);
449 void camdd_free_dev(struct camdd_dev *dev);
450 struct camdd_dev *camdd_alloc_dev(camdd_dev_type dev_type,
451 				  struct kevent *new_ke, int num_ke,
452 				  int retry_count, int timeout);
453 static struct camdd_buf *camdd_alloc_buf(struct camdd_dev *dev,
454 					 camdd_buf_type buf_type);
455 void camdd_release_buf(struct camdd_buf *buf);
456 struct camdd_buf *camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type);
457 int camdd_buf_sg_create(struct camdd_buf *buf, int iovec,
458 			uint32_t sector_size, uint32_t *num_sectors_used,
459 			int *double_buf_needed);
460 uint32_t camdd_buf_get_len(struct camdd_buf *buf);
461 void camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf);
462 int camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
463 		     uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran);
464 int camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
465          camdd_argmask arglist, int probe_retry_count,
466          int probe_timeout, uint64_t *maxsector, uint32_t *block_len);
467 struct camdd_dev *camdd_probe_file(int fd, struct camdd_io_opts *io_opts,
468 				   int retry_count, int timeout);
469 struct camdd_dev *camdd_probe_pass(struct cam_device *cam_dev,
470 				   struct camdd_io_opts *io_opts,
471 				   camdd_argmask arglist, int probe_retry_count,
472 				   int probe_timeout, int io_retry_count,
473 				   int io_timeout);
474 void *camdd_file_worker(void *arg);
475 camdd_buf_status camdd_ccb_status(union ccb *ccb, int protocol);
476 int camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd);
477 int camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf);
478 int camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf);
479 void camdd_peer_done(struct camdd_buf *buf);
480 void camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
481 			int *error_count);
482 int camdd_pass_fetch(struct camdd_dev *dev);
483 int camdd_file_run(struct camdd_dev *dev);
484 int camdd_pass_run(struct camdd_dev *dev);
485 int camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len);
486 int camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf);
487 void camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
488 		     uint32_t *peer_depth, uint32_t *our_bytes,
489 		     uint32_t *peer_bytes);
490 void *camdd_worker(void *arg);
491 void camdd_sig_handler(int sig);
492 void camdd_print_status(struct camdd_dev *camdd_dev,
493 			struct camdd_dev *other_dev,
494 			struct timespec *start_time);
495 int camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts,
496 	     uint64_t max_io, int retry_count, int timeout);
497 int camdd_parse_io_opts(char *args, int is_write,
498 			struct camdd_io_opts *io_opts);
499 void usage(void);
500 
501 /*
502  * Parse out a bus, or a bus, target and lun in the following
503  * format:
504  * bus
505  * bus:target
506  * bus:target:lun
507  *
508  * Returns the number of parsed components, or 0.
509  */
510 static int
511 parse_btl(char *tstr, int *bus, int *target, int *lun, camdd_argmask *arglst)
512 {
513 	char *tmpstr;
514 	int convs = 0;
515 
516 	while (isspace(*tstr) && (*tstr != '\0'))
517 		tstr++;
518 
519 	tmpstr = (char *)strtok(tstr, ":");
520 	if ((tmpstr != NULL) && (*tmpstr != '\0')) {
521 		*bus = strtol(tmpstr, NULL, 0);
522 		*arglst |= CAMDD_ARG_BUS;
523 		convs++;
524 		tmpstr = (char *)strtok(NULL, ":");
525 		if ((tmpstr != NULL) && (*tmpstr != '\0')) {
526 			*target = strtol(tmpstr, NULL, 0);
527 			*arglst |= CAMDD_ARG_TARGET;
528 			convs++;
529 			tmpstr = (char *)strtok(NULL, ":");
530 			if ((tmpstr != NULL) && (*tmpstr != '\0')) {
531 				*lun = strtol(tmpstr, NULL, 0);
532 				*arglst |= CAMDD_ARG_LUN;
533 				convs++;
534 			}
535 		}
536 	}
537 
538 	return convs;
539 }
540 
541 /*
542  * XXX KDM clean up and free all of the buffers on the queue!
543  */
544 void
545 camdd_free_dev(struct camdd_dev *dev)
546 {
547 	if (dev == NULL)
548 		return;
549 
550 	switch (dev->dev_type) {
551 	case CAMDD_DEV_FILE: {
552 		struct camdd_dev_file *file_dev = &dev->dev_spec.file;
553 
554 		if (file_dev->fd != -1)
555 			close(file_dev->fd);
556 		free(file_dev->tmp_buf);
557 		break;
558 	}
559 	case CAMDD_DEV_PASS: {
560 		struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
561 
562 		if (pass_dev->dev != NULL)
563 			cam_close_device(pass_dev->dev);
564 		break;
565 	}
566 	default:
567 		break;
568 	}
569 
570 	free(dev);
571 }
572 
573 struct camdd_dev *
574 camdd_alloc_dev(camdd_dev_type dev_type, struct kevent *new_ke, int num_ke,
575 		int retry_count, int timeout)
576 {
577 	struct camdd_dev *dev = NULL;
578 	struct kevent *ke;
579 	size_t ke_size;
580 	int retval = 0;
581 
582 	dev = calloc(1, sizeof(*dev));
583 	if (dev == NULL) {
584 		warn("%s: unable to malloc %zu bytes", __func__, sizeof(*dev));
585 		goto bailout;
586 	}
587 
588 	dev->dev_type = dev_type;
589 	dev->io_timeout = timeout;
590 	dev->retry_count = retry_count;
591 	STAILQ_INIT(&dev->free_queue);
592 	STAILQ_INIT(&dev->free_indirect_queue);
593 	STAILQ_INIT(&dev->active_queue);
594 	STAILQ_INIT(&dev->pending_queue);
595 	STAILQ_INIT(&dev->run_queue);
596 	STAILQ_INIT(&dev->reorder_queue);
597 	STAILQ_INIT(&dev->work_queue);
598 	STAILQ_INIT(&dev->peer_done_queue);
599 	STAILQ_INIT(&dev->peer_work_queue);
600 	retval = pthread_mutex_init(&dev->mutex, NULL);
601 	if (retval != 0) {
602 		warnc(retval, "%s: failed to initialize mutex", __func__);
603 		goto bailout;
604 	}
605 
606 	retval = pthread_cond_init(&dev->cond, NULL);
607 	if (retval != 0) {
608 		warnc(retval, "%s: failed to initialize condition variable",
609 		      __func__);
610 		goto bailout;
611 	}
612 
613 	dev->kq = kqueue();
614 	if (dev->kq == -1) {
615 		warn("%s: Unable to create kqueue", __func__);
616 		goto bailout;
617 	}
618 
619 	ke_size = sizeof(struct kevent) * (num_ke + 4);
620 	ke = calloc(1, ke_size);
621 	if (ke == NULL) {
622 		warn("%s: unable to malloc %zu bytes", __func__, ke_size);
623 		goto bailout;
624 	}
625 	if (num_ke > 0)
626 		bcopy(new_ke, ke, num_ke * sizeof(struct kevent));
627 
628 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->work_queue, EVFILT_USER,
629 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
630 	EV_SET(&ke[num_ke++], (uintptr_t)&dev->peer_done_queue, EVFILT_USER,
631 	       EV_ADD|EV_ENABLE|EV_CLEAR, 0,0, 0);
632 	EV_SET(&ke[num_ke++], SIGINFO, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
633 	EV_SET(&ke[num_ke++], SIGINT, EVFILT_SIGNAL, EV_ADD|EV_ENABLE, 0,0,0);
634 
635 	retval = kevent(dev->kq, ke, num_ke, NULL, 0, NULL);
636 	if (retval == -1) {
637 		warn("%s: Unable to register kevents", __func__);
638 		goto bailout;
639 	}
640 
641 
642 	return (dev);
643 
644 bailout:
645 	free(dev);
646 
647 	return (NULL);
648 }
649 
650 static struct camdd_buf *
651 camdd_alloc_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
652 {
653 	struct camdd_buf *buf = NULL;
654 	uint8_t *data_ptr = NULL;
655 
656 	/*
657 	 * We only need to allocate data space for data buffers.
658 	 */
659 	switch (buf_type) {
660 	case CAMDD_BUF_DATA:
661 		data_ptr = malloc(dev->blocksize);
662 		if (data_ptr == NULL) {
663 			warn("unable to allocate %u bytes", dev->blocksize);
664 			goto bailout_error;
665 		}
666 		break;
667 	default:
668 		break;
669 	}
670 
671 	buf = calloc(1, sizeof(*buf));
672 	if (buf == NULL) {
673 		warn("unable to allocate %zu bytes", sizeof(*buf));
674 		goto bailout_error;
675 	}
676 
677 	buf->buf_type = buf_type;
678 	buf->dev = dev;
679 	switch (buf_type) {
680 	case CAMDD_BUF_DATA: {
681 		struct camdd_buf_data *data;
682 
683 		data = &buf->buf_type_spec.data;
684 
685 		data->alloc_len = dev->blocksize;
686 		data->buf = data_ptr;
687 		break;
688 	}
689 	case CAMDD_BUF_INDIRECT:
690 		break;
691 	default:
692 		break;
693 	}
694 	STAILQ_INIT(&buf->src_list);
695 
696 	return (buf);
697 
698 bailout_error:
699 	free(data_ptr);
700 
701 	return (NULL);
702 }
703 
704 void
705 camdd_release_buf(struct camdd_buf *buf)
706 {
707 	struct camdd_dev *dev;
708 
709 	dev = buf->dev;
710 
711 	switch (buf->buf_type) {
712 	case CAMDD_BUF_DATA: {
713 		struct camdd_buf_data *data;
714 
715 		data = &buf->buf_type_spec.data;
716 
717 		if (data->segs != NULL) {
718 			if (data->extra_buf != 0) {
719 				void *extra_buf;
720 
721 				extra_buf = (void *)
722 				    data->segs[data->sg_count - 1].ds_addr;
723 				free(extra_buf);
724 				data->extra_buf = 0;
725 			}
726 			free(data->segs);
727 			data->segs = NULL;
728 			data->sg_count = 0;
729 		} else if (data->iovec != NULL) {
730 			if (data->extra_buf != 0) {
731 				free(data->iovec[data->sg_count - 1].iov_base);
732 				data->extra_buf = 0;
733 			}
734 			free(data->iovec);
735 			data->iovec = NULL;
736 			data->sg_count = 0;
737 		}
738 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
739 		break;
740 	}
741 	case CAMDD_BUF_INDIRECT:
742 		STAILQ_INSERT_TAIL(&dev->free_indirect_queue, buf, links);
743 		break;
744 	default:
745 		err(1, "%s: Invalid buffer type %d for released buffer",
746 		    __func__, buf->buf_type);
747 		break;
748 	}
749 }
750 
751 struct camdd_buf *
752 camdd_get_buf(struct camdd_dev *dev, camdd_buf_type buf_type)
753 {
754 	struct camdd_buf *buf = NULL;
755 
756 	switch (buf_type) {
757 	case CAMDD_BUF_DATA:
758 		buf = STAILQ_FIRST(&dev->free_queue);
759 		if (buf != NULL) {
760 			struct camdd_buf_data *data;
761 			uint8_t *data_ptr;
762 			uint32_t alloc_len;
763 
764 			STAILQ_REMOVE_HEAD(&dev->free_queue, links);
765 			data = &buf->buf_type_spec.data;
766 			data_ptr = data->buf;
767 			alloc_len = data->alloc_len;
768 			bzero(buf, sizeof(*buf));
769 			data->buf = data_ptr;
770 			data->alloc_len = alloc_len;
771 		}
772 		break;
773 	case CAMDD_BUF_INDIRECT:
774 		buf = STAILQ_FIRST(&dev->free_indirect_queue);
775 		if (buf != NULL) {
776 			STAILQ_REMOVE_HEAD(&dev->free_indirect_queue, links);
777 
778 			bzero(buf, sizeof(*buf));
779 		}
780 		break;
781 	default:
782 		warnx("Unknown buffer type %d requested", buf_type);
783 		break;
784 	}
785 
786 
787 	if (buf == NULL)
788 		return (camdd_alloc_buf(dev, buf_type));
789 	else {
790 		STAILQ_INIT(&buf->src_list);
791 		buf->dev = dev;
792 		buf->buf_type = buf_type;
793 
794 		return (buf);
795 	}
796 }
797 
798 int
799 camdd_buf_sg_create(struct camdd_buf *buf, int iovec, uint32_t sector_size,
800 		    uint32_t *num_sectors_used, int *double_buf_needed)
801 {
802 	struct camdd_buf *tmp_buf;
803 	struct camdd_buf_data *data;
804 	uint8_t *extra_buf = NULL;
805 	size_t extra_buf_len = 0;
806 	int extra_buf_attached = 0;
807 	int i, retval = 0;
808 
809 	data = &buf->buf_type_spec.data;
810 
811 	data->sg_count = buf->src_count;
812 	/*
813 	 * Compose a scatter/gather list from all of the buffers in the list.
814 	 * If the length of the buffer isn't a multiple of the sector size,
815 	 * we'll have to add an extra buffer.  This should only happen
816 	 * at the end of a transfer.
817 	 */
818 	if ((data->fill_len % sector_size) != 0) {
819 		extra_buf_len = sector_size - (data->fill_len % sector_size);
820 		extra_buf = calloc(extra_buf_len, 1);
821 		if (extra_buf == NULL) {
822 			warn("%s: unable to allocate %zu bytes for extra "
823 			    "buffer space", __func__, extra_buf_len);
824 			retval = 1;
825 			goto bailout;
826 		}
827 		data->extra_buf = 1;
828 		data->sg_count++;
829 	}
830 	if (iovec == 0) {
831 		data->segs = calloc(data->sg_count, sizeof(bus_dma_segment_t));
832 		if (data->segs == NULL) {
833 			warn("%s: unable to allocate %zu bytes for S/G list",
834 			    __func__, sizeof(bus_dma_segment_t) *
835 			    data->sg_count);
836 			retval = 1;
837 			goto bailout;
838 		}
839 
840 	} else {
841 		data->iovec = calloc(data->sg_count, sizeof(struct iovec));
842 		if (data->iovec == NULL) {
843 			warn("%s: unable to allocate %zu bytes for S/G list",
844 			    __func__, sizeof(struct iovec) * data->sg_count);
845 			retval = 1;
846 			goto bailout;
847 		}
848 	}
849 
850 	for (i = 0, tmp_buf = STAILQ_FIRST(&buf->src_list);
851 	     i < buf->src_count && tmp_buf != NULL; i++,
852 	     tmp_buf = STAILQ_NEXT(tmp_buf, src_links)) {
853 
854 		if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
855 			struct camdd_buf_data *tmp_data;
856 
857 			tmp_data = &tmp_buf->buf_type_spec.data;
858 			if (iovec == 0) {
859 				data->segs[i].ds_addr =
860 				    (bus_addr_t) tmp_data->buf;
861 				data->segs[i].ds_len = tmp_data->fill_len -
862 				    tmp_data->resid;
863 			} else {
864 				data->iovec[i].iov_base = tmp_data->buf;
865 				data->iovec[i].iov_len = tmp_data->fill_len -
866 				    tmp_data->resid;
867 			}
868 			if (((tmp_data->fill_len - tmp_data->resid) %
869 			     sector_size) != 0)
870 				*double_buf_needed = 1;
871 		} else {
872 			struct camdd_buf_indirect *tmp_ind;
873 
874 			tmp_ind = &tmp_buf->buf_type_spec.indirect;
875 			if (iovec == 0) {
876 				data->segs[i].ds_addr =
877 				    (bus_addr_t)tmp_ind->start_ptr;
878 				data->segs[i].ds_len = tmp_ind->len;
879 			} else {
880 				data->iovec[i].iov_base = tmp_ind->start_ptr;
881 				data->iovec[i].iov_len = tmp_ind->len;
882 			}
883 			if ((tmp_ind->len % sector_size) != 0)
884 				*double_buf_needed = 1;
885 		}
886 	}
887 
888 	if (extra_buf != NULL) {
889 		if (iovec == 0) {
890 			data->segs[i].ds_addr = (bus_addr_t)extra_buf;
891 			data->segs[i].ds_len = extra_buf_len;
892 		} else {
893 			data->iovec[i].iov_base = extra_buf;
894 			data->iovec[i].iov_len = extra_buf_len;
895 		}
896 		extra_buf_attached = 1;
897 		i++;
898 	}
899 	if ((tmp_buf != NULL) || (i != data->sg_count)) {
900 		warnx("buffer source count does not match "
901 		      "number of buffers in list!");
902 		retval = 1;
903 		goto bailout;
904 	}
905 
906 bailout:
907 	if (retval == 0) {
908 		*num_sectors_used = (data->fill_len + extra_buf_len) /
909 		    sector_size;
910 	} else if (extra_buf_attached == 0) {
911 		/*
912 		 * If extra_buf isn't attached yet, we need to free it
913 		 * to avoid leaking.
914 		 */
915 		free(extra_buf);
916 		data->extra_buf = 0;
917 		data->sg_count--;
918 	}
919 	return (retval);
920 }
921 
922 uint32_t
923 camdd_buf_get_len(struct camdd_buf *buf)
924 {
925 	uint32_t len = 0;
926 
927 	if (buf->buf_type != CAMDD_BUF_DATA) {
928 		struct camdd_buf_indirect *indirect;
929 
930 		indirect = &buf->buf_type_spec.indirect;
931 		len = indirect->len;
932 	} else {
933 		struct camdd_buf_data *data;
934 
935 		data = &buf->buf_type_spec.data;
936 		len = data->fill_len;
937 	}
938 
939 	return (len);
940 }
941 
942 void
943 camdd_buf_add_child(struct camdd_buf *buf, struct camdd_buf *child_buf)
944 {
945 	struct camdd_buf_data *data;
946 
947 	assert(buf->buf_type == CAMDD_BUF_DATA);
948 
949 	data = &buf->buf_type_spec.data;
950 
951 	STAILQ_INSERT_TAIL(&buf->src_list, child_buf, src_links);
952 	buf->src_count++;
953 
954 	data->fill_len += camdd_buf_get_len(child_buf);
955 }
956 
957 typedef enum {
958 	CAMDD_TS_MAX_BLK,
959 	CAMDD_TS_MIN_BLK,
960 	CAMDD_TS_BLK_GRAN,
961 	CAMDD_TS_EFF_IOSIZE
962 } camdd_status_item_index;
963 
964 static struct camdd_status_items {
965 	const char *name;
966 	struct mt_status_entry *entry;
967 } req_status_items[] = {
968 	{ "max_blk", NULL },
969 	{ "min_blk", NULL },
970 	{ "blk_gran", NULL },
971 	{ "max_effective_iosize", NULL }
972 };
973 
974 int
975 camdd_probe_tape(int fd, char *filename, uint64_t *max_iosize,
976 		 uint64_t *max_blk, uint64_t *min_blk, uint64_t *blk_gran)
977 {
978 	struct mt_status_data status_data;
979 	char *xml_str = NULL;
980 	unsigned int i;
981 	int retval = 0;
982 
983 	retval = mt_get_xml_str(fd, MTIOCEXTGET, &xml_str);
984 	if (retval != 0)
985 		err(1, "Couldn't get XML string from %s", filename);
986 
987 	retval = mt_get_status(xml_str, &status_data);
988 	if (retval != XML_STATUS_OK) {
989 		warn("couldn't get status for %s", filename);
990 		retval = 1;
991 		goto bailout;
992 	} else
993 		retval = 0;
994 
995 	if (status_data.error != 0) {
996 		warnx("%s", status_data.error_str);
997 		retval = 1;
998 		goto bailout;
999 	}
1000 
1001 	for (i = 0; i < nitems(req_status_items); i++) {
1002                 char *name;
1003 
1004 		name = __DECONST(char *, req_status_items[i].name);
1005 		req_status_items[i].entry = mt_status_entry_find(&status_data,
1006 		    name);
1007 		if (req_status_items[i].entry == NULL) {
1008 			errx(1, "Cannot find status entry %s",
1009 			    req_status_items[i].name);
1010 		}
1011 	}
1012 
1013 	*max_iosize = req_status_items[CAMDD_TS_EFF_IOSIZE].entry->value_unsigned;
1014 	*max_blk= req_status_items[CAMDD_TS_MAX_BLK].entry->value_unsigned;
1015 	*min_blk= req_status_items[CAMDD_TS_MIN_BLK].entry->value_unsigned;
1016 	*blk_gran = req_status_items[CAMDD_TS_BLK_GRAN].entry->value_unsigned;
1017 bailout:
1018 
1019 	free(xml_str);
1020 	mt_status_free(&status_data);
1021 
1022 	return (retval);
1023 }
1024 
1025 struct camdd_dev *
1026 camdd_probe_file(int fd, struct camdd_io_opts *io_opts, int retry_count,
1027     int timeout)
1028 {
1029 	struct camdd_dev *dev = NULL;
1030 	struct camdd_dev_file *file_dev;
1031 	uint64_t blocksize = io_opts->blocksize;
1032 
1033 	dev = camdd_alloc_dev(CAMDD_DEV_FILE, NULL, 0, retry_count, timeout);
1034 	if (dev == NULL)
1035 		goto bailout;
1036 
1037 	file_dev = &dev->dev_spec.file;
1038 	file_dev->fd = fd;
1039 	strlcpy(file_dev->filename, io_opts->dev_name,
1040 	    sizeof(file_dev->filename));
1041 	strlcpy(dev->device_name, io_opts->dev_name, sizeof(dev->device_name));
1042 	if (blocksize == 0)
1043 		dev->blocksize = CAMDD_FILE_DEFAULT_BLOCK;
1044 	else
1045 		dev->blocksize = blocksize;
1046 
1047 	if ((io_opts->queue_depth != 0)
1048 	 && (io_opts->queue_depth != 1)) {
1049 		warnx("Queue depth %ju for %s ignored, only 1 outstanding "
1050 		    "command supported", (uintmax_t)io_opts->queue_depth,
1051 		    io_opts->dev_name);
1052 	}
1053 	dev->target_queue_depth = CAMDD_FILE_DEFAULT_DEPTH;
1054 	dev->run = camdd_file_run;
1055 	dev->fetch = NULL;
1056 
1057 	/*
1058 	 * We can effectively access files on byte boundaries.  We'll reset
1059 	 * this for devices like disks that can be accessed on sector
1060 	 * boundaries.
1061 	 */
1062 	dev->sector_size = 1;
1063 
1064 	if ((fd != STDIN_FILENO)
1065 	 && (fd != STDOUT_FILENO)) {
1066 		int retval;
1067 
1068 		retval = fstat(fd, &file_dev->sb);
1069 		if (retval != 0) {
1070 			warn("Cannot stat %s", dev->device_name);
1071 			goto bailout_error;
1072 		}
1073 		if (S_ISREG(file_dev->sb.st_mode)) {
1074 			file_dev->file_type = CAMDD_FILE_REG;
1075 		} else if (S_ISCHR(file_dev->sb.st_mode)) {
1076 			int type;
1077 
1078 			if (ioctl(fd, FIODTYPE, &type) == -1)
1079 				err(1, "FIODTYPE ioctl failed on %s",
1080 				    dev->device_name);
1081 			else {
1082 				if (type & D_TAPE)
1083 					file_dev->file_type = CAMDD_FILE_TAPE;
1084 				else if (type & D_DISK)
1085 					file_dev->file_type = CAMDD_FILE_DISK;
1086 				else if (type & D_MEM)
1087 					file_dev->file_type = CAMDD_FILE_MEM;
1088 				else if (type & D_TTY)
1089 					file_dev->file_type = CAMDD_FILE_TTY;
1090 			}
1091 		} else if (S_ISDIR(file_dev->sb.st_mode)) {
1092 			errx(1, "cannot operate on directory %s",
1093 			    dev->device_name);
1094 		} else if (S_ISFIFO(file_dev->sb.st_mode)) {
1095 			file_dev->file_type = CAMDD_FILE_PIPE;
1096 		} else
1097 			errx(1, "Cannot determine file type for %s",
1098 			    dev->device_name);
1099 
1100 		switch (file_dev->file_type) {
1101 		case CAMDD_FILE_REG:
1102 			if (file_dev->sb.st_size != 0)
1103 				dev->max_sector = file_dev->sb.st_size - 1;
1104 			else
1105 				dev->max_sector = 0;
1106 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1107 			break;
1108 		case CAMDD_FILE_TAPE: {
1109 			uint64_t max_iosize, max_blk, min_blk, blk_gran;
1110 			/*
1111 			 * Check block limits and maximum effective iosize.
1112 			 * Make sure the blocksize is within the block
1113 			 * limits (and a multiple of the minimum blocksize)
1114 			 * and that the blocksize is <= maximum effective
1115 			 * iosize.
1116 			 */
1117 			retval = camdd_probe_tape(fd, dev->device_name,
1118 			    &max_iosize, &max_blk, &min_blk, &blk_gran);
1119 			if (retval != 0)
1120 				errx(1, "Unable to probe tape %s",
1121 				    dev->device_name);
1122 
1123 			/*
1124 			 * The blocksize needs to be <= the maximum
1125 			 * effective I/O size of the tape device.  Note
1126 			 * that this also takes into account the maximum
1127 			 * blocksize reported by READ BLOCK LIMITS.
1128 			 */
1129 			if (dev->blocksize > max_iosize) {
1130 				warnx("Blocksize %u too big for %s, limiting "
1131 				    "to %ju", dev->blocksize, dev->device_name,
1132 				    max_iosize);
1133 				dev->blocksize = max_iosize;
1134 			}
1135 
1136 			/*
1137 			 * The blocksize needs to be at least min_blk;
1138 			 */
1139 			if (dev->blocksize < min_blk) {
1140 				warnx("Blocksize %u too small for %s, "
1141 				    "increasing to %ju", dev->blocksize,
1142 				    dev->device_name, min_blk);
1143 				dev->blocksize = min_blk;
1144 			}
1145 
1146 			/*
1147 			 * And the blocksize needs to be a multiple of
1148 			 * the block granularity.
1149 			 */
1150 			if ((blk_gran != 0)
1151 			 && (dev->blocksize % (1 << blk_gran))) {
1152 				warnx("Blocksize %u for %s not a multiple of "
1153 				    "%d, adjusting to %d", dev->blocksize,
1154 				    dev->device_name, (1 << blk_gran),
1155 				    dev->blocksize & ~((1 << blk_gran) - 1));
1156 				dev->blocksize &= ~((1 << blk_gran) - 1);
1157 			}
1158 
1159 			if (dev->blocksize == 0) {
1160 				errx(1, "Unable to derive valid blocksize for "
1161 				    "%s", dev->device_name);
1162 			}
1163 
1164 			/*
1165 			 * For tape drives, set the sector size to the
1166 			 * blocksize so that we make sure not to write
1167 			 * less than the blocksize out to the drive.
1168 			 */
1169 			dev->sector_size = dev->blocksize;
1170 			break;
1171 		}
1172 		case CAMDD_FILE_DISK: {
1173 			off_t media_size;
1174 			unsigned int sector_size;
1175 
1176 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1177 
1178 			if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) == -1) {
1179 				err(1, "DIOCGSECTORSIZE ioctl failed on %s",
1180 				    dev->device_name);
1181 			}
1182 
1183 			if (sector_size == 0) {
1184 				errx(1, "DIOCGSECTORSIZE ioctl returned "
1185 				    "invalid sector size %u for %s",
1186 				    sector_size, dev->device_name);
1187 			}
1188 
1189 			if (ioctl(fd, DIOCGMEDIASIZE, &media_size) == -1) {
1190 				err(1, "DIOCGMEDIASIZE ioctl failed on %s",
1191 				    dev->device_name);
1192 			}
1193 
1194 			if (media_size == 0) {
1195 				errx(1, "DIOCGMEDIASIZE ioctl returned "
1196 				    "invalid media size %ju for %s",
1197 				    (uintmax_t)media_size, dev->device_name);
1198 			}
1199 
1200 			if (dev->blocksize % sector_size) {
1201 				errx(1, "%s blocksize %u not a multiple of "
1202 				    "sector size %u", dev->device_name,
1203 				    dev->blocksize, sector_size);
1204 			}
1205 
1206 			dev->sector_size = sector_size;
1207 			dev->max_sector = (media_size / sector_size) - 1;
1208 			break;
1209 		}
1210 		case CAMDD_FILE_MEM:
1211 			file_dev->file_flags |= CAMDD_FF_CAN_SEEK;
1212 			break;
1213 		default:
1214 			break;
1215 		}
1216 	}
1217 
1218 	if ((io_opts->offset != 0)
1219 	 && ((file_dev->file_flags & CAMDD_FF_CAN_SEEK) == 0)) {
1220 		warnx("Offset %ju specified for %s, but we cannot seek on %s",
1221 		    io_opts->offset, io_opts->dev_name, io_opts->dev_name);
1222 		goto bailout_error;
1223 	}
1224 #if 0
1225 	else if ((io_opts->offset != 0)
1226 		&& ((io_opts->offset % dev->sector_size) != 0)) {
1227 		warnx("Offset %ju for %s is not a multiple of the "
1228 		      "sector size %u", io_opts->offset,
1229 		      io_opts->dev_name, dev->sector_size);
1230 		goto bailout_error;
1231 	} else {
1232 		dev->start_offset_bytes = io_opts->offset;
1233 	}
1234 #endif
1235 
1236 bailout:
1237 	return (dev);
1238 
1239 bailout_error:
1240 	camdd_free_dev(dev);
1241 	return (NULL);
1242 }
1243 
1244 /*
1245  * Get a get device CCB for the specified device.
1246  */
1247 int
1248 camdd_get_cgd(struct cam_device *device, struct ccb_getdev *cgd)
1249 {
1250         union ccb *ccb;
1251 	int retval = 0;
1252 
1253 	ccb = cam_getccb(device);
1254 
1255 	if (ccb == NULL) {
1256 		warnx("%s: couldn't allocate CCB", __func__);
1257 		return -1;
1258 	}
1259 
1260 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cgd);
1261 
1262 	ccb->ccb_h.func_code = XPT_GDEV_TYPE;
1263 
1264 	if (cam_send_ccb(device, ccb) < 0) {
1265 		warn("%s: error sending Get Device Information CCB", __func__);
1266 			cam_error_print(device, ccb, CAM_ESF_ALL,
1267 					CAM_EPF_ALL, stderr);
1268 		retval = -1;
1269 		goto bailout;
1270 	}
1271 
1272 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1273 			cam_error_print(device, ccb, CAM_ESF_ALL,
1274 					CAM_EPF_ALL, stderr);
1275 		retval = -1;
1276 		goto bailout;
1277 	}
1278 
1279 	bcopy(&ccb->cgd, cgd, sizeof(struct ccb_getdev));
1280 
1281 bailout:
1282 	cam_freeccb(ccb);
1283 
1284 	return retval;
1285 }
1286 
1287 int
1288 camdd_probe_pass_scsi(struct cam_device *cam_dev, union ccb *ccb,
1289 		 camdd_argmask arglist, int probe_retry_count,
1290 		 int probe_timeout, uint64_t *maxsector, uint32_t *block_len)
1291 {
1292 	struct scsi_read_capacity_data rcap;
1293 	struct scsi_read_capacity_data_long rcaplong;
1294 	int retval = -1;
1295 
1296 	if (ccb == NULL) {
1297 		warnx("%s: error passed ccb is NULL", __func__);
1298 		goto bailout;
1299 	}
1300 
1301 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
1302 
1303 	scsi_read_capacity(&ccb->csio,
1304 			   /*retries*/ probe_retry_count,
1305 			   /*cbfcnp*/ NULL,
1306 			   /*tag_action*/ MSG_SIMPLE_Q_TAG,
1307 			   &rcap,
1308 			   SSD_FULL_SIZE,
1309 			   /*timeout*/ probe_timeout ? probe_timeout : 5000);
1310 
1311 	/* Disable freezing the device queue */
1312 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1313 
1314 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1315 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1316 
1317 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1318 		warn("error sending READ CAPACITY command");
1319 
1320 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1321 				CAM_EPF_ALL, stderr);
1322 
1323 		goto bailout;
1324 	}
1325 
1326 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1327 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1328 		goto bailout;
1329 	}
1330 
1331 	*maxsector = scsi_4btoul(rcap.addr);
1332 	*block_len = scsi_4btoul(rcap.length);
1333 
1334 	/*
1335 	 * A last block of 2^32-1 means that the true capacity is over 2TB,
1336 	 * and we need to issue the long READ CAPACITY to get the real
1337 	 * capacity.  Otherwise, we're all set.
1338 	 */
1339 	if (*maxsector != 0xffffffff) {
1340 		retval = 0;
1341 		goto bailout;
1342 	}
1343 
1344 	scsi_read_capacity_16(&ccb->csio,
1345 			      /*retries*/ probe_retry_count,
1346 			      /*cbfcnp*/ NULL,
1347 			      /*tag_action*/ MSG_SIMPLE_Q_TAG,
1348 			      /*lba*/ 0,
1349 			      /*reladdr*/ 0,
1350 			      /*pmi*/ 0,
1351 			      (uint8_t *)&rcaplong,
1352 			      sizeof(rcaplong),
1353 			      /*sense_len*/ SSD_FULL_SIZE,
1354 			      /*timeout*/ probe_timeout ? probe_timeout : 5000);
1355 
1356 	/* Disable freezing the device queue */
1357 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
1358 
1359 	if (arglist & CAMDD_ARG_ERR_RECOVER)
1360 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
1361 
1362 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1363 		warn("error sending READ CAPACITY (16) command");
1364 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1365 				CAM_EPF_ALL, stderr);
1366 		goto bailout;
1367 	}
1368 
1369 	if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
1370 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL, CAM_EPF_ALL, stderr);
1371 		goto bailout;
1372 	}
1373 
1374 	*maxsector = scsi_8btou64(rcaplong.addr);
1375 	*block_len = scsi_4btoul(rcaplong.length);
1376 
1377 	retval = 0;
1378 
1379 bailout:
1380 	return retval;
1381 }
1382 
1383 /*
1384  * Need to implement this.  Do a basic probe:
1385  * - Check the inquiry data, make sure we're talking to a device that we
1386  *   can reasonably expect to talk to -- direct, RBC, CD, WORM.
1387  * - Send a test unit ready, make sure the device is available.
1388  * - Get the capacity and block size.
1389  */
1390 struct camdd_dev *
1391 camdd_probe_pass(struct cam_device *cam_dev, struct camdd_io_opts *io_opts,
1392 		 camdd_argmask arglist, int probe_retry_count,
1393 		 int probe_timeout, int io_retry_count, int io_timeout)
1394 {
1395 	union ccb *ccb;
1396 	uint64_t maxsector = 0;
1397 	uint32_t cpi_maxio, max_iosize, pass_numblocks;
1398 	uint32_t block_len = 0;
1399 	struct camdd_dev *dev = NULL;
1400 	struct camdd_dev_pass *pass_dev;
1401 	struct kevent ke;
1402 	struct ccb_getdev cgd;
1403 	int retval;
1404 	int scsi_dev_type;
1405 
1406 	if ((retval = camdd_get_cgd(cam_dev, &cgd)) != 0) {
1407 		warnx("%s: error retrieving CGD", __func__);
1408 		return NULL;
1409 	}
1410 
1411 	ccb = cam_getccb(cam_dev);
1412 
1413 	if (ccb == NULL) {
1414 		warnx("%s: error allocating ccb", __func__);
1415 		goto bailout;
1416 	}
1417 
1418 	switch (cgd.protocol) {
1419 	case PROTO_SCSI:
1420 		scsi_dev_type = SID_TYPE(&cam_dev->inq_data);
1421 
1422 		/*
1423 		 * For devices that support READ CAPACITY, we'll attempt to get the
1424 		 * capacity.  Otherwise, we really don't support tape or other
1425 		 * devices via SCSI passthrough, so just return an error in that case.
1426 		 */
1427 		switch (scsi_dev_type) {
1428 		case T_DIRECT:
1429 		case T_WORM:
1430 		case T_CDROM:
1431 		case T_OPTICAL:
1432 		case T_RBC:
1433 		case T_ZBC_HM:
1434 			break;
1435 		default:
1436 			errx(1, "Unsupported SCSI device type %d", scsi_dev_type);
1437 			break; /*NOTREACHED*/
1438 		}
1439 
1440 		if ((retval = camdd_probe_pass_scsi(cam_dev, ccb, probe_retry_count,
1441 						arglist, probe_timeout, &maxsector,
1442 						&block_len))) {
1443 			goto bailout;
1444 		}
1445 		break;
1446 	default:
1447 		errx(1, "Unsupported PROTO type %d", cgd.protocol);
1448 		break; /*NOTREACHED*/
1449 	}
1450 
1451 	if (block_len == 0) {
1452 		warnx("Sector size for %s%u is 0, cannot continue",
1453 		    cam_dev->device_name, cam_dev->dev_unit_num);
1454 		goto bailout_error;
1455 	}
1456 
1457 	CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->cpi);
1458 
1459 	ccb->ccb_h.func_code = XPT_PATH_INQ;
1460 	ccb->ccb_h.flags = CAM_DIR_NONE;
1461 	ccb->ccb_h.retry_count = 1;
1462 
1463 	if (cam_send_ccb(cam_dev, ccb) < 0) {
1464 		warn("error sending XPT_PATH_INQ CCB");
1465 
1466 		cam_error_print(cam_dev, ccb, CAM_ESF_ALL,
1467 				CAM_EPF_ALL, stderr);
1468 		goto bailout;
1469 	}
1470 
1471 	EV_SET(&ke, cam_dev->fd, EVFILT_READ, EV_ADD|EV_ENABLE, 0, 0, 0);
1472 
1473 	dev = camdd_alloc_dev(CAMDD_DEV_PASS, &ke, 1, io_retry_count,
1474 			      io_timeout);
1475 	if (dev == NULL)
1476 		goto bailout;
1477 
1478 	pass_dev = &dev->dev_spec.pass;
1479 	pass_dev->scsi_dev_type = scsi_dev_type;
1480 	pass_dev->protocol = cgd.protocol;
1481 	pass_dev->dev = cam_dev;
1482 	pass_dev->max_sector = maxsector;
1483 	pass_dev->block_len = block_len;
1484 	pass_dev->cpi_maxio = ccb->cpi.maxio;
1485 	snprintf(dev->device_name, sizeof(dev->device_name), "%s%u",
1486 		 pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
1487 	dev->sector_size = block_len;
1488 	dev->max_sector = maxsector;
1489 
1490 
1491 	/*
1492 	 * Determine the optimal blocksize to use for this device.
1493 	 */
1494 
1495 	/*
1496 	 * If the controller has not specified a maximum I/O size,
1497 	 * just go with 128K as a somewhat conservative value.
1498 	 */
1499 	if (pass_dev->cpi_maxio == 0)
1500 		cpi_maxio = 131072;
1501 	else
1502 		cpi_maxio = pass_dev->cpi_maxio;
1503 
1504 	/*
1505 	 * If the controller has a large maximum I/O size, limit it
1506 	 * to something smaller so that the kernel doesn't have trouble
1507 	 * allocating buffers to copy data in and out for us.
1508 	 * XXX KDM this is until we have unmapped I/O support in the kernel.
1509 	 */
1510 	max_iosize = min(cpi_maxio, CAMDD_PASS_MAX_BLOCK);
1511 
1512 	/*
1513 	 * If we weren't able to get a block size for some reason,
1514 	 * default to 512 bytes.
1515 	 */
1516 	block_len = pass_dev->block_len;
1517 	if (block_len == 0)
1518 		block_len = 512;
1519 
1520 	/*
1521 	 * Figure out how many blocksize chunks will fit in the
1522 	 * maximum I/O size.
1523 	 */
1524 	pass_numblocks = max_iosize / block_len;
1525 
1526 	/*
1527 	 * And finally, multiple the number of blocks by the LBA
1528 	 * length to get our maximum block size;
1529 	 */
1530 	dev->blocksize = pass_numblocks * block_len;
1531 
1532 	if (io_opts->blocksize != 0) {
1533 		if ((io_opts->blocksize % dev->sector_size) != 0) {
1534 			warnx("Blocksize %ju for %s is not a multiple of "
1535 			      "sector size %u", (uintmax_t)io_opts->blocksize,
1536 			      dev->device_name, dev->sector_size);
1537 			goto bailout_error;
1538 		}
1539 		dev->blocksize = io_opts->blocksize;
1540 	}
1541 	dev->target_queue_depth = CAMDD_PASS_DEFAULT_DEPTH;
1542 	if (io_opts->queue_depth != 0)
1543 		dev->target_queue_depth = io_opts->queue_depth;
1544 
1545 	if (io_opts->offset != 0) {
1546 		if (io_opts->offset > (dev->max_sector * dev->sector_size)) {
1547 			warnx("Offset %ju is past the end of device %s",
1548 			    io_opts->offset, dev->device_name);
1549 			goto bailout_error;
1550 		}
1551 #if 0
1552 		else if ((io_opts->offset % dev->sector_size) != 0) {
1553 			warnx("Offset %ju for %s is not a multiple of the "
1554 			      "sector size %u", io_opts->offset,
1555 			      dev->device_name, dev->sector_size);
1556 			goto bailout_error;
1557 		}
1558 		dev->start_offset_bytes = io_opts->offset;
1559 #endif
1560 	}
1561 
1562 	dev->min_cmd_size = io_opts->min_cmd_size;
1563 
1564 	dev->run = camdd_pass_run;
1565 	dev->fetch = camdd_pass_fetch;
1566 
1567 bailout:
1568 	cam_freeccb(ccb);
1569 
1570 	return (dev);
1571 
1572 bailout_error:
1573 	cam_freeccb(ccb);
1574 
1575 	camdd_free_dev(dev);
1576 
1577 	return (NULL);
1578 }
1579 
1580 void *
1581 camdd_worker(void *arg)
1582 {
1583 	struct camdd_dev *dev = arg;
1584 	struct camdd_buf *buf;
1585 	struct timespec ts, *kq_ts;
1586 
1587 	ts.tv_sec = 0;
1588 	ts.tv_nsec = 0;
1589 
1590 	pthread_mutex_lock(&dev->mutex);
1591 
1592 	dev->flags |= CAMDD_DEV_FLAG_ACTIVE;
1593 
1594 	for (;;) {
1595 		struct kevent ke;
1596 		int retval = 0;
1597 
1598 		/*
1599 		 * XXX KDM check the reorder queue depth?
1600 		 */
1601 		if (dev->write_dev == 0) {
1602 			uint32_t our_depth, peer_depth, peer_bytes, our_bytes;
1603 			uint32_t target_depth = dev->target_queue_depth;
1604 			uint32_t peer_target_depth =
1605 			    dev->peer_dev->target_queue_depth;
1606 			uint32_t peer_blocksize = dev->peer_dev->blocksize;
1607 
1608 			camdd_get_depth(dev, &our_depth, &peer_depth,
1609 					&our_bytes, &peer_bytes);
1610 
1611 #if 0
1612 			while (((our_depth < target_depth)
1613 			     && (peer_depth < peer_target_depth))
1614 			    || ((peer_bytes + our_bytes) <
1615 				 (peer_blocksize * 2))) {
1616 #endif
1617 			while (((our_depth + peer_depth) <
1618 			        (target_depth + peer_target_depth))
1619 			    || ((peer_bytes + our_bytes) <
1620 				(peer_blocksize * 3))) {
1621 
1622 				retval = camdd_queue(dev, NULL);
1623 				if (retval == 1)
1624 					break;
1625 				else if (retval != 0) {
1626 					error_exit = 1;
1627 					goto bailout;
1628 				}
1629 
1630 				camdd_get_depth(dev, &our_depth, &peer_depth,
1631 						&our_bytes, &peer_bytes);
1632 			}
1633 		}
1634 		/*
1635 		 * See if we have any I/O that is ready to execute.
1636 		 */
1637 		buf = STAILQ_FIRST(&dev->run_queue);
1638 		if (buf != NULL) {
1639 			while (dev->target_queue_depth > dev->cur_active_io) {
1640 				retval = dev->run(dev);
1641 				if (retval == -1) {
1642 					dev->flags |= CAMDD_DEV_FLAG_EOF;
1643 					error_exit = 1;
1644 					break;
1645 				} else if (retval != 0) {
1646 					break;
1647 				}
1648 			}
1649 		}
1650 
1651 		/*
1652 		 * We've reached EOF, or our partner has reached EOF.
1653 		 */
1654 		if ((dev->flags & CAMDD_DEV_FLAG_EOF)
1655 		 || (dev->flags & CAMDD_DEV_FLAG_PEER_EOF)) {
1656 			if (dev->write_dev != 0) {
1657 			 	if ((STAILQ_EMPTY(&dev->work_queue))
1658 				 && (dev->num_run_queue == 0)
1659 				 && (dev->cur_active_io == 0)) {
1660 					goto bailout;
1661 				}
1662 			} else {
1663 				/*
1664 				 * If we're the reader, and the writer
1665 				 * got EOF, he is already done.  If we got
1666 				 * the EOF, then we need to wait until
1667 				 * everything is flushed out for the writer.
1668 				 */
1669 				if (dev->flags & CAMDD_DEV_FLAG_PEER_EOF) {
1670 					goto bailout;
1671 				} else if ((dev->num_peer_work_queue == 0)
1672 					&& (dev->num_peer_done_queue == 0)
1673 					&& (dev->cur_active_io == 0)
1674 					&& (dev->num_run_queue == 0)) {
1675 					goto bailout;
1676 				}
1677 			}
1678 			/*
1679 			 * XXX KDM need to do something about the pending
1680 			 * queue and cleanup resources.
1681 			 */
1682 		}
1683 
1684 		if ((dev->write_dev == 0)
1685 		 && (dev->cur_active_io == 0)
1686 		 && (dev->peer_bytes_queued < dev->peer_dev->blocksize))
1687 			kq_ts = &ts;
1688 		else
1689 			kq_ts = NULL;
1690 
1691 		/*
1692 		 * Run kevent to see if there are events to process.
1693 		 */
1694 		pthread_mutex_unlock(&dev->mutex);
1695 		retval = kevent(dev->kq, NULL, 0, &ke, 1, kq_ts);
1696 		pthread_mutex_lock(&dev->mutex);
1697 		if (retval == -1) {
1698 			warn("%s: error returned from kevent",__func__);
1699 			goto bailout;
1700 		} else if (retval != 0) {
1701 			switch (ke.filter) {
1702 			case EVFILT_READ:
1703 				if (dev->fetch != NULL) {
1704 					retval = dev->fetch(dev);
1705 					if (retval == -1) {
1706 						error_exit = 1;
1707 						goto bailout;
1708 					}
1709 				}
1710 				break;
1711 			case EVFILT_SIGNAL:
1712 				/*
1713 				 * We register for this so we don't get
1714 				 * an error as a result of a SIGINFO or a
1715 				 * SIGINT.  It will actually get handled
1716 				 * by the signal handler.  If we get a
1717 				 * SIGINT, bail out without printing an
1718 				 * error message.  Any other signals
1719 				 * will result in the error message above.
1720 				 */
1721 				if (ke.ident == SIGINT)
1722 					goto bailout;
1723 				break;
1724 			case EVFILT_USER:
1725 				retval = 0;
1726 				/*
1727 				 * Check to see if the other thread has
1728 				 * queued any I/O for us to do.  (In this
1729 				 * case we're the writer.)
1730 				 */
1731 				for (buf = STAILQ_FIRST(&dev->work_queue);
1732 				     buf != NULL;
1733 				     buf = STAILQ_FIRST(&dev->work_queue)) {
1734 					STAILQ_REMOVE_HEAD(&dev->work_queue,
1735 							   work_links);
1736 					retval = camdd_queue(dev, buf);
1737 					/*
1738 					 * We keep going unless we get an
1739 					 * actual error.  If we get EOF, we
1740 					 * still want to remove the buffers
1741 					 * from the queue and send the back
1742 					 * to the reader thread.
1743 					 */
1744 					if (retval == -1) {
1745 						error_exit = 1;
1746 						goto bailout;
1747 					} else
1748 						retval = 0;
1749 				}
1750 
1751 				/*
1752 				 * Next check to see if the other thread has
1753 				 * queued any completed buffers back to us.
1754 				 * (In this case we're the reader.)
1755 				 */
1756 				for (buf = STAILQ_FIRST(&dev->peer_done_queue);
1757 				     buf != NULL;
1758 				     buf = STAILQ_FIRST(&dev->peer_done_queue)){
1759 					STAILQ_REMOVE_HEAD(
1760 					    &dev->peer_done_queue, work_links);
1761 					dev->num_peer_done_queue--;
1762 					camdd_peer_done(buf);
1763 				}
1764 				break;
1765 			default:
1766 				warnx("%s: unknown kevent filter %d",
1767 				      __func__, ke.filter);
1768 				break;
1769 			}
1770 		}
1771 	}
1772 
1773 bailout:
1774 
1775 	dev->flags &= ~CAMDD_DEV_FLAG_ACTIVE;
1776 
1777 	/* XXX KDM cleanup resources here? */
1778 
1779 	pthread_mutex_unlock(&dev->mutex);
1780 
1781 	need_exit = 1;
1782 	sem_post(&camdd_sem);
1783 
1784 	return (NULL);
1785 }
1786 
1787 /*
1788  * Simplistic translation of CCB status to our local status.
1789  */
1790 camdd_buf_status
1791 camdd_ccb_status(union ccb *ccb, int protocol)
1792 {
1793 	camdd_buf_status status = CAMDD_STATUS_NONE;
1794 	cam_status ccb_status;
1795 
1796 	ccb_status = ccb->ccb_h.status & CAM_STATUS_MASK;
1797 
1798 	switch (protocol) {
1799 	case PROTO_SCSI:
1800 		switch (ccb_status) {
1801 		case CAM_REQ_CMP: {
1802 			if (ccb->csio.resid == 0) {
1803 				status = CAMDD_STATUS_OK;
1804 			} else if (ccb->csio.dxfer_len > ccb->csio.resid) {
1805 				status = CAMDD_STATUS_SHORT_IO;
1806 			} else {
1807 				status = CAMDD_STATUS_EOF;
1808 			}
1809 			break;
1810 		}
1811 		case CAM_SCSI_STATUS_ERROR: {
1812 			switch (ccb->csio.scsi_status) {
1813 			case SCSI_STATUS_OK:
1814 			case SCSI_STATUS_COND_MET:
1815 			case SCSI_STATUS_INTERMED:
1816 			case SCSI_STATUS_INTERMED_COND_MET:
1817 				status = CAMDD_STATUS_OK;
1818 				break;
1819 			case SCSI_STATUS_CMD_TERMINATED:
1820 			case SCSI_STATUS_CHECK_COND:
1821 			case SCSI_STATUS_QUEUE_FULL:
1822 			case SCSI_STATUS_BUSY:
1823 			case SCSI_STATUS_RESERV_CONFLICT:
1824 			default:
1825 				status = CAMDD_STATUS_ERROR;
1826 				break;
1827 			}
1828 			break;
1829 		}
1830 		default:
1831 			status = CAMDD_STATUS_ERROR;
1832 			break;
1833 		}
1834 		break;
1835 	default:
1836 		status = CAMDD_STATUS_ERROR;
1837 		break;
1838 	}
1839 
1840 	return (status);
1841 }
1842 
1843 /*
1844  * Queue a buffer to our peer's work thread for writing.
1845  *
1846  * Returns 0 for success, -1 for failure, 1 if the other thread exited.
1847  */
1848 int
1849 camdd_queue_peer_buf(struct camdd_dev *dev, struct camdd_buf *buf)
1850 {
1851 	struct kevent ke;
1852 	STAILQ_HEAD(, camdd_buf) local_queue;
1853 	struct camdd_buf *buf1, *buf2;
1854 	struct camdd_buf_data *data = NULL;
1855 	uint64_t peer_bytes_queued = 0;
1856 	int active = 1;
1857 	int retval = 0;
1858 
1859 	STAILQ_INIT(&local_queue);
1860 
1861 	/*
1862 	 * Since we're the reader, we need to queue our I/O to the writer
1863 	 * in sequential order in order to make sure it gets written out
1864 	 * in sequential order.
1865 	 *
1866 	 * Check the next expected I/O starting offset.  If this doesn't
1867 	 * match, put it on the reorder queue.
1868 	 */
1869 	if ((buf->lba * dev->sector_size) != dev->next_completion_pos_bytes) {
1870 
1871 		/*
1872 		 * If there is nothing on the queue, there is no sorting
1873 		 * needed.
1874 		 */
1875 		if (STAILQ_EMPTY(&dev->reorder_queue)) {
1876 			STAILQ_INSERT_TAIL(&dev->reorder_queue, buf, links);
1877 			dev->num_reorder_queue++;
1878 			goto bailout;
1879 		}
1880 
1881 		/*
1882 		 * Sort in ascending order by starting LBA.  There should
1883 		 * be no identical LBAs.
1884 		 */
1885 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1886 		     buf1 = buf2) {
1887 			buf2 = STAILQ_NEXT(buf1, links);
1888 			if (buf->lba < buf1->lba) {
1889 				/*
1890 				 * If we're less than the first one, then
1891 				 * we insert at the head of the list
1892 				 * because this has to be the first element
1893 				 * on the list.
1894 				 */
1895 				STAILQ_INSERT_HEAD(&dev->reorder_queue,
1896 						   buf, links);
1897 				dev->num_reorder_queue++;
1898 				break;
1899 			} else if (buf->lba > buf1->lba) {
1900 				if (buf2 == NULL) {
1901 					STAILQ_INSERT_TAIL(&dev->reorder_queue,
1902 					    buf, links);
1903 					dev->num_reorder_queue++;
1904 					break;
1905 				} else if (buf->lba < buf2->lba) {
1906 					STAILQ_INSERT_AFTER(&dev->reorder_queue,
1907 					    buf1, buf, links);
1908 					dev->num_reorder_queue++;
1909 					break;
1910 				}
1911 			} else {
1912 				errx(1, "Found buffers with duplicate LBA %ju!",
1913 				     buf->lba);
1914 			}
1915 		}
1916 		goto bailout;
1917 	} else {
1918 
1919 		/*
1920 		 * We're the next expected I/O completion, so put ourselves
1921 		 * on the local queue to be sent to the writer.  We use
1922 		 * work_links here so that we can queue this to the
1923 		 * peer_work_queue before taking the buffer off of the
1924 		 * local_queue.
1925 		 */
1926 		dev->next_completion_pos_bytes += buf->len;
1927 		STAILQ_INSERT_TAIL(&local_queue, buf, work_links);
1928 
1929 		/*
1930 		 * Go through the reorder queue looking for more sequential
1931 		 * I/O and add it to the local queue.
1932 		 */
1933 		for (buf1 = STAILQ_FIRST(&dev->reorder_queue); buf1 != NULL;
1934 		     buf1 = STAILQ_FIRST(&dev->reorder_queue)) {
1935 			/*
1936 			 * As soon as we see an I/O that is out of sequence,
1937 			 * we're done.
1938 			 */
1939 			if ((buf1->lba * dev->sector_size) !=
1940 			     dev->next_completion_pos_bytes)
1941 				break;
1942 
1943 			STAILQ_REMOVE_HEAD(&dev->reorder_queue, links);
1944 			dev->num_reorder_queue--;
1945 			STAILQ_INSERT_TAIL(&local_queue, buf1, work_links);
1946 			dev->next_completion_pos_bytes += buf1->len;
1947 		}
1948 	}
1949 
1950 	/*
1951 	 * Setup the event to let the other thread know that it has work
1952 	 * pending.
1953 	 */
1954 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->work_queue, EVFILT_USER, 0,
1955 	       NOTE_TRIGGER, 0, NULL);
1956 
1957 	/*
1958 	 * Put this on our shadow queue so that we know what we've queued
1959 	 * to the other thread.
1960 	 */
1961 	STAILQ_FOREACH_SAFE(buf1, &local_queue, work_links, buf2) {
1962 		if (buf1->buf_type != CAMDD_BUF_DATA) {
1963 			errx(1, "%s: should have a data buffer, not an "
1964 			    "indirect buffer", __func__);
1965 		}
1966 		data = &buf1->buf_type_spec.data;
1967 
1968 		/*
1969 		 * We only need to send one EOF to the writer, and don't
1970 		 * need to continue sending EOFs after that.
1971 		 */
1972 		if (buf1->status == CAMDD_STATUS_EOF) {
1973 			if (dev->flags & CAMDD_DEV_FLAG_EOF_SENT) {
1974 				STAILQ_REMOVE(&local_queue, buf1, camdd_buf,
1975 				    work_links);
1976 				camdd_release_buf(buf1);
1977 				retval = 1;
1978 				continue;
1979 			}
1980 			dev->flags |= CAMDD_DEV_FLAG_EOF_SENT;
1981 		}
1982 
1983 
1984 		STAILQ_INSERT_TAIL(&dev->peer_work_queue, buf1, links);
1985 		peer_bytes_queued += (data->fill_len - data->resid);
1986 		dev->peer_bytes_queued += (data->fill_len - data->resid);
1987 		dev->num_peer_work_queue++;
1988 	}
1989 
1990 	if (STAILQ_FIRST(&local_queue) == NULL)
1991 		goto bailout;
1992 
1993 	/*
1994 	 * Drop our mutex and pick up the other thread's mutex.  We need to
1995 	 * do this to avoid deadlocks.
1996 	 */
1997 	pthread_mutex_unlock(&dev->mutex);
1998 	pthread_mutex_lock(&dev->peer_dev->mutex);
1999 
2000 	if (dev->peer_dev->flags & CAMDD_DEV_FLAG_ACTIVE) {
2001 		/*
2002 		 * Put the buffers on the other thread's incoming work queue.
2003 		 */
2004 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2005 		     buf1 = STAILQ_FIRST(&local_queue)) {
2006 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2007 			STAILQ_INSERT_TAIL(&dev->peer_dev->work_queue, buf1,
2008 					   work_links);
2009 		}
2010 		/*
2011 		 * Send an event to the other thread's kqueue to let it know
2012 		 * that there is something on the work queue.
2013 		 */
2014 		retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2015 		if (retval == -1)
2016 			warn("%s: unable to add peer work_queue kevent",
2017 			     __func__);
2018 		else
2019 			retval = 0;
2020 	} else
2021 		active = 0;
2022 
2023 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2024 	pthread_mutex_lock(&dev->mutex);
2025 
2026 	/*
2027 	 * If the other side isn't active, run through the queue and
2028 	 * release all of the buffers.
2029 	 */
2030 	if (active == 0) {
2031 		for (buf1 = STAILQ_FIRST(&local_queue); buf1 != NULL;
2032 		     buf1 = STAILQ_FIRST(&local_queue)) {
2033 			STAILQ_REMOVE_HEAD(&local_queue, work_links);
2034 			STAILQ_REMOVE(&dev->peer_work_queue, buf1, camdd_buf,
2035 				      links);
2036 			dev->num_peer_work_queue--;
2037 			camdd_release_buf(buf1);
2038 		}
2039 		dev->peer_bytes_queued -= peer_bytes_queued;
2040 		retval = 1;
2041 	}
2042 
2043 bailout:
2044 	return (retval);
2045 }
2046 
2047 /*
2048  * Return a buffer to the reader thread when we have completed writing it.
2049  */
2050 int
2051 camdd_complete_peer_buf(struct camdd_dev *dev, struct camdd_buf *peer_buf)
2052 {
2053 	struct kevent ke;
2054 	int retval = 0;
2055 
2056 	/*
2057 	 * Setup the event to let the other thread know that we have
2058 	 * completed a buffer.
2059 	 */
2060 	EV_SET(&ke, (uintptr_t)&dev->peer_dev->peer_done_queue, EVFILT_USER, 0,
2061 	       NOTE_TRIGGER, 0, NULL);
2062 
2063 	/*
2064 	 * Drop our lock and acquire the other thread's lock before
2065 	 * manipulating
2066 	 */
2067 	pthread_mutex_unlock(&dev->mutex);
2068 	pthread_mutex_lock(&dev->peer_dev->mutex);
2069 
2070 	/*
2071 	 * Put the buffer on the reader thread's peer done queue now that
2072 	 * we have completed it.
2073 	 */
2074 	STAILQ_INSERT_TAIL(&dev->peer_dev->peer_done_queue, peer_buf,
2075 			   work_links);
2076 	dev->peer_dev->num_peer_done_queue++;
2077 
2078 	/*
2079 	 * Send an event to the peer thread to let it know that we've added
2080 	 * something to its peer done queue.
2081 	 */
2082 	retval = kevent(dev->peer_dev->kq, &ke, 1, NULL, 0, NULL);
2083 	if (retval == -1)
2084 		warn("%s: unable to add peer_done_queue kevent", __func__);
2085 	else
2086 		retval = 0;
2087 
2088 	/*
2089 	 * Drop the other thread's lock and reacquire ours.
2090 	 */
2091 	pthread_mutex_unlock(&dev->peer_dev->mutex);
2092 	pthread_mutex_lock(&dev->mutex);
2093 
2094 	return (retval);
2095 }
2096 
2097 /*
2098  * Free a buffer that was written out by the writer thread and returned to
2099  * the reader thread.
2100  */
2101 void
2102 camdd_peer_done(struct camdd_buf *buf)
2103 {
2104 	struct camdd_dev *dev;
2105 	struct camdd_buf_data *data;
2106 
2107 	dev = buf->dev;
2108 	if (buf->buf_type != CAMDD_BUF_DATA) {
2109 		errx(1, "%s: should have a data buffer, not an "
2110 		    "indirect buffer", __func__);
2111 	}
2112 
2113 	data = &buf->buf_type_spec.data;
2114 
2115 	STAILQ_REMOVE(&dev->peer_work_queue, buf, camdd_buf, links);
2116 	dev->num_peer_work_queue--;
2117 	dev->peer_bytes_queued -= (data->fill_len - data->resid);
2118 
2119 	if (buf->status == CAMDD_STATUS_EOF)
2120 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2121 
2122 	STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2123 }
2124 
2125 /*
2126  * Assumes caller holds the lock for this device.
2127  */
2128 void
2129 camdd_complete_buf(struct camdd_dev *dev, struct camdd_buf *buf,
2130 		   int *error_count)
2131 {
2132 	int retval = 0;
2133 
2134 	/*
2135 	 * If we're the reader, we need to send the completed I/O
2136 	 * to the writer.  If we're the writer, we need to just
2137 	 * free up resources, or let the reader know if we've
2138 	 * encountered an error.
2139 	 */
2140 	if (dev->write_dev == 0) {
2141 		retval = camdd_queue_peer_buf(dev, buf);
2142 		if (retval != 0)
2143 			(*error_count)++;
2144 	} else {
2145 		struct camdd_buf *tmp_buf, *next_buf;
2146 
2147 		STAILQ_FOREACH_SAFE(tmp_buf, &buf->src_list, src_links,
2148 				    next_buf) {
2149 			struct camdd_buf *src_buf;
2150 			struct camdd_buf_indirect *indirect;
2151 
2152 			STAILQ_REMOVE(&buf->src_list, tmp_buf,
2153 				      camdd_buf, src_links);
2154 
2155 			tmp_buf->status = buf->status;
2156 
2157 			if (tmp_buf->buf_type == CAMDD_BUF_DATA) {
2158 				camdd_complete_peer_buf(dev, tmp_buf);
2159 				continue;
2160 			}
2161 
2162 			indirect = &tmp_buf->buf_type_spec.indirect;
2163 			src_buf = indirect->src_buf;
2164 			src_buf->refcount--;
2165 			/*
2166 			 * XXX KDM we probably need to account for
2167 			 * exactly how many bytes we were able to
2168 			 * write.  Allocate the residual to the
2169 			 * first N buffers?  Or just track the
2170 			 * number of bytes written?  Right now the reader
2171 			 * doesn't do anything with a residual.
2172 			 */
2173 			src_buf->status = buf->status;
2174 			if (src_buf->refcount <= 0)
2175 				camdd_complete_peer_buf(dev, src_buf);
2176 			STAILQ_INSERT_TAIL(&dev->free_indirect_queue,
2177 					   tmp_buf, links);
2178 		}
2179 
2180 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2181 	}
2182 }
2183 
2184 /*
2185  * Fetch all completed commands from the pass(4) device.
2186  *
2187  * Returns the number of commands received, or -1 if any of the commands
2188  * completed with an error.  Returns 0 if no commands are available.
2189  */
2190 int
2191 camdd_pass_fetch(struct camdd_dev *dev)
2192 {
2193 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2194 	union ccb ccb;
2195 	int retval = 0, num_fetched = 0, error_count = 0;
2196 
2197 	pthread_mutex_unlock(&dev->mutex);
2198 	/*
2199 	 * XXX KDM we don't distinguish between EFAULT and ENOENT.
2200 	 */
2201 	while ((retval = ioctl(pass_dev->dev->fd, CAMIOGET, &ccb)) != -1) {
2202 		struct camdd_buf *buf;
2203 		struct camdd_buf_data *data;
2204 		cam_status ccb_status;
2205 		union ccb *buf_ccb;
2206 
2207 		buf = ccb.ccb_h.ccb_buf;
2208 		data = &buf->buf_type_spec.data;
2209 		buf_ccb = &data->ccb;
2210 
2211 		num_fetched++;
2212 
2213 		/*
2214 		 * Copy the CCB back out so we get status, sense data, etc.
2215 		 */
2216 		bcopy(&ccb, buf_ccb, sizeof(ccb));
2217 
2218 		pthread_mutex_lock(&dev->mutex);
2219 
2220 		/*
2221 		 * We're now done, so take this off the active queue.
2222 		 */
2223 		STAILQ_REMOVE(&dev->active_queue, buf, camdd_buf, links);
2224 		dev->cur_active_io--;
2225 
2226 		ccb_status = ccb.ccb_h.status & CAM_STATUS_MASK;
2227 		if (ccb_status != CAM_REQ_CMP) {
2228 			cam_error_print(pass_dev->dev, &ccb, CAM_ESF_ALL,
2229 					CAM_EPF_ALL, stderr);
2230 		}
2231 
2232 		switch (pass_dev->protocol) {
2233 		case PROTO_SCSI:
2234 			data->resid = ccb.csio.resid;
2235 			dev->bytes_transferred += (ccb.csio.dxfer_len - ccb.csio.resid);
2236 			break;
2237 		default:
2238 			return -1;
2239 			break;
2240 		}
2241 
2242 		if (buf->status == CAMDD_STATUS_NONE)
2243 			buf->status = camdd_ccb_status(&ccb, pass_dev->protocol);
2244 		if (buf->status == CAMDD_STATUS_ERROR)
2245 			error_count++;
2246 		else if (buf->status == CAMDD_STATUS_EOF) {
2247 			/*
2248 			 * Once we queue this buffer to our partner thread,
2249 			 * he will know that we've hit EOF.
2250 			 */
2251 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2252 		}
2253 
2254 		camdd_complete_buf(dev, buf, &error_count);
2255 
2256 		/*
2257 		 * Unlock in preparation for the ioctl call.
2258 		 */
2259 		pthread_mutex_unlock(&dev->mutex);
2260 	}
2261 
2262 	pthread_mutex_lock(&dev->mutex);
2263 
2264 	if (error_count > 0)
2265 		return (-1);
2266 	else
2267 		return (num_fetched);
2268 }
2269 
2270 /*
2271  * Returns -1 for error, 0 for success/continue, and 1 for resource
2272  * shortage/stop processing.
2273  */
2274 int
2275 camdd_file_run(struct camdd_dev *dev)
2276 {
2277 	struct camdd_dev_file *file_dev = &dev->dev_spec.file;
2278 	struct camdd_buf_data *data;
2279 	struct camdd_buf *buf;
2280 	off_t io_offset;
2281 	int retval = 0, write_dev = dev->write_dev;
2282 	int error_count = 0, no_resources = 0, double_buf_needed = 0;
2283 	uint32_t num_sectors = 0, db_len = 0;
2284 
2285 	buf = STAILQ_FIRST(&dev->run_queue);
2286 	if (buf == NULL) {
2287 		no_resources = 1;
2288 		goto bailout;
2289 	} else if ((dev->write_dev == 0)
2290 		&& (dev->flags & (CAMDD_DEV_FLAG_EOF |
2291 				  CAMDD_DEV_FLAG_EOF_SENT))) {
2292 		STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2293 		dev->num_run_queue--;
2294 		buf->status = CAMDD_STATUS_EOF;
2295 		error_count++;
2296 		goto bailout;
2297 	}
2298 
2299 	/*
2300 	 * If we're writing, we need to go through the source buffer list
2301 	 * and create an S/G list.
2302 	 */
2303 	if (write_dev != 0) {
2304 		retval = camdd_buf_sg_create(buf, /*iovec*/ 1,
2305 		    dev->sector_size, &num_sectors, &double_buf_needed);
2306 		if (retval != 0) {
2307 			no_resources = 1;
2308 			goto bailout;
2309 		}
2310 	}
2311 
2312 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2313 	dev->num_run_queue--;
2314 
2315 	data = &buf->buf_type_spec.data;
2316 
2317 	/*
2318 	 * pread(2) and pwrite(2) offsets are byte offsets.
2319 	 */
2320 	io_offset = buf->lba * dev->sector_size;
2321 
2322 	/*
2323 	 * Unlock the mutex while we read or write.
2324 	 */
2325 	pthread_mutex_unlock(&dev->mutex);
2326 
2327 	/*
2328 	 * Note that we don't need to double buffer if we're the reader
2329 	 * because in that case, we have allocated a single buffer of
2330 	 * sufficient size to do the read.  This copy is necessary on
2331 	 * writes because if one of the components of the S/G list is not
2332 	 * a sector size multiple, the kernel will reject the write.  This
2333 	 * is unfortunate but not surprising.  So this will make sure that
2334 	 * we're using a single buffer that is a multiple of the sector size.
2335 	 */
2336 	if ((double_buf_needed != 0)
2337 	 && (data->sg_count > 1)
2338 	 && (write_dev != 0)) {
2339 		uint32_t cur_offset;
2340 		int i;
2341 
2342 		if (file_dev->tmp_buf == NULL)
2343 			file_dev->tmp_buf = calloc(dev->blocksize, 1);
2344 		if (file_dev->tmp_buf == NULL) {
2345 			buf->status = CAMDD_STATUS_ERROR;
2346 			error_count++;
2347 			pthread_mutex_lock(&dev->mutex);
2348 			goto bailout;
2349 		}
2350 		for (i = 0, cur_offset = 0; i < data->sg_count; i++) {
2351 			bcopy(data->iovec[i].iov_base,
2352 			    &file_dev->tmp_buf[cur_offset],
2353 			    data->iovec[i].iov_len);
2354 			cur_offset += data->iovec[i].iov_len;
2355 		}
2356 		db_len = cur_offset;
2357 	}
2358 
2359 	if (file_dev->file_flags & CAMDD_FF_CAN_SEEK) {
2360 		if (write_dev == 0) {
2361 			/*
2362 			 * XXX KDM is there any way we would need a S/G
2363 			 * list here?
2364 			 */
2365 			retval = pread(file_dev->fd, data->buf,
2366 			    buf->len, io_offset);
2367 		} else {
2368 			if (double_buf_needed != 0) {
2369 				retval = pwrite(file_dev->fd, file_dev->tmp_buf,
2370 				    db_len, io_offset);
2371 			} else if (data->sg_count == 0) {
2372 				retval = pwrite(file_dev->fd, data->buf,
2373 				    data->fill_len, io_offset);
2374 			} else {
2375 				retval = pwritev(file_dev->fd, data->iovec,
2376 				    data->sg_count, io_offset);
2377 			}
2378 		}
2379 	} else {
2380 		if (write_dev == 0) {
2381 			/*
2382 			 * XXX KDM is there any way we would need a S/G
2383 			 * list here?
2384 			 */
2385 			retval = read(file_dev->fd, data->buf, buf->len);
2386 		} else {
2387 			if (double_buf_needed != 0) {
2388 				retval = write(file_dev->fd, file_dev->tmp_buf,
2389 				    db_len);
2390 			} else if (data->sg_count == 0) {
2391 				retval = write(file_dev->fd, data->buf,
2392 				    data->fill_len);
2393 			} else {
2394 				retval = writev(file_dev->fd, data->iovec,
2395 				    data->sg_count);
2396 			}
2397 		}
2398 	}
2399 
2400 	/* We're done, re-acquire the lock */
2401 	pthread_mutex_lock(&dev->mutex);
2402 
2403 	if (retval >= (ssize_t)data->fill_len) {
2404 		/*
2405 		 * If the bytes transferred is more than the request size,
2406 		 * that indicates an overrun, which should only happen at
2407 		 * the end of a transfer if we have to round up to a sector
2408 		 * boundary.
2409 		 */
2410 		if (buf->status == CAMDD_STATUS_NONE)
2411 			buf->status = CAMDD_STATUS_OK;
2412 		data->resid = 0;
2413 		dev->bytes_transferred += retval;
2414 	} else if (retval == -1) {
2415 		warn("Error %s %s", (write_dev) ? "writing to" :
2416 		    "reading from", file_dev->filename);
2417 
2418 		buf->status = CAMDD_STATUS_ERROR;
2419 		data->resid = data->fill_len;
2420 		error_count++;
2421 
2422 		if (dev->debug == 0)
2423 			goto bailout;
2424 
2425 		if ((double_buf_needed != 0)
2426 		 && (write_dev != 0)) {
2427 			fprintf(stderr, "%s: fd %d, DB buf %p, len %u lba %ju "
2428 			    "offset %ju\n", __func__, file_dev->fd,
2429 			    file_dev->tmp_buf, db_len, (uintmax_t)buf->lba,
2430 			    (uintmax_t)io_offset);
2431 		} else if (data->sg_count == 0) {
2432 			fprintf(stderr, "%s: fd %d, buf %p, len %u, lba %ju "
2433 			    "offset %ju\n", __func__, file_dev->fd, data->buf,
2434 			    data->fill_len, (uintmax_t)buf->lba,
2435 			    (uintmax_t)io_offset);
2436 		} else {
2437 			int i;
2438 
2439 			fprintf(stderr, "%s: fd %d, len %u, lba %ju "
2440 			    "offset %ju\n", __func__, file_dev->fd,
2441 			    data->fill_len, (uintmax_t)buf->lba,
2442 			    (uintmax_t)io_offset);
2443 
2444 			for (i = 0; i < data->sg_count; i++) {
2445 				fprintf(stderr, "index %d ptr %p len %zu\n",
2446 				    i, data->iovec[i].iov_base,
2447 				    data->iovec[i].iov_len);
2448 			}
2449 		}
2450 	} else if (retval == 0) {
2451 		buf->status = CAMDD_STATUS_EOF;
2452 		if (dev->debug != 0)
2453 			printf("%s: got EOF from %s!\n", __func__,
2454 			    file_dev->filename);
2455 		data->resid = data->fill_len;
2456 		error_count++;
2457 	} else if (retval < (ssize_t)data->fill_len) {
2458 		if (buf->status == CAMDD_STATUS_NONE)
2459 			buf->status = CAMDD_STATUS_SHORT_IO;
2460 		data->resid = data->fill_len - retval;
2461 		dev->bytes_transferred += retval;
2462 	}
2463 
2464 bailout:
2465 	if (buf != NULL) {
2466 		if (buf->status == CAMDD_STATUS_EOF) {
2467 			struct camdd_buf *buf2;
2468 			dev->flags |= CAMDD_DEV_FLAG_EOF;
2469 			STAILQ_FOREACH(buf2, &dev->run_queue, links)
2470 				buf2->status = CAMDD_STATUS_EOF;
2471 		}
2472 
2473 		camdd_complete_buf(dev, buf, &error_count);
2474 	}
2475 
2476 	if (error_count != 0)
2477 		return (-1);
2478 	else if (no_resources != 0)
2479 		return (1);
2480 	else
2481 		return (0);
2482 }
2483 
2484 /*
2485  * Execute one command from the run queue.  Returns 0 for success, 1 for
2486  * stop processing, and -1 for error.
2487  */
2488 int
2489 camdd_pass_run(struct camdd_dev *dev)
2490 {
2491 	struct camdd_buf *buf = NULL;
2492 	struct camdd_dev_pass *pass_dev = &dev->dev_spec.pass;
2493 	struct camdd_buf_data *data;
2494 	uint32_t num_blocks, sectors_used = 0;
2495 	union ccb *ccb;
2496 	int retval = 0, is_write = dev->write_dev;
2497 	int double_buf_needed = 0;
2498 
2499 	buf = STAILQ_FIRST(&dev->run_queue);
2500 	if (buf == NULL) {
2501 		retval = 1;
2502 		goto bailout;
2503 	}
2504 
2505 	/*
2506 	 * If we're writing, we need to go through the source buffer list
2507 	 * and create an S/G list.
2508 	 */
2509 	if (is_write != 0) {
2510 		retval = camdd_buf_sg_create(buf, /*iovec*/ 0,dev->sector_size,
2511 		    &sectors_used, &double_buf_needed);
2512 		if (retval != 0) {
2513 			retval = -1;
2514 			goto bailout;
2515 		}
2516 	}
2517 
2518 	STAILQ_REMOVE(&dev->run_queue, buf, camdd_buf, links);
2519 	dev->num_run_queue--;
2520 
2521 	data = &buf->buf_type_spec.data;
2522 
2523 	/*
2524 	 * In almost every case the number of blocks should be the device
2525 	 * block size.  The exception may be at the end of an I/O stream
2526 	 * for a partial block or at the end of a device.
2527 	 */
2528 	if (is_write != 0)
2529 		num_blocks = sectors_used;
2530 	else
2531 		num_blocks = data->fill_len / pass_dev->block_len;
2532 
2533 	ccb = &data->ccb;
2534 
2535 	switch (pass_dev->protocol) {
2536 	case PROTO_SCSI:
2537 		CCB_CLEAR_ALL_EXCEPT_HDR(&ccb->csio);
2538 
2539 		scsi_read_write(&ccb->csio,
2540 				/*retries*/ dev->retry_count,
2541 				/*cbfcnp*/ NULL,
2542 				/*tag_action*/ MSG_SIMPLE_Q_TAG,
2543 				/*readop*/ (dev->write_dev == 0) ? SCSI_RW_READ :
2544 					   SCSI_RW_WRITE,
2545 				/*byte2*/ 0,
2546 				/*minimum_cmd_size*/ dev->min_cmd_size,
2547 				/*lba*/ buf->lba,
2548 				/*block_count*/ num_blocks,
2549 				/*data_ptr*/ (data->sg_count != 0) ?
2550 					     (uint8_t *)data->segs : data->buf,
2551 				/*dxfer_len*/ (num_blocks * pass_dev->block_len),
2552 				/*sense_len*/ SSD_FULL_SIZE,
2553 				/*timeout*/ dev->io_timeout);
2554 
2555 		if (data->sg_count != 0) {
2556 			ccb->csio.sglist_cnt = data->sg_count;
2557 		}
2558 		break;
2559 	default:
2560 		retval = -1;
2561 		goto bailout;
2562 	}
2563 
2564 	/* Disable freezing the device queue */
2565 	ccb->ccb_h.flags |= CAM_DEV_QFRZDIS;
2566 
2567 	if (dev->retry_count != 0)
2568 		ccb->ccb_h.flags |= CAM_PASS_ERR_RECOVER;
2569 
2570 	if (data->sg_count != 0) {
2571 		ccb->ccb_h.flags |= CAM_DATA_SG;
2572 	}
2573 
2574 	/*
2575 	 * Store a pointer to the buffer in the CCB.  The kernel will
2576 	 * restore this when we get it back, and we'll use it to identify
2577 	 * the buffer this CCB came from.
2578 	 */
2579 	ccb->ccb_h.ccb_buf = buf;
2580 
2581 	/*
2582 	 * Unlock our mutex in preparation for issuing the ioctl.
2583 	 */
2584 	pthread_mutex_unlock(&dev->mutex);
2585 	/*
2586 	 * Queue the CCB to the pass(4) driver.
2587 	 */
2588 	if (ioctl(pass_dev->dev->fd, CAMIOQUEUE, ccb) == -1) {
2589 		pthread_mutex_lock(&dev->mutex);
2590 
2591 		warn("%s: error sending CAMIOQUEUE ioctl to %s%u", __func__,
2592 		     pass_dev->dev->device_name, pass_dev->dev->dev_unit_num);
2593 		warn("%s: CCB address is %p", __func__, ccb);
2594 		retval = -1;
2595 
2596 		STAILQ_INSERT_TAIL(&dev->free_queue, buf, links);
2597 	} else {
2598 		pthread_mutex_lock(&dev->mutex);
2599 
2600 		dev->cur_active_io++;
2601 		STAILQ_INSERT_TAIL(&dev->active_queue, buf, links);
2602 	}
2603 
2604 bailout:
2605 	return (retval);
2606 }
2607 
2608 int
2609 camdd_get_next_lba_len(struct camdd_dev *dev, uint64_t *lba, ssize_t *len)
2610 {
2611 	struct camdd_dev_pass *pass_dev;
2612 	uint32_t num_blocks;
2613 	int retval = 0;
2614 
2615 	pass_dev = &dev->dev_spec.pass;
2616 
2617 	*lba = dev->next_io_pos_bytes / dev->sector_size;
2618 	*len = dev->blocksize;
2619 	num_blocks = *len / dev->sector_size;
2620 
2621 	/*
2622 	 * If max_sector is 0, then we have no set limit.  This can happen
2623 	 * if we're writing to a file in a filesystem, or reading from
2624 	 * something like /dev/zero.
2625 	 */
2626 	if ((dev->max_sector != 0)
2627 	 || (dev->sector_io_limit != 0)) {
2628 		uint64_t max_sector;
2629 
2630 		if ((dev->max_sector != 0)
2631 		 && (dev->sector_io_limit != 0))
2632 			max_sector = min(dev->sector_io_limit, dev->max_sector);
2633 		else if (dev->max_sector != 0)
2634 			max_sector = dev->max_sector;
2635 		else
2636 			max_sector = dev->sector_io_limit;
2637 
2638 
2639 		/*
2640 		 * Check to see whether we're starting off past the end of
2641 		 * the device.  If so, we need to just send an EOF
2642 		 * notification to the writer.
2643 		 */
2644 		if (*lba > max_sector) {
2645 			*len = 0;
2646 			retval = 1;
2647 		} else if (((*lba + num_blocks) > max_sector + 1)
2648 			|| ((*lba + num_blocks) < *lba)) {
2649 			/*
2650 			 * If we get here (but pass the first check), we
2651 			 * can trim the request length down to go to the
2652 			 * end of the device.
2653 			 */
2654 			num_blocks = (max_sector + 1) - *lba;
2655 			*len = num_blocks * dev->sector_size;
2656 			retval = 1;
2657 		}
2658 	}
2659 
2660 	dev->next_io_pos_bytes += *len;
2661 
2662 	return (retval);
2663 }
2664 
2665 /*
2666  * Returns 0 for success, 1 for EOF detected, and -1 for failure.
2667  */
2668 int
2669 camdd_queue(struct camdd_dev *dev, struct camdd_buf *read_buf)
2670 {
2671 	struct camdd_buf *buf = NULL;
2672 	struct camdd_buf_data *data;
2673 	struct camdd_dev_pass *pass_dev;
2674 	size_t new_len;
2675 	struct camdd_buf_data *rb_data;
2676 	int is_write = dev->write_dev;
2677 	int eof_flush_needed = 0;
2678 	int retval = 0;
2679 	int error;
2680 
2681 	pass_dev = &dev->dev_spec.pass;
2682 
2683 	/*
2684 	 * If we've gotten EOF or our partner has, we should not continue
2685 	 * queueing I/O.  If we're a writer, though, we should continue
2686 	 * to write any buffers that don't have EOF status.
2687 	 */
2688 	if ((dev->flags & CAMDD_DEV_FLAG_EOF)
2689 	 || ((dev->flags & CAMDD_DEV_FLAG_PEER_EOF)
2690 	  && (is_write == 0))) {
2691 		/*
2692 		 * Tell the worker thread that we have seen EOF.
2693 		 */
2694 		retval = 1;
2695 
2696 		/*
2697 		 * If we're the writer, send the buffer back with EOF status.
2698 		 */
2699 		if (is_write) {
2700 			read_buf->status = CAMDD_STATUS_EOF;
2701 
2702 			error = camdd_complete_peer_buf(dev, read_buf);
2703 		}
2704 		goto bailout;
2705 	}
2706 
2707 	if (is_write == 0) {
2708 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2709 		if (buf == NULL) {
2710 			retval = -1;
2711 			goto bailout;
2712 		}
2713 		data = &buf->buf_type_spec.data;
2714 
2715 		retval = camdd_get_next_lba_len(dev, &buf->lba, &buf->len);
2716 		if (retval != 0) {
2717 			buf->status = CAMDD_STATUS_EOF;
2718 
2719 		 	if ((buf->len == 0)
2720 			 && ((dev->flags & (CAMDD_DEV_FLAG_EOF_SENT |
2721 			     CAMDD_DEV_FLAG_EOF_QUEUED)) != 0)) {
2722 				camdd_release_buf(buf);
2723 				goto bailout;
2724 			}
2725 			dev->flags |= CAMDD_DEV_FLAG_EOF_QUEUED;
2726 		}
2727 
2728 		data->fill_len = buf->len;
2729 		data->src_start_offset = buf->lba * dev->sector_size;
2730 
2731 		/*
2732 		 * Put this on the run queue.
2733 		 */
2734 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2735 		dev->num_run_queue++;
2736 
2737 		/* We're done. */
2738 		goto bailout;
2739 	}
2740 
2741 	/*
2742 	 * Check for new EOF status from the reader.
2743 	 */
2744 	if ((read_buf->status == CAMDD_STATUS_EOF)
2745 	 || (read_buf->status == CAMDD_STATUS_ERROR)) {
2746 		dev->flags |= CAMDD_DEV_FLAG_PEER_EOF;
2747 		if ((STAILQ_FIRST(&dev->pending_queue) == NULL)
2748 		 && (read_buf->len == 0)) {
2749 			camdd_complete_peer_buf(dev, read_buf);
2750 			retval = 1;
2751 			goto bailout;
2752 		} else
2753 			eof_flush_needed = 1;
2754 	}
2755 
2756 	/*
2757 	 * See if we have a buffer we're composing with pieces from our
2758 	 * partner thread.
2759 	 */
2760 	buf = STAILQ_FIRST(&dev->pending_queue);
2761 	if (buf == NULL) {
2762 		uint64_t lba;
2763 		ssize_t len;
2764 
2765 		retval = camdd_get_next_lba_len(dev, &lba, &len);
2766 		if (retval != 0) {
2767 			read_buf->status = CAMDD_STATUS_EOF;
2768 
2769 			if (len == 0) {
2770 				dev->flags |= CAMDD_DEV_FLAG_EOF;
2771 				error = camdd_complete_peer_buf(dev, read_buf);
2772 				goto bailout;
2773 			}
2774 		}
2775 
2776 		/*
2777 		 * If we don't have a pending buffer, we need to grab a new
2778 		 * one from the free list or allocate another one.
2779 		 */
2780 		buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2781 		if (buf == NULL) {
2782 			retval = 1;
2783 			goto bailout;
2784 		}
2785 
2786 		buf->lba = lba;
2787 		buf->len = len;
2788 
2789 		STAILQ_INSERT_TAIL(&dev->pending_queue, buf, links);
2790 		dev->num_pending_queue++;
2791 	}
2792 
2793 	data = &buf->buf_type_spec.data;
2794 
2795 	rb_data = &read_buf->buf_type_spec.data;
2796 
2797 	if ((rb_data->src_start_offset != dev->next_peer_pos_bytes)
2798 	 && (dev->debug != 0)) {
2799 		printf("%s: WARNING: reader offset %#jx != expected offset "
2800 		    "%#jx\n", __func__, (uintmax_t)rb_data->src_start_offset,
2801 		    (uintmax_t)dev->next_peer_pos_bytes);
2802 	}
2803 	dev->next_peer_pos_bytes = rb_data->src_start_offset +
2804 	    (rb_data->fill_len - rb_data->resid);
2805 
2806 	new_len = (rb_data->fill_len - rb_data->resid) + data->fill_len;
2807 	if (new_len < buf->len) {
2808 		/*
2809 		 * There are three cases here:
2810 		 * 1. We need more data to fill up a block, so we put
2811 		 *    this I/O on the queue and wait for more I/O.
2812 		 * 2. We have a pending buffer in the queue that is
2813 		 *    smaller than our blocksize, but we got an EOF.  So we
2814 		 *    need to go ahead and flush the write out.
2815 		 * 3. We got an error.
2816 		 */
2817 
2818 		/*
2819 		 * Increment our fill length.
2820 		 */
2821 		data->fill_len += (rb_data->fill_len - rb_data->resid);
2822 
2823 		/*
2824 		 * Add the new read buffer to the list for writing.
2825 		 */
2826 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2827 
2828 		/* Increment the count */
2829 		buf->src_count++;
2830 
2831 		if (eof_flush_needed == 0) {
2832 			/*
2833 			 * We need to exit, because we don't have enough
2834 			 * data yet.
2835 			 */
2836 			goto bailout;
2837 		} else {
2838 			/*
2839 			 * Take the buffer off of the pending queue.
2840 			 */
2841 			STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2842 				      links);
2843 			dev->num_pending_queue--;
2844 
2845 			/*
2846 			 * If we need an EOF flush, but there is no data
2847 			 * to flush, go ahead and return this buffer.
2848 			 */
2849 			if (data->fill_len == 0) {
2850 				camdd_complete_buf(dev, buf, /*error_count*/0);
2851 				retval = 1;
2852 				goto bailout;
2853 			}
2854 
2855 			/*
2856 			 * Put this on the next queue for execution.
2857 			 */
2858 			STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2859 			dev->num_run_queue++;
2860 		}
2861 	} else if (new_len == buf->len) {
2862 		/*
2863 		 * We have enough data to completey fill one block,
2864 		 * so we're ready to issue the I/O.
2865 		 */
2866 
2867 		/*
2868 		 * Take the buffer off of the pending queue.
2869 		 */
2870 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf, links);
2871 		dev->num_pending_queue--;
2872 
2873 		/*
2874 		 * Add the new read buffer to the list for writing.
2875 		 */
2876 		STAILQ_INSERT_TAIL(&buf->src_list, read_buf, src_links);
2877 
2878 		/* Increment the count */
2879 		buf->src_count++;
2880 
2881 		/*
2882 		 * Increment our fill length.
2883 		 */
2884 		data->fill_len += (rb_data->fill_len - rb_data->resid);
2885 
2886 		/*
2887 		 * Put this on the next queue for execution.
2888 		 */
2889 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2890 		dev->num_run_queue++;
2891 	} else {
2892 		struct camdd_buf *idb;
2893 		struct camdd_buf_indirect *indirect;
2894 		uint32_t len_to_go, cur_offset;
2895 
2896 
2897 		idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
2898 		if (idb == NULL) {
2899 			retval = 1;
2900 			goto bailout;
2901 		}
2902 		indirect = &idb->buf_type_spec.indirect;
2903 		indirect->src_buf = read_buf;
2904 		read_buf->refcount++;
2905 		indirect->offset = 0;
2906 		indirect->start_ptr = rb_data->buf;
2907 		/*
2908 		 * We've already established that there is more
2909 		 * data in read_buf than we have room for in our
2910 		 * current write request.  So this particular chunk
2911 		 * of the request should just be the remainder
2912 		 * needed to fill up a block.
2913 		 */
2914 		indirect->len = buf->len - (data->fill_len - data->resid);
2915 
2916 		camdd_buf_add_child(buf, idb);
2917 
2918 		/*
2919 		 * This buffer is ready to execute, so we can take
2920 		 * it off the pending queue and put it on the run
2921 		 * queue.
2922 		 */
2923 		STAILQ_REMOVE(&dev->pending_queue, buf, camdd_buf,
2924 			      links);
2925 		dev->num_pending_queue--;
2926 		STAILQ_INSERT_TAIL(&dev->run_queue, buf, links);
2927 		dev->num_run_queue++;
2928 
2929 		cur_offset = indirect->offset + indirect->len;
2930 
2931 		/*
2932 		 * The resulting I/O would be too large to fit in
2933 		 * one block.  We need to split this I/O into
2934 		 * multiple pieces.  Allocate as many buffers as needed.
2935 		 */
2936 		for (len_to_go = rb_data->fill_len - rb_data->resid -
2937 		     indirect->len; len_to_go > 0;) {
2938 			struct camdd_buf *new_buf;
2939 			struct camdd_buf_data *new_data;
2940 			uint64_t lba;
2941 			ssize_t len;
2942 
2943 			retval = camdd_get_next_lba_len(dev, &lba, &len);
2944 			if ((retval != 0)
2945 			 && (len == 0)) {
2946 				/*
2947 				 * The device has already been marked
2948 				 * as EOF, and there is no space left.
2949 				 */
2950 				goto bailout;
2951 			}
2952 
2953 			new_buf = camdd_get_buf(dev, CAMDD_BUF_DATA);
2954 			if (new_buf == NULL) {
2955 				retval = 1;
2956 				goto bailout;
2957 			}
2958 
2959 			new_buf->lba = lba;
2960 			new_buf->len = len;
2961 
2962 			idb = camdd_get_buf(dev, CAMDD_BUF_INDIRECT);
2963 			if (idb == NULL) {
2964 				retval = 1;
2965 				goto bailout;
2966 			}
2967 
2968 			indirect = &idb->buf_type_spec.indirect;
2969 
2970 			indirect->src_buf = read_buf;
2971 			read_buf->refcount++;
2972 			indirect->offset = cur_offset;
2973 			indirect->start_ptr = rb_data->buf + cur_offset;
2974 			indirect->len = min(len_to_go, new_buf->len);
2975 #if 0
2976 			if (((indirect->len % dev->sector_size) != 0)
2977 			 || ((indirect->offset % dev->sector_size) != 0)) {
2978 				warnx("offset %ju len %ju not aligned with "
2979 				    "sector size %u", indirect->offset,
2980 				    (uintmax_t)indirect->len, dev->sector_size);
2981 			}
2982 #endif
2983 			cur_offset += indirect->len;
2984 			len_to_go -= indirect->len;
2985 
2986 			camdd_buf_add_child(new_buf, idb);
2987 
2988 			new_data = &new_buf->buf_type_spec.data;
2989 
2990 			if ((new_data->fill_len == new_buf->len)
2991 			 || (eof_flush_needed != 0)) {
2992 				STAILQ_INSERT_TAIL(&dev->run_queue,
2993 						   new_buf, links);
2994 				dev->num_run_queue++;
2995 			} else if (new_data->fill_len < buf->len) {
2996 				STAILQ_INSERT_TAIL(&dev->pending_queue,
2997 					   	new_buf, links);
2998 				dev->num_pending_queue++;
2999 			} else {
3000 				warnx("%s: too much data in new "
3001 				      "buffer!", __func__);
3002 				retval = 1;
3003 				goto bailout;
3004 			}
3005 		}
3006 	}
3007 
3008 bailout:
3009 	return (retval);
3010 }
3011 
3012 void
3013 camdd_get_depth(struct camdd_dev *dev, uint32_t *our_depth,
3014 		uint32_t *peer_depth, uint32_t *our_bytes, uint32_t *peer_bytes)
3015 {
3016 	*our_depth = dev->cur_active_io + dev->num_run_queue;
3017 	if (dev->num_peer_work_queue >
3018 	    dev->num_peer_done_queue)
3019 		*peer_depth = dev->num_peer_work_queue -
3020 			      dev->num_peer_done_queue;
3021 	else
3022 		*peer_depth = 0;
3023 	*our_bytes = *our_depth * dev->blocksize;
3024 	*peer_bytes = dev->peer_bytes_queued;
3025 }
3026 
3027 void
3028 camdd_sig_handler(int sig)
3029 {
3030 	if (sig == SIGINFO)
3031 		need_status = 1;
3032 	else {
3033 		need_exit = 1;
3034 		error_exit = 1;
3035 	}
3036 
3037 	sem_post(&camdd_sem);
3038 }
3039 
3040 void
3041 camdd_print_status(struct camdd_dev *camdd_dev, struct camdd_dev *other_dev,
3042 		   struct timespec *start_time)
3043 {
3044 	struct timespec done_time;
3045 	uint64_t total_ns;
3046 	long double mb_sec, total_sec;
3047 	int error = 0;
3048 
3049 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &done_time);
3050 	if (error != 0) {
3051 		warn("Unable to get done time");
3052 		return;
3053 	}
3054 
3055 	timespecsub(&done_time, start_time, &done_time);
3056 
3057 	total_ns = done_time.tv_nsec + (done_time.tv_sec * 1000000000);
3058 	total_sec = total_ns;
3059 	total_sec /= 1000000000;
3060 
3061 	fprintf(stderr, "%ju bytes %s %s\n%ju bytes %s %s\n"
3062 		"%.4Lf seconds elapsed\n",
3063 		(uintmax_t)camdd_dev->bytes_transferred,
3064 		(camdd_dev->write_dev == 0) ?  "read from" : "written to",
3065 		camdd_dev->device_name,
3066 		(uintmax_t)other_dev->bytes_transferred,
3067 		(other_dev->write_dev == 0) ? "read from" : "written to",
3068 		other_dev->device_name, total_sec);
3069 
3070 	mb_sec = min(other_dev->bytes_transferred,camdd_dev->bytes_transferred);
3071 	mb_sec /= 1024 * 1024;
3072 	mb_sec *= 1000000000;
3073 	mb_sec /= total_ns;
3074 	fprintf(stderr, "%.2Lf MB/sec\n", mb_sec);
3075 }
3076 
3077 int
3078 camdd_rw(struct camdd_io_opts *io_opts, int num_io_opts, uint64_t max_io,
3079 	 int retry_count, int timeout)
3080 {
3081 	struct cam_device *new_cam_dev = NULL;
3082 	struct camdd_dev *devs[2];
3083 	struct timespec start_time;
3084 	pthread_t threads[2];
3085 	int unit = 0;
3086 	int error = 0;
3087 	int i;
3088 
3089 	if (num_io_opts != 2) {
3090 		warnx("Must have one input and one output path");
3091 		error = 1;
3092 		goto bailout;
3093 	}
3094 
3095 	bzero(devs, sizeof(devs));
3096 
3097 	for (i = 0; i < num_io_opts; i++) {
3098 		switch (io_opts[i].dev_type) {
3099 		case CAMDD_DEV_PASS: {
3100 			if (isdigit(io_opts[i].dev_name[0])) {
3101 				camdd_argmask new_arglist = CAMDD_ARG_NONE;
3102 				int bus = 0, target = 0, lun = 0;
3103 				int rv;
3104 
3105 				/* device specified as bus:target[:lun] */
3106 				rv = parse_btl(io_opts[i].dev_name, &bus,
3107 				    &target, &lun, &new_arglist);
3108 				if (rv < 2) {
3109 					warnx("numeric device specification "
3110 					     "must be either bus:target, or "
3111 					     "bus:target:lun");
3112 					error = 1;
3113 					goto bailout;
3114 				}
3115 				/* default to 0 if lun was not specified */
3116 				if ((new_arglist & CAMDD_ARG_LUN) == 0) {
3117 					lun = 0;
3118 					new_arglist |= CAMDD_ARG_LUN;
3119 				}
3120 				new_cam_dev = cam_open_btl(bus, target, lun,
3121 				    O_RDWR, NULL);
3122 			} else {
3123 				char name[30];
3124 
3125 				if (cam_get_device(io_opts[i].dev_name, name,
3126 						   sizeof name, &unit) == -1) {
3127 					warnx("%s", cam_errbuf);
3128 					error = 1;
3129 					goto bailout;
3130 				}
3131 				new_cam_dev = cam_open_spec_device(name, unit,
3132 				    O_RDWR, NULL);
3133 			}
3134 
3135 			if (new_cam_dev == NULL) {
3136 				warnx("%s", cam_errbuf);
3137 				error = 1;
3138 				goto bailout;
3139 			}
3140 
3141 			devs[i] = camdd_probe_pass(new_cam_dev,
3142 			    /*io_opts*/ &io_opts[i],
3143 			    CAMDD_ARG_ERR_RECOVER,
3144 			    /*probe_retry_count*/ 3,
3145 			    /*probe_timeout*/ 5000,
3146 			    /*io_retry_count*/ retry_count,
3147 			    /*io_timeout*/ timeout);
3148 			if (devs[i] == NULL) {
3149 				warn("Unable to probe device %s%u",
3150 				     new_cam_dev->device_name,
3151 				     new_cam_dev->dev_unit_num);
3152 				error = 1;
3153 				goto bailout;
3154 			}
3155 			break;
3156 		}
3157 		case CAMDD_DEV_FILE: {
3158 			int fd = -1;
3159 
3160 			if (io_opts[i].dev_name[0] == '-') {
3161 				if (io_opts[i].write_dev != 0)
3162 					fd = STDOUT_FILENO;
3163 				else
3164 					fd = STDIN_FILENO;
3165 			} else {
3166 				if (io_opts[i].write_dev != 0) {
3167 					fd = open(io_opts[i].dev_name,
3168 					    O_RDWR | O_CREAT, S_IWUSR |S_IRUSR);
3169 				} else {
3170 					fd = open(io_opts[i].dev_name,
3171 					    O_RDONLY);
3172 				}
3173 			}
3174 			if (fd == -1) {
3175 				warn("error opening file %s",
3176 				    io_opts[i].dev_name);
3177 				error = 1;
3178 				goto bailout;
3179 			}
3180 
3181 			devs[i] = camdd_probe_file(fd, &io_opts[i],
3182 			    retry_count, timeout);
3183 			if (devs[i] == NULL) {
3184 				error = 1;
3185 				goto bailout;
3186 			}
3187 
3188 			break;
3189 		}
3190 		default:
3191 			warnx("Unknown device type %d (%s)",
3192 			    io_opts[i].dev_type, io_opts[i].dev_name);
3193 			error = 1;
3194 			goto bailout;
3195 			break; /*NOTREACHED */
3196 		}
3197 
3198 		devs[i]->write_dev = io_opts[i].write_dev;
3199 
3200 		devs[i]->start_offset_bytes = io_opts[i].offset;
3201 
3202 		if (max_io != 0) {
3203 			devs[i]->sector_io_limit =
3204 			    (devs[i]->start_offset_bytes /
3205 			    devs[i]->sector_size) +
3206 			    (max_io / devs[i]->sector_size) - 1;
3207 		}
3208 
3209 		devs[i]->next_io_pos_bytes = devs[i]->start_offset_bytes;
3210 		devs[i]->next_completion_pos_bytes =devs[i]->start_offset_bytes;
3211 	}
3212 
3213 	devs[0]->peer_dev = devs[1];
3214 	devs[1]->peer_dev = devs[0];
3215 	devs[0]->next_peer_pos_bytes = devs[0]->peer_dev->next_io_pos_bytes;
3216 	devs[1]->next_peer_pos_bytes = devs[1]->peer_dev->next_io_pos_bytes;
3217 
3218 	sem_init(&camdd_sem, /*pshared*/ 0, 0);
3219 
3220 	signal(SIGINFO, camdd_sig_handler);
3221 	signal(SIGINT, camdd_sig_handler);
3222 
3223 	error = clock_gettime(CLOCK_MONOTONIC_PRECISE, &start_time);
3224 	if (error != 0) {
3225 		warn("Unable to get start time");
3226 		goto bailout;
3227 	}
3228 
3229 	for (i = 0; i < num_io_opts; i++) {
3230 		error = pthread_create(&threads[i], NULL, camdd_worker,
3231 				       (void *)devs[i]);
3232 		if (error != 0) {
3233 			warnc(error, "pthread_create() failed");
3234 			goto bailout;
3235 		}
3236 	}
3237 
3238 	for (;;) {
3239 		if ((sem_wait(&camdd_sem) == -1)
3240 		 || (need_exit != 0)) {
3241 			struct kevent ke;
3242 
3243 			for (i = 0; i < num_io_opts; i++) {
3244 				EV_SET(&ke, (uintptr_t)&devs[i]->work_queue,
3245 				    EVFILT_USER, 0, NOTE_TRIGGER, 0, NULL);
3246 
3247 				devs[i]->flags |= CAMDD_DEV_FLAG_EOF;
3248 
3249 				error = kevent(devs[i]->kq, &ke, 1, NULL, 0,
3250 						NULL);
3251 				if (error == -1)
3252 					warn("%s: unable to wake up thread",
3253 					    __func__);
3254 				error = 0;
3255 			}
3256 			break;
3257 		} else if (need_status != 0) {
3258 			camdd_print_status(devs[0], devs[1], &start_time);
3259 			need_status = 0;
3260 		}
3261 	}
3262 	for (i = 0; i < num_io_opts; i++) {
3263 		pthread_join(threads[i], NULL);
3264 	}
3265 
3266 	camdd_print_status(devs[0], devs[1], &start_time);
3267 
3268 bailout:
3269 
3270 	for (i = 0; i < num_io_opts; i++)
3271 		camdd_free_dev(devs[i]);
3272 
3273 	return (error + error_exit);
3274 }
3275 
3276 void
3277 usage(void)
3278 {
3279 	fprintf(stderr,
3280 "usage:  camdd <-i|-o pass=pass0,bs=1M,offset=1M,depth=4>\n"
3281 "              <-i|-o file=/tmp/file,bs=512K,offset=1M>\n"
3282 "              <-i|-o file=/dev/da0,bs=512K,offset=1M>\n"
3283 "              <-i|-o file=/dev/nsa0,bs=512K>\n"
3284 "              [-C retry_count][-E][-m max_io_amt][-t timeout_secs][-v][-h]\n"
3285 "Option description\n"
3286 "-i <arg=val>  Specify input device/file and parameters\n"
3287 "-o <arg=val>  Specify output device/file and parameters\n"
3288 "Input and Output parameters\n"
3289 "pass=name     Specify a pass(4) device like pass0 or /dev/pass0\n"
3290 "file=name     Specify a file or device, /tmp/foo, /dev/da0, /dev/null\n"
3291 "              or - for stdin/stdout\n"
3292 "bs=blocksize  Specify blocksize in bytes, or using K, M, G, etc. suffix\n"
3293 "offset=len    Specify starting offset in bytes or using K, M, G suffix\n"
3294 "              NOTE: offset cannot be specified on tapes, pipes, stdin/out\n"
3295 "depth=N       Specify a numeric queue depth.  This only applies to pass(4)\n"
3296 "mcs=N         Specify a minimum cmd size for pass(4) read/write commands\n"
3297 "Optional arguments\n"
3298 "-C retry_cnt  Specify a retry count for pass(4) devices\n"
3299 "-E            Enable CAM error recovery for pass(4) devices\n"
3300 "-m max_io     Specify the maximum amount to be transferred in bytes or\n"
3301 "              using K, G, M, etc. suffixes\n"
3302 "-t timeout    Specify the I/O timeout to use with pass(4) devices\n"
3303 "-v            Enable verbose error recovery\n"
3304 "-h            Print this message\n");
3305 }
3306 
3307 
3308 int
3309 camdd_parse_io_opts(char *args, int is_write, struct camdd_io_opts *io_opts)
3310 {
3311 	char *tmpstr, *tmpstr2;
3312 	char *orig_tmpstr = NULL;
3313 	int retval = 0;
3314 
3315 	io_opts->write_dev = is_write;
3316 
3317 	tmpstr = strdup(args);
3318 	if (tmpstr == NULL) {
3319 		warn("strdup failed");
3320 		retval = 1;
3321 		goto bailout;
3322 	}
3323 	orig_tmpstr = tmpstr;
3324 	while ((tmpstr2 = strsep(&tmpstr, ",")) != NULL) {
3325 		char *name, *value;
3326 
3327 		/*
3328 		 * If the user creates an empty parameter by putting in two
3329 		 * commas, skip over it and look for the next field.
3330 		 */
3331 		if (*tmpstr2 == '\0')
3332 			continue;
3333 
3334 		name = strsep(&tmpstr2, "=");
3335 		if (*name == '\0') {
3336 			warnx("Got empty I/O parameter name");
3337 			retval = 1;
3338 			goto bailout;
3339 		}
3340 		value = strsep(&tmpstr2, "=");
3341 		if ((value == NULL)
3342 		 || (*value == '\0')) {
3343 			warnx("Empty I/O parameter value for %s", name);
3344 			retval = 1;
3345 			goto bailout;
3346 		}
3347 		if (strncasecmp(name, "file", 4) == 0) {
3348 			io_opts->dev_type = CAMDD_DEV_FILE;
3349 			io_opts->dev_name = strdup(value);
3350 			if (io_opts->dev_name == NULL) {
3351 				warn("Error allocating memory");
3352 				retval = 1;
3353 				goto bailout;
3354 			}
3355 		} else if (strncasecmp(name, "pass", 4) == 0) {
3356 			io_opts->dev_type = CAMDD_DEV_PASS;
3357 			io_opts->dev_name = strdup(value);
3358 			if (io_opts->dev_name == NULL) {
3359 				warn("Error allocating memory");
3360 				retval = 1;
3361 				goto bailout;
3362 			}
3363 		} else if ((strncasecmp(name, "bs", 2) == 0)
3364 			|| (strncasecmp(name, "blocksize", 9) == 0)) {
3365 			retval = expand_number(value, &io_opts->blocksize);
3366 			if (retval == -1) {
3367 				warn("expand_number(3) failed on %s=%s", name,
3368 				    value);
3369 				retval = 1;
3370 				goto bailout;
3371 			}
3372 		} else if (strncasecmp(name, "depth", 5) == 0) {
3373 			char *endptr;
3374 
3375 			io_opts->queue_depth = strtoull(value, &endptr, 0);
3376 			if (*endptr != '\0') {
3377 				warnx("invalid queue depth %s", value);
3378 				retval = 1;
3379 				goto bailout;
3380 			}
3381 		} else if (strncasecmp(name, "mcs", 3) == 0) {
3382 			char *endptr;
3383 
3384 			io_opts->min_cmd_size = strtol(value, &endptr, 0);
3385 			if ((*endptr != '\0')
3386 			 || ((io_opts->min_cmd_size > 16)
3387 			  || (io_opts->min_cmd_size < 0))) {
3388 				warnx("invalid minimum cmd size %s", value);
3389 				retval = 1;
3390 				goto bailout;
3391 			}
3392 		} else if (strncasecmp(name, "offset", 6) == 0) {
3393 			retval = expand_number(value, &io_opts->offset);
3394 			if (retval == -1) {
3395 				warn("expand_number(3) failed on %s=%s", name,
3396 				    value);
3397 				retval = 1;
3398 				goto bailout;
3399 			}
3400 		} else if (strncasecmp(name, "debug", 5) == 0) {
3401 			char *endptr;
3402 
3403 			io_opts->debug = strtoull(value, &endptr, 0);
3404 			if (*endptr != '\0') {
3405 				warnx("invalid debug level %s", value);
3406 				retval = 1;
3407 				goto bailout;
3408 			}
3409 		} else {
3410 			warnx("Unrecognized parameter %s=%s", name, value);
3411 		}
3412 	}
3413 bailout:
3414 	free(orig_tmpstr);
3415 
3416 	return (retval);
3417 }
3418 
3419 int
3420 main(int argc, char **argv)
3421 {
3422 	int c;
3423 	camdd_argmask arglist = CAMDD_ARG_NONE;
3424 	int timeout = 0, retry_count = 1;
3425 	int error = 0;
3426 	uint64_t max_io = 0;
3427 	struct camdd_io_opts *opt_list = NULL;
3428 
3429 	if (argc == 1) {
3430 		usage();
3431 		exit(1);
3432 	}
3433 
3434 	opt_list = calloc(2, sizeof(struct camdd_io_opts));
3435 	if (opt_list == NULL) {
3436 		warn("Unable to allocate option list");
3437 		error = 1;
3438 		goto bailout;
3439 	}
3440 
3441 	while ((c = getopt(argc, argv, "C:Ehi:m:o:t:v")) != -1){
3442 		switch (c) {
3443 		case 'C':
3444 			retry_count = strtol(optarg, NULL, 0);
3445 			if (retry_count < 0)
3446 				errx(1, "retry count %d is < 0",
3447 				     retry_count);
3448 			arglist |= CAMDD_ARG_RETRIES;
3449 			break;
3450 		case 'E':
3451 			arglist |= CAMDD_ARG_ERR_RECOVER;
3452 			break;
3453 		case 'i':
3454 		case 'o':
3455 			if (((c == 'i')
3456 			  && (opt_list[0].dev_type != CAMDD_DEV_NONE))
3457 			 || ((c == 'o')
3458 			  && (opt_list[1].dev_type != CAMDD_DEV_NONE))) {
3459 				errx(1, "Only one input and output path "
3460 				    "allowed");
3461 			}
3462 			error = camdd_parse_io_opts(optarg, (c == 'o') ? 1 : 0,
3463 			    (c == 'o') ? &opt_list[1] : &opt_list[0]);
3464 			if (error != 0)
3465 				goto bailout;
3466 			break;
3467 		case 'm':
3468 			error = expand_number(optarg, &max_io);
3469 			if (error == -1) {
3470 				warn("invalid maximum I/O amount %s", optarg);
3471 				error = 1;
3472 				goto bailout;
3473 			}
3474 			break;
3475 		case 't':
3476 			timeout = strtol(optarg, NULL, 0);
3477 			if (timeout < 0)
3478 				errx(1, "invalid timeout %d", timeout);
3479 			/* Convert the timeout from seconds to ms */
3480 			timeout *= 1000;
3481 			arglist |= CAMDD_ARG_TIMEOUT;
3482 			break;
3483 		case 'v':
3484 			arglist |= CAMDD_ARG_VERBOSE;
3485 			break;
3486 		case 'h':
3487 		default:
3488 			usage();
3489 			exit(1);
3490 			break; /*NOTREACHED*/
3491 		}
3492 	}
3493 
3494 	if ((opt_list[0].dev_type == CAMDD_DEV_NONE)
3495 	 || (opt_list[1].dev_type == CAMDD_DEV_NONE))
3496 		errx(1, "Must specify both -i and -o");
3497 
3498 	/*
3499 	 * Set the timeout if the user hasn't specified one.
3500 	 */
3501 	if (timeout == 0)
3502 		timeout = CAMDD_PASS_RW_TIMEOUT;
3503 
3504 	error = camdd_rw(opt_list, 2, max_io, retry_count, timeout);
3505 
3506 bailout:
3507 	free(opt_list);
3508 
3509 	exit(error);
3510 }
3511