xref: /dragonfly/sys/dev/disk/ccd/ccd.c (revision 0db87cb7)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 /*
36  * Copyright (c) 1995 Jason R. Thorpe.
37  * All rights reserved.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgement:
49  *	This product includes software developed for the NetBSD Project
50  *	by Jason R. Thorpe.
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
58  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
59  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
60  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
61  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
62  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  */
66 
67 /*
68  * Copyright (c) 1988 University of Utah.
69  * Copyright (c) 1990, 1993
70  *	The Regents of the University of California.  All rights reserved.
71  *
72  * This code is derived from software contributed to Berkeley by
73  * the Systems Programming Group of the University of Utah Computer
74  * Science Department.
75  *
76  * Redistribution and use in source and binary forms, with or without
77  * modification, are permitted provided that the following conditions
78  * are met:
79  * 1. Redistributions of source code must retain the above copyright
80  *    notice, this list of conditions and the following disclaimer.
81  * 2. Redistributions in binary form must reproduce the above copyright
82  *    notice, this list of conditions and the following disclaimer in the
83  *    documentation and/or other materials provided with the distribution.
84  * 3. All advertising materials mentioning features or use of this software
85  *    must display the following acknowledgement:
86  *	This product includes software developed by the University of
87  *	California, Berkeley and its contributors.
88  * 4. Neither the name of the University nor the names of its contributors
89  *    may be used to endorse or promote products derived from this software
90  *    without specific prior written permission.
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102  * SUCH DAMAGE.
103  *
104  * from: Utah $Hdr: cd.c 1.6 90/11/28$
105  */
106 /*
107  * @(#)cd.c	8.2 (Berkeley) 11/16/93
108  * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $
109  * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
110  */
111 
112 /*
113  * "Concatenated" disk driver.
114  *
115  * Original dynamic configuration support by:
116  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
117  *	Numerical Aerodynamic Simulation Facility
118  *	Mail Stop 258-6
119  *	NASA Ames Research Center
120  *	Moffett Field, CA 94035
121  */
122 
123 #include "use_ccd.h"
124 
125 #include <sys/param.h>
126 #include <sys/systm.h>
127 #include <sys/kernel.h>
128 #include <sys/module.h>
129 #include <sys/proc.h>
130 #include <sys/buf.h>
131 #include <sys/malloc.h>
132 #include <sys/nlookup.h>
133 #include <sys/conf.h>
134 #include <sys/stat.h>
135 #include <sys/sysctl.h>
136 #include <sys/disk.h>
137 #include <sys/dtype.h>
138 #include <sys/diskslice.h>
139 #include <sys/devicestat.h>
140 #include <sys/fcntl.h>
141 #include <sys/vnode.h>
142 #include <sys/ccdvar.h>
143 
144 #include <vm/vm_zone.h>
145 
146 #include <vfs/ufs/dinode.h> 	/* XXX Used only for fs.h */
147 #include <vfs/ufs/fs.h> 	/* XXX used only to get BBSIZE and SBSIZE */
148 
149 #include <sys/thread2.h>
150 #include <sys/buf2.h>
151 
152 #if defined(CCDDEBUG) && !defined(DEBUG)
153 #define DEBUG
154 #endif
155 
156 #ifdef DEBUG
157 #define CCDB_FOLLOW	0x01
158 #define CCDB_INIT	0x02
159 #define CCDB_IO		0x04
160 #define CCDB_LABEL	0x08
161 #define CCDB_VNODE	0x10
162 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
163     CCDB_VNODE;
164 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
165 #undef DEBUG
166 #endif
167 
168 #define	ccdunit(x)	dkunit(x)
169 #define ccdpart(x)	dkpart(x)
170 
171 /*
172    This is how mirroring works (only writes are special):
173 
174    When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
175    linked together by the cb_mirror field.  "cb_pflags &
176    CCDPF_MIRROR_DONE" is set to 0 on both of them.
177 
178    When a component returns to ccdiodone(), it checks if "cb_pflags &
179    CCDPF_MIRROR_DONE" is set or not.  If not, it sets the partner's
180    flag and returns.  If it is, it means its partner has already
181    returned, so it will go to the regular cleanup.
182 
183  */
184 
185 struct ccdbuf {
186 	struct buf	cb_buf;		/* new I/O buf */
187 	struct vnode	*cb_vp;		/* related vnode */
188 	struct bio	*cb_obio;	/* ptr. to original I/O buf */
189 	int		cb_unit;	/* target unit */
190 	int		cb_comp;	/* target component */
191 	int		cb_pflags;	/* mirror/parity status flag */
192 	struct ccdbuf	*cb_mirror;	/* mirror counterpart */
193 };
194 
195 /* bits in cb_pflags */
196 #define CCDPF_MIRROR_DONE 1	/* if set, mirror counterpart is done */
197 
198 static d_open_t ccdopen;
199 static d_close_t ccdclose;
200 static d_strategy_t ccdstrategy;
201 static d_ioctl_t ccdioctl;
202 static d_dump_t ccddump;
203 
204 static struct dev_ops ccd_ops = {
205 	{ "ccd", 0, D_DISK | D_MPSAFE },
206 	.d_open =	ccdopen,
207 	.d_close =	ccdclose,
208 	.d_read =	physread,
209 	.d_write =	physwrite,
210 	.d_ioctl =	ccdioctl,
211 	.d_strategy =	ccdstrategy,
212 	.d_dump =	ccddump
213 };
214 
215 /* called during module initialization */
216 static	void ccdattach (void);
217 static	int ccddetach (void);
218 static	int ccd_modevent (module_t, int, void *);
219 
220 /* called by biodone() at interrupt time */
221 static	void ccdiodone (struct bio *bio);
222 
223 static	void ccdstart (struct ccd_softc *, struct bio *);
224 static	void ccdinterleave (struct ccd_softc *, int);
225 static	void ccdintr (struct ccd_softc *, struct bio *);
226 static	int ccdinit (struct ccddevice *, char **, struct ucred *);
227 static	int ccdlookup (char *, struct vnode **);
228 static	void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
229 		struct bio *, off_t, caddr_t, long);
230 static	int ccdlock (struct ccd_softc *);
231 static	void ccdunlock (struct ccd_softc *);
232 
233 #ifdef DEBUG
234 static	void printiinfo (struct ccdiinfo *);
235 #endif
236 
237 /* Non-private for the benefit of libkvm. */
238 struct	ccd_softc *ccd_softc;
239 struct	ccddevice *ccddevs;
240 static	int numccd = 0;
241 
242 /*
243  * getccdbuf() -	Allocate and zero a ccd buffer.
244  */
245 static struct ccdbuf *
246 getccdbuf(void)
247 {
248 	struct ccdbuf *cbp;
249 
250 	cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK | M_ZERO);
251 	initbufbio(&cbp->cb_buf);
252 
253 	/*
254 	 * independant struct buf initialization
255 	 */
256 	buf_dep_init(&cbp->cb_buf);
257 	BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
258 	BUF_KERNPROC(&cbp->cb_buf);
259 	cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP;
260 
261 	return(cbp);
262 }
263 
264 /*
265  * putccdbuf() -	Free a ccd buffer.
266  */
267 static void
268 putccdbuf(struct ccdbuf *cbp)
269 {
270 	BUF_UNLOCK(&cbp->cb_buf);
271 
272 	uninitbufbio(&cbp->cb_buf);
273 	kfree(cbp, M_DEVBUF);
274 }
275 
276 /*
277  * Called by main() during pseudo-device attachment.  All we need
278  * to do is allocate enough space for devices to be configured later, and
279  * add devsw entries.
280  */
281 static void
282 ccdattach(void)
283 {
284 	struct disk_info info;
285 	struct ccd_softc *cs;
286 	int i;
287 	int num = NCCD;
288 
289 	if (num > 1)
290 		kprintf("ccd0-%d: Concatenated disk drivers\n", num-1);
291 	else
292 		kprintf("ccd0: Concatenated disk driver\n");
293 
294 	ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF,
295 			    M_WAITOK | M_ZERO);
296 	ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF,
297 			  M_WAITOK | M_ZERO);
298 	numccd = num;
299 
300 	/*
301 	 * With normal disk devices the open simply fails if the media
302 	 * is not present.  With CCD we have to be able to open the
303 	 * raw disk to use the ioctl's to set it up, so create a dummy
304 	 * disk info structure so dscheck() doesn't blow up.
305 	 */
306 	bzero(&info, sizeof(info));
307 	info.d_media_blksize = DEV_BSIZE;
308 
309 	for (i = 0; i < numccd; ++i) {
310 		cs = &ccd_softc[i];
311 		cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops);
312 		cs->sc_dev->si_drv1 = cs;
313 		cs->sc_dev->si_iosize_max = 256 * 512;	/* XXX */
314 		disk_setdiskinfo(&cs->sc_disk, &info);
315 	}
316 }
317 
318 static int
319 ccddetach(void)
320 {
321 	struct ccd_softc *cs;
322 	struct dev_ioctl_args ioctl_args;
323 	int i;
324 	int error = 0;
325 	int eval;
326 
327 	bzero(&ioctl_args, sizeof(ioctl_args));
328 
329 	for (i = 0; i < numccd; ++i) {
330 		cs = &ccd_softc[i];
331 		if (cs->sc_dev == NULL)
332 			continue;
333 		ioctl_args.a_head.a_dev = cs->sc_dev;
334 		ioctl_args.a_cmd = CCDIOCCLR;
335 		ioctl_args.a_fflag = FWRITE;
336 		eval = ccdioctl(&ioctl_args);
337 		if (eval && eval != ENXIO) {
338 			kprintf("ccd%d: In use, cannot detach\n", i);
339 			error = EBUSY;
340 		}
341 	}
342 	if (error == 0) {
343 		for (i = 0; i < numccd; ++i) {
344 			cs = &ccd_softc[i];
345 			if (cs->sc_dev == NULL)
346 				continue;
347 			disk_destroy(&cs->sc_disk);
348 			cs->sc_dev = NULL;
349 		}
350 		if (ccd_softc)
351 			kfree(ccd_softc, M_DEVBUF);
352 		if (ccddevs)
353 			kfree(ccddevs, M_DEVBUF);
354 	}
355 	return (error);
356 }
357 
358 static int
359 ccd_modevent(module_t mod, int type, void *data)
360 {
361 	int error = 0;
362 
363 	switch (type) {
364 	case MOD_LOAD:
365 		ccdattach();
366 		break;
367 
368 	case MOD_UNLOAD:
369 		error = ccddetach();
370 		break;
371 
372 	default:	/* MOD_SHUTDOWN etc */
373 		break;
374 	}
375 	return (error);
376 }
377 
378 DEV_MODULE(ccd, ccd_modevent, NULL);
379 
380 static int
381 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred)
382 {
383 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
384 	struct ccdcinfo *ci = NULL;	/* XXX */
385 	int ix;
386 	struct vnode *vp;
387 	u_int64_t skip;
388 	u_int64_t size;
389 	u_int64_t minsize;
390 	int maxsecsize;
391 	struct partinfo dpart;
392 	struct ccdgeom *ccg = &cs->sc_geom;
393 	char tmppath[MAXPATHLEN];
394 	int error = 0;
395 
396 #ifdef DEBUG
397 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
398 		kprintf("ccdinit: unit %d\n", ccd->ccd_unit);
399 #endif
400 
401 	cs->sc_size = 0;
402 	cs->sc_ileave = ccd->ccd_interleave;
403 	cs->sc_nccdisks = ccd->ccd_ndev;
404 
405 	/* Allocate space for the component info. */
406 	cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
407 				M_DEVBUF, M_WAITOK);
408 	cs->sc_maxiosize = MAXPHYS;
409 
410 	lockinit(&cs->sc_lock, "ccdlck", 0, 0);
411 	ccdlock(cs);
412 
413 	/*
414 	 * Verify that each component piece exists and record
415 	 * relevant information about it.
416 	 */
417 	maxsecsize = 0;
418 	minsize = 0;
419 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
420 		vp = ccd->ccd_vpp[ix];
421 		ci = &cs->sc_cinfo[ix];
422 		ci->ci_vp = vp;
423 
424 		/*
425 		 * Copy in the pathname of the component.
426 		 */
427 		bzero(tmppath, sizeof(tmppath));	/* sanity */
428 		if ((error = copyinstr(cpaths[ix], tmppath,
429 		    MAXPATHLEN, &ci->ci_pathlen)) != 0) {
430 #ifdef DEBUG
431 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
432 				kprintf("ccd%d: can't copy path, error = %d\n",
433 				    ccd->ccd_unit, error);
434 #endif
435 			goto fail;
436 		}
437 		ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
438 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
439 
440 		ci->ci_dev = vn_todev(vp);
441 		if (ci->ci_dev->si_iosize_max &&
442 		    cs->sc_maxiosize > ci->ci_dev->si_iosize_max) {
443 			cs->sc_maxiosize = ci->ci_dev->si_iosize_max;
444 		}
445 
446 		/*
447 		 * Get partition information for the component.
448 		 */
449 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD,
450 				  cred, NULL);
451 		if (error) {
452 #ifdef DEBUG
453 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
454 				 kprintf("ccd%d: %s: ioctl failed, error = %d\n",
455 				     ccd->ccd_unit, ci->ci_path, error);
456 #endif
457 			goto fail;
458 		}
459 		if (dpart.fstype != FS_CCD &&
460 		    !kuuid_is_ccd(&dpart.fstype_uuid)) {
461 			kprintf("ccd%d: %s: filesystem type must be 'ccd'\n",
462 				ccd->ccd_unit, ci->ci_path);
463 			error = EFTYPE;
464 			goto fail;
465 		}
466 		if (maxsecsize < dpart.media_blksize)
467 			maxsecsize = dpart.media_blksize;
468 
469 		/*
470 		 * Skip a certain amount of storage at the beginning of
471 		 * the component to make sure we don't infringe on any
472 		 * reserved sectors.  This is handled entirely by
473 		 * dpart.reserved_blocks but we also impose a minimum
474 		 * of 16 sectors for backwards compatibility.
475 		 */
476 		skip = 16;
477 		if (skip < dpart.reserved_blocks)
478 			skip = dpart.reserved_blocks;
479 		size = dpart.media_blocks - skip;
480 
481 		/*
482 		 * Calculate the size, truncating to an interleave
483 		 * boundary if necessary.
484 		 */
485 		if (cs->sc_ileave > 1)
486 			size -= size % cs->sc_ileave;
487 
488 		if ((int64_t)size <= 0) {
489 #ifdef DEBUG
490 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
491 				kprintf("ccd%d: %s: size == 0\n",
492 				    ccd->ccd_unit, ci->ci_path);
493 #endif
494 			error = ENODEV;
495 			goto fail;
496 		}
497 
498 		/*
499 		 * Calculate the smallest uniform component, used
500 		 * elsewhere.
501 		 */
502 		if (minsize == 0 || minsize > size)
503 			minsize = size;
504 		ci->ci_skip = skip;
505 		ci->ci_size = size;
506 		cs->sc_size += size;
507 	}
508 	kprintf("ccd%d: max component iosize is %d total blocks %lld\n",
509 		cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size);
510 
511 	/*
512 	 * Don't allow the interleave to be smaller than
513 	 * the biggest component sector.
514 	 */
515 	if ((cs->sc_ileave > 0) &&
516 	    (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) {
517 #ifdef DEBUG
518 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
519 			kprintf("ccd%d: interleave must be at least %d\n",
520 			    ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
521 #endif
522 		error = EINVAL;
523 		goto fail;
524 	}
525 
526 	/*
527 	 * If uniform interleave is desired set all sizes to that of
528 	 * the smallest component.  This will guarentee that a single
529 	 * interleave table is generated.
530 	 *
531 	 * Lost space must be taken into account when calculating the
532 	 * overall size.  Half the space is lost when CCDF_MIRROR is
533 	 * specified.  One disk is lost when CCDF_PARITY is specified.
534 	 */
535 	if (ccd->ccd_flags & CCDF_UNIFORM) {
536 		for (ci = cs->sc_cinfo;
537 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
538 			ci->ci_size = minsize;
539 		}
540 		if (ccd->ccd_flags & CCDF_MIRROR) {
541 			/*
542 			 * Check to see if an even number of components
543 			 * have been specified.  The interleave must also
544 			 * be non-zero in order for us to be able to
545 			 * guarentee the topology.
546 			 */
547 			if (cs->sc_nccdisks % 2) {
548 				kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
549 				error = EINVAL;
550 				goto fail;
551 			}
552 			if (cs->sc_ileave == 0) {
553 				kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
554 				error = EINVAL;
555 				goto fail;
556 			}
557 			cs->sc_size = (cs->sc_nccdisks/2) * minsize;
558 		} else if (ccd->ccd_flags & CCDF_PARITY) {
559 			cs->sc_size = (cs->sc_nccdisks-1) * minsize;
560 		} else {
561 			if (cs->sc_ileave == 0) {
562 				kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
563 				error = EINVAL;
564 				goto fail;
565 			}
566 			cs->sc_size = cs->sc_nccdisks * minsize;
567 		}
568 	}
569 
570 	/*
571 	 * Construct the interleave table.
572 	 */
573 	ccdinterleave(cs, ccd->ccd_unit);
574 
575 	/*
576 	 * Create pseudo-geometry based on 1MB cylinders.  It's
577 	 * pretty close.
578 	 */
579 	ccg->ccg_secsize = maxsecsize;
580 	ccg->ccg_ntracks = 1;
581 	ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
582 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
583 
584 	/*
585 	 * Add an devstat entry for this device.
586 	 */
587 	devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
588 			  ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
589 			  DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
590 			  DEVSTAT_PRIORITY_ARRAY);
591 
592 	cs->sc_flags |= CCDF_INITED;
593 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
594 	cs->sc_unit = ccd->ccd_unit;
595 	return (0);
596 fail:
597 	while (ci > cs->sc_cinfo) {
598 		ci--;
599 		kfree(ci->ci_path, M_DEVBUF);
600 	}
601 	kfree(cs->sc_cinfo, M_DEVBUF);
602 	cs->sc_cinfo = NULL;
603 	return (error);
604 }
605 
606 static void
607 ccdinterleave(struct ccd_softc *cs, int unit)
608 {
609 	struct ccdcinfo *ci, *smallci;
610 	struct ccdiinfo *ii;
611 	u_int64_t bn;
612 	u_int64_t lbn;
613 	u_int64_t size;
614 	int icount;
615 	int ix;
616 
617 #ifdef DEBUG
618 	if (ccddebug & CCDB_INIT)
619 		kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
620 #endif
621 
622 	/*
623 	 * Allocate an interleave table.  The worst case occurs when each
624 	 * of N disks is of a different size, resulting in N interleave
625 	 * tables.
626 	 *
627 	 * Chances are this is too big, but we don't care.
628 	 */
629 	icount = cs->sc_nccdisks + 1;
630 	cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo),
631 				M_DEVBUF, M_WAITOK|M_ZERO);
632 
633 	/*
634 	 * Trivial case: no interleave (actually interleave of disk size).
635 	 * Each table entry represents a single component in its entirety.
636 	 *
637 	 * An interleave of 0 may not be used with a mirror or parity setup.
638 	 */
639 	if (cs->sc_ileave == 0) {
640 		bn = 0;
641 		ii = cs->sc_itable;
642 
643 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
644 			/* Allocate space for ii_index. */
645 			ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK);
646 			ii->ii_ndisk = 1;
647 			ii->ii_startblk = bn;
648 			ii->ii_startoff = 0;
649 			ii->ii_index[0] = ix;
650 			bn += cs->sc_cinfo[ix].ci_size;
651 			ii++;
652 		}
653 		ii->ii_ndisk = 0;
654 #ifdef DEBUG
655 		if (ccddebug & CCDB_INIT)
656 			printiinfo(cs->sc_itable);
657 #endif
658 		return;
659 	}
660 
661 	/*
662 	 * The following isn't fast or pretty; it doesn't have to be.
663 	 */
664 	size = 0;
665 	bn = lbn = 0;
666 	for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) {
667 		/*
668 		 * Allocate space for ii_index.  We might allocate more then
669 		 * we use.
670 		 */
671 		ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks),
672 					M_DEVBUF, M_WAITOK);
673 
674 		/*
675 		 * Locate the smallest of the remaining components
676 		 */
677 		smallci = NULL;
678 		ci = cs->sc_cinfo;
679 		while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) {
680 			if (ci->ci_size > size &&
681 			    (smallci == NULL ||
682 			     ci->ci_size < smallci->ci_size)) {
683 				smallci = ci;
684 			}
685 			++ci;
686 		}
687 
688 		/*
689 		 * Nobody left, all done
690 		 */
691 		if (smallci == NULL) {
692 			ii->ii_ndisk = 0;
693 			break;
694 		}
695 
696 		/*
697 		 * Record starting logical block using an sc_ileave blocksize.
698 		 */
699 		ii->ii_startblk = bn / cs->sc_ileave;
700 
701 		/*
702 		 * Record starting component block using an sc_ileave
703 		 * blocksize.  This value is relative to the beginning of
704 		 * a component disk.
705 		 */
706 		ii->ii_startoff = lbn;
707 
708 		/*
709 		 * Determine how many disks take part in this interleave
710 		 * and record their indices.
711 		 */
712 		ix = 0;
713 		for (ci = cs->sc_cinfo;
714 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
715 			if (ci->ci_size >= smallci->ci_size) {
716 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
717 			}
718 		}
719 		ii->ii_ndisk = ix;
720 
721 		/*
722 		 * Adjust for loop
723 		 */
724 		bn += ix * (smallci->ci_size - size);
725 		lbn = smallci->ci_size / cs->sc_ileave;
726 		size = smallci->ci_size;
727 	}
728 	if (ii == &cs->sc_itable[icount])
729 		panic("ccdinterlave software bug!  table exhausted");
730 #ifdef DEBUG
731 	if (ccddebug & CCDB_INIT)
732 		printiinfo(cs->sc_itable);
733 #endif
734 }
735 
736 /* ARGSUSED */
737 static int
738 ccdopen(struct dev_open_args *ap)
739 {
740 	cdev_t dev = ap->a_head.a_dev;
741 	int unit = ccdunit(dev);
742 	struct ccd_softc *cs;
743 	int error = 0;
744 
745 #ifdef DEBUG
746 	if (ccddebug & CCDB_FOLLOW)
747 		kprintf("ccdopen(%x, %x)\n", dev, flags);
748 #endif
749 	if (unit >= numccd)
750 		return (ENXIO);
751 	cs = &ccd_softc[unit];
752 
753 	if ((error = ccdlock(cs)) == 0) {
754 		ccdunlock(cs);
755 	}
756 	return (error);
757 }
758 
759 /* ARGSUSED */
760 static int
761 ccdclose(struct dev_close_args *ap)
762 {
763 	cdev_t dev = ap->a_head.a_dev;
764 	int unit = ccdunit(dev);
765 	struct ccd_softc *cs;
766 	int error = 0;
767 
768 #ifdef DEBUG
769 	if (ccddebug & CCDB_FOLLOW)
770 		kprintf("ccdclose(%x, %x)\n", dev, flags);
771 #endif
772 
773 	if (unit >= numccd)
774 		return (ENXIO);
775 	cs = &ccd_softc[unit];
776 	if ((error = ccdlock(cs)) == 0) {
777 		ccdunlock(cs);
778 	}
779 	return (error);
780 }
781 
782 static int
783 ccdstrategy(struct dev_strategy_args *ap)
784 {
785 	cdev_t dev = ap->a_head.a_dev;
786 	struct bio *bio = ap->a_bio;
787 	int unit = ccdunit(dev);
788 	struct bio *nbio;
789 	struct buf *bp = bio->bio_buf;
790 	struct ccd_softc *cs = &ccd_softc[unit];
791 	u_int64_t pbn;	/* in sc_secsize chunks */
792 	u_int32_t sz;	/* in sc_secsize chunks */
793 
794 #ifdef DEBUG
795 	if (ccddebug & CCDB_FOLLOW)
796 		kprintf("ccdstrategy(%x): unit %d\n", bp, unit);
797 #endif
798 	if ((cs->sc_flags & CCDF_INITED) == 0) {
799 		bp->b_error = ENXIO;
800 		goto error;
801 	}
802 
803 	/* If it's a nil transfer, wake up the top half now. */
804 	if (bp->b_bcount == 0) {
805 		bp->b_resid = 0;
806 		goto done;
807 	}
808 
809 	/*
810 	 * Do bounds checking and adjust transfer.  If there's an
811 	 * error, the bounds check will flag that for us.
812 	 */
813 
814 	pbn = bio->bio_offset / cs->sc_geom.ccg_secsize;
815 	sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
816 
817 	/*
818 	 * If out of bounds return an error.  If the request goes
819 	 * past EOF, clip the request as appropriate.  If exactly
820 	 * at EOF, return success (don't clip), but with 0 bytes
821 	 * of I/O.
822 	 *
823 	 * Mark EOF B_INVAL (just like bad), indicating that the
824 	 * contents of the buffer, if any, is invalid.
825 	 */
826 	if ((int64_t)pbn < 0)
827 		goto bad;
828 	if (pbn + sz > cs->sc_size) {
829 		if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP))
830 			goto bad;
831 		if (pbn == cs->sc_size) {
832 			bp->b_resid = bp->b_bcount;
833 			bp->b_flags |= B_INVAL;
834 			goto done;
835 		}
836 		sz = (long)(cs->sc_size - pbn);
837 		bp->b_bcount = sz * cs->sc_geom.ccg_secsize;
838 	}
839 	nbio = bio;
840 
841 	bp->b_resid = bp->b_bcount;
842 	nbio->bio_driver_info = dev;
843 
844 	/*
845 	 * "Start" the unit.
846 	 */
847 	ccdstart(cs, nbio);
848 	return(0);
849 
850 	/*
851 	 * note: bio, not nbio, is valid at the done label.
852 	 */
853 bad:
854 	bp->b_error = EINVAL;
855 error:
856 	bp->b_resid = bp->b_bcount;
857 	bp->b_flags |= B_ERROR | B_INVAL;
858 done:
859 	biodone(bio);
860 	return(0);
861 }
862 
863 static void
864 ccdstart(struct ccd_softc *cs, struct bio *bio)
865 {
866 	long bcount, rcount;
867 	struct ccdbuf *cbp[4];
868 	struct buf *bp = bio->bio_buf;
869 	/* XXX! : 2 reads and 2 writes for RAID 4/5 */
870 	caddr_t addr;
871 	off_t doffset;
872 
873 #ifdef DEBUG
874 	if (ccddebug & CCDB_FOLLOW)
875 		kprintf("ccdstart(%x, %x)\n", cs, bp);
876 #endif
877 
878 	/* Record the transaction start  */
879 	devstat_start_transaction(&cs->device_stats);
880 
881 	/*
882 	 * Allocate component buffers and fire off the requests
883 	 */
884 	doffset = bio->bio_offset;
885 	addr = bp->b_data;
886 
887 	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
888 		ccdbuffer(cbp, cs, bio, doffset, addr, bcount);
889 		rcount = cbp[0]->cb_buf.b_bcount;
890 
891 		if (cs->sc_cflags & CCDF_MIRROR) {
892 			/*
893 			 * Mirroring.  Writes go to both disks, reads are
894 			 * taken from whichever disk seems most appropriate.
895 			 *
896 			 * We attempt to localize reads to the disk whos arm
897 			 * is nearest the read request.  We ignore seeks due
898 			 * to writes when making this determination and we
899 			 * also try to avoid hogging.
900 			 */
901 			if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) {
902 				vn_strategy(cbp[0]->cb_vp,
903 					    &cbp[0]->cb_buf.b_bio1);
904 				vn_strategy(cbp[1]->cb_vp,
905 					    &cbp[1]->cb_buf.b_bio1);
906 			} else {
907 				int pick = cs->sc_pick;
908 				daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize;
909 				if (doffset < cs->sc_blk[pick] - range ||
910 				    doffset > cs->sc_blk[pick] + range
911 				) {
912 					cs->sc_pick = pick = 1 - pick;
913 				}
914 				cs->sc_blk[pick] = doffset + rcount;
915 				vn_strategy(cbp[pick]->cb_vp,
916 					    &cbp[pick]->cb_buf.b_bio1);
917 			}
918 		} else {
919 			/*
920 			 * Not mirroring
921 			 */
922 			vn_strategy(cbp[0]->cb_vp,
923 				     &cbp[0]->cb_buf.b_bio1);
924 		}
925 		doffset += rcount;
926 		addr += rcount;
927 	}
928 }
929 
930 /*
931  * Build a component buffer header.
932  */
933 static void
934 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio,
935 	  off_t doffset, caddr_t addr, long bcount)
936 {
937 	struct ccdcinfo *ci, *ci2 = NULL;	/* XXX */
938 	struct ccdbuf *cbp;
939 	u_int64_t bn;
940 	u_int64_t cbn;
941 	u_int64_t cboff;
942 	off_t cbc;
943 
944 #ifdef DEBUG
945 	if (ccddebug & CCDB_IO)
946 		kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n",
947 		       cs, bp, bn, addr, bcount);
948 #endif
949 	/*
950 	 * Determine which component bn falls in.
951 	 */
952 	bn = doffset / cs->sc_geom.ccg_secsize;
953 	cbn = bn;
954 	cboff = 0;
955 
956 	if (cs->sc_ileave == 0) {
957 		/*
958 		 * Serially concatenated and neither a mirror nor a parity
959 		 * config.  This is a special case.
960 		 */
961 		daddr_t sblk;
962 
963 		sblk = 0;
964 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
965 			sblk += ci->ci_size;
966 		cbn -= sblk;
967 	} else {
968 		struct ccdiinfo *ii;
969 		int ccdisk, off;
970 
971 		/*
972 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
973 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
974 		 * to cbn.
975 		 */
976 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
977 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
978 
979 		/*
980 		 * Figure out which interleave table to use.
981 		 */
982 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
983 			if (ii->ii_startblk > cbn)
984 				break;
985 		}
986 		ii--;
987 
988 		/*
989 		 * off is the logical superblock relative to the beginning
990 		 * of this interleave block.
991 		 */
992 		off = cbn - ii->ii_startblk;
993 
994 		/*
995 		 * We must calculate which disk component to use (ccdisk),
996 		 * and recalculate cbn to be the superblock relative to
997 		 * the beginning of the component.  This is typically done by
998 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
999 		 * must typically be divided by the number of components in
1000 		 * this interleave array to be properly convert it from a
1001 		 * CCD-relative logical superblock number to a
1002 		 * component-relative superblock number.
1003 		 */
1004 		if (ii->ii_ndisk == 1) {
1005 			/*
1006 			 * When we have just one disk, it can't be a mirror
1007 			 * or a parity config.
1008 			 */
1009 			ccdisk = ii->ii_index[0];
1010 			cbn = ii->ii_startoff + off;
1011 		} else {
1012 			if (cs->sc_cflags & CCDF_MIRROR) {
1013 				/*
1014 				 * We have forced a uniform mapping, resulting
1015 				 * in a single interleave array.  We double
1016 				 * up on the first half of the available
1017 				 * components and our mirror is in the second
1018 				 * half.  This only works with a single
1019 				 * interleave array because doubling up
1020 				 * doubles the number of sectors, so there
1021 				 * cannot be another interleave array because
1022 				 * the next interleave array's calculations
1023 				 * would be off.
1024 				 */
1025 				int ndisk2 = ii->ii_ndisk / 2;
1026 				ccdisk = ii->ii_index[off % ndisk2];
1027 				cbn = ii->ii_startoff + off / ndisk2;
1028 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1029 			} else if (cs->sc_cflags & CCDF_PARITY) {
1030 				/*
1031 				 * XXX not implemented yet
1032 				 */
1033 				int ndisk2 = ii->ii_ndisk - 1;
1034 				ccdisk = ii->ii_index[off % ndisk2];
1035 				cbn = ii->ii_startoff + off / ndisk2;
1036 				if (cbn % ii->ii_ndisk <= ccdisk)
1037 					ccdisk++;
1038 			} else {
1039 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
1040 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
1041 			}
1042 		}
1043 
1044 		ci = &cs->sc_cinfo[ccdisk];
1045 
1046 		/*
1047 		 * Convert cbn from a superblock to a normal block so it
1048 		 * can be used to calculate (along with cboff) the normal
1049 		 * block index into this particular disk.
1050 		 */
1051 		cbn *= cs->sc_ileave;
1052 	}
1053 
1054 	/*
1055 	 * Fill in the component buf structure.
1056 	 *
1057 	 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount
1058 	 * will be truncated on device EOF so we use b_bufsize to detect
1059 	 * the case.
1060 	 */
1061 	cbp = getccdbuf();
1062 	cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1063 	cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1064 	cbp->cb_buf.b_data = addr;
1065 	cbp->cb_vp = ci->ci_vp;
1066 	if (cs->sc_ileave == 0)
1067 		cbc = dbtob((off_t)(ci->ci_size - cbn));
1068 	else
1069 		cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1070 	if (cbc > cs->sc_maxiosize)
1071 		cbc = cs->sc_maxiosize;
1072 	cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1073  	cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1074 
1075 	cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1076 	cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1077 	cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip);
1078 
1079 	/*
1080 	 * context for ccdiodone
1081 	 */
1082 	cbp->cb_obio = bio;
1083 	cbp->cb_unit = cs - ccd_softc;
1084 	cbp->cb_comp = ci - cs->sc_cinfo;
1085 
1086 #ifdef DEBUG
1087 	if (ccddebug & CCDB_IO)
1088 		kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n",
1089 		       ci->ci_dev, ci-cs->sc_cinfo, cbp,
1090 		       cbp->cb_buf.b_bio1.bio_offset,
1091 		       cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1092 #endif
1093 	cb[0] = cbp;
1094 
1095 	/*
1096 	 * Note: both I/O's setup when reading from mirror, but only one
1097 	 * will be executed.
1098 	 */
1099 	if (cs->sc_cflags & CCDF_MIRROR) {
1100 		/* mirror, setup second I/O */
1101 		cbp = getccdbuf();
1102 
1103 		cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1104 		cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1105 		cbp->cb_buf.b_data = addr;
1106 		cbp->cb_vp = ci2->ci_vp;
1107 		if (cs->sc_ileave == 0)
1108 		      cbc = dbtob((off_t)(ci->ci_size - cbn));
1109 		else
1110 		      cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1111 		if (cbc > cs->sc_maxiosize)
1112 			cbc = cs->sc_maxiosize;
1113 		cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1114 		cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1115 
1116 		cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1117 		cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1118 		cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip);
1119 
1120 		/*
1121 		 * context for ccdiodone
1122 		 */
1123 		cbp->cb_obio = bio;
1124 		cbp->cb_unit = cs - ccd_softc;
1125 		cbp->cb_comp = ci2 - cs->sc_cinfo;
1126 		cb[1] = cbp;
1127 		/* link together the ccdbuf's and clear "mirror done" flag */
1128 		cb[0]->cb_mirror = cb[1];
1129 		cb[1]->cb_mirror = cb[0];
1130 		cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1131 		cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1132 	}
1133 }
1134 
1135 static void
1136 ccdintr(struct ccd_softc *cs, struct bio *bio)
1137 {
1138 	struct buf *bp = bio->bio_buf;
1139 
1140 #ifdef DEBUG
1141 	if (ccddebug & CCDB_FOLLOW)
1142 		kprintf("ccdintr(%x, %x)\n", cs, bp);
1143 #endif
1144 	/*
1145 	 * Request is done for better or worse, wakeup the top half.
1146 	 */
1147 	if (bp->b_flags & B_ERROR)
1148 		bp->b_resid = bp->b_bcount;
1149 	devstat_end_transaction_buf(&cs->device_stats, bp);
1150 	biodone(bio);
1151 }
1152 
1153 /*
1154  * Called at interrupt time.
1155  *
1156  * Mark the component as done and if all components are done,
1157  * take a ccd interrupt.
1158  */
1159 static void
1160 ccdiodone(struct bio *bio)
1161 {
1162 	struct ccdbuf *cbp = bio->bio_caller_info1.ptr;
1163 	struct bio *obio = cbp->cb_obio;
1164 	struct buf *obp = obio->bio_buf;
1165 	int unit = cbp->cb_unit;
1166 	struct ccd_softc *sc = &ccd_softc[unit];
1167 	int count;
1168 
1169 	/*
1170 	 * Since we do not have exclusive access to underlying devices,
1171 	 * we can't keep cache translations around.
1172 	 */
1173 	clearbiocache(bio->bio_next);
1174 
1175 	ccdlock(sc);
1176 
1177 #ifdef DEBUG
1178 	if (ccddebug & CCDB_FOLLOW)
1179 		kprintf("ccdiodone(%x)\n", cbp);
1180 	if (ccddebug & CCDB_IO) {
1181 		kprintf("ccdiodone: bp %x bcount %d resid %d\n",
1182 		       obp, obp->b_bcount, obp->b_resid);
1183 		kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n",
1184 		       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1185 		       cbp->cb_buf.b_loffset, cbp->cb_buf.b_data,
1186 		       cbp->cb_buf.b_bcount);
1187 	}
1188 #endif
1189 
1190 	/*
1191 	 * If an error occured, report it.  If this is a mirrored
1192 	 * configuration and the first of two possible reads, do not
1193 	 * set the error in the bp yet because the second read may
1194 	 * succeed.
1195 	 */
1196 	if (cbp->cb_buf.b_flags & B_ERROR) {
1197 		const char *msg = "";
1198 
1199 		if ((sc->sc_cflags & CCDF_MIRROR) &&
1200 		    (cbp->cb_buf.b_cmd == BUF_CMD_READ) &&
1201 		    (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1202 			/*
1203 			 * We will try our read on the other disk down
1204 			 * below, also reverse the default pick so if we
1205 			 * are doing a scan we do not keep hitting the
1206 			 * bad disk first.
1207 			 */
1208 			msg = ", trying other disk";
1209 			sc->sc_pick = 1 - sc->sc_pick;
1210 			sc->sc_blk[sc->sc_pick] = obio->bio_offset;
1211 		} else {
1212 			obp->b_flags |= B_ERROR;
1213 			obp->b_error = cbp->cb_buf.b_error ?
1214 			    cbp->cb_buf.b_error : EIO;
1215 		}
1216 		kprintf("ccd%d: error %d on component %d "
1217 			"offset %jd (ccd offset %jd)%s\n",
1218 		        unit, obp->b_error, cbp->cb_comp,
1219 		        (intmax_t)cbp->cb_buf.b_bio2.bio_offset,
1220 		        (intmax_t)obio->bio_offset,
1221 		        msg);
1222 	}
1223 
1224 	/*
1225 	 * Process mirror.  If we are writing, I/O has been initiated on both
1226 	 * buffers and we fall through only after both are finished.
1227 	 *
1228 	 * If we are reading only one I/O is initiated at a time.  If an
1229 	 * error occurs we initiate the second I/O and return, otherwise
1230 	 * we free the second I/O without initiating it.
1231 	 */
1232 
1233 	if (sc->sc_cflags & CCDF_MIRROR) {
1234 		if (cbp->cb_buf.b_cmd != BUF_CMD_READ) {
1235 			/*
1236 			 * When writing, handshake with the second buffer
1237 			 * to determine when both are done.  If both are not
1238 			 * done, return here.
1239 			 */
1240 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1241 				cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1242 				putccdbuf(cbp);
1243 				ccdunlock(sc);
1244 				return;
1245 			}
1246 		} else {
1247 			/*
1248 			 * When reading, either dispose of the second buffer
1249 			 * or initiate I/O on the second buffer if an error
1250 			 * occured with this one.
1251 			 */
1252 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1253 				if (cbp->cb_buf.b_flags & B_ERROR) {
1254 					cbp->cb_mirror->cb_pflags |=
1255 					    CCDPF_MIRROR_DONE;
1256 					vn_strategy(
1257 					    cbp->cb_mirror->cb_vp,
1258 					    &cbp->cb_mirror->cb_buf.b_bio1
1259 					);
1260 					putccdbuf(cbp);
1261 					ccdunlock(sc);
1262 					return;
1263 				} else {
1264 					putccdbuf(cbp->cb_mirror);
1265 					/* fall through */
1266 				}
1267 			}
1268 		}
1269 	}
1270 
1271 	/*
1272 	 * Use our saved b_bufsize to determine if an unexpected EOF occured.
1273 	 */
1274 	count = cbp->cb_buf.b_bufsize;
1275 	putccdbuf(cbp);
1276 
1277 	/*
1278 	 * If all done, "interrupt".
1279 	 */
1280 	obp->b_resid -= count;
1281 	if (obp->b_resid < 0)
1282 		panic("ccdiodone: count");
1283 
1284 	ccdunlock(sc);
1285 
1286 	if (obp->b_resid == 0)
1287 		ccdintr(sc, obio);
1288 }
1289 
1290 static int
1291 ccdioctl(struct dev_ioctl_args *ap)
1292 {
1293 	cdev_t dev = ap->a_head.a_dev;
1294 	int unit = ccdunit(dev);
1295 	int i, j, lookedup = 0, error = 0;
1296 	struct ccd_softc *cs;
1297 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data;
1298 	struct ccddevice ccd;
1299 	struct disk_info info;
1300 	char **cpp;
1301 	struct vnode **vpp;
1302 
1303 	if (unit >= numccd)
1304 		return (ENXIO);
1305 	cs = &ccd_softc[unit];
1306 
1307 	bzero(&ccd, sizeof(ccd));
1308 
1309 	switch (ap->a_cmd) {
1310 	case CCDIOCSET:
1311 		if (cs->sc_flags & CCDF_INITED)
1312 			return (EBUSY);
1313 
1314 		if ((ap->a_fflag & FWRITE) == 0)
1315 			return (EBADF);
1316 
1317 		if ((error = ccdlock(cs)) != 0)
1318 			return (error);
1319 
1320 		if (ccio->ccio_ndisks > CCD_MAXNDISKS) {
1321 			ccdunlock(cs);
1322 			return (EINVAL);
1323 		}
1324 
1325 		/* Fill in some important bits. */
1326 		ccd.ccd_unit = unit;
1327 		ccd.ccd_interleave = ccio->ccio_ileave;
1328 		if (ccd.ccd_interleave == 0 &&
1329 		    ((ccio->ccio_flags & CCDF_MIRROR) ||
1330 		     (ccio->ccio_flags & CCDF_PARITY))) {
1331 			kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1332 			ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1333 		}
1334 		if ((ccio->ccio_flags & CCDF_MIRROR) &&
1335 		    (ccio->ccio_flags & CCDF_PARITY)) {
1336 			kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1337 			ccio->ccio_flags &= ~CCDF_PARITY;
1338 		}
1339 		if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1340 		    !(ccio->ccio_flags & CCDF_UNIFORM)) {
1341 			kprintf("ccd%d: mirror/parity forces uniform flag\n",
1342 			       unit);
1343 			ccio->ccio_flags |= CCDF_UNIFORM;
1344 		}
1345 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1346 
1347 		/*
1348 		 * Allocate space for and copy in the array of
1349 		 * componet pathnames and device numbers.
1350 		 */
1351 		cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *),
1352 		    M_DEVBUF, M_WAITOK);
1353 		vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1354 		    M_DEVBUF, M_WAITOK);
1355 
1356 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1357 				ccio->ccio_ndisks * sizeof(char **));
1358 		if (error) {
1359 			kfree(vpp, M_DEVBUF);
1360 			kfree(cpp, M_DEVBUF);
1361 			ccdunlock(cs);
1362 			return (error);
1363 		}
1364 
1365 #ifdef DEBUG
1366 		if (ccddebug & CCDB_INIT) {
1367 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1368 				kprintf("ccdioctl: component %d: 0x%x\n",
1369 				    i, cpp[i]);
1370 		}
1371 #endif
1372 
1373 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1374 #ifdef DEBUG
1375 			if (ccddebug & CCDB_INIT)
1376 				kprintf("ccdioctl: lookedup = %d\n", lookedup);
1377 #endif
1378 			if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) {
1379 				for (j = 0; j < lookedup; ++j)
1380 					(void)vn_close(vpp[j], FREAD|FWRITE, NULL);
1381 				kfree(vpp, M_DEVBUF);
1382 				kfree(cpp, M_DEVBUF);
1383 				ccdunlock(cs);
1384 				return (error);
1385 			}
1386 			++lookedup;
1387 		}
1388 		ccd.ccd_cpp = cpp;
1389 		ccd.ccd_vpp = vpp;
1390 		ccd.ccd_ndev = ccio->ccio_ndisks;
1391 
1392 		/*
1393 		 * Initialize the ccd.  Fills in the softc for us.
1394 		 */
1395 		if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) {
1396 			for (j = 0; j < lookedup; ++j)
1397 				vn_close(vpp[j], FREAD|FWRITE, NULL);
1398 			kfree(vpp, M_DEVBUF);
1399 			kfree(cpp, M_DEVBUF);
1400 			ccdunlock(cs);
1401 			return (error);
1402 		}
1403 
1404 		/*
1405 		 * The ccd has been successfully initialized, so
1406 		 * we can place it into the array and read the disklabel.
1407 		 */
1408 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1409 		ccio->ccio_unit = unit;
1410 		ccio->ccio_size = cs->sc_size;
1411 
1412 		bzero(&info, sizeof(info));
1413 		info.d_media_blksize = cs->sc_geom.ccg_secsize;
1414 		info.d_media_blocks  = cs->sc_size;
1415 		info.d_nheads	     = cs->sc_geom.ccg_ntracks;
1416 		info.d_secpertrack   = cs->sc_geom.ccg_nsectors;
1417 		info.d_ncylinders    = cs->sc_geom.ccg_ncylinders;
1418 		info.d_secpercyl     = info.d_nheads * info.d_secpertrack;
1419 
1420 		/*
1421 		 * For cases where a label is directly applied to the ccd,
1422 		 * without slices, DSO_COMPATMBR forces one sector be
1423 		 * reserved for backwards compatibility.
1424 		 */
1425 		info.d_dsflags	     = DSO_COMPATMBR;
1426 		disk_setdiskinfo(&cs->sc_disk, &info);
1427 
1428 		ccdunlock(cs);
1429 
1430 		break;
1431 
1432 	case CCDIOCCLR:
1433 		if ((cs->sc_flags & CCDF_INITED) == 0)
1434 			return (ENXIO);
1435 
1436 		if ((ap->a_fflag & FWRITE) == 0)
1437 			return (EBADF);
1438 
1439 		if ((error = ccdlock(cs)) != 0)
1440 			return (error);
1441 
1442 		if (dev_drefs(cs->sc_dev) > 1) {
1443 			ccdunlock(cs);
1444 			return (EBUSY);
1445 		}
1446 
1447 		/*
1448 		 * Free ccd_softc information and clear entry.
1449 		 */
1450 
1451 		/* Close the components and free their pathnames. */
1452 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1453 			/*
1454 			 * XXX: this close could potentially fail and
1455 			 * cause Bad Things.  Maybe we need to force
1456 			 * the close to happen?
1457 			 */
1458 #ifdef DEBUG
1459 			if (ccddebug & CCDB_VNODE)
1460 				vprint("CCDIOCCLR: vnode info",
1461 				    cs->sc_cinfo[i].ci_vp);
1462 #endif
1463 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, NULL);
1464 			kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1465 		}
1466 
1467 		/* Free interleave index. */
1468 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1469 			kfree(cs->sc_itable[i].ii_index, M_DEVBUF);
1470 
1471 		/* Free component info and interleave table. */
1472 		kfree(cs->sc_cinfo, M_DEVBUF);
1473 		kfree(cs->sc_itable, M_DEVBUF);
1474 		cs->sc_cinfo = NULL;
1475 		cs->sc_itable = NULL;
1476 		cs->sc_flags &= ~CCDF_INITED;
1477 
1478 		/*
1479 		 * Free ccddevice information and clear entry.
1480 		 */
1481 		kfree(ccddevs[unit].ccd_cpp, M_DEVBUF);
1482 		kfree(ccddevs[unit].ccd_vpp, M_DEVBUF);
1483 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1484 
1485 		/*
1486 		 * And remove the devstat entry.
1487 		 */
1488 		devstat_remove_entry(&cs->device_stats);
1489 
1490 		ccdunlock(cs);
1491 
1492 		break;
1493 
1494 	default:
1495 		return (ENOTTY);
1496 	}
1497 
1498 	return (0);
1499 }
1500 
1501 static int
1502 ccddump(struct dev_dump_args *ap)
1503 {
1504 	/* Not implemented. */
1505 	return ENXIO;
1506 }
1507 
1508 /*
1509  * Lookup the provided name in the filesystem.  If the file exists,
1510  * is a valid block device, and isn't being used by anyone else,
1511  * set *vpp to the file's vnode.
1512  */
1513 static int
1514 ccdlookup(char *path, struct vnode **vpp)
1515 {
1516 	struct nlookupdata nd;
1517 	struct vnode *vp;
1518 	int error;
1519 
1520 	*vpp = NULL;
1521 
1522 	error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
1523 	if (error)
1524 		return (error);
1525 	if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) {
1526 #ifdef DEBUG
1527 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
1528 			kprintf("ccdlookup: vn_open error = %d\n", error);
1529 #endif
1530 		goto done;
1531 	}
1532 	vp = nd.nl_open_vp;
1533 
1534 	if (vp->v_opencount > 1) {
1535 		error = EBUSY;
1536 		goto done;
1537 	}
1538 
1539 	if (!vn_isdisk(vp, &error))
1540 		goto done;
1541 
1542 #ifdef DEBUG
1543 	if (ccddebug & CCDB_VNODE)
1544 		vprint("ccdlookup: vnode info", vp);
1545 #endif
1546 
1547 	vn_unlock(vp);
1548 	nd.nl_open_vp = NULL;
1549 	nlookup_done(&nd);
1550 	*vpp = vp;				/* leave ref intact  */
1551 	return (0);
1552 done:
1553 	nlookup_done(&nd);
1554 	return (error);
1555 }
1556 
1557 /*
1558  * Wait interruptibly for an exclusive lock.
1559  */
1560 static int
1561 ccdlock(struct ccd_softc *cs)
1562 {
1563 	lockmgr(&cs->sc_lock, LK_EXCLUSIVE);
1564 
1565 	return (0);
1566 }
1567 
1568 /*
1569  * Unlock and wake up any waiters.
1570  */
1571 static void
1572 ccdunlock(struct ccd_softc *cs)
1573 {
1574 	lockmgr(&cs->sc_lock, LK_RELEASE);
1575 }
1576 
1577 #ifdef DEBUG
1578 static void
1579 printiinfo(struct ccdiinfo *ii)
1580 {
1581 	int ix, i;
1582 
1583 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1584 		kprintf(" itab[%d]: #dk %d sblk %d soff %d",
1585 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1586 		for (i = 0; i < ii->ii_ndisk; i++)
1587 			kprintf(" %d", ii->ii_index[i]);
1588 		kprintf("\n");
1589 	}
1590 }
1591 #endif
1592