xref: /dragonfly/sys/dev/disk/ccd/ccd.c (revision 37de577a)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 /*
36  * Copyright (c) 1995 Jason R. Thorpe.
37  * All rights reserved.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgement:
49  *	This product includes software developed for the NetBSD Project
50  *	by Jason R. Thorpe.
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
58  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
59  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
60  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
61  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
62  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  */
66 
67 /*
68  * Copyright (c) 1988 University of Utah.
69  * Copyright (c) 1990, 1993
70  *	The Regents of the University of California.  All rights reserved.
71  *
72  * This code is derived from software contributed to Berkeley by
73  * the Systems Programming Group of the University of Utah Computer
74  * Science Department.
75  *
76  * Redistribution and use in source and binary forms, with or without
77  * modification, are permitted provided that the following conditions
78  * are met:
79  * 1. Redistributions of source code must retain the above copyright
80  *    notice, this list of conditions and the following disclaimer.
81  * 2. Redistributions in binary form must reproduce the above copyright
82  *    notice, this list of conditions and the following disclaimer in the
83  *    documentation and/or other materials provided with the distribution.
84  * 3. All advertising materials mentioning features or use of this software
85  *    must display the following acknowledgement:
86  *	This product includes software developed by the University of
87  *	California, Berkeley and its contributors.
88  * 4. Neither the name of the University nor the names of its contributors
89  *    may be used to endorse or promote products derived from this software
90  *    without specific prior written permission.
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102  * SUCH DAMAGE.
103  *
104  * from: Utah $Hdr: cd.c 1.6 90/11/28$
105  */
106 /*
107  * @(#)cd.c	8.2 (Berkeley) 11/16/93
108  * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $
109  * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
110  */
111 
112 /*
113  * "Concatenated" disk driver.
114  *
115  * Original dynamic configuration support by:
116  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
117  *	Numerical Aerodynamic Simulation Facility
118  *	Mail Stop 258-6
119  *	NASA Ames Research Center
120  *	Moffett Field, CA 94035
121  */
122 
123 #include "use_ccd.h"
124 
125 #include <sys/param.h>
126 #include <sys/systm.h>
127 #include <sys/kernel.h>
128 #include <sys/module.h>
129 #include <sys/proc.h>
130 #include <sys/buf.h>
131 #include <sys/malloc.h>
132 #include <sys/nlookup.h>
133 #include <sys/conf.h>
134 #include <sys/stat.h>
135 #include <sys/sysctl.h>
136 #include <sys/disk.h>
137 #include <sys/dtype.h>
138 #include <sys/diskslice.h>
139 #include <sys/devicestat.h>
140 #include <sys/fcntl.h>
141 #include <sys/vnode.h>
142 #include <sys/ccdvar.h>
143 
144 #include <vm/vm_zone.h>
145 
146 #include <vfs/ufs/dinode.h> 	/* XXX Used only for fs.h */
147 #include <vfs/ufs/fs.h> 	/* XXX used only to get BBSIZE and SBSIZE */
148 
149 #include <sys/buf2.h>
150 
151 #if defined(CCDDEBUG) && !defined(DEBUG)
152 #define DEBUG
153 #endif
154 
155 #ifdef DEBUG
156 #define CCDB_FOLLOW	0x01
157 #define CCDB_INIT	0x02
158 #define CCDB_IO		0x04
159 #define CCDB_LABEL	0x08
160 #define CCDB_VNODE	0x10
161 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
162     CCDB_VNODE;
163 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
164 #undef DEBUG
165 #endif
166 
167 #define	ccdunit(x)	dkunit(x)
168 #define ccdpart(x)	dkpart(x)
169 
170 /*
171    This is how mirroring works (only writes are special):
172 
173    When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
174    linked together by the cb_mirror field.  "cb_pflags &
175    CCDPF_MIRROR_DONE" is set to 0 on both of them.
176 
177    When a component returns to ccdiodone(), it checks if "cb_pflags &
178    CCDPF_MIRROR_DONE" is set or not.  If not, it sets the partner's
179    flag and returns.  If it is, it means its partner has already
180    returned, so it will go to the regular cleanup.
181 
182  */
183 
184 struct ccdbuf {
185 	struct buf	cb_buf;		/* new I/O buf */
186 	struct vnode	*cb_vp;		/* related vnode */
187 	struct bio	*cb_obio;	/* ptr. to original I/O buf */
188 	int		cb_unit;	/* target unit */
189 	int		cb_comp;	/* target component */
190 	int		cb_pflags;	/* mirror/parity status flag */
191 	struct ccdbuf	*cb_mirror;	/* mirror counterpart */
192 };
193 
194 /* bits in cb_pflags */
195 #define CCDPF_MIRROR_DONE 1	/* if set, mirror counterpart is done */
196 
197 static d_open_t ccdopen;
198 static d_close_t ccdclose;
199 static d_strategy_t ccdstrategy;
200 static d_ioctl_t ccdioctl;
201 static d_dump_t ccddump;
202 
203 static struct dev_ops ccd_ops = {
204 	{ "ccd", 0, D_DISK | D_MPSAFE },
205 	.d_open =	ccdopen,
206 	.d_close =	ccdclose,
207 	.d_read =	physread,
208 	.d_write =	physwrite,
209 	.d_ioctl =	ccdioctl,
210 	.d_strategy =	ccdstrategy,
211 	.d_dump =	ccddump
212 };
213 
214 /* called during module initialization */
215 static	void ccdattach (void);
216 static	int ccddetach (void);
217 static	int ccd_modevent (module_t, int, void *);
218 
219 /* called by biodone() at interrupt time */
220 static	void ccdiodone (struct bio *bio);
221 
222 static	void ccdstart (struct ccd_softc *, struct bio *);
223 static	void ccdinterleave (struct ccd_softc *, int);
224 static	void ccdintr (struct ccd_softc *, struct bio *);
225 static	int ccdinit (struct ccddevice *, char **, struct ucred *);
226 static	int ccdlookup (char *, struct vnode **);
227 static	void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
228 		struct bio *, off_t, caddr_t, long);
229 static	int ccdlock (struct ccd_softc *);
230 static	void ccdunlock (struct ccd_softc *);
231 
232 #ifdef DEBUG
233 static	void printiinfo (struct ccdiinfo *);
234 #endif
235 
236 /* Non-private for the benefit of libkvm. */
237 struct	ccd_softc *ccd_softc;
238 struct	ccddevice *ccddevs;
239 static	int numccd = 0;
240 
241 /*
242  * getccdbuf() -	Allocate and zero a ccd buffer.
243  */
244 static struct ccdbuf *
245 getccdbuf(void)
246 {
247 	struct ccdbuf *cbp;
248 
249 	cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK | M_ZERO);
250 	initbufbio(&cbp->cb_buf);
251 
252 	/*
253 	 * independant struct buf initialization
254 	 */
255 	buf_dep_init(&cbp->cb_buf);
256 	BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
257 	BUF_KERNPROC(&cbp->cb_buf);
258 	cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP;
259 
260 	return(cbp);
261 }
262 
263 /*
264  * putccdbuf() -	Free a ccd buffer.
265  */
266 static void
267 putccdbuf(struct ccdbuf *cbp)
268 {
269 	BUF_UNLOCK(&cbp->cb_buf);
270 
271 	uninitbufbio(&cbp->cb_buf);
272 	kfree(cbp, M_DEVBUF);
273 }
274 
275 /*
276  * Called by main() during pseudo-device attachment.  All we need
277  * to do is allocate enough space for devices to be configured later, and
278  * add devsw entries.
279  */
280 static void
281 ccdattach(void)
282 {
283 	struct disk_info info;
284 	struct ccd_softc *cs;
285 	int i;
286 	int num = NCCD;
287 
288 	if (num > 1)
289 		kprintf("ccd0-%d: Concatenated disk drivers\n", num-1);
290 	else
291 		kprintf("ccd0: Concatenated disk driver\n");
292 
293 	ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF,
294 			    M_WAITOK | M_ZERO);
295 	ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF,
296 			  M_WAITOK | M_ZERO);
297 	numccd = num;
298 
299 	/*
300 	 * With normal disk devices the open simply fails if the media
301 	 * is not present.  With CCD we have to be able to open the
302 	 * raw disk to use the ioctl's to set it up, so create a dummy
303 	 * disk info structure so dscheck() doesn't blow up.
304 	 */
305 	bzero(&info, sizeof(info));
306 	info.d_media_blksize = DEV_BSIZE;
307 
308 	for (i = 0; i < numccd; ++i) {
309 		cs = &ccd_softc[i];
310 		cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops);
311 		cs->sc_dev->si_drv1 = cs;
312 		cs->sc_dev->si_iosize_max = 256 * 512;	/* XXX */
313 		disk_setdiskinfo(&cs->sc_disk, &info);
314 	}
315 }
316 
317 static int
318 ccddetach(void)
319 {
320 	struct ccd_softc *cs;
321 	struct dev_ioctl_args ioctl_args;
322 	int i;
323 	int error = 0;
324 	int eval;
325 
326 	bzero(&ioctl_args, sizeof(ioctl_args));
327 
328 	for (i = 0; i < numccd; ++i) {
329 		cs = &ccd_softc[i];
330 		if (cs->sc_dev == NULL)
331 			continue;
332 		ioctl_args.a_head.a_dev = cs->sc_dev;
333 		ioctl_args.a_cmd = CCDIOCCLR;
334 		ioctl_args.a_fflag = FWRITE;
335 		eval = ccdioctl(&ioctl_args);
336 		if (eval && eval != ENXIO) {
337 			kprintf("ccd%d: In use, cannot detach\n", i);
338 			error = EBUSY;
339 		}
340 	}
341 	if (error == 0) {
342 		for (i = 0; i < numccd; ++i) {
343 			cs = &ccd_softc[i];
344 			if (cs->sc_dev == NULL)
345 				continue;
346 			disk_destroy(&cs->sc_disk);
347 			cs->sc_dev = NULL;
348 		}
349 		if (ccd_softc)
350 			kfree(ccd_softc, M_DEVBUF);
351 		if (ccddevs)
352 			kfree(ccddevs, M_DEVBUF);
353 	}
354 	return (error);
355 }
356 
357 static int
358 ccd_modevent(module_t mod, int type, void *data)
359 {
360 	int error = 0;
361 
362 	switch (type) {
363 	case MOD_LOAD:
364 		ccdattach();
365 		break;
366 
367 	case MOD_UNLOAD:
368 		error = ccddetach();
369 		break;
370 
371 	default:	/* MOD_SHUTDOWN etc */
372 		break;
373 	}
374 	return (error);
375 }
376 
377 DEV_MODULE(ccd, ccd_modevent, NULL);
378 
379 static int
380 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred)
381 {
382 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
383 	struct ccdcinfo *ci = NULL;	/* XXX */
384 	int ix;
385 	struct vnode *vp;
386 	u_int64_t skip;
387 	u_int64_t size;
388 	u_int64_t minsize;
389 	int maxsecsize;
390 	struct partinfo dpart;
391 	struct ccdgeom *ccg = &cs->sc_geom;
392 	char tmppath[MAXPATHLEN];
393 	int error = 0;
394 
395 #ifdef DEBUG
396 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
397 		kprintf("ccdinit: unit %d\n", ccd->ccd_unit);
398 #endif
399 
400 	cs->sc_size = 0;
401 	cs->sc_ileave = ccd->ccd_interleave;
402 	cs->sc_nccdisks = ccd->ccd_ndev;
403 
404 	/* Allocate space for the component info. */
405 	cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
406 				M_DEVBUF, M_WAITOK);
407 	cs->sc_maxiosize = MAXPHYS;
408 
409 	lockinit(&cs->sc_lock, "ccdlck", 0, 0);
410 	ccdlock(cs);
411 
412 	/*
413 	 * Verify that each component piece exists and record
414 	 * relevant information about it.
415 	 */
416 	maxsecsize = 0;
417 	minsize = 0;
418 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
419 		vp = ccd->ccd_vpp[ix];
420 		ci = &cs->sc_cinfo[ix];
421 		ci->ci_vp = vp;
422 
423 		/*
424 		 * Copy in the pathname of the component.
425 		 */
426 		bzero(tmppath, sizeof(tmppath));	/* sanity */
427 		if ((error = copyinstr(cpaths[ix], tmppath,
428 		    MAXPATHLEN, &ci->ci_pathlen)) != 0) {
429 #ifdef DEBUG
430 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
431 				kprintf("ccd%d: can't copy path, error = %d\n",
432 				    ccd->ccd_unit, error);
433 #endif
434 			goto fail;
435 		}
436 		ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
437 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
438 
439 		ci->ci_dev = vn_todev(vp);
440 		if (ci->ci_dev->si_iosize_max &&
441 		    cs->sc_maxiosize > ci->ci_dev->si_iosize_max) {
442 			cs->sc_maxiosize = ci->ci_dev->si_iosize_max;
443 		}
444 
445 		/*
446 		 * Get partition information for the component.
447 		 */
448 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD,
449 				  cred, NULL);
450 		if (error) {
451 #ifdef DEBUG
452 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
453 				 kprintf("ccd%d: %s: ioctl failed, error = %d\n",
454 				     ccd->ccd_unit, ci->ci_path, error);
455 #endif
456 			goto fail;
457 		}
458 		if (dpart.fstype != FS_CCD &&
459 		    !kuuid_is_ccd(&dpart.fstype_uuid)) {
460 			kprintf("ccd%d: %s: filesystem type must be 'ccd'\n",
461 				ccd->ccd_unit, ci->ci_path);
462 			error = EFTYPE;
463 			goto fail;
464 		}
465 		if (maxsecsize < dpart.media_blksize)
466 			maxsecsize = dpart.media_blksize;
467 
468 		/*
469 		 * Skip a certain amount of storage at the beginning of
470 		 * the component to make sure we don't infringe on any
471 		 * reserved sectors.  This is handled entirely by
472 		 * dpart.reserved_blocks but we also impose a minimum
473 		 * of 16 sectors for backwards compatibility.
474 		 */
475 		skip = 16;
476 		if (skip < dpart.reserved_blocks)
477 			skip = dpart.reserved_blocks;
478 		size = dpart.media_blocks - skip;
479 
480 		/*
481 		 * Calculate the size, truncating to an interleave
482 		 * boundary if necessary.
483 		 */
484 		if (cs->sc_ileave > 1)
485 			size -= size % cs->sc_ileave;
486 
487 		if ((int64_t)size <= 0) {
488 #ifdef DEBUG
489 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
490 				kprintf("ccd%d: %s: size == 0\n",
491 				    ccd->ccd_unit, ci->ci_path);
492 #endif
493 			error = ENODEV;
494 			goto fail;
495 		}
496 
497 		/*
498 		 * Calculate the smallest uniform component, used
499 		 * elsewhere.
500 		 */
501 		if (minsize == 0 || minsize > size)
502 			minsize = size;
503 		ci->ci_skip = skip;
504 		ci->ci_size = size;
505 		cs->sc_size += size;
506 	}
507 	kprintf("ccd%d: max component iosize is %d total blocks %lld\n",
508 		cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size);
509 
510 	/*
511 	 * Don't allow the interleave to be smaller than
512 	 * the biggest component sector.
513 	 */
514 	if ((cs->sc_ileave > 0) &&
515 	    (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) {
516 #ifdef DEBUG
517 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
518 			kprintf("ccd%d: interleave must be at least %d\n",
519 			    ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
520 #endif
521 		error = EINVAL;
522 		goto fail;
523 	}
524 
525 	/*
526 	 * If uniform interleave is desired set all sizes to that of
527 	 * the smallest component.  This will guarentee that a single
528 	 * interleave table is generated.
529 	 *
530 	 * Lost space must be taken into account when calculating the
531 	 * overall size.  Half the space is lost when CCDF_MIRROR is
532 	 * specified.  One disk is lost when CCDF_PARITY is specified.
533 	 */
534 	if (ccd->ccd_flags & CCDF_UNIFORM) {
535 		for (ci = cs->sc_cinfo;
536 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
537 			ci->ci_size = minsize;
538 		}
539 		if (ccd->ccd_flags & CCDF_MIRROR) {
540 			/*
541 			 * Check to see if an even number of components
542 			 * have been specified.  The interleave must also
543 			 * be non-zero in order for us to be able to
544 			 * guarentee the topology.
545 			 */
546 			if (cs->sc_nccdisks % 2) {
547 				kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
548 				error = EINVAL;
549 				goto fail;
550 			}
551 			if (cs->sc_ileave == 0) {
552 				kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
553 				error = EINVAL;
554 				goto fail;
555 			}
556 			cs->sc_size = (cs->sc_nccdisks/2) * minsize;
557 		} else if (ccd->ccd_flags & CCDF_PARITY) {
558 			cs->sc_size = (cs->sc_nccdisks-1) * minsize;
559 		} else {
560 			if (cs->sc_ileave == 0) {
561 				kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
562 				error = EINVAL;
563 				goto fail;
564 			}
565 			cs->sc_size = cs->sc_nccdisks * minsize;
566 		}
567 	}
568 
569 	/*
570 	 * Construct the interleave table.
571 	 */
572 	ccdinterleave(cs, ccd->ccd_unit);
573 
574 	/*
575 	 * Create pseudo-geometry based on 1MB cylinders.  It's
576 	 * pretty close.
577 	 */
578 	ccg->ccg_secsize = maxsecsize;
579 	ccg->ccg_ntracks = 1;
580 	ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
581 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
582 
583 	/*
584 	 * Add an devstat entry for this device.
585 	 */
586 	devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
587 			  ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
588 			  DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
589 			  DEVSTAT_PRIORITY_ARRAY);
590 
591 	cs->sc_flags |= CCDF_INITED;
592 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
593 	cs->sc_unit = ccd->ccd_unit;
594 	return (0);
595 fail:
596 	while (ci > cs->sc_cinfo) {
597 		ci--;
598 		kfree(ci->ci_path, M_DEVBUF);
599 	}
600 	kfree(cs->sc_cinfo, M_DEVBUF);
601 	cs->sc_cinfo = NULL;
602 	return (error);
603 }
604 
605 static void
606 ccdinterleave(struct ccd_softc *cs, int unit)
607 {
608 	struct ccdcinfo *ci, *smallci;
609 	struct ccdiinfo *ii;
610 	u_int64_t bn;
611 	u_int64_t lbn;
612 	u_int64_t size;
613 	int icount;
614 	int ix;
615 
616 #ifdef DEBUG
617 	if (ccddebug & CCDB_INIT)
618 		kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
619 #endif
620 
621 	/*
622 	 * Allocate an interleave table.  The worst case occurs when each
623 	 * of N disks is of a different size, resulting in N interleave
624 	 * tables.
625 	 *
626 	 * Chances are this is too big, but we don't care.
627 	 */
628 	icount = cs->sc_nccdisks + 1;
629 	cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo),
630 				M_DEVBUF, M_WAITOK|M_ZERO);
631 
632 	/*
633 	 * Trivial case: no interleave (actually interleave of disk size).
634 	 * Each table entry represents a single component in its entirety.
635 	 *
636 	 * An interleave of 0 may not be used with a mirror or parity setup.
637 	 */
638 	if (cs->sc_ileave == 0) {
639 		bn = 0;
640 		ii = cs->sc_itable;
641 
642 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
643 			/* Allocate space for ii_index. */
644 			ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK);
645 			ii->ii_ndisk = 1;
646 			ii->ii_startblk = bn;
647 			ii->ii_startoff = 0;
648 			ii->ii_index[0] = ix;
649 			bn += cs->sc_cinfo[ix].ci_size;
650 			ii++;
651 		}
652 		ii->ii_ndisk = 0;
653 #ifdef DEBUG
654 		if (ccddebug & CCDB_INIT)
655 			printiinfo(cs->sc_itable);
656 #endif
657 		return;
658 	}
659 
660 	/*
661 	 * The following isn't fast or pretty; it doesn't have to be.
662 	 */
663 	size = 0;
664 	bn = lbn = 0;
665 	for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) {
666 		/*
667 		 * Allocate space for ii_index.  We might allocate more then
668 		 * we use.
669 		 */
670 		ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks),
671 					M_DEVBUF, M_WAITOK);
672 
673 		/*
674 		 * Locate the smallest of the remaining components
675 		 */
676 		smallci = NULL;
677 		ci = cs->sc_cinfo;
678 		while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) {
679 			if (ci->ci_size > size &&
680 			    (smallci == NULL ||
681 			     ci->ci_size < smallci->ci_size)) {
682 				smallci = ci;
683 			}
684 			++ci;
685 		}
686 
687 		/*
688 		 * Nobody left, all done
689 		 */
690 		if (smallci == NULL) {
691 			ii->ii_ndisk = 0;
692 			break;
693 		}
694 
695 		/*
696 		 * Record starting logical block using an sc_ileave blocksize.
697 		 */
698 		ii->ii_startblk = bn / cs->sc_ileave;
699 
700 		/*
701 		 * Record starting component block using an sc_ileave
702 		 * blocksize.  This value is relative to the beginning of
703 		 * a component disk.
704 		 */
705 		ii->ii_startoff = lbn;
706 
707 		/*
708 		 * Determine how many disks take part in this interleave
709 		 * and record their indices.
710 		 */
711 		ix = 0;
712 		for (ci = cs->sc_cinfo;
713 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
714 			if (ci->ci_size >= smallci->ci_size) {
715 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
716 			}
717 		}
718 		ii->ii_ndisk = ix;
719 
720 		/*
721 		 * Adjust for loop
722 		 */
723 		bn += ix * (smallci->ci_size - size);
724 		lbn = smallci->ci_size / cs->sc_ileave;
725 		size = smallci->ci_size;
726 	}
727 	if (ii == &cs->sc_itable[icount])
728 		panic("ccdinterlave software bug!  table exhausted");
729 #ifdef DEBUG
730 	if (ccddebug & CCDB_INIT)
731 		printiinfo(cs->sc_itable);
732 #endif
733 }
734 
735 /* ARGSUSED */
736 static int
737 ccdopen(struct dev_open_args *ap)
738 {
739 	cdev_t dev = ap->a_head.a_dev;
740 	int unit = ccdunit(dev);
741 	struct ccd_softc *cs;
742 	int error = 0;
743 
744 #ifdef DEBUG
745 	if (ccddebug & CCDB_FOLLOW)
746 		kprintf("ccdopen(%x, %x)\n", dev, flags);
747 #endif
748 	if (unit >= numccd)
749 		return (ENXIO);
750 	cs = &ccd_softc[unit];
751 
752 	if ((error = ccdlock(cs)) == 0) {
753 		ccdunlock(cs);
754 	}
755 	return (error);
756 }
757 
758 /* ARGSUSED */
759 static int
760 ccdclose(struct dev_close_args *ap)
761 {
762 	cdev_t dev = ap->a_head.a_dev;
763 	int unit = ccdunit(dev);
764 	struct ccd_softc *cs;
765 	int error = 0;
766 
767 #ifdef DEBUG
768 	if (ccddebug & CCDB_FOLLOW)
769 		kprintf("ccdclose(%x, %x)\n", dev, flags);
770 #endif
771 
772 	if (unit >= numccd)
773 		return (ENXIO);
774 	cs = &ccd_softc[unit];
775 	if ((error = ccdlock(cs)) == 0) {
776 		ccdunlock(cs);
777 	}
778 	return (error);
779 }
780 
781 static int
782 ccdstrategy(struct dev_strategy_args *ap)
783 {
784 	cdev_t dev = ap->a_head.a_dev;
785 	struct bio *bio = ap->a_bio;
786 	int unit = ccdunit(dev);
787 	struct bio *nbio;
788 	struct buf *bp = bio->bio_buf;
789 	struct ccd_softc *cs = &ccd_softc[unit];
790 	u_int64_t pbn;	/* in sc_secsize chunks */
791 	u_int32_t sz;	/* in sc_secsize chunks */
792 
793 #ifdef DEBUG
794 	if (ccddebug & CCDB_FOLLOW)
795 		kprintf("ccdstrategy(%x): unit %d\n", bp, unit);
796 #endif
797 	if ((cs->sc_flags & CCDF_INITED) == 0) {
798 		bp->b_error = ENXIO;
799 		goto error;
800 	}
801 
802 	/* If it's a nil transfer, wake up the top half now. */
803 	if (bp->b_bcount == 0) {
804 		bp->b_resid = 0;
805 		goto done;
806 	}
807 
808 	/*
809 	 * Do bounds checking and adjust transfer.  If there's an
810 	 * error, the bounds check will flag that for us.
811 	 */
812 
813 	pbn = bio->bio_offset / cs->sc_geom.ccg_secsize;
814 	sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
815 
816 	/*
817 	 * If out of bounds return an error.  If the request goes
818 	 * past EOF, clip the request as appropriate.  If exactly
819 	 * at EOF, return success (don't clip), but with 0 bytes
820 	 * of I/O.
821 	 *
822 	 * Mark EOF B_INVAL (just like bad), indicating that the
823 	 * contents of the buffer, if any, is invalid.
824 	 */
825 	if ((int64_t)pbn < 0)
826 		goto bad;
827 	if (pbn + sz > cs->sc_size) {
828 		if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP))
829 			goto bad;
830 		if (pbn == cs->sc_size) {
831 			bp->b_resid = bp->b_bcount;
832 			bp->b_flags |= B_INVAL;
833 			goto done;
834 		}
835 		sz = (long)(cs->sc_size - pbn);
836 		bp->b_bcount = sz * cs->sc_geom.ccg_secsize;
837 	}
838 	nbio = bio;
839 
840 	bp->b_resid = bp->b_bcount;
841 	nbio->bio_driver_info = dev;
842 
843 	/*
844 	 * "Start" the unit.
845 	 */
846 	ccdstart(cs, nbio);
847 	return(0);
848 
849 	/*
850 	 * note: bio, not nbio, is valid at the done label.
851 	 */
852 bad:
853 	bp->b_error = EINVAL;
854 error:
855 	bp->b_resid = bp->b_bcount;
856 	bp->b_flags |= B_ERROR | B_INVAL;
857 done:
858 	biodone(bio);
859 	return(0);
860 }
861 
862 static void
863 ccdstart(struct ccd_softc *cs, struct bio *bio)
864 {
865 	long bcount, rcount;
866 	struct ccdbuf *cbp[4];
867 	struct buf *bp = bio->bio_buf;
868 	/* XXX! : 2 reads and 2 writes for RAID 4/5 */
869 	caddr_t addr;
870 	off_t doffset;
871 
872 #ifdef DEBUG
873 	if (ccddebug & CCDB_FOLLOW)
874 		kprintf("ccdstart(%x, %x)\n", cs, bp);
875 #endif
876 
877 	/* Record the transaction start  */
878 	devstat_start_transaction(&cs->device_stats);
879 
880 	/*
881 	 * Allocate component buffers and fire off the requests
882 	 */
883 	doffset = bio->bio_offset;
884 	addr = bp->b_data;
885 
886 	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
887 		ccdbuffer(cbp, cs, bio, doffset, addr, bcount);
888 		rcount = cbp[0]->cb_buf.b_bcount;
889 
890 		if (cs->sc_cflags & CCDF_MIRROR) {
891 			/*
892 			 * Mirroring.  Writes go to both disks, reads are
893 			 * taken from whichever disk seems most appropriate.
894 			 *
895 			 * We attempt to localize reads to the disk whos arm
896 			 * is nearest the read request.  We ignore seeks due
897 			 * to writes when making this determination and we
898 			 * also try to avoid hogging.
899 			 */
900 			if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) {
901 				vn_strategy(cbp[0]->cb_vp,
902 					    &cbp[0]->cb_buf.b_bio1);
903 				vn_strategy(cbp[1]->cb_vp,
904 					    &cbp[1]->cb_buf.b_bio1);
905 			} else {
906 				int pick = cs->sc_pick;
907 				daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize;
908 				if (doffset < cs->sc_blk[pick] - range ||
909 				    doffset > cs->sc_blk[pick] + range
910 				) {
911 					cs->sc_pick = pick = 1 - pick;
912 				}
913 				cs->sc_blk[pick] = doffset + rcount;
914 				vn_strategy(cbp[pick]->cb_vp,
915 					    &cbp[pick]->cb_buf.b_bio1);
916 			}
917 		} else {
918 			/*
919 			 * Not mirroring
920 			 */
921 			vn_strategy(cbp[0]->cb_vp,
922 				     &cbp[0]->cb_buf.b_bio1);
923 		}
924 		doffset += rcount;
925 		addr += rcount;
926 	}
927 }
928 
929 /*
930  * Build a component buffer header.
931  */
932 static void
933 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio,
934 	  off_t doffset, caddr_t addr, long bcount)
935 {
936 	struct ccdcinfo *ci, *ci2 = NULL;	/* XXX */
937 	struct ccdbuf *cbp;
938 	u_int64_t bn;
939 	u_int64_t cbn;
940 	u_int64_t cboff;
941 	off_t cbc;
942 
943 #ifdef DEBUG
944 	if (ccddebug & CCDB_IO)
945 		kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n",
946 		       cs, bp, bn, addr, bcount);
947 #endif
948 	/*
949 	 * Determine which component bn falls in.
950 	 */
951 	bn = doffset / cs->sc_geom.ccg_secsize;
952 	cbn = bn;
953 	cboff = 0;
954 
955 	if (cs->sc_ileave == 0) {
956 		/*
957 		 * Serially concatenated and neither a mirror nor a parity
958 		 * config.  This is a special case.
959 		 */
960 		daddr_t sblk;
961 
962 		sblk = 0;
963 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
964 			sblk += ci->ci_size;
965 		cbn -= sblk;
966 	} else {
967 		struct ccdiinfo *ii;
968 		int ccdisk, off;
969 
970 		/*
971 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
972 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
973 		 * to cbn.
974 		 */
975 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
976 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
977 
978 		/*
979 		 * Figure out which interleave table to use.
980 		 */
981 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
982 			if (ii->ii_startblk > cbn)
983 				break;
984 		}
985 		ii--;
986 
987 		/*
988 		 * off is the logical superblock relative to the beginning
989 		 * of this interleave block.
990 		 */
991 		off = cbn - ii->ii_startblk;
992 
993 		/*
994 		 * We must calculate which disk component to use (ccdisk),
995 		 * and recalculate cbn to be the superblock relative to
996 		 * the beginning of the component.  This is typically done by
997 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
998 		 * must typically be divided by the number of components in
999 		 * this interleave array to be properly convert it from a
1000 		 * CCD-relative logical superblock number to a
1001 		 * component-relative superblock number.
1002 		 */
1003 		if (ii->ii_ndisk == 1) {
1004 			/*
1005 			 * When we have just one disk, it can't be a mirror
1006 			 * or a parity config.
1007 			 */
1008 			ccdisk = ii->ii_index[0];
1009 			cbn = ii->ii_startoff + off;
1010 		} else {
1011 			if (cs->sc_cflags & CCDF_MIRROR) {
1012 				/*
1013 				 * We have forced a uniform mapping, resulting
1014 				 * in a single interleave array.  We double
1015 				 * up on the first half of the available
1016 				 * components and our mirror is in the second
1017 				 * half.  This only works with a single
1018 				 * interleave array because doubling up
1019 				 * doubles the number of sectors, so there
1020 				 * cannot be another interleave array because
1021 				 * the next interleave array's calculations
1022 				 * would be off.
1023 				 */
1024 				int ndisk2 = ii->ii_ndisk / 2;
1025 				ccdisk = ii->ii_index[off % ndisk2];
1026 				cbn = ii->ii_startoff + off / ndisk2;
1027 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1028 			} else if (cs->sc_cflags & CCDF_PARITY) {
1029 				/*
1030 				 * XXX not implemented yet
1031 				 */
1032 				int ndisk2 = ii->ii_ndisk - 1;
1033 				ccdisk = ii->ii_index[off % ndisk2];
1034 				cbn = ii->ii_startoff + off / ndisk2;
1035 				if (cbn % ii->ii_ndisk <= ccdisk)
1036 					ccdisk++;
1037 			} else {
1038 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
1039 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
1040 			}
1041 		}
1042 
1043 		ci = &cs->sc_cinfo[ccdisk];
1044 
1045 		/*
1046 		 * Convert cbn from a superblock to a normal block so it
1047 		 * can be used to calculate (along with cboff) the normal
1048 		 * block index into this particular disk.
1049 		 */
1050 		cbn *= cs->sc_ileave;
1051 	}
1052 
1053 	/*
1054 	 * Fill in the component buf structure.
1055 	 *
1056 	 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount
1057 	 * will be truncated on device EOF so we use b_bufsize to detect
1058 	 * the case.
1059 	 */
1060 	cbp = getccdbuf();
1061 	cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1062 	cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1063 	cbp->cb_buf.b_data = addr;
1064 	cbp->cb_vp = ci->ci_vp;
1065 	if (cs->sc_ileave == 0)
1066 		cbc = dbtob((off_t)(ci->ci_size - cbn));
1067 	else
1068 		cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1069 	if (cbc > cs->sc_maxiosize)
1070 		cbc = cs->sc_maxiosize;
1071 	cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1072  	cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1073 
1074 	cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1075 	cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1076 	cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip);
1077 
1078 	/*
1079 	 * context for ccdiodone
1080 	 */
1081 	cbp->cb_obio = bio;
1082 	cbp->cb_unit = cs - ccd_softc;
1083 	cbp->cb_comp = ci - cs->sc_cinfo;
1084 
1085 #ifdef DEBUG
1086 	if (ccddebug & CCDB_IO)
1087 		kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n",
1088 		       ci->ci_dev, ci-cs->sc_cinfo, cbp,
1089 		       cbp->cb_buf.b_bio1.bio_offset,
1090 		       cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1091 #endif
1092 	cb[0] = cbp;
1093 
1094 	/*
1095 	 * Note: both I/O's setup when reading from mirror, but only one
1096 	 * will be executed.
1097 	 */
1098 	if (cs->sc_cflags & CCDF_MIRROR) {
1099 		/* mirror, setup second I/O */
1100 		cbp = getccdbuf();
1101 
1102 		cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1103 		cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1104 		cbp->cb_buf.b_data = addr;
1105 		cbp->cb_vp = ci2->ci_vp;
1106 		if (cs->sc_ileave == 0)
1107 		      cbc = dbtob((off_t)(ci->ci_size - cbn));
1108 		else
1109 		      cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1110 		if (cbc > cs->sc_maxiosize)
1111 			cbc = cs->sc_maxiosize;
1112 		cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1113 		cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1114 
1115 		cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1116 		cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1117 		cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip);
1118 
1119 		/*
1120 		 * context for ccdiodone
1121 		 */
1122 		cbp->cb_obio = bio;
1123 		cbp->cb_unit = cs - ccd_softc;
1124 		cbp->cb_comp = ci2 - cs->sc_cinfo;
1125 		cb[1] = cbp;
1126 		/* link together the ccdbuf's and clear "mirror done" flag */
1127 		cb[0]->cb_mirror = cb[1];
1128 		cb[1]->cb_mirror = cb[0];
1129 		cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1130 		cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1131 	}
1132 }
1133 
1134 static void
1135 ccdintr(struct ccd_softc *cs, struct bio *bio)
1136 {
1137 	struct buf *bp = bio->bio_buf;
1138 
1139 #ifdef DEBUG
1140 	if (ccddebug & CCDB_FOLLOW)
1141 		kprintf("ccdintr(%x, %x)\n", cs, bp);
1142 #endif
1143 	/*
1144 	 * Request is done for better or worse, wakeup the top half.
1145 	 */
1146 	if (bp->b_flags & B_ERROR)
1147 		bp->b_resid = bp->b_bcount;
1148 	devstat_end_transaction_buf(&cs->device_stats, bp);
1149 	biodone(bio);
1150 }
1151 
1152 /*
1153  * Called at interrupt time.
1154  *
1155  * Mark the component as done and if all components are done,
1156  * take a ccd interrupt.
1157  */
1158 static void
1159 ccdiodone(struct bio *bio)
1160 {
1161 	struct ccdbuf *cbp = bio->bio_caller_info1.ptr;
1162 	struct bio *obio = cbp->cb_obio;
1163 	struct buf *obp = obio->bio_buf;
1164 	int unit = cbp->cb_unit;
1165 	struct ccd_softc *sc = &ccd_softc[unit];
1166 	int count;
1167 
1168 	/*
1169 	 * Since we do not have exclusive access to underlying devices,
1170 	 * we can't keep cache translations around.
1171 	 */
1172 	clearbiocache(bio->bio_next);
1173 
1174 	ccdlock(sc);
1175 
1176 #ifdef DEBUG
1177 	if (ccddebug & CCDB_FOLLOW)
1178 		kprintf("ccdiodone(%x)\n", cbp);
1179 	if (ccddebug & CCDB_IO) {
1180 		kprintf("ccdiodone: bp %x bcount %d resid %d\n",
1181 		       obp, obp->b_bcount, obp->b_resid);
1182 		kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n",
1183 		       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1184 		       cbp->cb_buf.b_loffset, cbp->cb_buf.b_data,
1185 		       cbp->cb_buf.b_bcount);
1186 	}
1187 #endif
1188 
1189 	/*
1190 	 * If an error occured, report it.  If this is a mirrored
1191 	 * configuration and the first of two possible reads, do not
1192 	 * set the error in the bp yet because the second read may
1193 	 * succeed.
1194 	 */
1195 	if (cbp->cb_buf.b_flags & B_ERROR) {
1196 		const char *msg = "";
1197 
1198 		if ((sc->sc_cflags & CCDF_MIRROR) &&
1199 		    (cbp->cb_buf.b_cmd == BUF_CMD_READ) &&
1200 		    (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1201 			/*
1202 			 * We will try our read on the other disk down
1203 			 * below, also reverse the default pick so if we
1204 			 * are doing a scan we do not keep hitting the
1205 			 * bad disk first.
1206 			 */
1207 			msg = ", trying other disk";
1208 			sc->sc_pick = 1 - sc->sc_pick;
1209 			sc->sc_blk[sc->sc_pick] = obio->bio_offset;
1210 		} else {
1211 			obp->b_flags |= B_ERROR;
1212 			obp->b_error = cbp->cb_buf.b_error ?
1213 			    cbp->cb_buf.b_error : EIO;
1214 		}
1215 		kprintf("ccd%d: error %d on component %d "
1216 			"offset %jd (ccd offset %jd)%s\n",
1217 		        unit, obp->b_error, cbp->cb_comp,
1218 		        (intmax_t)cbp->cb_buf.b_bio2.bio_offset,
1219 		        (intmax_t)obio->bio_offset,
1220 		        msg);
1221 	}
1222 
1223 	/*
1224 	 * Process mirror.  If we are writing, I/O has been initiated on both
1225 	 * buffers and we fall through only after both are finished.
1226 	 *
1227 	 * If we are reading only one I/O is initiated at a time.  If an
1228 	 * error occurs we initiate the second I/O and return, otherwise
1229 	 * we free the second I/O without initiating it.
1230 	 */
1231 
1232 	if (sc->sc_cflags & CCDF_MIRROR) {
1233 		if (cbp->cb_buf.b_cmd != BUF_CMD_READ) {
1234 			/*
1235 			 * When writing, handshake with the second buffer
1236 			 * to determine when both are done.  If both are not
1237 			 * done, return here.
1238 			 */
1239 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1240 				cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1241 				putccdbuf(cbp);
1242 				ccdunlock(sc);
1243 				return;
1244 			}
1245 		} else {
1246 			/*
1247 			 * When reading, either dispose of the second buffer
1248 			 * or initiate I/O on the second buffer if an error
1249 			 * occured with this one.
1250 			 */
1251 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1252 				if (cbp->cb_buf.b_flags & B_ERROR) {
1253 					cbp->cb_mirror->cb_pflags |=
1254 					    CCDPF_MIRROR_DONE;
1255 					vn_strategy(
1256 					    cbp->cb_mirror->cb_vp,
1257 					    &cbp->cb_mirror->cb_buf.b_bio1
1258 					);
1259 					putccdbuf(cbp);
1260 					ccdunlock(sc);
1261 					return;
1262 				} else {
1263 					putccdbuf(cbp->cb_mirror);
1264 					/* fall through */
1265 				}
1266 			}
1267 		}
1268 	}
1269 
1270 	/*
1271 	 * Use our saved b_bufsize to determine if an unexpected EOF occured.
1272 	 */
1273 	count = cbp->cb_buf.b_bufsize;
1274 	putccdbuf(cbp);
1275 
1276 	/*
1277 	 * If all done, "interrupt".
1278 	 */
1279 	obp->b_resid -= count;
1280 	if (obp->b_resid < 0)
1281 		panic("ccdiodone: count");
1282 
1283 	ccdunlock(sc);
1284 
1285 	if (obp->b_resid == 0)
1286 		ccdintr(sc, obio);
1287 }
1288 
1289 static int
1290 ccdioctl(struct dev_ioctl_args *ap)
1291 {
1292 	cdev_t dev = ap->a_head.a_dev;
1293 	int unit = ccdunit(dev);
1294 	int i, j, lookedup = 0, error = 0;
1295 	struct ccd_softc *cs;
1296 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data;
1297 	struct ccddevice ccd;
1298 	struct disk_info info;
1299 	char **cpp;
1300 	struct vnode **vpp;
1301 
1302 	if (unit >= numccd)
1303 		return (ENXIO);
1304 	cs = &ccd_softc[unit];
1305 
1306 	bzero(&ccd, sizeof(ccd));
1307 
1308 	switch (ap->a_cmd) {
1309 	case CCDIOCSET:
1310 		if (cs->sc_flags & CCDF_INITED)
1311 			return (EBUSY);
1312 
1313 		if ((ap->a_fflag & FWRITE) == 0)
1314 			return (EBADF);
1315 
1316 		if ((error = ccdlock(cs)) != 0)
1317 			return (error);
1318 
1319 		if (ccio->ccio_ndisks > CCD_MAXNDISKS) {
1320 			ccdunlock(cs);
1321 			return (EINVAL);
1322 		}
1323 
1324 		/* Fill in some important bits. */
1325 		ccd.ccd_unit = unit;
1326 		ccd.ccd_interleave = ccio->ccio_ileave;
1327 		if (ccd.ccd_interleave == 0 &&
1328 		    ((ccio->ccio_flags & CCDF_MIRROR) ||
1329 		     (ccio->ccio_flags & CCDF_PARITY))) {
1330 			kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1331 			ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1332 		}
1333 		if ((ccio->ccio_flags & CCDF_MIRROR) &&
1334 		    (ccio->ccio_flags & CCDF_PARITY)) {
1335 			kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1336 			ccio->ccio_flags &= ~CCDF_PARITY;
1337 		}
1338 		if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1339 		    !(ccio->ccio_flags & CCDF_UNIFORM)) {
1340 			kprintf("ccd%d: mirror/parity forces uniform flag\n",
1341 			       unit);
1342 			ccio->ccio_flags |= CCDF_UNIFORM;
1343 		}
1344 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1345 
1346 		/*
1347 		 * Allocate space for and copy in the array of
1348 		 * componet pathnames and device numbers.
1349 		 */
1350 		cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *),
1351 		    M_DEVBUF, M_WAITOK);
1352 		vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1353 		    M_DEVBUF, M_WAITOK);
1354 
1355 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1356 				ccio->ccio_ndisks * sizeof(char **));
1357 		if (error) {
1358 			kfree(vpp, M_DEVBUF);
1359 			kfree(cpp, M_DEVBUF);
1360 			ccdunlock(cs);
1361 			return (error);
1362 		}
1363 
1364 #ifdef DEBUG
1365 		if (ccddebug & CCDB_INIT) {
1366 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1367 				kprintf("ccdioctl: component %d: 0x%x\n",
1368 				    i, cpp[i]);
1369 		}
1370 #endif
1371 
1372 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1373 #ifdef DEBUG
1374 			if (ccddebug & CCDB_INIT)
1375 				kprintf("ccdioctl: lookedup = %d\n", lookedup);
1376 #endif
1377 			if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) {
1378 				for (j = 0; j < lookedup; ++j)
1379 					(void)vn_close(vpp[j], FREAD|FWRITE, NULL);
1380 				kfree(vpp, M_DEVBUF);
1381 				kfree(cpp, M_DEVBUF);
1382 				ccdunlock(cs);
1383 				return (error);
1384 			}
1385 			++lookedup;
1386 		}
1387 		ccd.ccd_cpp = cpp;
1388 		ccd.ccd_vpp = vpp;
1389 		ccd.ccd_ndev = ccio->ccio_ndisks;
1390 
1391 		/*
1392 		 * Initialize the ccd.  Fills in the softc for us.
1393 		 */
1394 		if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) {
1395 			for (j = 0; j < lookedup; ++j)
1396 				vn_close(vpp[j], FREAD|FWRITE, NULL);
1397 			kfree(vpp, M_DEVBUF);
1398 			kfree(cpp, M_DEVBUF);
1399 			ccdunlock(cs);
1400 			return (error);
1401 		}
1402 
1403 		/*
1404 		 * The ccd has been successfully initialized, so
1405 		 * we can place it into the array and read the disklabel.
1406 		 */
1407 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1408 		ccio->ccio_unit = unit;
1409 		ccio->ccio_size = cs->sc_size;
1410 
1411 		bzero(&info, sizeof(info));
1412 		info.d_media_blksize = cs->sc_geom.ccg_secsize;
1413 		info.d_media_blocks  = cs->sc_size;
1414 		info.d_nheads	     = cs->sc_geom.ccg_ntracks;
1415 		info.d_secpertrack   = cs->sc_geom.ccg_nsectors;
1416 		info.d_ncylinders    = cs->sc_geom.ccg_ncylinders;
1417 		info.d_secpercyl     = info.d_nheads * info.d_secpertrack;
1418 
1419 		/*
1420 		 * For cases where a label is directly applied to the ccd,
1421 		 * without slices, DSO_COMPATMBR forces one sector be
1422 		 * reserved for backwards compatibility.
1423 		 */
1424 		info.d_dsflags	     = DSO_COMPATMBR;
1425 		disk_setdiskinfo(&cs->sc_disk, &info);
1426 
1427 		ccdunlock(cs);
1428 
1429 		break;
1430 
1431 	case CCDIOCCLR:
1432 		if ((cs->sc_flags & CCDF_INITED) == 0)
1433 			return (ENXIO);
1434 
1435 		if ((ap->a_fflag & FWRITE) == 0)
1436 			return (EBADF);
1437 
1438 		if ((error = ccdlock(cs)) != 0)
1439 			return (error);
1440 
1441 		if (dev_drefs(cs->sc_dev) > 1) {
1442 			ccdunlock(cs);
1443 			return (EBUSY);
1444 		}
1445 
1446 		/*
1447 		 * Free ccd_softc information and clear entry.
1448 		 */
1449 
1450 		/* Close the components and free their pathnames. */
1451 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1452 			/*
1453 			 * XXX: this close could potentially fail and
1454 			 * cause Bad Things.  Maybe we need to force
1455 			 * the close to happen?
1456 			 */
1457 #ifdef DEBUG
1458 			if (ccddebug & CCDB_VNODE)
1459 				vprint("CCDIOCCLR: vnode info",
1460 				    cs->sc_cinfo[i].ci_vp);
1461 #endif
1462 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, NULL);
1463 			kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1464 		}
1465 
1466 		/* Free interleave index. */
1467 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1468 			kfree(cs->sc_itable[i].ii_index, M_DEVBUF);
1469 
1470 		/* Free component info and interleave table. */
1471 		kfree(cs->sc_cinfo, M_DEVBUF);
1472 		kfree(cs->sc_itable, M_DEVBUF);
1473 		cs->sc_cinfo = NULL;
1474 		cs->sc_itable = NULL;
1475 		cs->sc_flags &= ~CCDF_INITED;
1476 
1477 		/*
1478 		 * Free ccddevice information and clear entry.
1479 		 */
1480 		kfree(ccddevs[unit].ccd_cpp, M_DEVBUF);
1481 		kfree(ccddevs[unit].ccd_vpp, M_DEVBUF);
1482 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1483 
1484 		/*
1485 		 * And remove the devstat entry.
1486 		 */
1487 		devstat_remove_entry(&cs->device_stats);
1488 
1489 		ccdunlock(cs);
1490 
1491 		break;
1492 
1493 	default:
1494 		return (ENOTTY);
1495 	}
1496 
1497 	return (0);
1498 }
1499 
1500 static int
1501 ccddump(struct dev_dump_args *ap)
1502 {
1503 	/* Not implemented. */
1504 	return ENXIO;
1505 }
1506 
1507 /*
1508  * Lookup the provided name in the filesystem.  If the file exists,
1509  * is a valid block device, and isn't being used by anyone else,
1510  * set *vpp to the file's vnode.
1511  */
1512 static int
1513 ccdlookup(char *path, struct vnode **vpp)
1514 {
1515 	struct nlookupdata nd;
1516 	struct vnode *vp;
1517 	int error;
1518 
1519 	*vpp = NULL;
1520 
1521 	error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
1522 	if (error)
1523 		return (error);
1524 	if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) {
1525 #ifdef DEBUG
1526 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
1527 			kprintf("ccdlookup: vn_open error = %d\n", error);
1528 #endif
1529 		goto done;
1530 	}
1531 	vp = nd.nl_open_vp;
1532 
1533 	if (vp->v_opencount > 1) {
1534 		error = EBUSY;
1535 		goto done;
1536 	}
1537 
1538 	if (!vn_isdisk(vp, &error))
1539 		goto done;
1540 
1541 #ifdef DEBUG
1542 	if (ccddebug & CCDB_VNODE)
1543 		vprint("ccdlookup: vnode info", vp);
1544 #endif
1545 
1546 	vn_unlock(vp);
1547 	nd.nl_open_vp = NULL;
1548 	nlookup_done(&nd);
1549 	*vpp = vp;				/* leave ref intact  */
1550 	return (0);
1551 done:
1552 	nlookup_done(&nd);
1553 	return (error);
1554 }
1555 
1556 /*
1557  * Wait interruptibly for an exclusive lock.
1558  */
1559 static int
1560 ccdlock(struct ccd_softc *cs)
1561 {
1562 	lockmgr(&cs->sc_lock, LK_EXCLUSIVE);
1563 
1564 	return (0);
1565 }
1566 
1567 /*
1568  * Unlock and wake up any waiters.
1569  */
1570 static void
1571 ccdunlock(struct ccd_softc *cs)
1572 {
1573 	lockmgr(&cs->sc_lock, LK_RELEASE);
1574 }
1575 
1576 #ifdef DEBUG
1577 static void
1578 printiinfo(struct ccdiinfo *ii)
1579 {
1580 	int ix, i;
1581 
1582 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1583 		kprintf(" itab[%d]: #dk %d sblk %d soff %d",
1584 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1585 		for (i = 0; i < ii->ii_ndisk; i++)
1586 			kprintf(" %d", ii->ii_index[i]);
1587 		kprintf("\n");
1588 	}
1589 }
1590 #endif
1591