xref: /dragonfly/sys/dev/disk/ccd/ccd.c (revision 678e8cc6)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  */
35 /*
36  * Copyright (c) 1995 Jason R. Thorpe.
37  * All rights reserved.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. All advertising materials mentioning features or use of this software
48  *    must display the following acknowledgement:
49  *	This product includes software developed for the NetBSD Project
50  *	by Jason R. Thorpe.
51  * 4. The name of the author may not be used to endorse or promote products
52  *    derived from this software without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
58  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
59  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
60  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
61  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
62  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  */
66 
67 /*
68  * Copyright (c) 1988 University of Utah.
69  * Copyright (c) 1990, 1993
70  *	The Regents of the University of California.  All rights reserved.
71  *
72  * This code is derived from software contributed to Berkeley by
73  * the Systems Programming Group of the University of Utah Computer
74  * Science Department.
75  *
76  * Redistribution and use in source and binary forms, with or without
77  * modification, are permitted provided that the following conditions
78  * are met:
79  * 1. Redistributions of source code must retain the above copyright
80  *    notice, this list of conditions and the following disclaimer.
81  * 2. Redistributions in binary form must reproduce the above copyright
82  *    notice, this list of conditions and the following disclaimer in the
83  *    documentation and/or other materials provided with the distribution.
84  * 3. All advertising materials mentioning features or use of this software
85  *    must display the following acknowledgement:
86  *	This product includes software developed by the University of
87  *	California, Berkeley and its contributors.
88  * 4. Neither the name of the University nor the names of its contributors
89  *    may be used to endorse or promote products derived from this software
90  *    without specific prior written permission.
91  *
92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102  * SUCH DAMAGE.
103  *
104  * from: Utah $Hdr: cd.c 1.6 90/11/28$
105  */
106 /*
107  * @(#)cd.c	8.2 (Berkeley) 11/16/93
108  * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $
109  * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
110  */
111 
112 /*
113  * "Concatenated" disk driver.
114  *
115  * Original dynamic configuration support by:
116  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
117  *	Numerical Aerodynamic Simulation Facility
118  *	Mail Stop 258-6
119  *	NASA Ames Research Center
120  *	Moffett Field, CA 94035
121  */
122 
123 #include "use_ccd.h"
124 
125 #include <sys/param.h>
126 #include <sys/systm.h>
127 #include <sys/kernel.h>
128 #include <sys/module.h>
129 #include <sys/proc.h>
130 #include <sys/buf.h>
131 #include <sys/malloc.h>
132 #include <sys/nlookup.h>
133 #include <sys/conf.h>
134 #include <sys/stat.h>
135 #include <sys/sysctl.h>
136 #include <sys/disk.h>
137 #include <sys/dtype.h>
138 #include <sys/diskslice.h>
139 #include <sys/devicestat.h>
140 #include <sys/fcntl.h>
141 #include <sys/vnode.h>
142 #include <sys/ccdvar.h>
143 
144 #include <vm/vm_zone.h>
145 
146 #include <vfs/ufs/dinode.h> 	/* XXX Used only for fs.h */
147 #include <vfs/ufs/fs.h> 	/* XXX used only to get BBSIZE and SBSIZE */
148 
149 #include <sys/thread2.h>
150 #include <sys/buf2.h>
151 #include <sys/mplock2.h>
152 
153 #if defined(CCDDEBUG) && !defined(DEBUG)
154 #define DEBUG
155 #endif
156 
157 #ifdef DEBUG
158 #define CCDB_FOLLOW	0x01
159 #define CCDB_INIT	0x02
160 #define CCDB_IO		0x04
161 #define CCDB_LABEL	0x08
162 #define CCDB_VNODE	0x10
163 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
164     CCDB_VNODE;
165 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
166 #undef DEBUG
167 #endif
168 
169 #define	ccdunit(x)	dkunit(x)
170 #define ccdpart(x)	dkpart(x)
171 
172 /*
173    This is how mirroring works (only writes are special):
174 
175    When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
176    linked together by the cb_mirror field.  "cb_pflags &
177    CCDPF_MIRROR_DONE" is set to 0 on both of them.
178 
179    When a component returns to ccdiodone(), it checks if "cb_pflags &
180    CCDPF_MIRROR_DONE" is set or not.  If not, it sets the partner's
181    flag and returns.  If it is, it means its partner has already
182    returned, so it will go to the regular cleanup.
183 
184  */
185 
186 struct ccdbuf {
187 	struct buf	cb_buf;		/* new I/O buf */
188 	struct vnode	*cb_vp;		/* related vnode */
189 	struct bio	*cb_obio;	/* ptr. to original I/O buf */
190 	struct ccdbuf	*cb_freenext;	/* free list link */
191 	int		cb_unit;	/* target unit */
192 	int		cb_comp;	/* target component */
193 	int		cb_pflags;	/* mirror/parity status flag */
194 	struct ccdbuf	*cb_mirror;	/* mirror counterpart */
195 };
196 
197 /* bits in cb_pflags */
198 #define CCDPF_MIRROR_DONE 1	/* if set, mirror counterpart is done */
199 
200 static d_open_t ccdopen;
201 static d_close_t ccdclose;
202 static d_strategy_t ccdstrategy;
203 static d_ioctl_t ccdioctl;
204 static d_dump_t ccddump;
205 
206 #define NCCDFREEHIWAT	16
207 
208 static struct dev_ops ccd_ops = {
209 	{ "ccd", 0, D_DISK },
210 	.d_open =	ccdopen,
211 	.d_close =	ccdclose,
212 	.d_read =	physread,
213 	.d_write =	physwrite,
214 	.d_ioctl =	ccdioctl,
215 	.d_strategy =	ccdstrategy,
216 	.d_dump =	ccddump
217 };
218 
219 /* called during module initialization */
220 static	void ccdattach (void);
221 static	int ccddetach (void);
222 static	int ccd_modevent (module_t, int, void *);
223 
224 /* called by biodone() at interrupt time */
225 static	void ccdiodone (struct bio *bio);
226 
227 static	void ccdstart (struct ccd_softc *, struct bio *);
228 static	void ccdinterleave (struct ccd_softc *, int);
229 static	void ccdintr (struct ccd_softc *, struct bio *);
230 static	int ccdinit (struct ccddevice *, char **, struct ucred *);
231 static	int ccdlookup (char *, struct vnode **);
232 static	void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
233 		struct bio *, off_t, caddr_t, long);
234 static	int ccdlock (struct ccd_softc *);
235 static	void ccdunlock (struct ccd_softc *);
236 
237 #ifdef DEBUG
238 static	void printiinfo (struct ccdiinfo *);
239 #endif
240 
241 /* Non-private for the benefit of libkvm. */
242 struct	ccd_softc *ccd_softc;
243 struct	ccddevice *ccddevs;
244 struct	ccdbuf *ccdfreebufs;
245 static	int numccdfreebufs;
246 static	int numccd = 0;
247 
248 /*
249  * getccdbuf() -	Allocate and zero a ccd buffer.
250  *
251  *	This routine is called at splbio().
252  */
253 
254 static __inline
255 struct ccdbuf *
256 getccdbuf(void)
257 {
258 	struct ccdbuf *cbp;
259 
260 	/*
261 	 * Allocate from freelist or malloc as necessary
262 	 */
263 	if ((cbp = ccdfreebufs) != NULL) {
264 		ccdfreebufs = cbp->cb_freenext;
265 		--numccdfreebufs;
266 		reinitbufbio(&cbp->cb_buf);
267 	} else {
268 		cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO);
269 		initbufbio(&cbp->cb_buf);
270 	}
271 
272 	/*
273 	 * independant struct buf initialization
274 	 */
275 	buf_dep_init(&cbp->cb_buf);
276 	BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
277 	BUF_KERNPROC(&cbp->cb_buf);
278 	cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP;
279 
280 	return(cbp);
281 }
282 
283 /*
284  * putccdbuf() -	Free a ccd buffer.
285  *
286  *	This routine is called at splbio().
287  */
288 
289 static __inline
290 void
291 putccdbuf(struct ccdbuf *cbp)
292 {
293 	BUF_UNLOCK(&cbp->cb_buf);
294 
295 	if (numccdfreebufs < NCCDFREEHIWAT) {
296 		cbp->cb_freenext = ccdfreebufs;
297 		ccdfreebufs = cbp;
298 		++numccdfreebufs;
299 	} else {
300 		uninitbufbio(&cbp->cb_buf);
301 		kfree((caddr_t)cbp, M_DEVBUF);
302 	}
303 }
304 
305 /*
306  * Called by main() during pseudo-device attachment.  All we need
307  * to do is allocate enough space for devices to be configured later, and
308  * add devsw entries.
309  */
310 static void
311 ccdattach(void)
312 {
313 	struct disk_info info;
314 	struct ccd_softc *cs;
315 	int i;
316 	int num = NCCD;
317 
318 	if (num > 1)
319 		kprintf("ccd0-%d: Concatenated disk drivers\n", num-1);
320 	else
321 		kprintf("ccd0: Concatenated disk driver\n");
322 
323 	ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF,
324 			    M_WAITOK | M_ZERO);
325 	ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF,
326 			    M_WAITOK | M_ZERO);
327 	numccd = num;
328 
329 	/*
330 	 * With normal disk devices the open simply fails if the media
331 	 * is not present.  With CCD we have to be able to open the
332 	 * raw disk to use the ioctl's to set it up, so create a dummy
333 	 * disk info structure so dscheck() doesn't blow up.
334 	 */
335 	bzero(&info, sizeof(info));
336 	info.d_media_blksize = DEV_BSIZE;
337 
338 	for (i = 0; i < numccd; ++i) {
339 		cs = &ccd_softc[i];
340 		cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops);
341 		cs->sc_dev->si_drv1 = cs;
342 		cs->sc_dev->si_iosize_max = 256 * 512;	/* XXX */
343 		disk_setdiskinfo(&cs->sc_disk, &info);
344 	}
345 }
346 
347 static int
348 ccddetach(void)
349 {
350 	struct ccd_softc *cs;
351 	struct dev_ioctl_args ioctl_args;
352 	int i;
353 	int error = 0;
354 	int eval;
355 
356 	bzero(&ioctl_args, sizeof(ioctl_args));
357 
358 	for (i = 0; i < numccd; ++i) {
359 		cs = &ccd_softc[i];
360 		if (cs->sc_dev == NULL)
361 			continue;
362 		ioctl_args.a_head.a_dev = cs->sc_dev;
363 		ioctl_args.a_cmd = CCDIOCCLR;
364 		ioctl_args.a_fflag = FWRITE;
365 		eval = ccdioctl(&ioctl_args);
366 		if (eval && eval != ENXIO) {
367 			kprintf("ccd%d: In use, cannot detach\n", i);
368 			error = EBUSY;
369 		}
370 	}
371 	if (error == 0) {
372 		for (i = 0; i < numccd; ++i) {
373 			cs = &ccd_softc[i];
374 			if (cs->sc_dev == NULL)
375 				continue;
376 			disk_destroy(&cs->sc_disk);
377 			cs->sc_dev = NULL;
378 		}
379 		if (ccd_softc)
380 			kfree(ccd_softc, M_DEVBUF);
381 		if (ccddevs)
382 			kfree(ccddevs, M_DEVBUF);
383 	}
384 	return (error);
385 }
386 
387 static int
388 ccd_modevent(module_t mod, int type, void *data)
389 {
390 	int error = 0;
391 
392 	switch (type) {
393 	case MOD_LOAD:
394 		ccdattach();
395 		break;
396 
397 	case MOD_UNLOAD:
398 		error = ccddetach();
399 		break;
400 
401 	default:	/* MOD_SHUTDOWN etc */
402 		break;
403 	}
404 	return (error);
405 }
406 
407 DEV_MODULE(ccd, ccd_modevent, NULL);
408 
409 static int
410 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred)
411 {
412 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
413 	struct ccdcinfo *ci = NULL;	/* XXX */
414 	int ix;
415 	struct vnode *vp;
416 	u_int64_t skip;
417 	u_int64_t size;
418 	u_int64_t minsize;
419 	int maxsecsize;
420 	struct partinfo dpart;
421 	struct ccdgeom *ccg = &cs->sc_geom;
422 	char tmppath[MAXPATHLEN];
423 	int error = 0;
424 
425 #ifdef DEBUG
426 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
427 		kprintf("ccdinit: unit %d\n", ccd->ccd_unit);
428 #endif
429 
430 	cs->sc_size = 0;
431 	cs->sc_ileave = ccd->ccd_interleave;
432 	cs->sc_nccdisks = ccd->ccd_ndev;
433 
434 	/* Allocate space for the component info. */
435 	cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
436 				M_DEVBUF, M_WAITOK);
437 	cs->sc_maxiosize = MAXPHYS;
438 
439 	/*
440 	 * Verify that each component piece exists and record
441 	 * relevant information about it.
442 	 */
443 	maxsecsize = 0;
444 	minsize = 0;
445 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
446 		vp = ccd->ccd_vpp[ix];
447 		ci = &cs->sc_cinfo[ix];
448 		ci->ci_vp = vp;
449 
450 		/*
451 		 * Copy in the pathname of the component.
452 		 */
453 		bzero(tmppath, sizeof(tmppath));	/* sanity */
454 		if ((error = copyinstr(cpaths[ix], tmppath,
455 		    MAXPATHLEN, &ci->ci_pathlen)) != 0) {
456 #ifdef DEBUG
457 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
458 				kprintf("ccd%d: can't copy path, error = %d\n",
459 				    ccd->ccd_unit, error);
460 #endif
461 			goto fail;
462 		}
463 		ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
464 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
465 
466 		ci->ci_dev = vn_todev(vp);
467 		if (ci->ci_dev->si_iosize_max &&
468 		    cs->sc_maxiosize > ci->ci_dev->si_iosize_max) {
469 			cs->sc_maxiosize = ci->ci_dev->si_iosize_max;
470 		}
471 
472 		/*
473 		 * Get partition information for the component.
474 		 */
475 		error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD,
476 				  cred, NULL);
477 		if (error) {
478 #ifdef DEBUG
479 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
480 				 kprintf("ccd%d: %s: ioctl failed, error = %d\n",
481 				     ccd->ccd_unit, ci->ci_path, error);
482 #endif
483 			goto fail;
484 		}
485 		if (dpart.fstype != FS_CCD &&
486 		    !kuuid_is_ccd(&dpart.fstype_uuid)) {
487 			kprintf("ccd%d: %s: filesystem type must be 'ccd'\n",
488 				ccd->ccd_unit, ci->ci_path);
489 			error = EFTYPE;
490 			goto fail;
491 		}
492 		if (maxsecsize < dpart.media_blksize)
493 			maxsecsize = dpart.media_blksize;
494 
495 		/*
496 		 * Skip a certain amount of storage at the beginning of
497 		 * the component to make sure we don't infringe on any
498 		 * reserved sectors.  This is handled entirely by
499 		 * dpart.reserved_blocks but we also impose a minimum
500 		 * of 16 sectors for backwards compatibility.
501 		 */
502 		skip = 16;
503 		if (skip < dpart.reserved_blocks)
504 			skip = dpart.reserved_blocks;
505 		size = dpart.media_blocks - skip;
506 
507 		/*
508 		 * Calculate the size, truncating to an interleave
509 		 * boundary if necessary.
510 		 */
511 		if (cs->sc_ileave > 1)
512 			size -= size % cs->sc_ileave;
513 
514 		if ((int64_t)size <= 0) {
515 #ifdef DEBUG
516 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
517 				kprintf("ccd%d: %s: size == 0\n",
518 				    ccd->ccd_unit, ci->ci_path);
519 #endif
520 			error = ENODEV;
521 			goto fail;
522 		}
523 
524 		/*
525 		 * Calculate the smallest uniform component, used
526 		 * elsewhere.
527 		 */
528 		if (minsize == 0 || minsize > size)
529 			minsize = size;
530 		ci->ci_skip = skip;
531 		ci->ci_size = size;
532 		cs->sc_size += size;
533 	}
534 	kprintf("ccd%d: max component iosize is %d total blocks %lld\n",
535 		cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size);
536 
537 	/*
538 	 * Don't allow the interleave to be smaller than
539 	 * the biggest component sector.
540 	 */
541 	if ((cs->sc_ileave > 0) &&
542 	    (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) {
543 #ifdef DEBUG
544 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
545 			kprintf("ccd%d: interleave must be at least %d\n",
546 			    ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
547 #endif
548 		error = EINVAL;
549 		goto fail;
550 	}
551 
552 	/*
553 	 * If uniform interleave is desired set all sizes to that of
554 	 * the smallest component.  This will guarentee that a single
555 	 * interleave table is generated.
556 	 *
557 	 * Lost space must be taken into account when calculating the
558 	 * overall size.  Half the space is lost when CCDF_MIRROR is
559 	 * specified.  One disk is lost when CCDF_PARITY is specified.
560 	 */
561 	if (ccd->ccd_flags & CCDF_UNIFORM) {
562 		for (ci = cs->sc_cinfo;
563 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
564 			ci->ci_size = minsize;
565 		}
566 		if (ccd->ccd_flags & CCDF_MIRROR) {
567 			/*
568 			 * Check to see if an even number of components
569 			 * have been specified.  The interleave must also
570 			 * be non-zero in order for us to be able to
571 			 * guarentee the topology.
572 			 */
573 			if (cs->sc_nccdisks % 2) {
574 				kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
575 				error = EINVAL;
576 				goto fail;
577 			}
578 			if (cs->sc_ileave == 0) {
579 				kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
580 				error = EINVAL;
581 				goto fail;
582 			}
583 			cs->sc_size = (cs->sc_nccdisks/2) * minsize;
584 		} else if (ccd->ccd_flags & CCDF_PARITY) {
585 			cs->sc_size = (cs->sc_nccdisks-1) * minsize;
586 		} else {
587 			if (cs->sc_ileave == 0) {
588 				kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
589 				error = EINVAL;
590 				goto fail;
591 			}
592 			cs->sc_size = cs->sc_nccdisks * minsize;
593 		}
594 	}
595 
596 	/*
597 	 * Construct the interleave table.
598 	 */
599 	ccdinterleave(cs, ccd->ccd_unit);
600 
601 	/*
602 	 * Create pseudo-geometry based on 1MB cylinders.  It's
603 	 * pretty close.
604 	 */
605 	ccg->ccg_secsize = maxsecsize;
606 	ccg->ccg_ntracks = 1;
607 	ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
608 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
609 
610 	/*
611 	 * Add an devstat entry for this device.
612 	 */
613 	devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
614 			  ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
615 			  DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
616 			  DEVSTAT_PRIORITY_ARRAY);
617 
618 	cs->sc_flags |= CCDF_INITED;
619 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
620 	cs->sc_unit = ccd->ccd_unit;
621 	return (0);
622 fail:
623 	while (ci > cs->sc_cinfo) {
624 		ci--;
625 		kfree(ci->ci_path, M_DEVBUF);
626 	}
627 	kfree(cs->sc_cinfo, M_DEVBUF);
628 	cs->sc_cinfo = NULL;
629 	return (error);
630 }
631 
632 static void
633 ccdinterleave(struct ccd_softc *cs, int unit)
634 {
635 	struct ccdcinfo *ci, *smallci;
636 	struct ccdiinfo *ii;
637 	u_int64_t bn;
638 	u_int64_t lbn;
639 	u_int64_t size;
640 	int icount;
641 	int ix;
642 
643 #ifdef DEBUG
644 	if (ccddebug & CCDB_INIT)
645 		kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
646 #endif
647 
648 	/*
649 	 * Allocate an interleave table.  The worst case occurs when each
650 	 * of N disks is of a different size, resulting in N interleave
651 	 * tables.
652 	 *
653 	 * Chances are this is too big, but we don't care.
654 	 */
655 	icount = cs->sc_nccdisks + 1;
656 	cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo),
657 				M_DEVBUF, M_WAITOK|M_ZERO);
658 
659 	/*
660 	 * Trivial case: no interleave (actually interleave of disk size).
661 	 * Each table entry represents a single component in its entirety.
662 	 *
663 	 * An interleave of 0 may not be used with a mirror or parity setup.
664 	 */
665 	if (cs->sc_ileave == 0) {
666 		bn = 0;
667 		ii = cs->sc_itable;
668 
669 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
670 			/* Allocate space for ii_index. */
671 			ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK);
672 			ii->ii_ndisk = 1;
673 			ii->ii_startblk = bn;
674 			ii->ii_startoff = 0;
675 			ii->ii_index[0] = ix;
676 			bn += cs->sc_cinfo[ix].ci_size;
677 			ii++;
678 		}
679 		ii->ii_ndisk = 0;
680 #ifdef DEBUG
681 		if (ccddebug & CCDB_INIT)
682 			printiinfo(cs->sc_itable);
683 #endif
684 		return;
685 	}
686 
687 	/*
688 	 * The following isn't fast or pretty; it doesn't have to be.
689 	 */
690 	size = 0;
691 	bn = lbn = 0;
692 	for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) {
693 		/*
694 		 * Allocate space for ii_index.  We might allocate more then
695 		 * we use.
696 		 */
697 		ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks),
698 					M_DEVBUF, M_WAITOK);
699 
700 		/*
701 		 * Locate the smallest of the remaining components
702 		 */
703 		smallci = NULL;
704 		ci = cs->sc_cinfo;
705 		while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) {
706 			if (ci->ci_size > size &&
707 			    (smallci == NULL ||
708 			     ci->ci_size < smallci->ci_size)) {
709 				smallci = ci;
710 			}
711 			++ci;
712 		}
713 
714 		/*
715 		 * Nobody left, all done
716 		 */
717 		if (smallci == NULL) {
718 			ii->ii_ndisk = 0;
719 			break;
720 		}
721 
722 		/*
723 		 * Record starting logical block using an sc_ileave blocksize.
724 		 */
725 		ii->ii_startblk = bn / cs->sc_ileave;
726 
727 		/*
728 		 * Record starting component block using an sc_ileave
729 		 * blocksize.  This value is relative to the beginning of
730 		 * a component disk.
731 		 */
732 		ii->ii_startoff = lbn;
733 
734 		/*
735 		 * Determine how many disks take part in this interleave
736 		 * and record their indices.
737 		 */
738 		ix = 0;
739 		for (ci = cs->sc_cinfo;
740 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
741 			if (ci->ci_size >= smallci->ci_size) {
742 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
743 			}
744 		}
745 		ii->ii_ndisk = ix;
746 
747 		/*
748 		 * Adjust for loop
749 		 */
750 		bn += ix * (smallci->ci_size - size);
751 		lbn = smallci->ci_size / cs->sc_ileave;
752 		size = smallci->ci_size;
753 	}
754 	if (ii == &cs->sc_itable[icount])
755 		panic("ccdinterlave software bug!  table exhausted");
756 #ifdef DEBUG
757 	if (ccddebug & CCDB_INIT)
758 		printiinfo(cs->sc_itable);
759 #endif
760 }
761 
762 /* ARGSUSED */
763 static int
764 ccdopen(struct dev_open_args *ap)
765 {
766 	cdev_t dev = ap->a_head.a_dev;
767 	int unit = ccdunit(dev);
768 	struct ccd_softc *cs;
769 	int error = 0;
770 
771 #ifdef DEBUG
772 	if (ccddebug & CCDB_FOLLOW)
773 		kprintf("ccdopen(%x, %x)\n", dev, flags);
774 #endif
775 	if (unit >= numccd)
776 		return (ENXIO);
777 	cs = &ccd_softc[unit];
778 
779 	if ((error = ccdlock(cs)) == 0) {
780 		ccdunlock(cs);
781 	}
782 	return (error);
783 }
784 
785 /* ARGSUSED */
786 static int
787 ccdclose(struct dev_close_args *ap)
788 {
789 	cdev_t dev = ap->a_head.a_dev;
790 	int unit = ccdunit(dev);
791 	struct ccd_softc *cs;
792 	int error = 0;
793 
794 #ifdef DEBUG
795 	if (ccddebug & CCDB_FOLLOW)
796 		kprintf("ccdclose(%x, %x)\n", dev, flags);
797 #endif
798 
799 	if (unit >= numccd)
800 		return (ENXIO);
801 	cs = &ccd_softc[unit];
802 	if ((error = ccdlock(cs)) == 0) {
803 		ccdunlock(cs);
804 	}
805 	return (error);
806 }
807 
808 static int
809 ccdstrategy(struct dev_strategy_args *ap)
810 {
811 	cdev_t dev = ap->a_head.a_dev;
812 	struct bio *bio = ap->a_bio;
813 	int unit = ccdunit(dev);
814 	struct bio *nbio;
815 	struct buf *bp = bio->bio_buf;
816 	struct ccd_softc *cs = &ccd_softc[unit];
817 	u_int64_t pbn;	/* in sc_secsize chunks */
818 	u_int32_t sz;	/* in sc_secsize chunks */
819 
820 #ifdef DEBUG
821 	if (ccddebug & CCDB_FOLLOW)
822 		kprintf("ccdstrategy(%x): unit %d\n", bp, unit);
823 #endif
824 	if ((cs->sc_flags & CCDF_INITED) == 0) {
825 		bp->b_error = ENXIO;
826 		goto error;
827 	}
828 
829 	/* If it's a nil transfer, wake up the top half now. */
830 	if (bp->b_bcount == 0) {
831 		bp->b_resid = 0;
832 		goto done;
833 	}
834 
835 	/*
836 	 * Do bounds checking and adjust transfer.  If there's an
837 	 * error, the bounds check will flag that for us.
838 	 */
839 
840 	pbn = bio->bio_offset / cs->sc_geom.ccg_secsize;
841 	sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
842 
843 	/*
844 	 * If out of bounds return an error.  If the request goes
845 	 * past EOF, clip the request as appropriate.  If exactly
846 	 * at EOF, return success (don't clip), but with 0 bytes
847 	 * of I/O.
848 	 *
849 	 * Mark EOF B_INVAL (just like bad), indicating that the
850 	 * contents of the buffer, if any, is invalid.
851 	 */
852 	if ((int64_t)pbn < 0)
853 		goto bad;
854 	if (pbn + sz > cs->sc_size) {
855 		if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP))
856 			goto bad;
857 		if (pbn == cs->sc_size) {
858 			bp->b_resid = bp->b_bcount;
859 			bp->b_flags |= B_INVAL;
860 			goto done;
861 		}
862 		sz = (long)(cs->sc_size - pbn);
863 		bp->b_bcount = sz * cs->sc_geom.ccg_secsize;
864 	}
865 	nbio = bio;
866 
867 	bp->b_resid = bp->b_bcount;
868 	nbio->bio_driver_info = dev;
869 
870 	/*
871 	 * "Start" the unit.
872 	 */
873 	crit_enter();
874 	ccdstart(cs, nbio);
875 	crit_exit();
876 	return(0);
877 
878 	/*
879 	 * note: bio, not nbio, is valid at the done label.
880 	 */
881 bad:
882 	bp->b_error = EINVAL;
883 error:
884 	bp->b_resid = bp->b_bcount;
885 	bp->b_flags |= B_ERROR | B_INVAL;
886 done:
887 	biodone(bio);
888 	return(0);
889 }
890 
891 static void
892 ccdstart(struct ccd_softc *cs, struct bio *bio)
893 {
894 	long bcount, rcount;
895 	struct ccdbuf *cbp[4];
896 	struct buf *bp = bio->bio_buf;
897 	/* XXX! : 2 reads and 2 writes for RAID 4/5 */
898 	caddr_t addr;
899 	off_t doffset;
900 
901 #ifdef DEBUG
902 	if (ccddebug & CCDB_FOLLOW)
903 		kprintf("ccdstart(%x, %x)\n", cs, bp);
904 #endif
905 
906 	/* Record the transaction start  */
907 	devstat_start_transaction(&cs->device_stats);
908 
909 	/*
910 	 * Allocate component buffers and fire off the requests
911 	 */
912 	doffset = bio->bio_offset;
913 	addr = bp->b_data;
914 
915 	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
916 		ccdbuffer(cbp, cs, bio, doffset, addr, bcount);
917 		rcount = cbp[0]->cb_buf.b_bcount;
918 
919 		if (cs->sc_cflags & CCDF_MIRROR) {
920 			/*
921 			 * Mirroring.  Writes go to both disks, reads are
922 			 * taken from whichever disk seems most appropriate.
923 			 *
924 			 * We attempt to localize reads to the disk whos arm
925 			 * is nearest the read request.  We ignore seeks due
926 			 * to writes when making this determination and we
927 			 * also try to avoid hogging.
928 			 */
929 			if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) {
930 				vn_strategy(cbp[0]->cb_vp,
931 					    &cbp[0]->cb_buf.b_bio1);
932 				vn_strategy(cbp[1]->cb_vp,
933 					    &cbp[1]->cb_buf.b_bio1);
934 			} else {
935 				int pick = cs->sc_pick;
936 				daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize;
937 				if (doffset < cs->sc_blk[pick] - range ||
938 				    doffset > cs->sc_blk[pick] + range
939 				) {
940 					cs->sc_pick = pick = 1 - pick;
941 				}
942 				cs->sc_blk[pick] = doffset + rcount;
943 				vn_strategy(cbp[pick]->cb_vp,
944 					    &cbp[pick]->cb_buf.b_bio1);
945 			}
946 		} else {
947 			/*
948 			 * Not mirroring
949 			 */
950 			vn_strategy(cbp[0]->cb_vp,
951 				     &cbp[0]->cb_buf.b_bio1);
952 		}
953 		doffset += rcount;
954 		addr += rcount;
955 	}
956 }
957 
958 /*
959  * Build a component buffer header.
960  */
961 static void
962 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio,
963 	  off_t doffset, caddr_t addr, long bcount)
964 {
965 	struct ccdcinfo *ci, *ci2 = NULL;	/* XXX */
966 	struct ccdbuf *cbp;
967 	u_int64_t bn;
968 	u_int64_t cbn;
969 	u_int64_t cboff;
970 	off_t cbc;
971 
972 #ifdef DEBUG
973 	if (ccddebug & CCDB_IO)
974 		kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n",
975 		       cs, bp, bn, addr, bcount);
976 #endif
977 	/*
978 	 * Determine which component bn falls in.
979 	 */
980 	bn = doffset / cs->sc_geom.ccg_secsize;
981 	cbn = bn;
982 	cboff = 0;
983 
984 	if (cs->sc_ileave == 0) {
985 		/*
986 		 * Serially concatenated and neither a mirror nor a parity
987 		 * config.  This is a special case.
988 		 */
989 		daddr_t sblk;
990 
991 		sblk = 0;
992 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
993 			sblk += ci->ci_size;
994 		cbn -= sblk;
995 	} else {
996 		struct ccdiinfo *ii;
997 		int ccdisk, off;
998 
999 		/*
1000 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
1001 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
1002 		 * to cbn.
1003 		 */
1004 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
1005 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
1006 
1007 		/*
1008 		 * Figure out which interleave table to use.
1009 		 */
1010 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
1011 			if (ii->ii_startblk > cbn)
1012 				break;
1013 		}
1014 		ii--;
1015 
1016 		/*
1017 		 * off is the logical superblock relative to the beginning
1018 		 * of this interleave block.
1019 		 */
1020 		off = cbn - ii->ii_startblk;
1021 
1022 		/*
1023 		 * We must calculate which disk component to use (ccdisk),
1024 		 * and recalculate cbn to be the superblock relative to
1025 		 * the beginning of the component.  This is typically done by
1026 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
1027 		 * must typically be divided by the number of components in
1028 		 * this interleave array to be properly convert it from a
1029 		 * CCD-relative logical superblock number to a
1030 		 * component-relative superblock number.
1031 		 */
1032 		if (ii->ii_ndisk == 1) {
1033 			/*
1034 			 * When we have just one disk, it can't be a mirror
1035 			 * or a parity config.
1036 			 */
1037 			ccdisk = ii->ii_index[0];
1038 			cbn = ii->ii_startoff + off;
1039 		} else {
1040 			if (cs->sc_cflags & CCDF_MIRROR) {
1041 				/*
1042 				 * We have forced a uniform mapping, resulting
1043 				 * in a single interleave array.  We double
1044 				 * up on the first half of the available
1045 				 * components and our mirror is in the second
1046 				 * half.  This only works with a single
1047 				 * interleave array because doubling up
1048 				 * doubles the number of sectors, so there
1049 				 * cannot be another interleave array because
1050 				 * the next interleave array's calculations
1051 				 * would be off.
1052 				 */
1053 				int ndisk2 = ii->ii_ndisk / 2;
1054 				ccdisk = ii->ii_index[off % ndisk2];
1055 				cbn = ii->ii_startoff + off / ndisk2;
1056 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1057 			} else if (cs->sc_cflags & CCDF_PARITY) {
1058 				/*
1059 				 * XXX not implemented yet
1060 				 */
1061 				int ndisk2 = ii->ii_ndisk - 1;
1062 				ccdisk = ii->ii_index[off % ndisk2];
1063 				cbn = ii->ii_startoff + off / ndisk2;
1064 				if (cbn % ii->ii_ndisk <= ccdisk)
1065 					ccdisk++;
1066 			} else {
1067 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
1068 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
1069 			}
1070 		}
1071 
1072 		ci = &cs->sc_cinfo[ccdisk];
1073 
1074 		/*
1075 		 * Convert cbn from a superblock to a normal block so it
1076 		 * can be used to calculate (along with cboff) the normal
1077 		 * block index into this particular disk.
1078 		 */
1079 		cbn *= cs->sc_ileave;
1080 	}
1081 
1082 	/*
1083 	 * Fill in the component buf structure.
1084 	 *
1085 	 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount
1086 	 * will be truncated on device EOF so we use b_bufsize to detect
1087 	 * the case.
1088 	 */
1089 	cbp = getccdbuf();
1090 	cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1091 	cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1092 	cbp->cb_buf.b_data = addr;
1093 	cbp->cb_vp = ci->ci_vp;
1094 	if (cs->sc_ileave == 0)
1095 		cbc = dbtob((off_t)(ci->ci_size - cbn));
1096 	else
1097 		cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1098 	if (cbc > cs->sc_maxiosize)
1099 		cbc = cs->sc_maxiosize;
1100 	cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1101  	cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1102 
1103 	cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1104 	cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1105 	cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip);
1106 
1107 	/*
1108 	 * context for ccdiodone
1109 	 */
1110 	cbp->cb_obio = bio;
1111 	cbp->cb_unit = cs - ccd_softc;
1112 	cbp->cb_comp = ci - cs->sc_cinfo;
1113 
1114 #ifdef DEBUG
1115 	if (ccddebug & CCDB_IO)
1116 		kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n",
1117 		       ci->ci_dev, ci-cs->sc_cinfo, cbp,
1118 		       cbp->cb_buf.b_bio1.bio_offset,
1119 		       cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1120 #endif
1121 	cb[0] = cbp;
1122 
1123 	/*
1124 	 * Note: both I/O's setup when reading from mirror, but only one
1125 	 * will be executed.
1126 	 */
1127 	if (cs->sc_cflags & CCDF_MIRROR) {
1128 		/* mirror, setup second I/O */
1129 		cbp = getccdbuf();
1130 
1131 		cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1132 		cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1133 		cbp->cb_buf.b_data = addr;
1134 		cbp->cb_vp = ci2->ci_vp;
1135 		if (cs->sc_ileave == 0)
1136 		      cbc = dbtob((off_t)(ci->ci_size - cbn));
1137 		else
1138 		      cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1139 		if (cbc > cs->sc_maxiosize)
1140 			cbc = cs->sc_maxiosize;
1141 		cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1142 		cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1143 
1144 		cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1145 		cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1146 		cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip);
1147 
1148 		/*
1149 		 * context for ccdiodone
1150 		 */
1151 		cbp->cb_obio = bio;
1152 		cbp->cb_unit = cs - ccd_softc;
1153 		cbp->cb_comp = ci2 - cs->sc_cinfo;
1154 		cb[1] = cbp;
1155 		/* link together the ccdbuf's and clear "mirror done" flag */
1156 		cb[0]->cb_mirror = cb[1];
1157 		cb[1]->cb_mirror = cb[0];
1158 		cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1159 		cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1160 	}
1161 }
1162 
1163 static void
1164 ccdintr(struct ccd_softc *cs, struct bio *bio)
1165 {
1166 	struct buf *bp = bio->bio_buf;
1167 
1168 #ifdef DEBUG
1169 	if (ccddebug & CCDB_FOLLOW)
1170 		kprintf("ccdintr(%x, %x)\n", cs, bp);
1171 #endif
1172 	/*
1173 	 * Request is done for better or worse, wakeup the top half.
1174 	 */
1175 	if (bp->b_flags & B_ERROR)
1176 		bp->b_resid = bp->b_bcount;
1177 	devstat_end_transaction_buf(&cs->device_stats, bp);
1178 	biodone(bio);
1179 }
1180 
1181 /*
1182  * Called at interrupt time.
1183  *
1184  * Mark the component as done and if all components are done,
1185  * take a ccd interrupt.
1186  */
1187 static void
1188 ccdiodone(struct bio *bio)
1189 {
1190 	struct ccdbuf *cbp = bio->bio_caller_info1.ptr;
1191 	struct bio *obio = cbp->cb_obio;
1192 	struct buf *obp = obio->bio_buf;
1193 	int unit = cbp->cb_unit;
1194 	int count;
1195 
1196 	/*
1197 	 * Since we do not have exclusive access to underlying devices,
1198 	 * we can't keep cache translations around.
1199 	 */
1200 	clearbiocache(bio->bio_next);
1201 
1202 	get_mplock();
1203 	crit_enter();
1204 #ifdef DEBUG
1205 	if (ccddebug & CCDB_FOLLOW)
1206 		kprintf("ccdiodone(%x)\n", cbp);
1207 	if (ccddebug & CCDB_IO) {
1208 		kprintf("ccdiodone: bp %x bcount %d resid %d\n",
1209 		       obp, obp->b_bcount, obp->b_resid);
1210 		kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n",
1211 		       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1212 		       cbp->cb_buf.b_loffset, cbp->cb_buf.b_data,
1213 		       cbp->cb_buf.b_bcount);
1214 	}
1215 #endif
1216 
1217 	/*
1218 	 * If an error occured, report it.  If this is a mirrored
1219 	 * configuration and the first of two possible reads, do not
1220 	 * set the error in the bp yet because the second read may
1221 	 * succeed.
1222 	 */
1223 	if (cbp->cb_buf.b_flags & B_ERROR) {
1224 		const char *msg = "";
1225 
1226 		if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1227 		    (cbp->cb_buf.b_cmd == BUF_CMD_READ) &&
1228 		    (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1229 			/*
1230 			 * We will try our read on the other disk down
1231 			 * below, also reverse the default pick so if we
1232 			 * are doing a scan we do not keep hitting the
1233 			 * bad disk first.
1234 			 */
1235 			struct ccd_softc *cs = &ccd_softc[unit];
1236 
1237 			msg = ", trying other disk";
1238 			cs->sc_pick = 1 - cs->sc_pick;
1239 			cs->sc_blk[cs->sc_pick] = obio->bio_offset;
1240 		} else {
1241 			obp->b_flags |= B_ERROR;
1242 			obp->b_error = cbp->cb_buf.b_error ?
1243 			    cbp->cb_buf.b_error : EIO;
1244 		}
1245 		kprintf("ccd%d: error %d on component %d "
1246 			"offset %jd (ccd offset %jd)%s\n",
1247 		        unit, obp->b_error, cbp->cb_comp,
1248 		        (intmax_t)cbp->cb_buf.b_bio2.bio_offset,
1249 		        (intmax_t)obio->bio_offset,
1250 		        msg);
1251 	}
1252 
1253 	/*
1254 	 * Process mirror.  If we are writing, I/O has been initiated on both
1255 	 * buffers and we fall through only after both are finished.
1256 	 *
1257 	 * If we are reading only one I/O is initiated at a time.  If an
1258 	 * error occurs we initiate the second I/O and return, otherwise
1259 	 * we free the second I/O without initiating it.
1260 	 */
1261 
1262 	if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1263 		if (cbp->cb_buf.b_cmd != BUF_CMD_READ) {
1264 			/*
1265 			 * When writing, handshake with the second buffer
1266 			 * to determine when both are done.  If both are not
1267 			 * done, return here.
1268 			 */
1269 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1270 				cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1271 				putccdbuf(cbp);
1272 				crit_exit();
1273 				rel_mplock();
1274 				return;
1275 			}
1276 		} else {
1277 			/*
1278 			 * When reading, either dispose of the second buffer
1279 			 * or initiate I/O on the second buffer if an error
1280 			 * occured with this one.
1281 			 */
1282 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1283 				if (cbp->cb_buf.b_flags & B_ERROR) {
1284 					cbp->cb_mirror->cb_pflags |=
1285 					    CCDPF_MIRROR_DONE;
1286 					vn_strategy(
1287 					    cbp->cb_mirror->cb_vp,
1288 					    &cbp->cb_mirror->cb_buf.b_bio1
1289 					);
1290 					putccdbuf(cbp);
1291 					crit_exit();
1292 					rel_mplock();
1293 					return;
1294 				} else {
1295 					putccdbuf(cbp->cb_mirror);
1296 					/* fall through */
1297 				}
1298 			}
1299 		}
1300 	}
1301 
1302 	/*
1303 	 * Use our saved b_bufsize to determine if an unexpected EOF occured.
1304 	 */
1305 	count = cbp->cb_buf.b_bufsize;
1306 	putccdbuf(cbp);
1307 
1308 	/*
1309 	 * If all done, "interrupt".
1310 	 */
1311 	obp->b_resid -= count;
1312 	if (obp->b_resid < 0)
1313 		panic("ccdiodone: count");
1314 	if (obp->b_resid == 0)
1315 		ccdintr(&ccd_softc[unit], obio);
1316 	crit_exit();
1317 	rel_mplock();
1318 }
1319 
1320 static int
1321 ccdioctl(struct dev_ioctl_args *ap)
1322 {
1323 	cdev_t dev = ap->a_head.a_dev;
1324 	int unit = ccdunit(dev);
1325 	int i, j, lookedup = 0, error = 0;
1326 	struct ccd_softc *cs;
1327 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data;
1328 	struct ccddevice ccd;
1329 	struct disk_info info;
1330 	char **cpp;
1331 	struct vnode **vpp;
1332 
1333 	if (unit >= numccd)
1334 		return (ENXIO);
1335 	cs = &ccd_softc[unit];
1336 
1337 	bzero(&ccd, sizeof(ccd));
1338 
1339 	switch (ap->a_cmd) {
1340 	case CCDIOCSET:
1341 		if (cs->sc_flags & CCDF_INITED)
1342 			return (EBUSY);
1343 
1344 		if ((ap->a_fflag & FWRITE) == 0)
1345 			return (EBADF);
1346 
1347 		if ((error = ccdlock(cs)) != 0)
1348 			return (error);
1349 
1350 		if (ccio->ccio_ndisks > CCD_MAXNDISKS) {
1351 			ccdunlock(cs);
1352 			return (EINVAL);
1353 		}
1354 
1355 		/* Fill in some important bits. */
1356 		ccd.ccd_unit = unit;
1357 		ccd.ccd_interleave = ccio->ccio_ileave;
1358 		if (ccd.ccd_interleave == 0 &&
1359 		    ((ccio->ccio_flags & CCDF_MIRROR) ||
1360 		     (ccio->ccio_flags & CCDF_PARITY))) {
1361 			kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1362 			ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1363 		}
1364 		if ((ccio->ccio_flags & CCDF_MIRROR) &&
1365 		    (ccio->ccio_flags & CCDF_PARITY)) {
1366 			kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1367 			ccio->ccio_flags &= ~CCDF_PARITY;
1368 		}
1369 		if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1370 		    !(ccio->ccio_flags & CCDF_UNIFORM)) {
1371 			kprintf("ccd%d: mirror/parity forces uniform flag\n",
1372 			       unit);
1373 			ccio->ccio_flags |= CCDF_UNIFORM;
1374 		}
1375 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1376 
1377 		/*
1378 		 * Allocate space for and copy in the array of
1379 		 * componet pathnames and device numbers.
1380 		 */
1381 		cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *),
1382 		    M_DEVBUF, M_WAITOK);
1383 		vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1384 		    M_DEVBUF, M_WAITOK);
1385 
1386 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1387 				ccio->ccio_ndisks * sizeof(char **));
1388 		if (error) {
1389 			kfree(vpp, M_DEVBUF);
1390 			kfree(cpp, M_DEVBUF);
1391 			ccdunlock(cs);
1392 			return (error);
1393 		}
1394 
1395 #ifdef DEBUG
1396 		if (ccddebug & CCDB_INIT) {
1397 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1398 				kprintf("ccdioctl: component %d: 0x%x\n",
1399 				    i, cpp[i]);
1400 		}
1401 #endif
1402 
1403 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1404 #ifdef DEBUG
1405 			if (ccddebug & CCDB_INIT)
1406 				kprintf("ccdioctl: lookedup = %d\n", lookedup);
1407 #endif
1408 			if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) {
1409 				for (j = 0; j < lookedup; ++j)
1410 					(void)vn_close(vpp[j], FREAD|FWRITE);
1411 				kfree(vpp, M_DEVBUF);
1412 				kfree(cpp, M_DEVBUF);
1413 				ccdunlock(cs);
1414 				return (error);
1415 			}
1416 			++lookedup;
1417 		}
1418 		ccd.ccd_cpp = cpp;
1419 		ccd.ccd_vpp = vpp;
1420 		ccd.ccd_ndev = ccio->ccio_ndisks;
1421 
1422 		/*
1423 		 * Initialize the ccd.  Fills in the softc for us.
1424 		 */
1425 		if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) {
1426 			for (j = 0; j < lookedup; ++j)
1427 				(void)vn_close(vpp[j], FREAD|FWRITE);
1428 			kfree(vpp, M_DEVBUF);
1429 			kfree(cpp, M_DEVBUF);
1430 			ccdunlock(cs);
1431 			return (error);
1432 		}
1433 
1434 		/*
1435 		 * The ccd has been successfully initialized, so
1436 		 * we can place it into the array and read the disklabel.
1437 		 */
1438 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1439 		ccio->ccio_unit = unit;
1440 		ccio->ccio_size = cs->sc_size;
1441 
1442 		bzero(&info, sizeof(info));
1443 		info.d_media_blksize = cs->sc_geom.ccg_secsize;
1444 		info.d_media_blocks  = cs->sc_size;
1445 		info.d_nheads	     = cs->sc_geom.ccg_ntracks;
1446 		info.d_secpertrack   = cs->sc_geom.ccg_nsectors;
1447 		info.d_ncylinders    = cs->sc_geom.ccg_ncylinders;
1448 		info.d_secpercyl     = info.d_nheads * info.d_secpertrack;
1449 
1450 		/*
1451 		 * For cases where a label is directly applied to the ccd,
1452 		 * without slices, DSO_COMPATMBR forces one sector be
1453 		 * reserved for backwards compatibility.
1454 		 */
1455 		info.d_dsflags	     = DSO_COMPATMBR;
1456 		disk_setdiskinfo(&cs->sc_disk, &info);
1457 
1458 		ccdunlock(cs);
1459 
1460 		break;
1461 
1462 	case CCDIOCCLR:
1463 		if ((cs->sc_flags & CCDF_INITED) == 0)
1464 			return (ENXIO);
1465 
1466 		if ((ap->a_fflag & FWRITE) == 0)
1467 			return (EBADF);
1468 
1469 		if ((error = ccdlock(cs)) != 0)
1470 			return (error);
1471 
1472 		if (dev_drefs(cs->sc_dev) > 1) {
1473 			ccdunlock(cs);
1474 			return (EBUSY);
1475 		}
1476 
1477 		/*
1478 		 * Free ccd_softc information and clear entry.
1479 		 */
1480 
1481 		/* Close the components and free their pathnames. */
1482 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1483 			/*
1484 			 * XXX: this close could potentially fail and
1485 			 * cause Bad Things.  Maybe we need to force
1486 			 * the close to happen?
1487 			 */
1488 #ifdef DEBUG
1489 			if (ccddebug & CCDB_VNODE)
1490 				vprint("CCDIOCCLR: vnode info",
1491 				    cs->sc_cinfo[i].ci_vp);
1492 #endif
1493 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE);
1494 			kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1495 		}
1496 
1497 		/* Free interleave index. */
1498 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1499 			kfree(cs->sc_itable[i].ii_index, M_DEVBUF);
1500 
1501 		/* Free component info and interleave table. */
1502 		kfree(cs->sc_cinfo, M_DEVBUF);
1503 		kfree(cs->sc_itable, M_DEVBUF);
1504 		cs->sc_cinfo = NULL;
1505 		cs->sc_itable = NULL;
1506 		cs->sc_flags &= ~CCDF_INITED;
1507 
1508 		/*
1509 		 * Free ccddevice information and clear entry.
1510 		 */
1511 		kfree(ccddevs[unit].ccd_cpp, M_DEVBUF);
1512 		kfree(ccddevs[unit].ccd_vpp, M_DEVBUF);
1513 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1514 
1515 		/*
1516 		 * And remove the devstat entry.
1517 		 */
1518 		devstat_remove_entry(&cs->device_stats);
1519 
1520 		/* This must be atomic. */
1521 		crit_enter();
1522 		ccdunlock(cs);
1523 		crit_exit();
1524 
1525 		break;
1526 
1527 	default:
1528 		return (ENOTTY);
1529 	}
1530 
1531 	return (0);
1532 }
1533 
1534 static int
1535 ccddump(struct dev_dump_args *ap)
1536 {
1537 	/* Not implemented. */
1538 	return ENXIO;
1539 }
1540 
1541 /*
1542  * Lookup the provided name in the filesystem.  If the file exists,
1543  * is a valid block device, and isn't being used by anyone else,
1544  * set *vpp to the file's vnode.
1545  */
1546 static int
1547 ccdlookup(char *path, struct vnode **vpp)
1548 {
1549 	struct nlookupdata nd;
1550 	struct vnode *vp;
1551 	int error;
1552 
1553 	*vpp = NULL;
1554 
1555 	error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
1556 	if (error)
1557 		return (error);
1558 	if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) {
1559 #ifdef DEBUG
1560 		if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1561 			kprintf("ccdlookup: vn_open error = %d\n", error);
1562 #endif
1563 		goto done;
1564 	}
1565 	vp = nd.nl_open_vp;
1566 
1567 	if (vp->v_opencount > 1) {
1568 		error = EBUSY;
1569 		goto done;
1570 	}
1571 
1572 	if (!vn_isdisk(vp, &error))
1573 		goto done;
1574 
1575 #ifdef DEBUG
1576 	if (ccddebug & CCDB_VNODE)
1577 		vprint("ccdlookup: vnode info", vp);
1578 #endif
1579 
1580 	vn_unlock(vp);
1581 	nd.nl_open_vp = NULL;
1582 	nlookup_done(&nd);
1583 	*vpp = vp;				/* leave ref intact  */
1584 	return (0);
1585 done:
1586 	nlookup_done(&nd);
1587 	return (error);
1588 }
1589 
1590 /*
1591  * Wait interruptibly for an exclusive lock.
1592  *
1593  * XXX
1594  * Several drivers do this; it should be abstracted and made MP-safe.
1595  */
1596 static int
1597 ccdlock(struct ccd_softc *cs)
1598 {
1599 	int error;
1600 
1601 	while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1602 		cs->sc_flags |= CCDF_WANTED;
1603 		if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0)
1604 			return (error);
1605 	}
1606 	cs->sc_flags |= CCDF_LOCKED;
1607 	return (0);
1608 }
1609 
1610 /*
1611  * Unlock and wake up any waiters.
1612  */
1613 static void
1614 ccdunlock(struct ccd_softc *cs)
1615 {
1616 
1617 	cs->sc_flags &= ~CCDF_LOCKED;
1618 	if ((cs->sc_flags & CCDF_WANTED) != 0) {
1619 		cs->sc_flags &= ~CCDF_WANTED;
1620 		wakeup(cs);
1621 	}
1622 }
1623 
1624 #ifdef DEBUG
1625 static void
1626 printiinfo(struct ccdiinfo *ii)
1627 {
1628 	int ix, i;
1629 
1630 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1631 		kprintf(" itab[%d]: #dk %d sblk %d soff %d",
1632 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1633 		for (i = 0; i < ii->ii_ndisk; i++)
1634 			kprintf(" %d", ii->ii_index[i]);
1635 		kprintf("\n");
1636 	}
1637 }
1638 #endif
1639 
1640 
1641 /* Local Variables: */
1642 /* c-argdecl-indent: 8 */
1643 /* c-continued-statement-offset: 8 */
1644 /* c-indent-level: 8 */
1645 /* End: */
1646