xref: /dragonfly/sys/dev/disk/ccd/ccd.c (revision 17b61719)
1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */
2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.16 2004/05/19 22:52:41 dillon Exp $ */
3 
4 /*	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $	*/
5 
6 /*
7  * Copyright (c) 1995 Jason R. Thorpe.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed for the NetBSD Project
21  *	by Jason R. Thorpe.
22  * 4. The name of the author may not be used to endorse or promote products
23  *    derived from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
26  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
29  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 /*
39  * Copyright (c) 1988 University of Utah.
40  * Copyright (c) 1990, 1993
41  *	The Regents of the University of California.  All rights reserved.
42  *
43  * This code is derived from software contributed to Berkeley by
44  * the Systems Programming Group of the University of Utah Computer
45  * Science Department.
46  *
47  * Redistribution and use in source and binary forms, with or without
48  * modification, are permitted provided that the following conditions
49  * are met:
50  * 1. Redistributions of source code must retain the above copyright
51  *    notice, this list of conditions and the following disclaimer.
52  * 2. Redistributions in binary form must reproduce the above copyright
53  *    notice, this list of conditions and the following disclaimer in the
54  *    documentation and/or other materials provided with the distribution.
55  * 3. All advertising materials mentioning features or use of this software
56  *    must display the following acknowledgement:
57  *	This product includes software developed by the University of
58  *	California, Berkeley and its contributors.
59  * 4. Neither the name of the University nor the names of its contributors
60  *    may be used to endorse or promote products derived from this software
61  *    without specific prior written permission.
62  *
63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73  * SUCH DAMAGE.
74  *
75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
76  *
77  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
78  */
79 
80 /*
81  * "Concatenated" disk driver.
82  *
83  * Dynamic configuration and disklabel support by:
84  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
85  *	Numerical Aerodynamic Simulation Facility
86  *	Mail Stop 258-6
87  *	NASA Ames Research Center
88  *	Moffett Field, CA 94035
89  */
90 
91 #include "use_ccd.h"
92 
93 #include <sys/param.h>
94 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/module.h>
97 #include <sys/proc.h>
98 #include <sys/buf.h>
99 #include <sys/malloc.h>
100 #include <sys/namei.h>
101 #include <sys/conf.h>
102 #include <sys/stat.h>
103 #include <sys/sysctl.h>
104 #include <sys/disklabel.h>
105 #include <vfs/ufs/fs.h>
106 #include <sys/devicestat.h>
107 #include <sys/fcntl.h>
108 #include <sys/vnode.h>
109 #include <sys/buf2.h>
110 
111 #include <sys/ccdvar.h>
112 
113 #include <vm/vm_zone.h>
114 
115 #if defined(CCDDEBUG) && !defined(DEBUG)
116 #define DEBUG
117 #endif
118 
119 #ifdef DEBUG
120 #define CCDB_FOLLOW	0x01
121 #define CCDB_INIT	0x02
122 #define CCDB_IO		0x04
123 #define CCDB_LABEL	0x08
124 #define CCDB_VNODE	0x10
125 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
126     CCDB_VNODE;
127 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
128 #undef DEBUG
129 #endif
130 
131 #define	ccdunit(x)	dkunit(x)
132 #define ccdpart(x)	dkpart(x)
133 
134 /*
135    This is how mirroring works (only writes are special):
136 
137    When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
138    linked together by the cb_mirror field.  "cb_pflags &
139    CCDPF_MIRROR_DONE" is set to 0 on both of them.
140 
141    When a component returns to ccdiodone(), it checks if "cb_pflags &
142    CCDPF_MIRROR_DONE" is set or not.  If not, it sets the partner's
143    flag and returns.  If it is, it means its partner has already
144    returned, so it will go to the regular cleanup.
145 
146  */
147 
148 struct ccdbuf {
149 	struct buf	cb_buf;		/* new I/O buf */
150 	struct buf	*cb_obp;	/* ptr. to original I/O buf */
151 	struct ccdbuf	*cb_freenext;	/* free list link */
152 	int		cb_unit;	/* target unit */
153 	int		cb_comp;	/* target component */
154 	int		cb_pflags;	/* mirror/parity status flag */
155 	struct ccdbuf	*cb_mirror;	/* mirror counterpart */
156 };
157 
158 /* bits in cb_pflags */
159 #define CCDPF_MIRROR_DONE 1	/* if set, mirror counterpart is done */
160 
161 #define CCDLABELDEV(dev)	\
162 	(make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
163 
164 static d_open_t ccdopen;
165 static d_close_t ccdclose;
166 static d_strategy_t ccdstrategy;
167 static d_ioctl_t ccdioctl;
168 static d_dump_t ccddump;
169 static d_psize_t ccdsize;
170 
171 #define NCCDFREEHIWAT	16
172 
173 #define CDEV_MAJOR 74
174 
175 static struct cdevsw ccd_cdevsw = {
176 	/* name */	"ccd",
177 	/* maj */	CDEV_MAJOR,
178 	/* flags */	D_DISK,
179 	/* port */      NULL,
180 	/* clone */	NULL,
181 
182 	/* open */	ccdopen,
183 	/* close */	ccdclose,
184 	/* read */	physread,
185 	/* write */	physwrite,
186 	/* ioctl */	ccdioctl,
187 	/* poll */	nopoll,
188 	/* mmap */	nommap,
189 	/* strategy */	ccdstrategy,
190 	/* dump */	ccddump,
191 	/* psize */	ccdsize
192 };
193 
194 /* called during module initialization */
195 static	void ccdattach (void);
196 static	int ccd_modevent (module_t, int, void *);
197 
198 /* called by biodone() at interrupt time */
199 static	void ccdiodone (struct ccdbuf *cbp);
200 
201 static	void ccdstart (struct ccd_softc *, struct buf *);
202 static	void ccdinterleave (struct ccd_softc *, int);
203 static	void ccdintr (struct ccd_softc *, struct buf *);
204 static	int ccdinit (struct ccddevice *, char **, struct thread *);
205 static	int ccdlookup (char *, struct thread *td, struct vnode **);
206 static	void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
207 		struct buf *, daddr_t, caddr_t, long);
208 static	void ccdgetdisklabel (dev_t);
209 static	void ccdmakedisklabel (struct ccd_softc *);
210 static	int ccdlock (struct ccd_softc *);
211 static	void ccdunlock (struct ccd_softc *);
212 
213 #ifdef DEBUG
214 static	void printiinfo (struct ccdiinfo *);
215 #endif
216 
217 /* Non-private for the benefit of libkvm. */
218 struct	ccd_softc *ccd_softc;
219 struct	ccddevice *ccddevs;
220 struct	ccdbuf *ccdfreebufs;
221 static	int numccdfreebufs;
222 static	int numccd = 0;
223 
224 /*
225  * getccdbuf() -	Allocate and zero a ccd buffer.
226  *
227  *	This routine is called at splbio().
228  */
229 
230 static __inline
231 struct ccdbuf *
232 getccdbuf(struct ccdbuf *cpy)
233 {
234 	struct ccdbuf *cbp;
235 
236 	/*
237 	 * Allocate from freelist or malloc as necessary
238 	 */
239 	if ((cbp = ccdfreebufs) != NULL) {
240 		ccdfreebufs = cbp->cb_freenext;
241 		--numccdfreebufs;
242 	} else {
243 		cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
244 	}
245 
246 	/*
247 	 * Used by mirroring code
248 	 */
249 	if (cpy)
250 		bcopy(cpy, cbp, sizeof(struct ccdbuf));
251 	else
252 		bzero(cbp, sizeof(struct ccdbuf));
253 
254 	/*
255 	 * independant struct buf initialization
256 	 */
257 	LIST_INIT(&cbp->cb_buf.b_dep);
258 	BUF_LOCKINIT(&cbp->cb_buf);
259 	BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
260 	BUF_KERNPROC(&cbp->cb_buf);
261 
262 	return(cbp);
263 }
264 
265 /*
266  * putccdbuf() -	Free a ccd buffer.
267  *
268  *	This routine is called at splbio().
269  */
270 
271 static __inline
272 void
273 putccdbuf(struct ccdbuf *cbp)
274 {
275 	BUF_UNLOCK(&cbp->cb_buf);
276 	BUF_LOCKFREE(&cbp->cb_buf);
277 
278 	if (numccdfreebufs < NCCDFREEHIWAT) {
279 		cbp->cb_freenext = ccdfreebufs;
280 		ccdfreebufs = cbp;
281 		++numccdfreebufs;
282 	} else {
283 		free((caddr_t)cbp, M_DEVBUF);
284 	}
285 }
286 
287 
288 /*
289  * Number of blocks to untouched in front of a component partition.
290  * This is to avoid violating its disklabel area when it starts at the
291  * beginning of the slice.
292  */
293 #if !defined(CCD_OFFSET)
294 #define CCD_OFFSET 16
295 #endif
296 
297 /*
298  * Called by main() during pseudo-device attachment.  All we need
299  * to do is allocate enough space for devices to be configured later, and
300  * add devsw entries.
301  */
302 static void
303 ccdattach()
304 {
305 	int i;
306 	int num = NCCD;
307 
308 	if (num > 1)
309 		printf("ccd0-%d: Concatenated disk drivers\n", num-1);
310 	else
311 		printf("ccd0: Concatenated disk driver\n");
312 
313 	ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF,
314 			    M_WAITOK | M_ZERO);
315 	ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF,
316 			    M_WAITOK | M_ZERO);
317 	numccd = num;
318 
319 	cdevsw_add(&ccd_cdevsw, 0, 0);
320 	/* XXX: is this necessary? */
321 	for (i = 0; i < numccd; ++i)
322 		ccddevs[i].ccd_dk = -1;
323 }
324 
325 static int
326 ccd_modevent(mod, type, data)
327 	module_t mod;
328 	int type;
329 	void *data;
330 {
331 	int error = 0;
332 
333 	switch (type) {
334 	case MOD_LOAD:
335 		ccdattach();
336 		break;
337 
338 	case MOD_UNLOAD:
339 		printf("ccd0: Unload not supported!\n");
340 		error = EOPNOTSUPP;
341 		break;
342 
343 	default:	/* MOD_SHUTDOWN etc */
344 		break;
345 	}
346 	return (error);
347 }
348 
349 DEV_MODULE(ccd, ccd_modevent, NULL);
350 
351 static int
352 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td)
353 {
354 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
355 	struct ccdcinfo *ci = NULL;	/* XXX */
356 	size_t size;
357 	int ix;
358 	struct vnode *vp;
359 	size_t minsize;
360 	int maxsecsize;
361 	struct partinfo dpart;
362 	struct ccdgeom *ccg = &cs->sc_geom;
363 	char tmppath[MAXPATHLEN];
364 	int error = 0;
365 	struct ucred *cred;
366 
367 	KKASSERT(td->td_proc);
368 	cred = td->td_proc->p_ucred;
369 
370 #ifdef DEBUG
371 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
372 		printf("ccdinit: unit %d\n", ccd->ccd_unit);
373 #endif
374 
375 	cs->sc_size = 0;
376 	cs->sc_ileave = ccd->ccd_interleave;
377 	cs->sc_nccdisks = ccd->ccd_ndev;
378 
379 	/* Allocate space for the component info. */
380 	cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
381 	    M_DEVBUF, M_WAITOK);
382 
383 	/*
384 	 * Verify that each component piece exists and record
385 	 * relevant information about it.
386 	 */
387 	maxsecsize = 0;
388 	minsize = 0;
389 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
390 		vp = ccd->ccd_vpp[ix];
391 		ci = &cs->sc_cinfo[ix];
392 		ci->ci_vp = vp;
393 
394 		/*
395 		 * Copy in the pathname of the component.
396 		 */
397 		bzero(tmppath, sizeof(tmppath));	/* sanity */
398 		if ((error = copyinstr(cpaths[ix], tmppath,
399 		    MAXPATHLEN, &ci->ci_pathlen)) != 0) {
400 #ifdef DEBUG
401 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
402 				printf("ccd%d: can't copy path, error = %d\n",
403 				    ccd->ccd_unit, error);
404 #endif
405 			goto fail;
406 		}
407 		ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
408 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
409 
410 		ci->ci_dev = vn_todev(vp);
411 
412 		/*
413 		 * Get partition information for the component.
414 		 */
415 		if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
416 		    FREAD, cred, td)) != 0) {
417 #ifdef DEBUG
418 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
419 				 printf("ccd%d: %s: ioctl failed, error = %d\n",
420 				     ccd->ccd_unit, ci->ci_path, error);
421 #endif
422 			goto fail;
423 		}
424 		if (dpart.part->p_fstype == FS_BSDFFS) {
425 			maxsecsize =
426 			    ((dpart.disklab->d_secsize > maxsecsize) ?
427 			    dpart.disklab->d_secsize : maxsecsize);
428 			size = dpart.part->p_size - CCD_OFFSET;
429 		} else {
430 #ifdef DEBUG
431 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
432 				printf("ccd%d: %s: incorrect partition type\n",
433 				    ccd->ccd_unit, ci->ci_path);
434 #endif
435 			error = EFTYPE;
436 			goto fail;
437 		}
438 
439 		/*
440 		 * Calculate the size, truncating to an interleave
441 		 * boundary if necessary.
442 		 */
443 
444 		if (cs->sc_ileave > 1)
445 			size -= size % cs->sc_ileave;
446 
447 		if (size == 0) {
448 #ifdef DEBUG
449 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
450 				printf("ccd%d: %s: size == 0\n",
451 				    ccd->ccd_unit, ci->ci_path);
452 #endif
453 			error = ENODEV;
454 			goto fail;
455 		}
456 
457 		if (minsize == 0 || size < minsize)
458 			minsize = size;
459 		ci->ci_size = size;
460 		cs->sc_size += size;
461 	}
462 
463 	/*
464 	 * Don't allow the interleave to be smaller than
465 	 * the biggest component sector.
466 	 */
467 	if ((cs->sc_ileave > 0) &&
468 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
469 #ifdef DEBUG
470 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
471 			printf("ccd%d: interleave must be at least %d\n",
472 			    ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
473 #endif
474 		error = EINVAL;
475 		goto fail;
476 	}
477 
478 	/*
479 	 * If uniform interleave is desired set all sizes to that of
480 	 * the smallest component.  This will guarentee that a single
481 	 * interleave table is generated.
482 	 *
483 	 * Lost space must be taken into account when calculating the
484 	 * overall size.  Half the space is lost when CCDF_MIRROR is
485 	 * specified.  One disk is lost when CCDF_PARITY is specified.
486 	 */
487 	if (ccd->ccd_flags & CCDF_UNIFORM) {
488 		for (ci = cs->sc_cinfo;
489 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
490 			ci->ci_size = minsize;
491 		}
492 		if (ccd->ccd_flags & CCDF_MIRROR) {
493 			/*
494 			 * Check to see if an even number of components
495 			 * have been specified.  The interleave must also
496 			 * be non-zero in order for us to be able to
497 			 * guarentee the topology.
498 			 */
499 			if (cs->sc_nccdisks % 2) {
500 				printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
501 				error = EINVAL;
502 				goto fail;
503 			}
504 			if (cs->sc_ileave == 0) {
505 				printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
506 				error = EINVAL;
507 				goto fail;
508 			}
509 			cs->sc_size = (cs->sc_nccdisks/2) * minsize;
510 		} else if (ccd->ccd_flags & CCDF_PARITY) {
511 			cs->sc_size = (cs->sc_nccdisks-1) * minsize;
512 		} else {
513 			if (cs->sc_ileave == 0) {
514 				printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
515 				error = EINVAL;
516 				goto fail;
517 			}
518 			cs->sc_size = cs->sc_nccdisks * minsize;
519 		}
520 	}
521 
522 	/*
523 	 * Construct the interleave table.
524 	 */
525 	ccdinterleave(cs, ccd->ccd_unit);
526 
527 	/*
528 	 * Create pseudo-geometry based on 1MB cylinders.  It's
529 	 * pretty close.
530 	 */
531 	ccg->ccg_secsize = maxsecsize;
532 	ccg->ccg_ntracks = 1;
533 	ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
534 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
535 
536 	/*
537 	 * Add an devstat entry for this device.
538 	 */
539 	devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
540 			  ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
541 			  DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
542 			  DEVSTAT_PRIORITY_ARRAY);
543 
544 	cs->sc_flags |= CCDF_INITED;
545 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
546 	cs->sc_unit = ccd->ccd_unit;
547 	return (0);
548 fail:
549 	while (ci > cs->sc_cinfo) {
550 		ci--;
551 		free(ci->ci_path, M_DEVBUF);
552 	}
553 	free(cs->sc_cinfo, M_DEVBUF);
554 	return (error);
555 }
556 
557 static void
558 ccdinterleave(cs, unit)
559 	struct ccd_softc *cs;
560 	int unit;
561 {
562 	struct ccdcinfo *ci, *smallci;
563 	struct ccdiinfo *ii;
564 	daddr_t bn, lbn;
565 	int ix;
566 	u_long size;
567 
568 #ifdef DEBUG
569 	if (ccddebug & CCDB_INIT)
570 		printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
571 #endif
572 
573 	/*
574 	 * Allocate an interleave table.  The worst case occurs when each
575 	 * of N disks is of a different size, resulting in N interleave
576 	 * tables.
577 	 *
578 	 * Chances are this is too big, but we don't care.
579 	 */
580 	size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
581 	cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
582 	bzero((caddr_t)cs->sc_itable, size);
583 
584 	/*
585 	 * Trivial case: no interleave (actually interleave of disk size).
586 	 * Each table entry represents a single component in its entirety.
587 	 *
588 	 * An interleave of 0 may not be used with a mirror or parity setup.
589 	 */
590 	if (cs->sc_ileave == 0) {
591 		bn = 0;
592 		ii = cs->sc_itable;
593 
594 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
595 			/* Allocate space for ii_index. */
596 			ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
597 			ii->ii_ndisk = 1;
598 			ii->ii_startblk = bn;
599 			ii->ii_startoff = 0;
600 			ii->ii_index[0] = ix;
601 			bn += cs->sc_cinfo[ix].ci_size;
602 			ii++;
603 		}
604 		ii->ii_ndisk = 0;
605 #ifdef DEBUG
606 		if (ccddebug & CCDB_INIT)
607 			printiinfo(cs->sc_itable);
608 #endif
609 		return;
610 	}
611 
612 	/*
613 	 * The following isn't fast or pretty; it doesn't have to be.
614 	 */
615 	size = 0;
616 	bn = lbn = 0;
617 	for (ii = cs->sc_itable; ; ii++) {
618 		/*
619 		 * Allocate space for ii_index.  We might allocate more then
620 		 * we use.
621 		 */
622 		ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
623 		    M_DEVBUF, M_WAITOK);
624 
625 		/*
626 		 * Locate the smallest of the remaining components
627 		 */
628 		smallci = NULL;
629 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
630 		    ci++) {
631 			if (ci->ci_size > size &&
632 			    (smallci == NULL ||
633 			     ci->ci_size < smallci->ci_size)) {
634 				smallci = ci;
635 			}
636 		}
637 
638 		/*
639 		 * Nobody left, all done
640 		 */
641 		if (smallci == NULL) {
642 			ii->ii_ndisk = 0;
643 			break;
644 		}
645 
646 		/*
647 		 * Record starting logical block using an sc_ileave blocksize.
648 		 */
649 		ii->ii_startblk = bn / cs->sc_ileave;
650 
651 		/*
652 		 * Record starting comopnent block using an sc_ileave
653 		 * blocksize.  This value is relative to the beginning of
654 		 * a component disk.
655 		 */
656 		ii->ii_startoff = lbn;
657 
658 		/*
659 		 * Determine how many disks take part in this interleave
660 		 * and record their indices.
661 		 */
662 		ix = 0;
663 		for (ci = cs->sc_cinfo;
664 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
665 			if (ci->ci_size >= smallci->ci_size) {
666 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
667 			}
668 		}
669 		ii->ii_ndisk = ix;
670 		bn += ix * (smallci->ci_size - size);
671 		lbn = smallci->ci_size / cs->sc_ileave;
672 		size = smallci->ci_size;
673 	}
674 #ifdef DEBUG
675 	if (ccddebug & CCDB_INIT)
676 		printiinfo(cs->sc_itable);
677 #endif
678 }
679 
680 /* ARGSUSED */
681 static int
682 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td)
683 {
684 	int unit = ccdunit(dev);
685 	struct ccd_softc *cs;
686 	struct disklabel *lp;
687 	int error = 0, part, pmask;
688 
689 #ifdef DEBUG
690 	if (ccddebug & CCDB_FOLLOW)
691 		printf("ccdopen(%x, %x)\n", dev, flags);
692 #endif
693 	if (unit >= numccd)
694 		return (ENXIO);
695 	cs = &ccd_softc[unit];
696 
697 	if ((error = ccdlock(cs)) != 0)
698 		return (error);
699 
700 	lp = &cs->sc_label;
701 
702 	part = ccdpart(dev);
703 	pmask = (1 << part);
704 
705 	/*
706 	 * If we're initialized, check to see if there are any other
707 	 * open partitions.  If not, then it's safe to update
708 	 * the in-core disklabel.
709 	 */
710 	if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
711 		ccdgetdisklabel(dev);
712 
713 	/* Check that the partition exists. */
714 	if (part != RAW_PART && ((part >= lp->d_npartitions) ||
715 	    (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
716 		error = ENXIO;
717 		goto done;
718 	}
719 
720 	cs->sc_openmask |= pmask;
721  done:
722 	ccdunlock(cs);
723 	return (0);
724 }
725 
726 /* ARGSUSED */
727 static int
728 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td)
729 {
730 	int unit = ccdunit(dev);
731 	struct ccd_softc *cs;
732 	int error = 0, part;
733 
734 #ifdef DEBUG
735 	if (ccddebug & CCDB_FOLLOW)
736 		printf("ccdclose(%x, %x)\n", dev, flags);
737 #endif
738 
739 	if (unit >= numccd)
740 		return (ENXIO);
741 	cs = &ccd_softc[unit];
742 
743 	if ((error = ccdlock(cs)) != 0)
744 		return (error);
745 
746 	part = ccdpart(dev);
747 
748 	/* ...that much closer to allowing unconfiguration... */
749 	cs->sc_openmask &= ~(1 << part);
750 	ccdunlock(cs);
751 	return (0);
752 }
753 
754 static void
755 ccdstrategy(bp)
756 	struct buf *bp;
757 {
758 	int unit = ccdunit(bp->b_dev);
759 	struct ccd_softc *cs = &ccd_softc[unit];
760 	int s;
761 	int wlabel;
762 	struct disklabel *lp;
763 
764 #ifdef DEBUG
765 	if (ccddebug & CCDB_FOLLOW)
766 		printf("ccdstrategy(%x): unit %d\n", bp, unit);
767 #endif
768 	if ((cs->sc_flags & CCDF_INITED) == 0) {
769 		bp->b_error = ENXIO;
770 		bp->b_flags |= B_ERROR;
771 		goto done;
772 	}
773 
774 	/* If it's a nil transfer, wake up the top half now. */
775 	if (bp->b_bcount == 0)
776 		goto done;
777 
778 	lp = &cs->sc_label;
779 
780 	/*
781 	 * Do bounds checking and adjust transfer.  If there's an
782 	 * error, the bounds check will flag that for us.
783 	 */
784 	wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
785 	if (ccdpart(bp->b_dev) != RAW_PART) {
786 		if (bounds_check_with_label(bp, lp, wlabel) <= 0)
787 			goto done;
788 	} else {
789 		int pbn;        /* in sc_secsize chunks */
790 		long sz;        /* in sc_secsize chunks */
791 
792 		pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
793 		sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
794 
795 		/*
796 		 * If out of bounds return an error. If at the EOF point,
797 		 * simply read or write less.
798 		 */
799 
800 		if (pbn < 0 || pbn >= cs->sc_size) {
801 			bp->b_resid = bp->b_bcount;
802 			if (pbn != cs->sc_size) {
803 				bp->b_error = EINVAL;
804 				bp->b_flags |= B_ERROR | B_INVAL;
805 			}
806 			goto done;
807 		}
808 
809 		/*
810 		 * If the request crosses EOF, truncate the request.
811 		 */
812 		if (pbn + sz > cs->sc_size) {
813 			bp->b_bcount = (cs->sc_size - pbn) *
814 			    cs->sc_geom.ccg_secsize;
815 		}
816 	}
817 
818 	bp->b_resid = bp->b_bcount;
819 
820 	/*
821 	 * "Start" the unit.
822 	 */
823 	s = splbio();
824 	ccdstart(cs, bp);
825 	splx(s);
826 	return;
827 done:
828 	biodone(bp);
829 }
830 
831 static void
832 ccdstart(cs, bp)
833 	struct ccd_softc *cs;
834 	struct buf *bp;
835 {
836 	long bcount, rcount;
837 	struct ccdbuf *cbp[4];
838 	/* XXX! : 2 reads and 2 writes for RAID 4/5 */
839 	caddr_t addr;
840 	daddr_t bn;
841 	struct partition *pp;
842 
843 #ifdef DEBUG
844 	if (ccddebug & CCDB_FOLLOW)
845 		printf("ccdstart(%x, %x)\n", cs, bp);
846 #endif
847 
848 	/* Record the transaction start  */
849 	devstat_start_transaction(&cs->device_stats);
850 
851 	/*
852 	 * Translate the partition-relative block number to an absolute.
853 	 */
854 	bn = bp->b_blkno;
855 	if (ccdpart(bp->b_dev) != RAW_PART) {
856 		pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)];
857 		bn += pp->p_offset;
858 	}
859 
860 	/*
861 	 * Allocate component buffers and fire off the requests
862 	 */
863 	addr = bp->b_data;
864 	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
865 		ccdbuffer(cbp, cs, bp, bn, addr, bcount);
866 		rcount = cbp[0]->cb_buf.b_bcount;
867 
868 		if (cs->sc_cflags & CCDF_MIRROR) {
869 			/*
870 			 * Mirroring.  Writes go to both disks, reads are
871 			 * taken from whichever disk seems most appropriate.
872 			 *
873 			 * We attempt to localize reads to the disk whos arm
874 			 * is nearest the read request.  We ignore seeks due
875 			 * to writes when making this determination and we
876 			 * also try to avoid hogging.
877 			 */
878 			if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) {
879 				cbp[0]->cb_buf.b_vp->v_numoutput++;
880 				cbp[1]->cb_buf.b_vp->v_numoutput++;
881 				VOP_STRATEGY(cbp[0]->cb_buf.b_vp,
882 				    &cbp[0]->cb_buf);
883 				VOP_STRATEGY(cbp[1]->cb_buf.b_vp,
884 				    &cbp[1]->cb_buf);
885 			} else {
886 				int pick = cs->sc_pick;
887 				daddr_t range = cs->sc_size / 16;
888 
889 				if (bn < cs->sc_blk[pick] - range ||
890 				    bn > cs->sc_blk[pick] + range
891 				) {
892 					cs->sc_pick = pick = 1 - pick;
893 				}
894 				cs->sc_blk[pick] = bn + btodb(rcount);
895 				VOP_STRATEGY(cbp[pick]->cb_buf.b_vp,
896 				    &cbp[pick]->cb_buf);
897 			}
898 		} else {
899 			/*
900 			 * Not mirroring
901 			 */
902 			if ((cbp[0]->cb_buf.b_flags & B_READ) == 0)
903 				cbp[0]->cb_buf.b_vp->v_numoutput++;
904 			VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf);
905 		}
906 		bn += btodb(rcount);
907 		addr += rcount;
908 	}
909 }
910 
911 /*
912  * Build a component buffer header.
913  */
914 static void
915 ccdbuffer(cb, cs, bp, bn, addr, bcount)
916 	struct ccdbuf **cb;
917 	struct ccd_softc *cs;
918 	struct buf *bp;
919 	daddr_t bn;
920 	caddr_t addr;
921 	long bcount;
922 {
923 	struct ccdcinfo *ci, *ci2 = NULL;	/* XXX */
924 	struct ccdbuf *cbp;
925 	daddr_t cbn, cboff;
926 	off_t cbc;
927 
928 #ifdef DEBUG
929 	if (ccddebug & CCDB_IO)
930 		printf("ccdbuffer(%x, %x, %d, %x, %d)\n",
931 		       cs, bp, bn, addr, bcount);
932 #endif
933 	/*
934 	 * Determine which component bn falls in.
935 	 */
936 	cbn = bn;
937 	cboff = 0;
938 
939 	if (cs->sc_ileave == 0) {
940 		/*
941 		 * Serially concatenated and neither a mirror nor a parity
942 		 * config.  This is a special case.
943 		 */
944 		daddr_t sblk;
945 
946 		sblk = 0;
947 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
948 			sblk += ci->ci_size;
949 		cbn -= sblk;
950 	} else {
951 		struct ccdiinfo *ii;
952 		int ccdisk, off;
953 
954 		/*
955 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
956 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
957 		 * to cbn.
958 		 */
959 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
960 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
961 
962 		/*
963 		 * Figure out which interleave table to use.
964 		 */
965 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
966 			if (ii->ii_startblk > cbn)
967 				break;
968 		}
969 		ii--;
970 
971 		/*
972 		 * off is the logical superblock relative to the beginning
973 		 * of this interleave block.
974 		 */
975 		off = cbn - ii->ii_startblk;
976 
977 		/*
978 		 * We must calculate which disk component to use (ccdisk),
979 		 * and recalculate cbn to be the superblock relative to
980 		 * the beginning of the component.  This is typically done by
981 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
982 		 * must typically be divided by the number of components in
983 		 * this interleave array to be properly convert it from a
984 		 * CCD-relative logical superblock number to a
985 		 * component-relative superblock number.
986 		 */
987 		if (ii->ii_ndisk == 1) {
988 			/*
989 			 * When we have just one disk, it can't be a mirror
990 			 * or a parity config.
991 			 */
992 			ccdisk = ii->ii_index[0];
993 			cbn = ii->ii_startoff + off;
994 		} else {
995 			if (cs->sc_cflags & CCDF_MIRROR) {
996 				/*
997 				 * We have forced a uniform mapping, resulting
998 				 * in a single interleave array.  We double
999 				 * up on the first half of the available
1000 				 * components and our mirror is in the second
1001 				 * half.  This only works with a single
1002 				 * interleave array because doubling up
1003 				 * doubles the number of sectors, so there
1004 				 * cannot be another interleave array because
1005 				 * the next interleave array's calculations
1006 				 * would be off.
1007 				 */
1008 				int ndisk2 = ii->ii_ndisk / 2;
1009 				ccdisk = ii->ii_index[off % ndisk2];
1010 				cbn = ii->ii_startoff + off / ndisk2;
1011 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1012 			} else if (cs->sc_cflags & CCDF_PARITY) {
1013 				/*
1014 				 * XXX not implemented yet
1015 				 */
1016 				int ndisk2 = ii->ii_ndisk - 1;
1017 				ccdisk = ii->ii_index[off % ndisk2];
1018 				cbn = ii->ii_startoff + off / ndisk2;
1019 				if (cbn % ii->ii_ndisk <= ccdisk)
1020 					ccdisk++;
1021 			} else {
1022 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
1023 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
1024 			}
1025 		}
1026 
1027 		ci = &cs->sc_cinfo[ccdisk];
1028 
1029 		/*
1030 		 * Convert cbn from a superblock to a normal block so it
1031 		 * can be used to calculate (along with cboff) the normal
1032 		 * block index into this particular disk.
1033 		 */
1034 		cbn *= cs->sc_ileave;
1035 	}
1036 
1037 	/*
1038 	 * Fill in the component buf structure.
1039 	 */
1040 	cbp = getccdbuf(NULL);
1041 	cbp->cb_buf.b_flags = bp->b_flags | B_CALL;
1042 	cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone;
1043 	cbp->cb_buf.b_dev = ci->ci_dev;		/* XXX */
1044 	cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET;
1045 	cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET);
1046 	cbp->cb_buf.b_data = addr;
1047 	cbp->cb_buf.b_vp = ci->ci_vp;
1048 	if (cs->sc_ileave == 0)
1049               cbc = dbtob((off_t)(ci->ci_size - cbn));
1050 	else
1051               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1052 	cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1053  	cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1054 
1055 	/*
1056 	 * context for ccdiodone
1057 	 */
1058 	cbp->cb_obp = bp;
1059 	cbp->cb_unit = cs - ccd_softc;
1060 	cbp->cb_comp = ci - cs->sc_cinfo;
1061 
1062 #ifdef DEBUG
1063 	if (ccddebug & CCDB_IO)
1064 		printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n",
1065 		       ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno,
1066 		       cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1067 #endif
1068 	cb[0] = cbp;
1069 
1070 	/*
1071 	 * Note: both I/O's setup when reading from mirror, but only one
1072 	 * will be executed.
1073 	 */
1074 	if (cs->sc_cflags & CCDF_MIRROR) {
1075 		/* mirror, setup second I/O */
1076 		cbp = getccdbuf(cb[0]);
1077 		cbp->cb_buf.b_dev = ci2->ci_dev;
1078 		cbp->cb_buf.b_vp = ci2->ci_vp;
1079 		cbp->cb_comp = ci2 - cs->sc_cinfo;
1080 		cb[1] = cbp;
1081 		/* link together the ccdbuf's and clear "mirror done" flag */
1082 		cb[0]->cb_mirror = cb[1];
1083 		cb[1]->cb_mirror = cb[0];
1084 		cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1085 		cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1086 	}
1087 }
1088 
1089 static void
1090 ccdintr(cs, bp)
1091 	struct ccd_softc *cs;
1092 	struct buf *bp;
1093 {
1094 #ifdef DEBUG
1095 	if (ccddebug & CCDB_FOLLOW)
1096 		printf("ccdintr(%x, %x)\n", cs, bp);
1097 #endif
1098 	/*
1099 	 * Request is done for better or worse, wakeup the top half.
1100 	 */
1101 	if (bp->b_flags & B_ERROR)
1102 		bp->b_resid = bp->b_bcount;
1103 	devstat_end_transaction_buf(&cs->device_stats, bp);
1104 	biodone(bp);
1105 }
1106 
1107 /*
1108  * Called at interrupt time.
1109  * Mark the component as done and if all components are done,
1110  * take a ccd interrupt.
1111  */
1112 static void
1113 ccdiodone(cbp)
1114 	struct ccdbuf *cbp;
1115 {
1116 	struct buf *bp = cbp->cb_obp;
1117 	int unit = cbp->cb_unit;
1118 	int count, s;
1119 
1120 	s = splbio();
1121 #ifdef DEBUG
1122 	if (ccddebug & CCDB_FOLLOW)
1123 		printf("ccdiodone(%x)\n", cbp);
1124 	if (ccddebug & CCDB_IO) {
1125 		printf("ccdiodone: bp %x bcount %d resid %d\n",
1126 		       bp, bp->b_bcount, bp->b_resid);
1127 		printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n",
1128 		       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1129 		       cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
1130 		       cbp->cb_buf.b_bcount);
1131 	}
1132 #endif
1133 	/*
1134 	 * If an error occured, report it.  If this is a mirrored
1135 	 * configuration and the first of two possible reads, do not
1136 	 * set the error in the bp yet because the second read may
1137 	 * succeed.
1138 	 */
1139 
1140 	if (cbp->cb_buf.b_flags & B_ERROR) {
1141 		const char *msg = "";
1142 
1143 		if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1144 		    (cbp->cb_buf.b_flags & B_READ) &&
1145 		    (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1146 			/*
1147 			 * We will try our read on the other disk down
1148 			 * below, also reverse the default pick so if we
1149 			 * are doing a scan we do not keep hitting the
1150 			 * bad disk first.
1151 			 */
1152 			struct ccd_softc *cs = &ccd_softc[unit];
1153 
1154 			msg = ", trying other disk";
1155 			cs->sc_pick = 1 - cs->sc_pick;
1156 			cs->sc_blk[cs->sc_pick] = bp->b_blkno;
1157 		} else {
1158 			bp->b_flags |= B_ERROR;
1159 			bp->b_error = cbp->cb_buf.b_error ?
1160 			    cbp->cb_buf.b_error : EIO;
1161 		}
1162 		printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n",
1163 		       unit, bp->b_error, cbp->cb_comp,
1164 		       (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg);
1165 	}
1166 
1167 	/*
1168 	 * Process mirror.  If we are writing, I/O has been initiated on both
1169 	 * buffers and we fall through only after both are finished.
1170 	 *
1171 	 * If we are reading only one I/O is initiated at a time.  If an
1172 	 * error occurs we initiate the second I/O and return, otherwise
1173 	 * we free the second I/O without initiating it.
1174 	 */
1175 
1176 	if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1177 		if ((cbp->cb_buf.b_flags & B_READ) == 0) {
1178 			/*
1179 			 * When writing, handshake with the second buffer
1180 			 * to determine when both are done.  If both are not
1181 			 * done, return here.
1182 			 */
1183 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1184 				cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1185 				putccdbuf(cbp);
1186 				splx(s);
1187 				return;
1188 			}
1189 		} else {
1190 			/*
1191 			 * When reading, either dispose of the second buffer
1192 			 * or initiate I/O on the second buffer if an error
1193 			 * occured with this one.
1194 			 */
1195 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1196 				if (cbp->cb_buf.b_flags & B_ERROR) {
1197 					cbp->cb_mirror->cb_pflags |=
1198 					    CCDPF_MIRROR_DONE;
1199 					VOP_STRATEGY(
1200 					    cbp->cb_mirror->cb_buf.b_vp,
1201 					    &cbp->cb_mirror->cb_buf
1202 					);
1203 					putccdbuf(cbp);
1204 					splx(s);
1205 					return;
1206 				} else {
1207 					putccdbuf(cbp->cb_mirror);
1208 					/* fall through */
1209 				}
1210 			}
1211 		}
1212 	}
1213 
1214 	/*
1215 	 * use b_bufsize to determine how big the original request was rather
1216 	 * then b_bcount, because b_bcount may have been truncated for EOF.
1217 	 *
1218 	 * XXX We check for an error, but we do not test the resid for an
1219 	 * aligned EOF condition.  This may result in character & block
1220 	 * device access not recognizing EOF properly when read or written
1221 	 * sequentially, but will not effect filesystems.
1222 	 */
1223 	count = cbp->cb_buf.b_bufsize;
1224 	putccdbuf(cbp);
1225 
1226 	/*
1227 	 * If all done, "interrupt".
1228 	 */
1229 	bp->b_resid -= count;
1230 	if (bp->b_resid < 0)
1231 		panic("ccdiodone: count");
1232 	if (bp->b_resid == 0)
1233 		ccdintr(&ccd_softc[unit], bp);
1234 	splx(s);
1235 }
1236 
1237 static int
1238 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td)
1239 {
1240 	int unit = ccdunit(dev);
1241 	int i, j, lookedup = 0, error = 0;
1242 	int part, pmask, s;
1243 	struct ccd_softc *cs;
1244 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1245 	struct ccddevice ccd;
1246 	char **cpp;
1247 	struct vnode **vpp;
1248 	struct ucred *cred;
1249 
1250 	KKASSERT(td->td_proc != NULL);
1251 	cred = td->td_proc->p_ucred;
1252 
1253 	if (unit >= numccd)
1254 		return (ENXIO);
1255 	cs = &ccd_softc[unit];
1256 
1257 	bzero(&ccd, sizeof(ccd));
1258 
1259 	switch (cmd) {
1260 	case CCDIOCSET:
1261 		if (cs->sc_flags & CCDF_INITED)
1262 			return (EBUSY);
1263 
1264 		if ((flag & FWRITE) == 0)
1265 			return (EBADF);
1266 
1267 		if ((error = ccdlock(cs)) != 0)
1268 			return (error);
1269 
1270 		if (ccio->ccio_ndisks > CCD_MAXNDISKS)
1271 			return (EINVAL);
1272 
1273 		/* Fill in some important bits. */
1274 		ccd.ccd_unit = unit;
1275 		ccd.ccd_interleave = ccio->ccio_ileave;
1276 		if (ccd.ccd_interleave == 0 &&
1277 		    ((ccio->ccio_flags & CCDF_MIRROR) ||
1278 		     (ccio->ccio_flags & CCDF_PARITY))) {
1279 			printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1280 			ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1281 		}
1282 		if ((ccio->ccio_flags & CCDF_MIRROR) &&
1283 		    (ccio->ccio_flags & CCDF_PARITY)) {
1284 			printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1285 			ccio->ccio_flags &= ~CCDF_PARITY;
1286 		}
1287 		if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1288 		    !(ccio->ccio_flags & CCDF_UNIFORM)) {
1289 			printf("ccd%d: mirror/parity forces uniform flag\n",
1290 			       unit);
1291 			ccio->ccio_flags |= CCDF_UNIFORM;
1292 		}
1293 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1294 
1295 		/*
1296 		 * Allocate space for and copy in the array of
1297 		 * componet pathnames and device numbers.
1298 		 */
1299 		cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1300 		    M_DEVBUF, M_WAITOK);
1301 		vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1302 		    M_DEVBUF, M_WAITOK);
1303 
1304 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1305 		    ccio->ccio_ndisks * sizeof(char **));
1306 		if (error) {
1307 			free(vpp, M_DEVBUF);
1308 			free(cpp, M_DEVBUF);
1309 			ccdunlock(cs);
1310 			return (error);
1311 		}
1312 
1313 #ifdef DEBUG
1314 		if (ccddebug & CCDB_INIT)
1315 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1316 				printf("ccdioctl: component %d: 0x%x\n",
1317 				    i, cpp[i]);
1318 #endif
1319 
1320 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1321 #ifdef DEBUG
1322 			if (ccddebug & CCDB_INIT)
1323 				printf("ccdioctl: lookedup = %d\n", lookedup);
1324 #endif
1325 			if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) {
1326 				for (j = 0; j < lookedup; ++j)
1327 					(void)vn_close(vpp[j], FREAD|FWRITE, td);
1328 				free(vpp, M_DEVBUF);
1329 				free(cpp, M_DEVBUF);
1330 				ccdunlock(cs);
1331 				return (error);
1332 			}
1333 			++lookedup;
1334 		}
1335 		ccd.ccd_cpp = cpp;
1336 		ccd.ccd_vpp = vpp;
1337 		ccd.ccd_ndev = ccio->ccio_ndisks;
1338 
1339 		/*
1340 		 * Initialize the ccd.  Fills in the softc for us.
1341 		 */
1342 		if ((error = ccdinit(&ccd, cpp, td)) != 0) {
1343 			for (j = 0; j < lookedup; ++j)
1344 				(void)vn_close(vpp[j], FREAD|FWRITE, td);
1345 			bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1346 			free(vpp, M_DEVBUF);
1347 			free(cpp, M_DEVBUF);
1348 			ccdunlock(cs);
1349 			return (error);
1350 		}
1351 
1352 		/*
1353 		 * The ccd has been successfully initialized, so
1354 		 * we can place it into the array and read the disklabel.
1355 		 */
1356 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1357 		ccio->ccio_unit = unit;
1358 		ccio->ccio_size = cs->sc_size;
1359 		ccdgetdisklabel(dev);
1360 
1361 		ccdunlock(cs);
1362 
1363 		break;
1364 
1365 	case CCDIOCCLR:
1366 		if ((cs->sc_flags & CCDF_INITED) == 0)
1367 			return (ENXIO);
1368 
1369 		if ((flag & FWRITE) == 0)
1370 			return (EBADF);
1371 
1372 		if ((error = ccdlock(cs)) != 0)
1373 			return (error);
1374 
1375 		/* Don't unconfigure if any other partitions are open */
1376 		part = ccdpart(dev);
1377 		pmask = (1 << part);
1378 		if ((cs->sc_openmask & ~pmask)) {
1379 			ccdunlock(cs);
1380 			return (EBUSY);
1381 		}
1382 
1383 		/*
1384 		 * Free ccd_softc information and clear entry.
1385 		 */
1386 
1387 		/* Close the components and free their pathnames. */
1388 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1389 			/*
1390 			 * XXX: this close could potentially fail and
1391 			 * cause Bad Things.  Maybe we need to force
1392 			 * the close to happen?
1393 			 */
1394 #ifdef DEBUG
1395 			if (ccddebug & CCDB_VNODE)
1396 				vprint("CCDIOCCLR: vnode info",
1397 				    cs->sc_cinfo[i].ci_vp);
1398 #endif
1399 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td);
1400 			free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1401 		}
1402 
1403 		/* Free interleave index. */
1404 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1405 			free(cs->sc_itable[i].ii_index, M_DEVBUF);
1406 
1407 		/* Free component info and interleave table. */
1408 		free(cs->sc_cinfo, M_DEVBUF);
1409 		free(cs->sc_itable, M_DEVBUF);
1410 		cs->sc_flags &= ~CCDF_INITED;
1411 
1412 		/*
1413 		 * Free ccddevice information and clear entry.
1414 		 */
1415 		free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1416 		free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1417 		ccd.ccd_dk = -1;
1418 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1419 
1420 		/*
1421 		 * And remove the devstat entry.
1422 		 */
1423 		devstat_remove_entry(&cs->device_stats);
1424 
1425 		/* This must be atomic. */
1426 		s = splhigh();
1427 		ccdunlock(cs);
1428 		bzero(cs, sizeof(struct ccd_softc));
1429 		splx(s);
1430 
1431 		break;
1432 
1433 	case DIOCGDINFO:
1434 		if ((cs->sc_flags & CCDF_INITED) == 0)
1435 			return (ENXIO);
1436 
1437 		*(struct disklabel *)data = cs->sc_label;
1438 		break;
1439 
1440 	case DIOCGPART:
1441 		if ((cs->sc_flags & CCDF_INITED) == 0)
1442 			return (ENXIO);
1443 
1444 		((struct partinfo *)data)->disklab = &cs->sc_label;
1445 		((struct partinfo *)data)->part =
1446 		    &cs->sc_label.d_partitions[ccdpart(dev)];
1447 		break;
1448 
1449 	case DIOCWDINFO:
1450 	case DIOCSDINFO:
1451 		if ((cs->sc_flags & CCDF_INITED) == 0)
1452 			return (ENXIO);
1453 
1454 		if ((flag & FWRITE) == 0)
1455 			return (EBADF);
1456 
1457 		if ((error = ccdlock(cs)) != 0)
1458 			return (error);
1459 
1460 		cs->sc_flags |= CCDF_LABELLING;
1461 
1462 		error = setdisklabel(&cs->sc_label,
1463 		    (struct disklabel *)data, 0);
1464 		if (error == 0) {
1465 			if (cmd == DIOCWDINFO) {
1466 				dev_t cdev = CCDLABELDEV(dev);
1467 				error = writedisklabel(cdev, &cs->sc_label);
1468 			}
1469 		}
1470 
1471 		cs->sc_flags &= ~CCDF_LABELLING;
1472 
1473 		ccdunlock(cs);
1474 
1475 		if (error)
1476 			return (error);
1477 		break;
1478 
1479 	case DIOCWLABEL:
1480 		if ((cs->sc_flags & CCDF_INITED) == 0)
1481 			return (ENXIO);
1482 
1483 		if ((flag & FWRITE) == 0)
1484 			return (EBADF);
1485 		if (*(int *)data != 0)
1486 			cs->sc_flags |= CCDF_WLABEL;
1487 		else
1488 			cs->sc_flags &= ~CCDF_WLABEL;
1489 		break;
1490 
1491 	default:
1492 		return (ENOTTY);
1493 	}
1494 
1495 	return (0);
1496 }
1497 
1498 static int
1499 ccdsize(dev_t dev)
1500 {
1501 	struct ccd_softc *cs;
1502 	int part, size;
1503 
1504 	if (ccdopen(dev, 0, S_IFCHR, curthread))
1505 		return (-1);
1506 
1507 	cs = &ccd_softc[ccdunit(dev)];
1508 	part = ccdpart(dev);
1509 
1510 	if ((cs->sc_flags & CCDF_INITED) == 0)
1511 		return (-1);
1512 
1513 	if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1514 		size = -1;
1515 	else
1516 		size = cs->sc_label.d_partitions[part].p_size;
1517 
1518 	if (ccdclose(dev, 0, S_IFCHR, curthread))
1519 		return (-1);
1520 
1521 	return (size);
1522 }
1523 
1524 static int
1525 ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize)
1526 {
1527 	/* Not implemented. */
1528 	return ENXIO;
1529 }
1530 
1531 /*
1532  * Lookup the provided name in the filesystem.  If the file exists,
1533  * is a valid block device, and isn't being used by anyone else,
1534  * set *vpp to the file's vnode.
1535  */
1536 static int
1537 ccdlookup(char *path, struct thread *td, struct vnode **vpp)
1538 {
1539 	struct nameidata nd;
1540 	struct vnode *vp;
1541 	int error;
1542 	struct ucred *cred;
1543 
1544 	KKASSERT(td->td_proc);
1545 	cred = td->td_proc->p_ucred;
1546 
1547 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, path, td);
1548 	if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
1549 #ifdef DEBUG
1550 		if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1551 			printf("ccdlookup: vn_open error = %d\n", error);
1552 #endif
1553 		return (error);
1554 	}
1555 	vp = nd.ni_vp;
1556 
1557 	if (vp->v_usecount > 1) {
1558 		error = EBUSY;
1559 		goto bad;
1560 	}
1561 
1562 	if (!vn_isdisk(vp, &error))
1563 		goto bad;
1564 
1565 #ifdef DEBUG
1566 	if (ccddebug & CCDB_VNODE)
1567 		vprint("ccdlookup: vnode info", vp);
1568 #endif
1569 
1570 	VOP_UNLOCK(vp, NULL, 0, td);
1571 	NDFREE(&nd, NDF_ONLY_PNBUF);
1572 	*vpp = vp;
1573 	return (0);
1574 bad:
1575 	VOP_UNLOCK(vp, NULL, 0, td);
1576 	NDFREE(&nd, NDF_ONLY_PNBUF);
1577 	/* vn_close does vrele() for vp */
1578 	(void)vn_close(vp, FREAD|FWRITE, td);
1579 	return (error);
1580 }
1581 
1582 /*
1583  * Read the disklabel from the ccd.  If one is not present, fake one
1584  * up.
1585  */
1586 static void
1587 ccdgetdisklabel(dev)
1588 	dev_t dev;
1589 {
1590 	int unit = ccdunit(dev);
1591 	struct ccd_softc *cs = &ccd_softc[unit];
1592 	char *errstring;
1593 	struct disklabel *lp = &cs->sc_label;
1594 	struct ccdgeom *ccg = &cs->sc_geom;
1595 	dev_t cdev;
1596 
1597 	bzero(lp, sizeof(*lp));
1598 
1599 	lp->d_secperunit = cs->sc_size;
1600 	lp->d_secsize = ccg->ccg_secsize;
1601 	lp->d_nsectors = ccg->ccg_nsectors;
1602 	lp->d_ntracks = ccg->ccg_ntracks;
1603 	lp->d_ncylinders = ccg->ccg_ncylinders;
1604 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1605 
1606 	strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1607 	lp->d_type = DTYPE_CCD;
1608 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1609 	lp->d_rpm = 3600;
1610 	lp->d_interleave = 1;
1611 	lp->d_flags = 0;
1612 
1613 	lp->d_partitions[RAW_PART].p_offset = 0;
1614 	lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1615 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1616 	lp->d_npartitions = RAW_PART + 1;
1617 
1618 	lp->d_bbsize = BBSIZE;				/* XXX */
1619 	lp->d_sbsize = SBSIZE;				/* XXX */
1620 
1621 	lp->d_magic = DISKMAGIC;
1622 	lp->d_magic2 = DISKMAGIC;
1623 	lp->d_checksum = dkcksum(&cs->sc_label);
1624 
1625 	/*
1626 	 * Call the generic disklabel extraction routine.
1627 	 */
1628 	cdev = CCDLABELDEV(dev);
1629 	errstring = readdisklabel(cdev, &cs->sc_label);
1630 	if (errstring != NULL)
1631 		ccdmakedisklabel(cs);
1632 
1633 #ifdef DEBUG
1634 	/* It's actually extremely common to have unlabeled ccds. */
1635 	if (ccddebug & CCDB_LABEL)
1636 		if (errstring != NULL)
1637 			printf("ccd%d: %s\n", unit, errstring);
1638 #endif
1639 }
1640 
1641 /*
1642  * Take care of things one might want to take care of in the event
1643  * that a disklabel isn't present.
1644  */
1645 static void
1646 ccdmakedisklabel(cs)
1647 	struct ccd_softc *cs;
1648 {
1649 	struct disklabel *lp = &cs->sc_label;
1650 
1651 	/*
1652 	 * For historical reasons, if there's no disklabel present
1653 	 * the raw partition must be marked FS_BSDFFS.
1654 	 */
1655 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1656 
1657 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1658 }
1659 
1660 /*
1661  * Wait interruptibly for an exclusive lock.
1662  *
1663  * XXX
1664  * Several drivers do this; it should be abstracted and made MP-safe.
1665  */
1666 static int
1667 ccdlock(cs)
1668 	struct ccd_softc *cs;
1669 {
1670 	int error;
1671 
1672 	while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1673 		cs->sc_flags |= CCDF_WANTED;
1674 		if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0)
1675 			return (error);
1676 	}
1677 	cs->sc_flags |= CCDF_LOCKED;
1678 	return (0);
1679 }
1680 
1681 /*
1682  * Unlock and wake up any waiters.
1683  */
1684 static void
1685 ccdunlock(cs)
1686 	struct ccd_softc *cs;
1687 {
1688 
1689 	cs->sc_flags &= ~CCDF_LOCKED;
1690 	if ((cs->sc_flags & CCDF_WANTED) != 0) {
1691 		cs->sc_flags &= ~CCDF_WANTED;
1692 		wakeup(cs);
1693 	}
1694 }
1695 
1696 #ifdef DEBUG
1697 static void
1698 printiinfo(ii)
1699 	struct ccdiinfo *ii;
1700 {
1701 	int ix, i;
1702 
1703 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1704 		printf(" itab[%d]: #dk %d sblk %d soff %d",
1705 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1706 		for (i = 0; i < ii->ii_ndisk; i++)
1707 			printf(" %d", ii->ii_index[i]);
1708 		printf("\n");
1709 	}
1710 }
1711 #endif
1712 
1713 
1714 /* Local Variables: */
1715 /* c-argdecl-indent: 8 */
1716 /* c-continued-statement-offset: 8 */
1717 /* c-indent-level: 8 */
1718 /* End: */
1719