xref: /dragonfly/sys/dev/disk/ccd/ccd.c (revision 2e3ed54d)
1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */
2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.20 2005/08/03 16:36:33 hmp Exp $ */
3 
4 /*	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $	*/
5 
6 /*
7  * Copyright (c) 1995 Jason R. Thorpe.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed for the NetBSD Project
21  *	by Jason R. Thorpe.
22  * 4. The name of the author may not be used to endorse or promote products
23  *    derived from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
26  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
29  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 /*
39  * Copyright (c) 1988 University of Utah.
40  * Copyright (c) 1990, 1993
41  *	The Regents of the University of California.  All rights reserved.
42  *
43  * This code is derived from software contributed to Berkeley by
44  * the Systems Programming Group of the University of Utah Computer
45  * Science Department.
46  *
47  * Redistribution and use in source and binary forms, with or without
48  * modification, are permitted provided that the following conditions
49  * are met:
50  * 1. Redistributions of source code must retain the above copyright
51  *    notice, this list of conditions and the following disclaimer.
52  * 2. Redistributions in binary form must reproduce the above copyright
53  *    notice, this list of conditions and the following disclaimer in the
54  *    documentation and/or other materials provided with the distribution.
55  * 3. All advertising materials mentioning features or use of this software
56  *    must display the following acknowledgement:
57  *	This product includes software developed by the University of
58  *	California, Berkeley and its contributors.
59  * 4. Neither the name of the University nor the names of its contributors
60  *    may be used to endorse or promote products derived from this software
61  *    without specific prior written permission.
62  *
63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73  * SUCH DAMAGE.
74  *
75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
76  *
77  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
78  */
79 
80 /*
81  * "Concatenated" disk driver.
82  *
83  * Dynamic configuration and disklabel support by:
84  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
85  *	Numerical Aerodynamic Simulation Facility
86  *	Mail Stop 258-6
87  *	NASA Ames Research Center
88  *	Moffett Field, CA 94035
89  */
90 
91 #include "use_ccd.h"
92 
93 #include <sys/param.h>
94 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/module.h>
97 #include <sys/proc.h>
98 #include <sys/buf.h>
99 #include <sys/malloc.h>
100 #include <sys/nlookup.h>
101 #include <sys/conf.h>
102 #include <sys/stat.h>
103 #include <sys/sysctl.h>
104 #include <sys/disklabel.h>
105 #include <vfs/ufs/fs.h>
106 #include <sys/devicestat.h>
107 #include <sys/fcntl.h>
108 #include <sys/vnode.h>
109 #include <sys/buf2.h>
110 
111 #include <sys/ccdvar.h>
112 
113 #include <sys/thread2.h>
114 
115 #include <vm/vm_zone.h>
116 
117 #if defined(CCDDEBUG) && !defined(DEBUG)
118 #define DEBUG
119 #endif
120 
121 #ifdef DEBUG
122 #define CCDB_FOLLOW	0x01
123 #define CCDB_INIT	0x02
124 #define CCDB_IO		0x04
125 #define CCDB_LABEL	0x08
126 #define CCDB_VNODE	0x10
127 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
128     CCDB_VNODE;
129 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
130 #undef DEBUG
131 #endif
132 
133 #define	ccdunit(x)	dkunit(x)
134 #define ccdpart(x)	dkpart(x)
135 
136 /*
137    This is how mirroring works (only writes are special):
138 
139    When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
140    linked together by the cb_mirror field.  "cb_pflags &
141    CCDPF_MIRROR_DONE" is set to 0 on both of them.
142 
143    When a component returns to ccdiodone(), it checks if "cb_pflags &
144    CCDPF_MIRROR_DONE" is set or not.  If not, it sets the partner's
145    flag and returns.  If it is, it means its partner has already
146    returned, so it will go to the regular cleanup.
147 
148  */
149 
150 struct ccdbuf {
151 	struct buf	cb_buf;		/* new I/O buf */
152 	struct buf	*cb_obp;	/* ptr. to original I/O buf */
153 	struct ccdbuf	*cb_freenext;	/* free list link */
154 	int		cb_unit;	/* target unit */
155 	int		cb_comp;	/* target component */
156 	int		cb_pflags;	/* mirror/parity status flag */
157 	struct ccdbuf	*cb_mirror;	/* mirror counterpart */
158 };
159 
160 /* bits in cb_pflags */
161 #define CCDPF_MIRROR_DONE 1	/* if set, mirror counterpart is done */
162 
163 #define CCDLABELDEV(dev)	\
164 	(make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
165 
166 static d_open_t ccdopen;
167 static d_close_t ccdclose;
168 static d_strategy_t ccdstrategy;
169 static d_ioctl_t ccdioctl;
170 static d_dump_t ccddump;
171 static d_psize_t ccdsize;
172 
173 #define NCCDFREEHIWAT	16
174 
175 #define CDEV_MAJOR 74
176 
177 static struct cdevsw ccd_cdevsw = {
178 	/* name */	"ccd",
179 	/* maj */	CDEV_MAJOR,
180 	/* flags */	D_DISK,
181 	/* port */      NULL,
182 	/* clone */	NULL,
183 
184 	/* open */	ccdopen,
185 	/* close */	ccdclose,
186 	/* read */	physread,
187 	/* write */	physwrite,
188 	/* ioctl */	ccdioctl,
189 	/* poll */	nopoll,
190 	/* mmap */	nommap,
191 	/* strategy */	ccdstrategy,
192 	/* dump */	ccddump,
193 	/* psize */	ccdsize
194 };
195 
196 /* called during module initialization */
197 static	void ccdattach (void);
198 static	int ccd_modevent (module_t, int, void *);
199 
200 /* called by biodone() at interrupt time */
201 static	void ccdiodone (struct ccdbuf *cbp);
202 
203 static	void ccdstart (struct ccd_softc *, struct buf *);
204 static	void ccdinterleave (struct ccd_softc *, int);
205 static	void ccdintr (struct ccd_softc *, struct buf *);
206 static	int ccdinit (struct ccddevice *, char **, struct thread *);
207 static	int ccdlookup (char *, struct thread *td, struct vnode **);
208 static	void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
209 		struct buf *, daddr_t, caddr_t, long);
210 static	void ccdgetdisklabel (dev_t);
211 static	void ccdmakedisklabel (struct ccd_softc *);
212 static	int ccdlock (struct ccd_softc *);
213 static	void ccdunlock (struct ccd_softc *);
214 
215 #ifdef DEBUG
216 static	void printiinfo (struct ccdiinfo *);
217 #endif
218 
219 /* Non-private for the benefit of libkvm. */
220 struct	ccd_softc *ccd_softc;
221 struct	ccddevice *ccddevs;
222 struct	ccdbuf *ccdfreebufs;
223 static	int numccdfreebufs;
224 static	int numccd = 0;
225 
226 /*
227  * getccdbuf() -	Allocate and zero a ccd buffer.
228  *
229  *	This routine is called at splbio().
230  */
231 
232 static __inline
233 struct ccdbuf *
234 getccdbuf(struct ccdbuf *cpy)
235 {
236 	struct ccdbuf *cbp;
237 
238 	/*
239 	 * Allocate from freelist or malloc as necessary
240 	 */
241 	if ((cbp = ccdfreebufs) != NULL) {
242 		ccdfreebufs = cbp->cb_freenext;
243 		--numccdfreebufs;
244 	} else {
245 		cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
246 	}
247 
248 	/*
249 	 * Used by mirroring code
250 	 */
251 	if (cpy)
252 		bcopy(cpy, cbp, sizeof(struct ccdbuf));
253 	else
254 		bzero(cbp, sizeof(struct ccdbuf));
255 
256 	/*
257 	 * independant struct buf initialization
258 	 */
259 	LIST_INIT(&cbp->cb_buf.b_dep);
260 	BUF_LOCKINIT(&cbp->cb_buf);
261 	BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
262 	BUF_KERNPROC(&cbp->cb_buf);
263 
264 	return(cbp);
265 }
266 
267 /*
268  * putccdbuf() -	Free a ccd buffer.
269  *
270  *	This routine is called at splbio().
271  */
272 
273 static __inline
274 void
275 putccdbuf(struct ccdbuf *cbp)
276 {
277 	BUF_UNLOCK(&cbp->cb_buf);
278 	BUF_LOCKFREE(&cbp->cb_buf);
279 
280 	if (numccdfreebufs < NCCDFREEHIWAT) {
281 		cbp->cb_freenext = ccdfreebufs;
282 		ccdfreebufs = cbp;
283 		++numccdfreebufs;
284 	} else {
285 		free((caddr_t)cbp, M_DEVBUF);
286 	}
287 }
288 
289 
290 /*
291  * Number of blocks to untouched in front of a component partition.
292  * This is to avoid violating its disklabel area when it starts at the
293  * beginning of the slice.
294  */
295 #if !defined(CCD_OFFSET)
296 #define CCD_OFFSET 16
297 #endif
298 
299 /*
300  * Called by main() during pseudo-device attachment.  All we need
301  * to do is allocate enough space for devices to be configured later, and
302  * add devsw entries.
303  */
304 static void
305 ccdattach()
306 {
307 	int i;
308 	int num = NCCD;
309 
310 	if (num > 1)
311 		printf("ccd0-%d: Concatenated disk drivers\n", num-1);
312 	else
313 		printf("ccd0: Concatenated disk driver\n");
314 
315 	ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF,
316 			    M_WAITOK | M_ZERO);
317 	ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF,
318 			    M_WAITOK | M_ZERO);
319 	numccd = num;
320 
321 	cdevsw_add(&ccd_cdevsw, 0, 0);
322 	/* XXX: is this necessary? */
323 	for (i = 0; i < numccd; ++i)
324 		ccddevs[i].ccd_dk = -1;
325 }
326 
327 static int
328 ccd_modevent(mod, type, data)
329 	module_t mod;
330 	int type;
331 	void *data;
332 {
333 	int error = 0;
334 
335 	switch (type) {
336 	case MOD_LOAD:
337 		ccdattach();
338 		break;
339 
340 	case MOD_UNLOAD:
341 		printf("ccd0: Unload not supported!\n");
342 		error = EOPNOTSUPP;
343 		break;
344 
345 	default:	/* MOD_SHUTDOWN etc */
346 		break;
347 	}
348 	return (error);
349 }
350 
351 DEV_MODULE(ccd, ccd_modevent, NULL);
352 
353 static int
354 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td)
355 {
356 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
357 	struct ccdcinfo *ci = NULL;	/* XXX */
358 	size_t size;
359 	int ix;
360 	struct vnode *vp;
361 	size_t minsize;
362 	int maxsecsize;
363 	struct partinfo dpart;
364 	struct ccdgeom *ccg = &cs->sc_geom;
365 	char tmppath[MAXPATHLEN];
366 	int error = 0;
367 	struct ucred *cred;
368 
369 	KKASSERT(td->td_proc);
370 	cred = td->td_proc->p_ucred;
371 
372 #ifdef DEBUG
373 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
374 		printf("ccdinit: unit %d\n", ccd->ccd_unit);
375 #endif
376 
377 	cs->sc_size = 0;
378 	cs->sc_ileave = ccd->ccd_interleave;
379 	cs->sc_nccdisks = ccd->ccd_ndev;
380 
381 	/* Allocate space for the component info. */
382 	cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
383 	    M_DEVBUF, M_WAITOK);
384 
385 	/*
386 	 * Verify that each component piece exists and record
387 	 * relevant information about it.
388 	 */
389 	maxsecsize = 0;
390 	minsize = 0;
391 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
392 		vp = ccd->ccd_vpp[ix];
393 		ci = &cs->sc_cinfo[ix];
394 		ci->ci_vp = vp;
395 
396 		/*
397 		 * Copy in the pathname of the component.
398 		 */
399 		bzero(tmppath, sizeof(tmppath));	/* sanity */
400 		if ((error = copyinstr(cpaths[ix], tmppath,
401 		    MAXPATHLEN, &ci->ci_pathlen)) != 0) {
402 #ifdef DEBUG
403 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
404 				printf("ccd%d: can't copy path, error = %d\n",
405 				    ccd->ccd_unit, error);
406 #endif
407 			goto fail;
408 		}
409 		ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
410 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
411 
412 		ci->ci_dev = vn_todev(vp);
413 
414 		/*
415 		 * Get partition information for the component.
416 		 */
417 		if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
418 		    FREAD, cred, td)) != 0) {
419 #ifdef DEBUG
420 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
421 				 printf("ccd%d: %s: ioctl failed, error = %d\n",
422 				     ccd->ccd_unit, ci->ci_path, error);
423 #endif
424 			goto fail;
425 		}
426 		if (dpart.part->p_fstype == FS_BSDFFS) {
427 			maxsecsize =
428 			    ((dpart.disklab->d_secsize > maxsecsize) ?
429 			    dpart.disklab->d_secsize : maxsecsize);
430 			size = dpart.part->p_size - CCD_OFFSET;
431 		} else {
432 #ifdef DEBUG
433 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
434 				printf("ccd%d: %s: incorrect partition type\n",
435 				    ccd->ccd_unit, ci->ci_path);
436 #endif
437 			error = EFTYPE;
438 			goto fail;
439 		}
440 
441 		/*
442 		 * Calculate the size, truncating to an interleave
443 		 * boundary if necessary.
444 		 */
445 
446 		if (cs->sc_ileave > 1)
447 			size -= size % cs->sc_ileave;
448 
449 		if (size == 0) {
450 #ifdef DEBUG
451 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
452 				printf("ccd%d: %s: size == 0\n",
453 				    ccd->ccd_unit, ci->ci_path);
454 #endif
455 			error = ENODEV;
456 			goto fail;
457 		}
458 
459 		if (minsize == 0 || size < minsize)
460 			minsize = size;
461 		ci->ci_size = size;
462 		cs->sc_size += size;
463 	}
464 
465 	/*
466 	 * Don't allow the interleave to be smaller than
467 	 * the biggest component sector.
468 	 */
469 	if ((cs->sc_ileave > 0) &&
470 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
471 #ifdef DEBUG
472 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
473 			printf("ccd%d: interleave must be at least %d\n",
474 			    ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
475 #endif
476 		error = EINVAL;
477 		goto fail;
478 	}
479 
480 	/*
481 	 * If uniform interleave is desired set all sizes to that of
482 	 * the smallest component.  This will guarentee that a single
483 	 * interleave table is generated.
484 	 *
485 	 * Lost space must be taken into account when calculating the
486 	 * overall size.  Half the space is lost when CCDF_MIRROR is
487 	 * specified.  One disk is lost when CCDF_PARITY is specified.
488 	 */
489 	if (ccd->ccd_flags & CCDF_UNIFORM) {
490 		for (ci = cs->sc_cinfo;
491 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
492 			ci->ci_size = minsize;
493 		}
494 		if (ccd->ccd_flags & CCDF_MIRROR) {
495 			/*
496 			 * Check to see if an even number of components
497 			 * have been specified.  The interleave must also
498 			 * be non-zero in order for us to be able to
499 			 * guarentee the topology.
500 			 */
501 			if (cs->sc_nccdisks % 2) {
502 				printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
503 				error = EINVAL;
504 				goto fail;
505 			}
506 			if (cs->sc_ileave == 0) {
507 				printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
508 				error = EINVAL;
509 				goto fail;
510 			}
511 			cs->sc_size = (cs->sc_nccdisks/2) * minsize;
512 		} else if (ccd->ccd_flags & CCDF_PARITY) {
513 			cs->sc_size = (cs->sc_nccdisks-1) * minsize;
514 		} else {
515 			if (cs->sc_ileave == 0) {
516 				printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
517 				error = EINVAL;
518 				goto fail;
519 			}
520 			cs->sc_size = cs->sc_nccdisks * minsize;
521 		}
522 	}
523 
524 	/*
525 	 * Construct the interleave table.
526 	 */
527 	ccdinterleave(cs, ccd->ccd_unit);
528 
529 	/*
530 	 * Create pseudo-geometry based on 1MB cylinders.  It's
531 	 * pretty close.
532 	 */
533 	ccg->ccg_secsize = maxsecsize;
534 	ccg->ccg_ntracks = 1;
535 	ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
536 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
537 
538 	/*
539 	 * Add an devstat entry for this device.
540 	 */
541 	devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
542 			  ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
543 			  DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
544 			  DEVSTAT_PRIORITY_ARRAY);
545 
546 	cs->sc_flags |= CCDF_INITED;
547 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
548 	cs->sc_unit = ccd->ccd_unit;
549 	return (0);
550 fail:
551 	while (ci > cs->sc_cinfo) {
552 		ci--;
553 		free(ci->ci_path, M_DEVBUF);
554 	}
555 	free(cs->sc_cinfo, M_DEVBUF);
556 	return (error);
557 }
558 
559 static void
560 ccdinterleave(cs, unit)
561 	struct ccd_softc *cs;
562 	int unit;
563 {
564 	struct ccdcinfo *ci, *smallci;
565 	struct ccdiinfo *ii;
566 	daddr_t bn, lbn;
567 	int ix;
568 	u_long size;
569 
570 #ifdef DEBUG
571 	if (ccddebug & CCDB_INIT)
572 		printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
573 #endif
574 
575 	/*
576 	 * Allocate an interleave table.  The worst case occurs when each
577 	 * of N disks is of a different size, resulting in N interleave
578 	 * tables.
579 	 *
580 	 * Chances are this is too big, but we don't care.
581 	 */
582 	size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
583 	cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
584 	bzero((caddr_t)cs->sc_itable, size);
585 
586 	/*
587 	 * Trivial case: no interleave (actually interleave of disk size).
588 	 * Each table entry represents a single component in its entirety.
589 	 *
590 	 * An interleave of 0 may not be used with a mirror or parity setup.
591 	 */
592 	if (cs->sc_ileave == 0) {
593 		bn = 0;
594 		ii = cs->sc_itable;
595 
596 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
597 			/* Allocate space for ii_index. */
598 			ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
599 			ii->ii_ndisk = 1;
600 			ii->ii_startblk = bn;
601 			ii->ii_startoff = 0;
602 			ii->ii_index[0] = ix;
603 			bn += cs->sc_cinfo[ix].ci_size;
604 			ii++;
605 		}
606 		ii->ii_ndisk = 0;
607 #ifdef DEBUG
608 		if (ccddebug & CCDB_INIT)
609 			printiinfo(cs->sc_itable);
610 #endif
611 		return;
612 	}
613 
614 	/*
615 	 * The following isn't fast or pretty; it doesn't have to be.
616 	 */
617 	size = 0;
618 	bn = lbn = 0;
619 	for (ii = cs->sc_itable; ; ii++) {
620 		/*
621 		 * Allocate space for ii_index.  We might allocate more then
622 		 * we use.
623 		 */
624 		ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
625 		    M_DEVBUF, M_WAITOK);
626 
627 		/*
628 		 * Locate the smallest of the remaining components
629 		 */
630 		smallci = NULL;
631 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
632 		    ci++) {
633 			if (ci->ci_size > size &&
634 			    (smallci == NULL ||
635 			     ci->ci_size < smallci->ci_size)) {
636 				smallci = ci;
637 			}
638 		}
639 
640 		/*
641 		 * Nobody left, all done
642 		 */
643 		if (smallci == NULL) {
644 			ii->ii_ndisk = 0;
645 			break;
646 		}
647 
648 		/*
649 		 * Record starting logical block using an sc_ileave blocksize.
650 		 */
651 		ii->ii_startblk = bn / cs->sc_ileave;
652 
653 		/*
654 		 * Record starting comopnent block using an sc_ileave
655 		 * blocksize.  This value is relative to the beginning of
656 		 * a component disk.
657 		 */
658 		ii->ii_startoff = lbn;
659 
660 		/*
661 		 * Determine how many disks take part in this interleave
662 		 * and record their indices.
663 		 */
664 		ix = 0;
665 		for (ci = cs->sc_cinfo;
666 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
667 			if (ci->ci_size >= smallci->ci_size) {
668 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
669 			}
670 		}
671 		ii->ii_ndisk = ix;
672 		bn += ix * (smallci->ci_size - size);
673 		lbn = smallci->ci_size / cs->sc_ileave;
674 		size = smallci->ci_size;
675 	}
676 #ifdef DEBUG
677 	if (ccddebug & CCDB_INIT)
678 		printiinfo(cs->sc_itable);
679 #endif
680 }
681 
682 /* ARGSUSED */
683 static int
684 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td)
685 {
686 	int unit = ccdunit(dev);
687 	struct ccd_softc *cs;
688 	struct disklabel *lp;
689 	int error = 0, part, pmask;
690 
691 #ifdef DEBUG
692 	if (ccddebug & CCDB_FOLLOW)
693 		printf("ccdopen(%x, %x)\n", dev, flags);
694 #endif
695 	if (unit >= numccd)
696 		return (ENXIO);
697 	cs = &ccd_softc[unit];
698 
699 	if ((error = ccdlock(cs)) != 0)
700 		return (error);
701 
702 	lp = &cs->sc_label;
703 
704 	part = ccdpart(dev);
705 	pmask = (1 << part);
706 
707 	/*
708 	 * If we're initialized, check to see if there are any other
709 	 * open partitions.  If not, then it's safe to update
710 	 * the in-core disklabel.
711 	 */
712 	if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
713 		ccdgetdisklabel(dev);
714 
715 	/* Check that the partition exists. */
716 	if (part != RAW_PART && ((part >= lp->d_npartitions) ||
717 	    (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
718 		error = ENXIO;
719 		goto done;
720 	}
721 
722 	cs->sc_openmask |= pmask;
723  done:
724 	ccdunlock(cs);
725 	return (0);
726 }
727 
728 /* ARGSUSED */
729 static int
730 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td)
731 {
732 	int unit = ccdunit(dev);
733 	struct ccd_softc *cs;
734 	int error = 0, part;
735 
736 #ifdef DEBUG
737 	if (ccddebug & CCDB_FOLLOW)
738 		printf("ccdclose(%x, %x)\n", dev, flags);
739 #endif
740 
741 	if (unit >= numccd)
742 		return (ENXIO);
743 	cs = &ccd_softc[unit];
744 
745 	if ((error = ccdlock(cs)) != 0)
746 		return (error);
747 
748 	part = ccdpart(dev);
749 
750 	/* ...that much closer to allowing unconfiguration... */
751 	cs->sc_openmask &= ~(1 << part);
752 	ccdunlock(cs);
753 	return (0);
754 }
755 
756 static void
757 ccdstrategy(bp)
758 	struct buf *bp;
759 {
760 	int unit = ccdunit(bp->b_dev);
761 	struct ccd_softc *cs = &ccd_softc[unit];
762 	int wlabel;
763 	struct disklabel *lp;
764 
765 #ifdef DEBUG
766 	if (ccddebug & CCDB_FOLLOW)
767 		printf("ccdstrategy(%x): unit %d\n", bp, unit);
768 #endif
769 	if ((cs->sc_flags & CCDF_INITED) == 0) {
770 		bp->b_error = ENXIO;
771 		bp->b_flags |= B_ERROR;
772 		goto done;
773 	}
774 
775 	/* If it's a nil transfer, wake up the top half now. */
776 	if (bp->b_bcount == 0)
777 		goto done;
778 
779 	lp = &cs->sc_label;
780 
781 	/*
782 	 * Do bounds checking and adjust transfer.  If there's an
783 	 * error, the bounds check will flag that for us.
784 	 */
785 	wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
786 	if (ccdpart(bp->b_dev) != RAW_PART) {
787 		if (bounds_check_with_label(bp, lp, wlabel) <= 0)
788 			goto done;
789 	} else {
790 		int pbn;        /* in sc_secsize chunks */
791 		long sz;        /* in sc_secsize chunks */
792 
793 		pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
794 		sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
795 
796 		/*
797 		 * If out of bounds return an error. If at the EOF point,
798 		 * simply read or write less.
799 		 */
800 
801 		if (pbn < 0 || pbn >= cs->sc_size) {
802 			bp->b_resid = bp->b_bcount;
803 			if (pbn != cs->sc_size) {
804 				bp->b_error = EINVAL;
805 				bp->b_flags |= B_ERROR | B_INVAL;
806 			}
807 			goto done;
808 		}
809 
810 		/*
811 		 * If the request crosses EOF, truncate the request.
812 		 */
813 		if (pbn + sz > cs->sc_size) {
814 			bp->b_bcount = (cs->sc_size - pbn) *
815 			    cs->sc_geom.ccg_secsize;
816 		}
817 	}
818 
819 	bp->b_resid = bp->b_bcount;
820 
821 	/*
822 	 * "Start" the unit.
823 	 */
824 	crit_enter();
825 	ccdstart(cs, bp);
826 	crit_exit();
827 	return;
828 done:
829 	biodone(bp);
830 }
831 
832 static void
833 ccdstart(cs, bp)
834 	struct ccd_softc *cs;
835 	struct buf *bp;
836 {
837 	long bcount, rcount;
838 	struct ccdbuf *cbp[4];
839 	/* XXX! : 2 reads and 2 writes for RAID 4/5 */
840 	caddr_t addr;
841 	daddr_t bn;
842 	struct partition *pp;
843 
844 #ifdef DEBUG
845 	if (ccddebug & CCDB_FOLLOW)
846 		printf("ccdstart(%x, %x)\n", cs, bp);
847 #endif
848 
849 	/* Record the transaction start  */
850 	devstat_start_transaction(&cs->device_stats);
851 
852 	/*
853 	 * Translate the partition-relative block number to an absolute.
854 	 */
855 	bn = bp->b_blkno;
856 	if (ccdpart(bp->b_dev) != RAW_PART) {
857 		pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)];
858 		bn += pp->p_offset;
859 	}
860 
861 	/*
862 	 * Allocate component buffers and fire off the requests
863 	 */
864 	addr = bp->b_data;
865 	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
866 		ccdbuffer(cbp, cs, bp, bn, addr, bcount);
867 		rcount = cbp[0]->cb_buf.b_bcount;
868 
869 		if (cs->sc_cflags & CCDF_MIRROR) {
870 			/*
871 			 * Mirroring.  Writes go to both disks, reads are
872 			 * taken from whichever disk seems most appropriate.
873 			 *
874 			 * We attempt to localize reads to the disk whos arm
875 			 * is nearest the read request.  We ignore seeks due
876 			 * to writes when making this determination and we
877 			 * also try to avoid hogging.
878 			 */
879 			if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) {
880 				cbp[0]->cb_buf.b_vp->v_numoutput++;
881 				cbp[1]->cb_buf.b_vp->v_numoutput++;
882 				VOP_STRATEGY(cbp[0]->cb_buf.b_vp,
883 				    &cbp[0]->cb_buf);
884 				VOP_STRATEGY(cbp[1]->cb_buf.b_vp,
885 				    &cbp[1]->cb_buf);
886 			} else {
887 				int pick = cs->sc_pick;
888 				daddr_t range = cs->sc_size / 16;
889 
890 				if (bn < cs->sc_blk[pick] - range ||
891 				    bn > cs->sc_blk[pick] + range
892 				) {
893 					cs->sc_pick = pick = 1 - pick;
894 				}
895 				cs->sc_blk[pick] = bn + btodb(rcount);
896 				VOP_STRATEGY(cbp[pick]->cb_buf.b_vp,
897 				    &cbp[pick]->cb_buf);
898 			}
899 		} else {
900 			/*
901 			 * Not mirroring
902 			 */
903 			if ((cbp[0]->cb_buf.b_flags & B_READ) == 0)
904 				cbp[0]->cb_buf.b_vp->v_numoutput++;
905 			VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf);
906 		}
907 		bn += btodb(rcount);
908 		addr += rcount;
909 	}
910 }
911 
912 /*
913  * Build a component buffer header.
914  */
915 static void
916 ccdbuffer(cb, cs, bp, bn, addr, bcount)
917 	struct ccdbuf **cb;
918 	struct ccd_softc *cs;
919 	struct buf *bp;
920 	daddr_t bn;
921 	caddr_t addr;
922 	long bcount;
923 {
924 	struct ccdcinfo *ci, *ci2 = NULL;	/* XXX */
925 	struct ccdbuf *cbp;
926 	daddr_t cbn, cboff;
927 	off_t cbc;
928 
929 #ifdef DEBUG
930 	if (ccddebug & CCDB_IO)
931 		printf("ccdbuffer(%x, %x, %d, %x, %d)\n",
932 		       cs, bp, bn, addr, bcount);
933 #endif
934 	/*
935 	 * Determine which component bn falls in.
936 	 */
937 	cbn = bn;
938 	cboff = 0;
939 
940 	if (cs->sc_ileave == 0) {
941 		/*
942 		 * Serially concatenated and neither a mirror nor a parity
943 		 * config.  This is a special case.
944 		 */
945 		daddr_t sblk;
946 
947 		sblk = 0;
948 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
949 			sblk += ci->ci_size;
950 		cbn -= sblk;
951 	} else {
952 		struct ccdiinfo *ii;
953 		int ccdisk, off;
954 
955 		/*
956 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
957 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
958 		 * to cbn.
959 		 */
960 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
961 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
962 
963 		/*
964 		 * Figure out which interleave table to use.
965 		 */
966 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
967 			if (ii->ii_startblk > cbn)
968 				break;
969 		}
970 		ii--;
971 
972 		/*
973 		 * off is the logical superblock relative to the beginning
974 		 * of this interleave block.
975 		 */
976 		off = cbn - ii->ii_startblk;
977 
978 		/*
979 		 * We must calculate which disk component to use (ccdisk),
980 		 * and recalculate cbn to be the superblock relative to
981 		 * the beginning of the component.  This is typically done by
982 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
983 		 * must typically be divided by the number of components in
984 		 * this interleave array to be properly convert it from a
985 		 * CCD-relative logical superblock number to a
986 		 * component-relative superblock number.
987 		 */
988 		if (ii->ii_ndisk == 1) {
989 			/*
990 			 * When we have just one disk, it can't be a mirror
991 			 * or a parity config.
992 			 */
993 			ccdisk = ii->ii_index[0];
994 			cbn = ii->ii_startoff + off;
995 		} else {
996 			if (cs->sc_cflags & CCDF_MIRROR) {
997 				/*
998 				 * We have forced a uniform mapping, resulting
999 				 * in a single interleave array.  We double
1000 				 * up on the first half of the available
1001 				 * components and our mirror is in the second
1002 				 * half.  This only works with a single
1003 				 * interleave array because doubling up
1004 				 * doubles the number of sectors, so there
1005 				 * cannot be another interleave array because
1006 				 * the next interleave array's calculations
1007 				 * would be off.
1008 				 */
1009 				int ndisk2 = ii->ii_ndisk / 2;
1010 				ccdisk = ii->ii_index[off % ndisk2];
1011 				cbn = ii->ii_startoff + off / ndisk2;
1012 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1013 			} else if (cs->sc_cflags & CCDF_PARITY) {
1014 				/*
1015 				 * XXX not implemented yet
1016 				 */
1017 				int ndisk2 = ii->ii_ndisk - 1;
1018 				ccdisk = ii->ii_index[off % ndisk2];
1019 				cbn = ii->ii_startoff + off / ndisk2;
1020 				if (cbn % ii->ii_ndisk <= ccdisk)
1021 					ccdisk++;
1022 			} else {
1023 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
1024 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
1025 			}
1026 		}
1027 
1028 		ci = &cs->sc_cinfo[ccdisk];
1029 
1030 		/*
1031 		 * Convert cbn from a superblock to a normal block so it
1032 		 * can be used to calculate (along with cboff) the normal
1033 		 * block index into this particular disk.
1034 		 */
1035 		cbn *= cs->sc_ileave;
1036 	}
1037 
1038 	/*
1039 	 * Fill in the component buf structure.
1040 	 */
1041 	cbp = getccdbuf(NULL);
1042 	cbp->cb_buf.b_flags = bp->b_flags;
1043 	cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone;
1044 	cbp->cb_buf.b_dev = ci->ci_dev;		/* XXX */
1045 	cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET;
1046 	cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET);
1047 	cbp->cb_buf.b_data = addr;
1048 	cbp->cb_buf.b_vp = ci->ci_vp;
1049 	if (cs->sc_ileave == 0)
1050               cbc = dbtob((off_t)(ci->ci_size - cbn));
1051 	else
1052               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1053 	cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1054  	cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1055 
1056 	/*
1057 	 * context for ccdiodone
1058 	 */
1059 	cbp->cb_obp = bp;
1060 	cbp->cb_unit = cs - ccd_softc;
1061 	cbp->cb_comp = ci - cs->sc_cinfo;
1062 
1063 #ifdef DEBUG
1064 	if (ccddebug & CCDB_IO)
1065 		printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n",
1066 		       ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno,
1067 		       cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1068 #endif
1069 	cb[0] = cbp;
1070 
1071 	/*
1072 	 * Note: both I/O's setup when reading from mirror, but only one
1073 	 * will be executed.
1074 	 */
1075 	if (cs->sc_cflags & CCDF_MIRROR) {
1076 		/* mirror, setup second I/O */
1077 		cbp = getccdbuf(cb[0]);
1078 		cbp->cb_buf.b_dev = ci2->ci_dev;
1079 		cbp->cb_buf.b_vp = ci2->ci_vp;
1080 		cbp->cb_comp = ci2 - cs->sc_cinfo;
1081 		cb[1] = cbp;
1082 		/* link together the ccdbuf's and clear "mirror done" flag */
1083 		cb[0]->cb_mirror = cb[1];
1084 		cb[1]->cb_mirror = cb[0];
1085 		cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1086 		cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1087 	}
1088 }
1089 
1090 static void
1091 ccdintr(cs, bp)
1092 	struct ccd_softc *cs;
1093 	struct buf *bp;
1094 {
1095 #ifdef DEBUG
1096 	if (ccddebug & CCDB_FOLLOW)
1097 		printf("ccdintr(%x, %x)\n", cs, bp);
1098 #endif
1099 	/*
1100 	 * Request is done for better or worse, wakeup the top half.
1101 	 */
1102 	if (bp->b_flags & B_ERROR)
1103 		bp->b_resid = bp->b_bcount;
1104 	devstat_end_transaction_buf(&cs->device_stats, bp);
1105 	biodone(bp);
1106 }
1107 
1108 /*
1109  * Called at interrupt time.
1110  * Mark the component as done and if all components are done,
1111  * take a ccd interrupt.
1112  */
1113 static void
1114 ccdiodone(cbp)
1115 	struct ccdbuf *cbp;
1116 {
1117 	struct buf *bp = cbp->cb_obp;
1118 	int unit = cbp->cb_unit;
1119 	int count;
1120 
1121 	crit_enter();
1122 #ifdef DEBUG
1123 	if (ccddebug & CCDB_FOLLOW)
1124 		printf("ccdiodone(%x)\n", cbp);
1125 	if (ccddebug & CCDB_IO) {
1126 		printf("ccdiodone: bp %x bcount %d resid %d\n",
1127 		       bp, bp->b_bcount, bp->b_resid);
1128 		printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n",
1129 		       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1130 		       cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
1131 		       cbp->cb_buf.b_bcount);
1132 	}
1133 #endif
1134 	/*
1135 	 * If an error occured, report it.  If this is a mirrored
1136 	 * configuration and the first of two possible reads, do not
1137 	 * set the error in the bp yet because the second read may
1138 	 * succeed.
1139 	 */
1140 
1141 	if (cbp->cb_buf.b_flags & B_ERROR) {
1142 		const char *msg = "";
1143 
1144 		if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1145 		    (cbp->cb_buf.b_flags & B_READ) &&
1146 		    (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1147 			/*
1148 			 * We will try our read on the other disk down
1149 			 * below, also reverse the default pick so if we
1150 			 * are doing a scan we do not keep hitting the
1151 			 * bad disk first.
1152 			 */
1153 			struct ccd_softc *cs = &ccd_softc[unit];
1154 
1155 			msg = ", trying other disk";
1156 			cs->sc_pick = 1 - cs->sc_pick;
1157 			cs->sc_blk[cs->sc_pick] = bp->b_blkno;
1158 		} else {
1159 			bp->b_flags |= B_ERROR;
1160 			bp->b_error = cbp->cb_buf.b_error ?
1161 			    cbp->cb_buf.b_error : EIO;
1162 		}
1163 		printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n",
1164 		       unit, bp->b_error, cbp->cb_comp,
1165 		       (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg);
1166 	}
1167 
1168 	/*
1169 	 * Process mirror.  If we are writing, I/O has been initiated on both
1170 	 * buffers and we fall through only after both are finished.
1171 	 *
1172 	 * If we are reading only one I/O is initiated at a time.  If an
1173 	 * error occurs we initiate the second I/O and return, otherwise
1174 	 * we free the second I/O without initiating it.
1175 	 */
1176 
1177 	if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1178 		if ((cbp->cb_buf.b_flags & B_READ) == 0) {
1179 			/*
1180 			 * When writing, handshake with the second buffer
1181 			 * to determine when both are done.  If both are not
1182 			 * done, return here.
1183 			 */
1184 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1185 				cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1186 				putccdbuf(cbp);
1187 				crit_exit();
1188 				return;
1189 			}
1190 		} else {
1191 			/*
1192 			 * When reading, either dispose of the second buffer
1193 			 * or initiate I/O on the second buffer if an error
1194 			 * occured with this one.
1195 			 */
1196 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1197 				if (cbp->cb_buf.b_flags & B_ERROR) {
1198 					cbp->cb_mirror->cb_pflags |=
1199 					    CCDPF_MIRROR_DONE;
1200 					VOP_STRATEGY(
1201 					    cbp->cb_mirror->cb_buf.b_vp,
1202 					    &cbp->cb_mirror->cb_buf
1203 					);
1204 					putccdbuf(cbp);
1205 					crit_exit();
1206 					return;
1207 				} else {
1208 					putccdbuf(cbp->cb_mirror);
1209 					/* fall through */
1210 				}
1211 			}
1212 		}
1213 	}
1214 
1215 	/*
1216 	 * use b_bufsize to determine how big the original request was rather
1217 	 * then b_bcount, because b_bcount may have been truncated for EOF.
1218 	 *
1219 	 * XXX We check for an error, but we do not test the resid for an
1220 	 * aligned EOF condition.  This may result in character & block
1221 	 * device access not recognizing EOF properly when read or written
1222 	 * sequentially, but will not effect filesystems.
1223 	 */
1224 	count = cbp->cb_buf.b_bufsize;
1225 	putccdbuf(cbp);
1226 
1227 	/*
1228 	 * If all done, "interrupt".
1229 	 */
1230 	bp->b_resid -= count;
1231 	if (bp->b_resid < 0)
1232 		panic("ccdiodone: count");
1233 	if (bp->b_resid == 0)
1234 		ccdintr(&ccd_softc[unit], bp);
1235 	crit_exit();
1236 }
1237 
1238 static int
1239 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td)
1240 {
1241 	int unit = ccdunit(dev);
1242 	int i, j, lookedup = 0, error = 0;
1243 	int part, pmask;
1244 	struct ccd_softc *cs;
1245 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1246 	struct ccddevice ccd;
1247 	char **cpp;
1248 	struct vnode **vpp;
1249 	struct ucred *cred;
1250 
1251 	KKASSERT(td->td_proc != NULL);
1252 	cred = td->td_proc->p_ucred;
1253 
1254 	if (unit >= numccd)
1255 		return (ENXIO);
1256 	cs = &ccd_softc[unit];
1257 
1258 	bzero(&ccd, sizeof(ccd));
1259 
1260 	switch (cmd) {
1261 	case CCDIOCSET:
1262 		if (cs->sc_flags & CCDF_INITED)
1263 			return (EBUSY);
1264 
1265 		if ((flag & FWRITE) == 0)
1266 			return (EBADF);
1267 
1268 		if ((error = ccdlock(cs)) != 0)
1269 			return (error);
1270 
1271 		if (ccio->ccio_ndisks > CCD_MAXNDISKS)
1272 			return (EINVAL);
1273 
1274 		/* Fill in some important bits. */
1275 		ccd.ccd_unit = unit;
1276 		ccd.ccd_interleave = ccio->ccio_ileave;
1277 		if (ccd.ccd_interleave == 0 &&
1278 		    ((ccio->ccio_flags & CCDF_MIRROR) ||
1279 		     (ccio->ccio_flags & CCDF_PARITY))) {
1280 			printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1281 			ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1282 		}
1283 		if ((ccio->ccio_flags & CCDF_MIRROR) &&
1284 		    (ccio->ccio_flags & CCDF_PARITY)) {
1285 			printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1286 			ccio->ccio_flags &= ~CCDF_PARITY;
1287 		}
1288 		if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1289 		    !(ccio->ccio_flags & CCDF_UNIFORM)) {
1290 			printf("ccd%d: mirror/parity forces uniform flag\n",
1291 			       unit);
1292 			ccio->ccio_flags |= CCDF_UNIFORM;
1293 		}
1294 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1295 
1296 		/*
1297 		 * Allocate space for and copy in the array of
1298 		 * componet pathnames and device numbers.
1299 		 */
1300 		cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1301 		    M_DEVBUF, M_WAITOK);
1302 		vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1303 		    M_DEVBUF, M_WAITOK);
1304 
1305 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1306 		    ccio->ccio_ndisks * sizeof(char **));
1307 		if (error) {
1308 			free(vpp, M_DEVBUF);
1309 			free(cpp, M_DEVBUF);
1310 			ccdunlock(cs);
1311 			return (error);
1312 		}
1313 
1314 #ifdef DEBUG
1315 		if (ccddebug & CCDB_INIT)
1316 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1317 				printf("ccdioctl: component %d: 0x%x\n",
1318 				    i, cpp[i]);
1319 #endif
1320 
1321 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1322 #ifdef DEBUG
1323 			if (ccddebug & CCDB_INIT)
1324 				printf("ccdioctl: lookedup = %d\n", lookedup);
1325 #endif
1326 			if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) {
1327 				for (j = 0; j < lookedup; ++j)
1328 					(void)vn_close(vpp[j], FREAD|FWRITE, td);
1329 				free(vpp, M_DEVBUF);
1330 				free(cpp, M_DEVBUF);
1331 				ccdunlock(cs);
1332 				return (error);
1333 			}
1334 			++lookedup;
1335 		}
1336 		ccd.ccd_cpp = cpp;
1337 		ccd.ccd_vpp = vpp;
1338 		ccd.ccd_ndev = ccio->ccio_ndisks;
1339 
1340 		/*
1341 		 * Initialize the ccd.  Fills in the softc for us.
1342 		 */
1343 		if ((error = ccdinit(&ccd, cpp, td)) != 0) {
1344 			for (j = 0; j < lookedup; ++j)
1345 				(void)vn_close(vpp[j], FREAD|FWRITE, td);
1346 			bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1347 			free(vpp, M_DEVBUF);
1348 			free(cpp, M_DEVBUF);
1349 			ccdunlock(cs);
1350 			return (error);
1351 		}
1352 
1353 		/*
1354 		 * The ccd has been successfully initialized, so
1355 		 * we can place it into the array and read the disklabel.
1356 		 */
1357 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1358 		ccio->ccio_unit = unit;
1359 		ccio->ccio_size = cs->sc_size;
1360 		ccdgetdisklabel(dev);
1361 
1362 		ccdunlock(cs);
1363 
1364 		break;
1365 
1366 	case CCDIOCCLR:
1367 		if ((cs->sc_flags & CCDF_INITED) == 0)
1368 			return (ENXIO);
1369 
1370 		if ((flag & FWRITE) == 0)
1371 			return (EBADF);
1372 
1373 		if ((error = ccdlock(cs)) != 0)
1374 			return (error);
1375 
1376 		/* Don't unconfigure if any other partitions are open */
1377 		part = ccdpart(dev);
1378 		pmask = (1 << part);
1379 		if ((cs->sc_openmask & ~pmask)) {
1380 			ccdunlock(cs);
1381 			return (EBUSY);
1382 		}
1383 
1384 		/*
1385 		 * Free ccd_softc information and clear entry.
1386 		 */
1387 
1388 		/* Close the components and free their pathnames. */
1389 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1390 			/*
1391 			 * XXX: this close could potentially fail and
1392 			 * cause Bad Things.  Maybe we need to force
1393 			 * the close to happen?
1394 			 */
1395 #ifdef DEBUG
1396 			if (ccddebug & CCDB_VNODE)
1397 				vprint("CCDIOCCLR: vnode info",
1398 				    cs->sc_cinfo[i].ci_vp);
1399 #endif
1400 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td);
1401 			free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1402 		}
1403 
1404 		/* Free interleave index. */
1405 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1406 			free(cs->sc_itable[i].ii_index, M_DEVBUF);
1407 
1408 		/* Free component info and interleave table. */
1409 		free(cs->sc_cinfo, M_DEVBUF);
1410 		free(cs->sc_itable, M_DEVBUF);
1411 		cs->sc_flags &= ~CCDF_INITED;
1412 
1413 		/*
1414 		 * Free ccddevice information and clear entry.
1415 		 */
1416 		free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1417 		free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1418 		ccd.ccd_dk = -1;
1419 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1420 
1421 		/*
1422 		 * And remove the devstat entry.
1423 		 */
1424 		devstat_remove_entry(&cs->device_stats);
1425 
1426 		/* This must be atomic. */
1427 		crit_enter();
1428 		ccdunlock(cs);
1429 		bzero(cs, sizeof(struct ccd_softc));
1430 		crit_exit();
1431 
1432 		break;
1433 
1434 	case DIOCGDINFO:
1435 		if ((cs->sc_flags & CCDF_INITED) == 0)
1436 			return (ENXIO);
1437 
1438 		*(struct disklabel *)data = cs->sc_label;
1439 		break;
1440 
1441 	case DIOCGPART:
1442 		if ((cs->sc_flags & CCDF_INITED) == 0)
1443 			return (ENXIO);
1444 
1445 		((struct partinfo *)data)->disklab = &cs->sc_label;
1446 		((struct partinfo *)data)->part =
1447 		    &cs->sc_label.d_partitions[ccdpart(dev)];
1448 		break;
1449 
1450 	case DIOCWDINFO:
1451 	case DIOCSDINFO:
1452 		if ((cs->sc_flags & CCDF_INITED) == 0)
1453 			return (ENXIO);
1454 
1455 		if ((flag & FWRITE) == 0)
1456 			return (EBADF);
1457 
1458 		if ((error = ccdlock(cs)) != 0)
1459 			return (error);
1460 
1461 		cs->sc_flags |= CCDF_LABELLING;
1462 
1463 		error = setdisklabel(&cs->sc_label,
1464 		    (struct disklabel *)data, 0);
1465 		if (error == 0) {
1466 			if (cmd == DIOCWDINFO) {
1467 				dev_t cdev = CCDLABELDEV(dev);
1468 				error = writedisklabel(cdev, &cs->sc_label);
1469 			}
1470 		}
1471 
1472 		cs->sc_flags &= ~CCDF_LABELLING;
1473 
1474 		ccdunlock(cs);
1475 
1476 		if (error)
1477 			return (error);
1478 		break;
1479 
1480 	case DIOCWLABEL:
1481 		if ((cs->sc_flags & CCDF_INITED) == 0)
1482 			return (ENXIO);
1483 
1484 		if ((flag & FWRITE) == 0)
1485 			return (EBADF);
1486 		if (*(int *)data != 0)
1487 			cs->sc_flags |= CCDF_WLABEL;
1488 		else
1489 			cs->sc_flags &= ~CCDF_WLABEL;
1490 		break;
1491 
1492 	default:
1493 		return (ENOTTY);
1494 	}
1495 
1496 	return (0);
1497 }
1498 
1499 static int
1500 ccdsize(dev_t dev)
1501 {
1502 	struct ccd_softc *cs;
1503 	int part, size;
1504 
1505 	if (ccdopen(dev, 0, S_IFCHR, curthread))
1506 		return (-1);
1507 
1508 	cs = &ccd_softc[ccdunit(dev)];
1509 	part = ccdpart(dev);
1510 
1511 	if ((cs->sc_flags & CCDF_INITED) == 0)
1512 		return (-1);
1513 
1514 	if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1515 		size = -1;
1516 	else
1517 		size = cs->sc_label.d_partitions[part].p_size;
1518 
1519 	if (ccdclose(dev, 0, S_IFCHR, curthread))
1520 		return (-1);
1521 
1522 	return (size);
1523 }
1524 
1525 static int
1526 ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize)
1527 {
1528 	/* Not implemented. */
1529 	return ENXIO;
1530 }
1531 
1532 /*
1533  * Lookup the provided name in the filesystem.  If the file exists,
1534  * is a valid block device, and isn't being used by anyone else,
1535  * set *vpp to the file's vnode.
1536  */
1537 static int
1538 ccdlookup(char *path, struct thread *td, struct vnode **vpp)
1539 {
1540 	struct nlookupdata nd;
1541 	struct ucred *cred;
1542 	struct vnode *vp;
1543 	int error;
1544 
1545 	KKASSERT(td->td_proc);
1546 	cred = td->td_proc->p_ucred;
1547 	*vpp = NULL;
1548 
1549 	error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
1550 	if (error)
1551 		return (error);
1552 	if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) {
1553 #ifdef DEBUG
1554 		if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1555 			printf("ccdlookup: vn_open error = %d\n", error);
1556 #endif
1557 		goto done;
1558 	}
1559 	vp = nd.nl_open_vp;
1560 
1561 	if (vp->v_usecount > 1) {
1562 		error = EBUSY;
1563 		goto done;
1564 	}
1565 
1566 	if (!vn_isdisk(vp, &error))
1567 		goto done;
1568 
1569 #ifdef DEBUG
1570 	if (ccddebug & CCDB_VNODE)
1571 		vprint("ccdlookup: vnode info", vp);
1572 #endif
1573 
1574 	VOP_UNLOCK(vp, 0, td);
1575 	nd.nl_open_vp = NULL;
1576 	nlookup_done(&nd);
1577 	*vpp = vp;				/* leave ref intact  */
1578 	return (0);
1579 done:
1580 	nlookup_done(&nd);
1581 	return (error);
1582 }
1583 
1584 /*
1585  * Read the disklabel from the ccd.  If one is not present, fake one
1586  * up.
1587  */
1588 static void
1589 ccdgetdisklabel(dev)
1590 	dev_t dev;
1591 {
1592 	int unit = ccdunit(dev);
1593 	struct ccd_softc *cs = &ccd_softc[unit];
1594 	char *errstring;
1595 	struct disklabel *lp = &cs->sc_label;
1596 	struct ccdgeom *ccg = &cs->sc_geom;
1597 	dev_t cdev;
1598 
1599 	bzero(lp, sizeof(*lp));
1600 
1601 	lp->d_secperunit = cs->sc_size;
1602 	lp->d_secsize = ccg->ccg_secsize;
1603 	lp->d_nsectors = ccg->ccg_nsectors;
1604 	lp->d_ntracks = ccg->ccg_ntracks;
1605 	lp->d_ncylinders = ccg->ccg_ncylinders;
1606 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1607 
1608 	strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1609 	lp->d_type = DTYPE_CCD;
1610 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1611 	lp->d_rpm = 3600;
1612 	lp->d_interleave = 1;
1613 	lp->d_flags = 0;
1614 
1615 	lp->d_partitions[RAW_PART].p_offset = 0;
1616 	lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1617 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1618 	lp->d_npartitions = RAW_PART + 1;
1619 
1620 	lp->d_bbsize = BBSIZE;				/* XXX */
1621 	lp->d_sbsize = SBSIZE;				/* XXX */
1622 
1623 	lp->d_magic = DISKMAGIC;
1624 	lp->d_magic2 = DISKMAGIC;
1625 	lp->d_checksum = dkcksum(&cs->sc_label);
1626 
1627 	/*
1628 	 * Call the generic disklabel extraction routine.
1629 	 */
1630 	cdev = CCDLABELDEV(dev);
1631 	errstring = readdisklabel(cdev, &cs->sc_label);
1632 	if (errstring != NULL)
1633 		ccdmakedisklabel(cs);
1634 
1635 #ifdef DEBUG
1636 	/* It's actually extremely common to have unlabeled ccds. */
1637 	if (ccddebug & CCDB_LABEL)
1638 		if (errstring != NULL)
1639 			printf("ccd%d: %s\n", unit, errstring);
1640 #endif
1641 }
1642 
1643 /*
1644  * Take care of things one might want to take care of in the event
1645  * that a disklabel isn't present.
1646  */
1647 static void
1648 ccdmakedisklabel(cs)
1649 	struct ccd_softc *cs;
1650 {
1651 	struct disklabel *lp = &cs->sc_label;
1652 
1653 	/*
1654 	 * For historical reasons, if there's no disklabel present
1655 	 * the raw partition must be marked FS_BSDFFS.
1656 	 */
1657 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1658 
1659 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1660 }
1661 
1662 /*
1663  * Wait interruptibly for an exclusive lock.
1664  *
1665  * XXX
1666  * Several drivers do this; it should be abstracted and made MP-safe.
1667  */
1668 static int
1669 ccdlock(cs)
1670 	struct ccd_softc *cs;
1671 {
1672 	int error;
1673 
1674 	while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1675 		cs->sc_flags |= CCDF_WANTED;
1676 		if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0)
1677 			return (error);
1678 	}
1679 	cs->sc_flags |= CCDF_LOCKED;
1680 	return (0);
1681 }
1682 
1683 /*
1684  * Unlock and wake up any waiters.
1685  */
1686 static void
1687 ccdunlock(cs)
1688 	struct ccd_softc *cs;
1689 {
1690 
1691 	cs->sc_flags &= ~CCDF_LOCKED;
1692 	if ((cs->sc_flags & CCDF_WANTED) != 0) {
1693 		cs->sc_flags &= ~CCDF_WANTED;
1694 		wakeup(cs);
1695 	}
1696 }
1697 
1698 #ifdef DEBUG
1699 static void
1700 printiinfo(ii)
1701 	struct ccdiinfo *ii;
1702 {
1703 	int ix, i;
1704 
1705 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1706 		printf(" itab[%d]: #dk %d sblk %d soff %d",
1707 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1708 		for (i = 0; i < ii->ii_ndisk; i++)
1709 			printf(" %d", ii->ii_index[i]);
1710 		printf("\n");
1711 	}
1712 }
1713 #endif
1714 
1715 
1716 /* Local Variables: */
1717 /* c-argdecl-indent: 8 */
1718 /* c-continued-statement-offset: 8 */
1719 /* c-indent-level: 8 */
1720 /* End: */
1721