xref: /dragonfly/sys/dev/disk/ccd/ccd.c (revision af79c6e5)
1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */
2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.12 2003/09/23 05:03:40 dillon Exp $ */
3 
4 /*	$NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $	*/
5 
6 /*
7  * Copyright (c) 1995 Jason R. Thorpe.
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed for the NetBSD Project
21  *	by Jason R. Thorpe.
22  * 4. The name of the author may not be used to endorse or promote products
23  *    derived from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
26  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
29  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 /*
39  * Copyright (c) 1988 University of Utah.
40  * Copyright (c) 1990, 1993
41  *	The Regents of the University of California.  All rights reserved.
42  *
43  * This code is derived from software contributed to Berkeley by
44  * the Systems Programming Group of the University of Utah Computer
45  * Science Department.
46  *
47  * Redistribution and use in source and binary forms, with or without
48  * modification, are permitted provided that the following conditions
49  * are met:
50  * 1. Redistributions of source code must retain the above copyright
51  *    notice, this list of conditions and the following disclaimer.
52  * 2. Redistributions in binary form must reproduce the above copyright
53  *    notice, this list of conditions and the following disclaimer in the
54  *    documentation and/or other materials provided with the distribution.
55  * 3. All advertising materials mentioning features or use of this software
56  *    must display the following acknowledgement:
57  *	This product includes software developed by the University of
58  *	California, Berkeley and its contributors.
59  * 4. Neither the name of the University nor the names of its contributors
60  *    may be used to endorse or promote products derived from this software
61  *    without specific prior written permission.
62  *
63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
73  * SUCH DAMAGE.
74  *
75  * from: Utah $Hdr: cd.c 1.6 90/11/28$
76  *
77  *	@(#)cd.c	8.2 (Berkeley) 11/16/93
78  */
79 
80 /*
81  * "Concatenated" disk driver.
82  *
83  * Dynamic configuration and disklabel support by:
84  *	Jason R. Thorpe <thorpej@nas.nasa.gov>
85  *	Numerical Aerodynamic Simulation Facility
86  *	Mail Stop 258-6
87  *	NASA Ames Research Center
88  *	Moffett Field, CA 94035
89  */
90 
91 #include "use_ccd.h"
92 
93 #include <sys/param.h>
94 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/module.h>
97 #include <sys/proc.h>
98 #include <sys/buf.h>
99 #include <sys/malloc.h>
100 #include <sys/namei.h>
101 #include <sys/conf.h>
102 #include <sys/stat.h>
103 #include <sys/sysctl.h>
104 #include <sys/disklabel.h>
105 #include <vfs/ufs/fs.h>
106 #include <sys/devicestat.h>
107 #include <sys/fcntl.h>
108 #include <sys/vnode.h>
109 #include <sys/buf2.h>
110 
111 #include <sys/ccdvar.h>
112 
113 #include <vm/vm_zone.h>
114 
115 #if defined(CCDDEBUG) && !defined(DEBUG)
116 #define DEBUG
117 #endif
118 
119 #ifdef DEBUG
120 #define CCDB_FOLLOW	0x01
121 #define CCDB_INIT	0x02
122 #define CCDB_IO		0x04
123 #define CCDB_LABEL	0x08
124 #define CCDB_VNODE	0x10
125 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
126     CCDB_VNODE;
127 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
128 #undef DEBUG
129 #endif
130 
131 #define	ccdunit(x)	dkunit(x)
132 #define ccdpart(x)	dkpart(x)
133 
134 /*
135    This is how mirroring works (only writes are special):
136 
137    When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
138    linked together by the cb_mirror field.  "cb_pflags &
139    CCDPF_MIRROR_DONE" is set to 0 on both of them.
140 
141    When a component returns to ccdiodone(), it checks if "cb_pflags &
142    CCDPF_MIRROR_DONE" is set or not.  If not, it sets the partner's
143    flag and returns.  If it is, it means its partner has already
144    returned, so it will go to the regular cleanup.
145 
146  */
147 
148 struct ccdbuf {
149 	struct buf	cb_buf;		/* new I/O buf */
150 	struct buf	*cb_obp;	/* ptr. to original I/O buf */
151 	struct ccdbuf	*cb_freenext;	/* free list link */
152 	int		cb_unit;	/* target unit */
153 	int		cb_comp;	/* target component */
154 	int		cb_pflags;	/* mirror/parity status flag */
155 	struct ccdbuf	*cb_mirror;	/* mirror counterpart */
156 };
157 
158 /* bits in cb_pflags */
159 #define CCDPF_MIRROR_DONE 1	/* if set, mirror counterpart is done */
160 
161 #define CCDLABELDEV(dev)	\
162 	(makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART)))
163 
164 static d_open_t ccdopen;
165 static d_close_t ccdclose;
166 static d_strategy_t ccdstrategy;
167 static d_ioctl_t ccdioctl;
168 static d_dump_t ccddump;
169 static d_psize_t ccdsize;
170 
171 #define NCCDFREEHIWAT	16
172 
173 #define CDEV_MAJOR 74
174 
175 static struct cdevsw ccd_cdevsw = {
176 	/* name */	"ccd",
177 	/* maj */	CDEV_MAJOR,
178 	/* flags */	D_DISK,
179 	/* port */      NULL,
180 	/* autoq */	0,
181 
182 	/* open */	ccdopen,
183 	/* close */	ccdclose,
184 	/* read */	physread,
185 	/* write */	physwrite,
186 	/* ioctl */	ccdioctl,
187 	/* poll */	nopoll,
188 	/* mmap */	nommap,
189 	/* strategy */	ccdstrategy,
190 	/* dump */	ccddump,
191 	/* psize */	ccdsize
192 };
193 
194 /* called during module initialization */
195 static	void ccdattach (void);
196 static	int ccd_modevent (module_t, int, void *);
197 
198 /* called by biodone() at interrupt time */
199 static	void ccdiodone (struct ccdbuf *cbp);
200 
201 static	void ccdstart (struct ccd_softc *, struct buf *);
202 static	void ccdinterleave (struct ccd_softc *, int);
203 static	void ccdintr (struct ccd_softc *, struct buf *);
204 static	int ccdinit (struct ccddevice *, char **, struct thread *);
205 static	int ccdlookup (char *, struct thread *td, struct vnode **);
206 static	void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
207 		struct buf *, daddr_t, caddr_t, long);
208 static	void ccdgetdisklabel (dev_t);
209 static	void ccdmakedisklabel (struct ccd_softc *);
210 static	int ccdlock (struct ccd_softc *);
211 static	void ccdunlock (struct ccd_softc *);
212 
213 #ifdef DEBUG
214 static	void printiinfo (struct ccdiinfo *);
215 #endif
216 
217 /* Non-private for the benefit of libkvm. */
218 struct	ccd_softc *ccd_softc;
219 struct	ccddevice *ccddevs;
220 struct	ccdbuf *ccdfreebufs;
221 static	int numccdfreebufs;
222 static	int numccd = 0;
223 
224 /*
225  * getccdbuf() -	Allocate and zero a ccd buffer.
226  *
227  *	This routine is called at splbio().
228  */
229 
230 static __inline
231 struct ccdbuf *
232 getccdbuf(struct ccdbuf *cpy)
233 {
234 	struct ccdbuf *cbp;
235 
236 	/*
237 	 * Allocate from freelist or malloc as necessary
238 	 */
239 	if ((cbp = ccdfreebufs) != NULL) {
240 		ccdfreebufs = cbp->cb_freenext;
241 		--numccdfreebufs;
242 	} else {
243 		cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK);
244 	}
245 
246 	/*
247 	 * Used by mirroring code
248 	 */
249 	if (cpy)
250 		bcopy(cpy, cbp, sizeof(struct ccdbuf));
251 	else
252 		bzero(cbp, sizeof(struct ccdbuf));
253 
254 	/*
255 	 * independant struct buf initialization
256 	 */
257 	LIST_INIT(&cbp->cb_buf.b_dep);
258 	BUF_LOCKINIT(&cbp->cb_buf);
259 	BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
260 	BUF_KERNPROC(&cbp->cb_buf);
261 
262 	return(cbp);
263 }
264 
265 /*
266  * putccdbuf() -	Free a ccd buffer.
267  *
268  *	This routine is called at splbio().
269  */
270 
271 static __inline
272 void
273 putccdbuf(struct ccdbuf *cbp)
274 {
275 	BUF_UNLOCK(&cbp->cb_buf);
276 	BUF_LOCKFREE(&cbp->cb_buf);
277 
278 	if (numccdfreebufs < NCCDFREEHIWAT) {
279 		cbp->cb_freenext = ccdfreebufs;
280 		ccdfreebufs = cbp;
281 		++numccdfreebufs;
282 	} else {
283 		free((caddr_t)cbp, M_DEVBUF);
284 	}
285 }
286 
287 
288 /*
289  * Number of blocks to untouched in front of a component partition.
290  * This is to avoid violating its disklabel area when it starts at the
291  * beginning of the slice.
292  */
293 #if !defined(CCD_OFFSET)
294 #define CCD_OFFSET 16
295 #endif
296 
297 /*
298  * Called by main() during pseudo-device attachment.  All we need
299  * to do is allocate enough space for devices to be configured later, and
300  * add devsw entries.
301  */
302 static void
303 ccdattach()
304 {
305 	int i;
306 	int num = NCCD;
307 
308 	if (num > 1)
309 		printf("ccd0-%d: Concatenated disk drivers\n", num-1);
310 	else
311 		printf("ccd0: Concatenated disk driver\n");
312 
313 	ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc),
314 	    M_DEVBUF, M_NOWAIT);
315 	ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice),
316 	    M_DEVBUF, M_NOWAIT);
317 	if ((ccd_softc == NULL) || (ccddevs == NULL)) {
318 		printf("WARNING: no memory for concatenated disks\n");
319 		if (ccd_softc != NULL)
320 			free(ccd_softc, M_DEVBUF);
321 		if (ccddevs != NULL)
322 			free(ccddevs, M_DEVBUF);
323 		return;
324 	}
325 	numccd = num;
326 	bzero(ccd_softc, num * sizeof(struct ccd_softc));
327 	bzero(ccddevs, num * sizeof(struct ccddevice));
328 
329 	cdevsw_add(&ccd_cdevsw);
330 	/* XXX: is this necessary? */
331 	for (i = 0; i < numccd; ++i)
332 		ccddevs[i].ccd_dk = -1;
333 }
334 
335 static int
336 ccd_modevent(mod, type, data)
337 	module_t mod;
338 	int type;
339 	void *data;
340 {
341 	int error = 0;
342 
343 	switch (type) {
344 	case MOD_LOAD:
345 		ccdattach();
346 		break;
347 
348 	case MOD_UNLOAD:
349 		printf("ccd0: Unload not supported!\n");
350 		error = EOPNOTSUPP;
351 		break;
352 
353 	default:	/* MOD_SHUTDOWN etc */
354 		break;
355 	}
356 	return (error);
357 }
358 
359 DEV_MODULE(ccd, ccd_modevent, NULL);
360 
361 static int
362 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td)
363 {
364 	struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
365 	struct ccdcinfo *ci = NULL;	/* XXX */
366 	size_t size;
367 	int ix;
368 	struct vnode *vp;
369 	size_t minsize;
370 	int maxsecsize;
371 	struct partinfo dpart;
372 	struct ccdgeom *ccg = &cs->sc_geom;
373 	char tmppath[MAXPATHLEN];
374 	int error = 0;
375 	struct ucred *cred;
376 
377 	KKASSERT(td->td_proc);
378 	cred = td->td_proc->p_ucred;
379 
380 #ifdef DEBUG
381 	if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
382 		printf("ccdinit: unit %d\n", ccd->ccd_unit);
383 #endif
384 
385 	cs->sc_size = 0;
386 	cs->sc_ileave = ccd->ccd_interleave;
387 	cs->sc_nccdisks = ccd->ccd_ndev;
388 
389 	/* Allocate space for the component info. */
390 	cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
391 	    M_DEVBUF, M_WAITOK);
392 
393 	/*
394 	 * Verify that each component piece exists and record
395 	 * relevant information about it.
396 	 */
397 	maxsecsize = 0;
398 	minsize = 0;
399 	for (ix = 0; ix < cs->sc_nccdisks; ix++) {
400 		vp = ccd->ccd_vpp[ix];
401 		ci = &cs->sc_cinfo[ix];
402 		ci->ci_vp = vp;
403 
404 		/*
405 		 * Copy in the pathname of the component.
406 		 */
407 		bzero(tmppath, sizeof(tmppath));	/* sanity */
408 		if ((error = copyinstr(cpaths[ix], tmppath,
409 		    MAXPATHLEN, &ci->ci_pathlen)) != 0) {
410 #ifdef DEBUG
411 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
412 				printf("ccd%d: can't copy path, error = %d\n",
413 				    ccd->ccd_unit, error);
414 #endif
415 			goto fail;
416 		}
417 		ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
418 		bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
419 
420 		ci->ci_dev = vn_todev(vp);
421 
422 		/*
423 		 * Get partition information for the component.
424 		 */
425 		if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart,
426 		    FREAD, cred, td)) != 0) {
427 #ifdef DEBUG
428 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
429 				 printf("ccd%d: %s: ioctl failed, error = %d\n",
430 				     ccd->ccd_unit, ci->ci_path, error);
431 #endif
432 			goto fail;
433 		}
434 		if (dpart.part->p_fstype == FS_BSDFFS) {
435 			maxsecsize =
436 			    ((dpart.disklab->d_secsize > maxsecsize) ?
437 			    dpart.disklab->d_secsize : maxsecsize);
438 			size = dpart.part->p_size - CCD_OFFSET;
439 		} else {
440 #ifdef DEBUG
441 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
442 				printf("ccd%d: %s: incorrect partition type\n",
443 				    ccd->ccd_unit, ci->ci_path);
444 #endif
445 			error = EFTYPE;
446 			goto fail;
447 		}
448 
449 		/*
450 		 * Calculate the size, truncating to an interleave
451 		 * boundary if necessary.
452 		 */
453 
454 		if (cs->sc_ileave > 1)
455 			size -= size % cs->sc_ileave;
456 
457 		if (size == 0) {
458 #ifdef DEBUG
459 			if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
460 				printf("ccd%d: %s: size == 0\n",
461 				    ccd->ccd_unit, ci->ci_path);
462 #endif
463 			error = ENODEV;
464 			goto fail;
465 		}
466 
467 		if (minsize == 0 || size < minsize)
468 			minsize = size;
469 		ci->ci_size = size;
470 		cs->sc_size += size;
471 	}
472 
473 	/*
474 	 * Don't allow the interleave to be smaller than
475 	 * the biggest component sector.
476 	 */
477 	if ((cs->sc_ileave > 0) &&
478 	    (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
479 #ifdef DEBUG
480 		if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
481 			printf("ccd%d: interleave must be at least %d\n",
482 			    ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
483 #endif
484 		error = EINVAL;
485 		goto fail;
486 	}
487 
488 	/*
489 	 * If uniform interleave is desired set all sizes to that of
490 	 * the smallest component.  This will guarentee that a single
491 	 * interleave table is generated.
492 	 *
493 	 * Lost space must be taken into account when calculating the
494 	 * overall size.  Half the space is lost when CCDF_MIRROR is
495 	 * specified.  One disk is lost when CCDF_PARITY is specified.
496 	 */
497 	if (ccd->ccd_flags & CCDF_UNIFORM) {
498 		for (ci = cs->sc_cinfo;
499 		     ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
500 			ci->ci_size = minsize;
501 		}
502 		if (ccd->ccd_flags & CCDF_MIRROR) {
503 			/*
504 			 * Check to see if an even number of components
505 			 * have been specified.  The interleave must also
506 			 * be non-zero in order for us to be able to
507 			 * guarentee the topology.
508 			 */
509 			if (cs->sc_nccdisks % 2) {
510 				printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
511 				error = EINVAL;
512 				goto fail;
513 			}
514 			if (cs->sc_ileave == 0) {
515 				printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
516 				error = EINVAL;
517 				goto fail;
518 			}
519 			cs->sc_size = (cs->sc_nccdisks/2) * minsize;
520 		} else if (ccd->ccd_flags & CCDF_PARITY) {
521 			cs->sc_size = (cs->sc_nccdisks-1) * minsize;
522 		} else {
523 			if (cs->sc_ileave == 0) {
524 				printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
525 				error = EINVAL;
526 				goto fail;
527 			}
528 			cs->sc_size = cs->sc_nccdisks * minsize;
529 		}
530 	}
531 
532 	/*
533 	 * Construct the interleave table.
534 	 */
535 	ccdinterleave(cs, ccd->ccd_unit);
536 
537 	/*
538 	 * Create pseudo-geometry based on 1MB cylinders.  It's
539 	 * pretty close.
540 	 */
541 	ccg->ccg_secsize = maxsecsize;
542 	ccg->ccg_ntracks = 1;
543 	ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
544 	ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
545 
546 	/*
547 	 * Add an devstat entry for this device.
548 	 */
549 	devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
550 			  ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
551 			  DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
552 			  DEVSTAT_PRIORITY_ARRAY);
553 
554 	cs->sc_flags |= CCDF_INITED;
555 	cs->sc_cflags = ccd->ccd_flags;	/* So we can find out later... */
556 	cs->sc_unit = ccd->ccd_unit;
557 	return (0);
558 fail:
559 	while (ci > cs->sc_cinfo) {
560 		ci--;
561 		free(ci->ci_path, M_DEVBUF);
562 	}
563 	free(cs->sc_cinfo, M_DEVBUF);
564 	return (error);
565 }
566 
567 static void
568 ccdinterleave(cs, unit)
569 	struct ccd_softc *cs;
570 	int unit;
571 {
572 	struct ccdcinfo *ci, *smallci;
573 	struct ccdiinfo *ii;
574 	daddr_t bn, lbn;
575 	int ix;
576 	u_long size;
577 
578 #ifdef DEBUG
579 	if (ccddebug & CCDB_INIT)
580 		printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
581 #endif
582 
583 	/*
584 	 * Allocate an interleave table.  The worst case occurs when each
585 	 * of N disks is of a different size, resulting in N interleave
586 	 * tables.
587 	 *
588 	 * Chances are this is too big, but we don't care.
589 	 */
590 	size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo);
591 	cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK);
592 	bzero((caddr_t)cs->sc_itable, size);
593 
594 	/*
595 	 * Trivial case: no interleave (actually interleave of disk size).
596 	 * Each table entry represents a single component in its entirety.
597 	 *
598 	 * An interleave of 0 may not be used with a mirror or parity setup.
599 	 */
600 	if (cs->sc_ileave == 0) {
601 		bn = 0;
602 		ii = cs->sc_itable;
603 
604 		for (ix = 0; ix < cs->sc_nccdisks; ix++) {
605 			/* Allocate space for ii_index. */
606 			ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK);
607 			ii->ii_ndisk = 1;
608 			ii->ii_startblk = bn;
609 			ii->ii_startoff = 0;
610 			ii->ii_index[0] = ix;
611 			bn += cs->sc_cinfo[ix].ci_size;
612 			ii++;
613 		}
614 		ii->ii_ndisk = 0;
615 #ifdef DEBUG
616 		if (ccddebug & CCDB_INIT)
617 			printiinfo(cs->sc_itable);
618 #endif
619 		return;
620 	}
621 
622 	/*
623 	 * The following isn't fast or pretty; it doesn't have to be.
624 	 */
625 	size = 0;
626 	bn = lbn = 0;
627 	for (ii = cs->sc_itable; ; ii++) {
628 		/*
629 		 * Allocate space for ii_index.  We might allocate more then
630 		 * we use.
631 		 */
632 		ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks),
633 		    M_DEVBUF, M_WAITOK);
634 
635 		/*
636 		 * Locate the smallest of the remaining components
637 		 */
638 		smallci = NULL;
639 		for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks];
640 		    ci++) {
641 			if (ci->ci_size > size &&
642 			    (smallci == NULL ||
643 			     ci->ci_size < smallci->ci_size)) {
644 				smallci = ci;
645 			}
646 		}
647 
648 		/*
649 		 * Nobody left, all done
650 		 */
651 		if (smallci == NULL) {
652 			ii->ii_ndisk = 0;
653 			break;
654 		}
655 
656 		/*
657 		 * Record starting logical block using an sc_ileave blocksize.
658 		 */
659 		ii->ii_startblk = bn / cs->sc_ileave;
660 
661 		/*
662 		 * Record starting comopnent block using an sc_ileave
663 		 * blocksize.  This value is relative to the beginning of
664 		 * a component disk.
665 		 */
666 		ii->ii_startoff = lbn;
667 
668 		/*
669 		 * Determine how many disks take part in this interleave
670 		 * and record their indices.
671 		 */
672 		ix = 0;
673 		for (ci = cs->sc_cinfo;
674 		    ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
675 			if (ci->ci_size >= smallci->ci_size) {
676 				ii->ii_index[ix++] = ci - cs->sc_cinfo;
677 			}
678 		}
679 		ii->ii_ndisk = ix;
680 		bn += ix * (smallci->ci_size - size);
681 		lbn = smallci->ci_size / cs->sc_ileave;
682 		size = smallci->ci_size;
683 	}
684 #ifdef DEBUG
685 	if (ccddebug & CCDB_INIT)
686 		printiinfo(cs->sc_itable);
687 #endif
688 }
689 
690 /* ARGSUSED */
691 static int
692 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td)
693 {
694 	int unit = ccdunit(dev);
695 	struct ccd_softc *cs;
696 	struct disklabel *lp;
697 	int error = 0, part, pmask;
698 
699 #ifdef DEBUG
700 	if (ccddebug & CCDB_FOLLOW)
701 		printf("ccdopen(%x, %x)\n", dev, flags);
702 #endif
703 	if (unit >= numccd)
704 		return (ENXIO);
705 	cs = &ccd_softc[unit];
706 
707 	if ((error = ccdlock(cs)) != 0)
708 		return (error);
709 
710 	lp = &cs->sc_label;
711 
712 	part = ccdpart(dev);
713 	pmask = (1 << part);
714 
715 	/*
716 	 * If we're initialized, check to see if there are any other
717 	 * open partitions.  If not, then it's safe to update
718 	 * the in-core disklabel.
719 	 */
720 	if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0))
721 		ccdgetdisklabel(dev);
722 
723 	/* Check that the partition exists. */
724 	if (part != RAW_PART && ((part >= lp->d_npartitions) ||
725 	    (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
726 		error = ENXIO;
727 		goto done;
728 	}
729 
730 	cs->sc_openmask |= pmask;
731  done:
732 	ccdunlock(cs);
733 	return (0);
734 }
735 
736 /* ARGSUSED */
737 static int
738 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td)
739 {
740 	int unit = ccdunit(dev);
741 	struct ccd_softc *cs;
742 	int error = 0, part;
743 
744 #ifdef DEBUG
745 	if (ccddebug & CCDB_FOLLOW)
746 		printf("ccdclose(%x, %x)\n", dev, flags);
747 #endif
748 
749 	if (unit >= numccd)
750 		return (ENXIO);
751 	cs = &ccd_softc[unit];
752 
753 	if ((error = ccdlock(cs)) != 0)
754 		return (error);
755 
756 	part = ccdpart(dev);
757 
758 	/* ...that much closer to allowing unconfiguration... */
759 	cs->sc_openmask &= ~(1 << part);
760 	ccdunlock(cs);
761 	return (0);
762 }
763 
764 static void
765 ccdstrategy(bp)
766 	struct buf *bp;
767 {
768 	int unit = ccdunit(bp->b_dev);
769 	struct ccd_softc *cs = &ccd_softc[unit];
770 	int s;
771 	int wlabel;
772 	struct disklabel *lp;
773 
774 #ifdef DEBUG
775 	if (ccddebug & CCDB_FOLLOW)
776 		printf("ccdstrategy(%x): unit %d\n", bp, unit);
777 #endif
778 	if ((cs->sc_flags & CCDF_INITED) == 0) {
779 		bp->b_error = ENXIO;
780 		bp->b_flags |= B_ERROR;
781 		goto done;
782 	}
783 
784 	/* If it's a nil transfer, wake up the top half now. */
785 	if (bp->b_bcount == 0)
786 		goto done;
787 
788 	lp = &cs->sc_label;
789 
790 	/*
791 	 * Do bounds checking and adjust transfer.  If there's an
792 	 * error, the bounds check will flag that for us.
793 	 */
794 	wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING);
795 	if (ccdpart(bp->b_dev) != RAW_PART) {
796 		if (bounds_check_with_label(bp, lp, wlabel) <= 0)
797 			goto done;
798 	} else {
799 		int pbn;        /* in sc_secsize chunks */
800 		long sz;        /* in sc_secsize chunks */
801 
802 		pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE);
803 		sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
804 
805 		/*
806 		 * If out of bounds return an error. If at the EOF point,
807 		 * simply read or write less.
808 		 */
809 
810 		if (pbn < 0 || pbn >= cs->sc_size) {
811 			bp->b_resid = bp->b_bcount;
812 			if (pbn != cs->sc_size) {
813 				bp->b_error = EINVAL;
814 				bp->b_flags |= B_ERROR | B_INVAL;
815 			}
816 			goto done;
817 		}
818 
819 		/*
820 		 * If the request crosses EOF, truncate the request.
821 		 */
822 		if (pbn + sz > cs->sc_size) {
823 			bp->b_bcount = (cs->sc_size - pbn) *
824 			    cs->sc_geom.ccg_secsize;
825 		}
826 	}
827 
828 	bp->b_resid = bp->b_bcount;
829 
830 	/*
831 	 * "Start" the unit.
832 	 */
833 	s = splbio();
834 	ccdstart(cs, bp);
835 	splx(s);
836 	return;
837 done:
838 	biodone(bp);
839 }
840 
841 static void
842 ccdstart(cs, bp)
843 	struct ccd_softc *cs;
844 	struct buf *bp;
845 {
846 	long bcount, rcount;
847 	struct ccdbuf *cbp[4];
848 	/* XXX! : 2 reads and 2 writes for RAID 4/5 */
849 	caddr_t addr;
850 	daddr_t bn;
851 	struct partition *pp;
852 
853 #ifdef DEBUG
854 	if (ccddebug & CCDB_FOLLOW)
855 		printf("ccdstart(%x, %x)\n", cs, bp);
856 #endif
857 
858 	/* Record the transaction start  */
859 	devstat_start_transaction(&cs->device_stats);
860 
861 	/*
862 	 * Translate the partition-relative block number to an absolute.
863 	 */
864 	bn = bp->b_blkno;
865 	if (ccdpart(bp->b_dev) != RAW_PART) {
866 		pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)];
867 		bn += pp->p_offset;
868 	}
869 
870 	/*
871 	 * Allocate component buffers and fire off the requests
872 	 */
873 	addr = bp->b_data;
874 	for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
875 		ccdbuffer(cbp, cs, bp, bn, addr, bcount);
876 		rcount = cbp[0]->cb_buf.b_bcount;
877 
878 		if (cs->sc_cflags & CCDF_MIRROR) {
879 			/*
880 			 * Mirroring.  Writes go to both disks, reads are
881 			 * taken from whichever disk seems most appropriate.
882 			 *
883 			 * We attempt to localize reads to the disk whos arm
884 			 * is nearest the read request.  We ignore seeks due
885 			 * to writes when making this determination and we
886 			 * also try to avoid hogging.
887 			 */
888 			if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) {
889 				cbp[0]->cb_buf.b_vp->v_numoutput++;
890 				cbp[1]->cb_buf.b_vp->v_numoutput++;
891 				VOP_STRATEGY(cbp[0]->cb_buf.b_vp,
892 				    &cbp[0]->cb_buf);
893 				VOP_STRATEGY(cbp[1]->cb_buf.b_vp,
894 				    &cbp[1]->cb_buf);
895 			} else {
896 				int pick = cs->sc_pick;
897 				daddr_t range = cs->sc_size / 16;
898 
899 				if (bn < cs->sc_blk[pick] - range ||
900 				    bn > cs->sc_blk[pick] + range
901 				) {
902 					cs->sc_pick = pick = 1 - pick;
903 				}
904 				cs->sc_blk[pick] = bn + btodb(rcount);
905 				VOP_STRATEGY(cbp[pick]->cb_buf.b_vp,
906 				    &cbp[pick]->cb_buf);
907 			}
908 		} else {
909 			/*
910 			 * Not mirroring
911 			 */
912 			if ((cbp[0]->cb_buf.b_flags & B_READ) == 0)
913 				cbp[0]->cb_buf.b_vp->v_numoutput++;
914 			VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf);
915 		}
916 		bn += btodb(rcount);
917 		addr += rcount;
918 	}
919 }
920 
921 /*
922  * Build a component buffer header.
923  */
924 static void
925 ccdbuffer(cb, cs, bp, bn, addr, bcount)
926 	struct ccdbuf **cb;
927 	struct ccd_softc *cs;
928 	struct buf *bp;
929 	daddr_t bn;
930 	caddr_t addr;
931 	long bcount;
932 {
933 	struct ccdcinfo *ci, *ci2 = NULL;	/* XXX */
934 	struct ccdbuf *cbp;
935 	daddr_t cbn, cboff;
936 	off_t cbc;
937 
938 #ifdef DEBUG
939 	if (ccddebug & CCDB_IO)
940 		printf("ccdbuffer(%x, %x, %d, %x, %d)\n",
941 		       cs, bp, bn, addr, bcount);
942 #endif
943 	/*
944 	 * Determine which component bn falls in.
945 	 */
946 	cbn = bn;
947 	cboff = 0;
948 
949 	if (cs->sc_ileave == 0) {
950 		/*
951 		 * Serially concatenated and neither a mirror nor a parity
952 		 * config.  This is a special case.
953 		 */
954 		daddr_t sblk;
955 
956 		sblk = 0;
957 		for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
958 			sblk += ci->ci_size;
959 		cbn -= sblk;
960 	} else {
961 		struct ccdiinfo *ii;
962 		int ccdisk, off;
963 
964 		/*
965 		 * Calculate cbn, the logical superblock (sc_ileave chunks),
966 		 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
967 		 * to cbn.
968 		 */
969 		cboff = cbn % cs->sc_ileave;	/* DEV_BSIZE gran */
970 		cbn = cbn / cs->sc_ileave;	/* DEV_BSIZE * ileave gran */
971 
972 		/*
973 		 * Figure out which interleave table to use.
974 		 */
975 		for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
976 			if (ii->ii_startblk > cbn)
977 				break;
978 		}
979 		ii--;
980 
981 		/*
982 		 * off is the logical superblock relative to the beginning
983 		 * of this interleave block.
984 		 */
985 		off = cbn - ii->ii_startblk;
986 
987 		/*
988 		 * We must calculate which disk component to use (ccdisk),
989 		 * and recalculate cbn to be the superblock relative to
990 		 * the beginning of the component.  This is typically done by
991 		 * adding 'off' and ii->ii_startoff together.  However, 'off'
992 		 * must typically be divided by the number of components in
993 		 * this interleave array to be properly convert it from a
994 		 * CCD-relative logical superblock number to a
995 		 * component-relative superblock number.
996 		 */
997 		if (ii->ii_ndisk == 1) {
998 			/*
999 			 * When we have just one disk, it can't be a mirror
1000 			 * or a parity config.
1001 			 */
1002 			ccdisk = ii->ii_index[0];
1003 			cbn = ii->ii_startoff + off;
1004 		} else {
1005 			if (cs->sc_cflags & CCDF_MIRROR) {
1006 				/*
1007 				 * We have forced a uniform mapping, resulting
1008 				 * in a single interleave array.  We double
1009 				 * up on the first half of the available
1010 				 * components and our mirror is in the second
1011 				 * half.  This only works with a single
1012 				 * interleave array because doubling up
1013 				 * doubles the number of sectors, so there
1014 				 * cannot be another interleave array because
1015 				 * the next interleave array's calculations
1016 				 * would be off.
1017 				 */
1018 				int ndisk2 = ii->ii_ndisk / 2;
1019 				ccdisk = ii->ii_index[off % ndisk2];
1020 				cbn = ii->ii_startoff + off / ndisk2;
1021 				ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1022 			} else if (cs->sc_cflags & CCDF_PARITY) {
1023 				/*
1024 				 * XXX not implemented yet
1025 				 */
1026 				int ndisk2 = ii->ii_ndisk - 1;
1027 				ccdisk = ii->ii_index[off % ndisk2];
1028 				cbn = ii->ii_startoff + off / ndisk2;
1029 				if (cbn % ii->ii_ndisk <= ccdisk)
1030 					ccdisk++;
1031 			} else {
1032 				ccdisk = ii->ii_index[off % ii->ii_ndisk];
1033 				cbn = ii->ii_startoff + off / ii->ii_ndisk;
1034 			}
1035 		}
1036 
1037 		ci = &cs->sc_cinfo[ccdisk];
1038 
1039 		/*
1040 		 * Convert cbn from a superblock to a normal block so it
1041 		 * can be used to calculate (along with cboff) the normal
1042 		 * block index into this particular disk.
1043 		 */
1044 		cbn *= cs->sc_ileave;
1045 	}
1046 
1047 	/*
1048 	 * Fill in the component buf structure.
1049 	 */
1050 	cbp = getccdbuf(NULL);
1051 	cbp->cb_buf.b_flags = bp->b_flags | B_CALL;
1052 	cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone;
1053 	cbp->cb_buf.b_dev = ci->ci_dev;		/* XXX */
1054 	cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET;
1055 	cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET);
1056 	cbp->cb_buf.b_data = addr;
1057 	cbp->cb_buf.b_vp = ci->ci_vp;
1058 	if (cs->sc_ileave == 0)
1059               cbc = dbtob((off_t)(ci->ci_size - cbn));
1060 	else
1061               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1062 	cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1063  	cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1064 
1065 	/*
1066 	 * context for ccdiodone
1067 	 */
1068 	cbp->cb_obp = bp;
1069 	cbp->cb_unit = cs - ccd_softc;
1070 	cbp->cb_comp = ci - cs->sc_cinfo;
1071 
1072 #ifdef DEBUG
1073 	if (ccddebug & CCDB_IO)
1074 		printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n",
1075 		       ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno,
1076 		       cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1077 #endif
1078 	cb[0] = cbp;
1079 
1080 	/*
1081 	 * Note: both I/O's setup when reading from mirror, but only one
1082 	 * will be executed.
1083 	 */
1084 	if (cs->sc_cflags & CCDF_MIRROR) {
1085 		/* mirror, setup second I/O */
1086 		cbp = getccdbuf(cb[0]);
1087 		cbp->cb_buf.b_dev = ci2->ci_dev;
1088 		cbp->cb_buf.b_vp = ci2->ci_vp;
1089 		cbp->cb_comp = ci2 - cs->sc_cinfo;
1090 		cb[1] = cbp;
1091 		/* link together the ccdbuf's and clear "mirror done" flag */
1092 		cb[0]->cb_mirror = cb[1];
1093 		cb[1]->cb_mirror = cb[0];
1094 		cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1095 		cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1096 	}
1097 }
1098 
1099 static void
1100 ccdintr(cs, bp)
1101 	struct ccd_softc *cs;
1102 	struct buf *bp;
1103 {
1104 #ifdef DEBUG
1105 	if (ccddebug & CCDB_FOLLOW)
1106 		printf("ccdintr(%x, %x)\n", cs, bp);
1107 #endif
1108 	/*
1109 	 * Request is done for better or worse, wakeup the top half.
1110 	 */
1111 	if (bp->b_flags & B_ERROR)
1112 		bp->b_resid = bp->b_bcount;
1113 	devstat_end_transaction_buf(&cs->device_stats, bp);
1114 	biodone(bp);
1115 }
1116 
1117 /*
1118  * Called at interrupt time.
1119  * Mark the component as done and if all components are done,
1120  * take a ccd interrupt.
1121  */
1122 static void
1123 ccdiodone(cbp)
1124 	struct ccdbuf *cbp;
1125 {
1126 	struct buf *bp = cbp->cb_obp;
1127 	int unit = cbp->cb_unit;
1128 	int count, s;
1129 
1130 	s = splbio();
1131 #ifdef DEBUG
1132 	if (ccddebug & CCDB_FOLLOW)
1133 		printf("ccdiodone(%x)\n", cbp);
1134 	if (ccddebug & CCDB_IO) {
1135 		printf("ccdiodone: bp %x bcount %d resid %d\n",
1136 		       bp, bp->b_bcount, bp->b_resid);
1137 		printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n",
1138 		       cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1139 		       cbp->cb_buf.b_blkno, cbp->cb_buf.b_data,
1140 		       cbp->cb_buf.b_bcount);
1141 	}
1142 #endif
1143 	/*
1144 	 * If an error occured, report it.  If this is a mirrored
1145 	 * configuration and the first of two possible reads, do not
1146 	 * set the error in the bp yet because the second read may
1147 	 * succeed.
1148 	 */
1149 
1150 	if (cbp->cb_buf.b_flags & B_ERROR) {
1151 		const char *msg = "";
1152 
1153 		if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) &&
1154 		    (cbp->cb_buf.b_flags & B_READ) &&
1155 		    (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1156 			/*
1157 			 * We will try our read on the other disk down
1158 			 * below, also reverse the default pick so if we
1159 			 * are doing a scan we do not keep hitting the
1160 			 * bad disk first.
1161 			 */
1162 			struct ccd_softc *cs = &ccd_softc[unit];
1163 
1164 			msg = ", trying other disk";
1165 			cs->sc_pick = 1 - cs->sc_pick;
1166 			cs->sc_blk[cs->sc_pick] = bp->b_blkno;
1167 		} else {
1168 			bp->b_flags |= B_ERROR;
1169 			bp->b_error = cbp->cb_buf.b_error ?
1170 			    cbp->cb_buf.b_error : EIO;
1171 		}
1172 		printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n",
1173 		       unit, bp->b_error, cbp->cb_comp,
1174 		       (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg);
1175 	}
1176 
1177 	/*
1178 	 * Process mirror.  If we are writing, I/O has been initiated on both
1179 	 * buffers and we fall through only after both are finished.
1180 	 *
1181 	 * If we are reading only one I/O is initiated at a time.  If an
1182 	 * error occurs we initiate the second I/O and return, otherwise
1183 	 * we free the second I/O without initiating it.
1184 	 */
1185 
1186 	if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) {
1187 		if ((cbp->cb_buf.b_flags & B_READ) == 0) {
1188 			/*
1189 			 * When writing, handshake with the second buffer
1190 			 * to determine when both are done.  If both are not
1191 			 * done, return here.
1192 			 */
1193 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1194 				cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1195 				putccdbuf(cbp);
1196 				splx(s);
1197 				return;
1198 			}
1199 		} else {
1200 			/*
1201 			 * When reading, either dispose of the second buffer
1202 			 * or initiate I/O on the second buffer if an error
1203 			 * occured with this one.
1204 			 */
1205 			if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1206 				if (cbp->cb_buf.b_flags & B_ERROR) {
1207 					cbp->cb_mirror->cb_pflags |=
1208 					    CCDPF_MIRROR_DONE;
1209 					VOP_STRATEGY(
1210 					    cbp->cb_mirror->cb_buf.b_vp,
1211 					    &cbp->cb_mirror->cb_buf
1212 					);
1213 					putccdbuf(cbp);
1214 					splx(s);
1215 					return;
1216 				} else {
1217 					putccdbuf(cbp->cb_mirror);
1218 					/* fall through */
1219 				}
1220 			}
1221 		}
1222 	}
1223 
1224 	/*
1225 	 * use b_bufsize to determine how big the original request was rather
1226 	 * then b_bcount, because b_bcount may have been truncated for EOF.
1227 	 *
1228 	 * XXX We check for an error, but we do not test the resid for an
1229 	 * aligned EOF condition.  This may result in character & block
1230 	 * device access not recognizing EOF properly when read or written
1231 	 * sequentially, but will not effect filesystems.
1232 	 */
1233 	count = cbp->cb_buf.b_bufsize;
1234 	putccdbuf(cbp);
1235 
1236 	/*
1237 	 * If all done, "interrupt".
1238 	 */
1239 	bp->b_resid -= count;
1240 	if (bp->b_resid < 0)
1241 		panic("ccdiodone: count");
1242 	if (bp->b_resid == 0)
1243 		ccdintr(&ccd_softc[unit], bp);
1244 	splx(s);
1245 }
1246 
1247 static int
1248 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td)
1249 {
1250 	int unit = ccdunit(dev);
1251 	int i, j, lookedup = 0, error = 0;
1252 	int part, pmask, s;
1253 	struct ccd_softc *cs;
1254 	struct ccd_ioctl *ccio = (struct ccd_ioctl *)data;
1255 	struct ccddevice ccd;
1256 	char **cpp;
1257 	struct vnode **vpp;
1258 	struct ucred *cred;
1259 
1260 	KKASSERT(td->td_proc != NULL);
1261 	cred = td->td_proc->p_ucred;
1262 
1263 	if (unit >= numccd)
1264 		return (ENXIO);
1265 	cs = &ccd_softc[unit];
1266 
1267 	bzero(&ccd, sizeof(ccd));
1268 
1269 	switch (cmd) {
1270 	case CCDIOCSET:
1271 		if (cs->sc_flags & CCDF_INITED)
1272 			return (EBUSY);
1273 
1274 		if ((flag & FWRITE) == 0)
1275 			return (EBADF);
1276 
1277 		if ((error = ccdlock(cs)) != 0)
1278 			return (error);
1279 
1280 		if (ccio->ccio_ndisks > CCD_MAXNDISKS)
1281 			return (EINVAL);
1282 
1283 		/* Fill in some important bits. */
1284 		ccd.ccd_unit = unit;
1285 		ccd.ccd_interleave = ccio->ccio_ileave;
1286 		if (ccd.ccd_interleave == 0 &&
1287 		    ((ccio->ccio_flags & CCDF_MIRROR) ||
1288 		     (ccio->ccio_flags & CCDF_PARITY))) {
1289 			printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1290 			ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1291 		}
1292 		if ((ccio->ccio_flags & CCDF_MIRROR) &&
1293 		    (ccio->ccio_flags & CCDF_PARITY)) {
1294 			printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1295 			ccio->ccio_flags &= ~CCDF_PARITY;
1296 		}
1297 		if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1298 		    !(ccio->ccio_flags & CCDF_UNIFORM)) {
1299 			printf("ccd%d: mirror/parity forces uniform flag\n",
1300 			       unit);
1301 			ccio->ccio_flags |= CCDF_UNIFORM;
1302 		}
1303 		ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1304 
1305 		/*
1306 		 * Allocate space for and copy in the array of
1307 		 * componet pathnames and device numbers.
1308 		 */
1309 		cpp = malloc(ccio->ccio_ndisks * sizeof(char *),
1310 		    M_DEVBUF, M_WAITOK);
1311 		vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1312 		    M_DEVBUF, M_WAITOK);
1313 
1314 		error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1315 		    ccio->ccio_ndisks * sizeof(char **));
1316 		if (error) {
1317 			free(vpp, M_DEVBUF);
1318 			free(cpp, M_DEVBUF);
1319 			ccdunlock(cs);
1320 			return (error);
1321 		}
1322 
1323 #ifdef DEBUG
1324 		if (ccddebug & CCDB_INIT)
1325 			for (i = 0; i < ccio->ccio_ndisks; ++i)
1326 				printf("ccdioctl: component %d: 0x%x\n",
1327 				    i, cpp[i]);
1328 #endif
1329 
1330 		for (i = 0; i < ccio->ccio_ndisks; ++i) {
1331 #ifdef DEBUG
1332 			if (ccddebug & CCDB_INIT)
1333 				printf("ccdioctl: lookedup = %d\n", lookedup);
1334 #endif
1335 			if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) {
1336 				for (j = 0; j < lookedup; ++j)
1337 					(void)vn_close(vpp[j], FREAD|FWRITE, td);
1338 				free(vpp, M_DEVBUF);
1339 				free(cpp, M_DEVBUF);
1340 				ccdunlock(cs);
1341 				return (error);
1342 			}
1343 			++lookedup;
1344 		}
1345 		ccd.ccd_cpp = cpp;
1346 		ccd.ccd_vpp = vpp;
1347 		ccd.ccd_ndev = ccio->ccio_ndisks;
1348 
1349 		/*
1350 		 * Initialize the ccd.  Fills in the softc for us.
1351 		 */
1352 		if ((error = ccdinit(&ccd, cpp, td)) != 0) {
1353 			for (j = 0; j < lookedup; ++j)
1354 				(void)vn_close(vpp[j], FREAD|FWRITE, td);
1355 			bzero(&ccd_softc[unit], sizeof(struct ccd_softc));
1356 			free(vpp, M_DEVBUF);
1357 			free(cpp, M_DEVBUF);
1358 			ccdunlock(cs);
1359 			return (error);
1360 		}
1361 
1362 		/*
1363 		 * The ccd has been successfully initialized, so
1364 		 * we can place it into the array and read the disklabel.
1365 		 */
1366 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1367 		ccio->ccio_unit = unit;
1368 		ccio->ccio_size = cs->sc_size;
1369 		ccdgetdisklabel(dev);
1370 
1371 		ccdunlock(cs);
1372 
1373 		break;
1374 
1375 	case CCDIOCCLR:
1376 		if ((cs->sc_flags & CCDF_INITED) == 0)
1377 			return (ENXIO);
1378 
1379 		if ((flag & FWRITE) == 0)
1380 			return (EBADF);
1381 
1382 		if ((error = ccdlock(cs)) != 0)
1383 			return (error);
1384 
1385 		/* Don't unconfigure if any other partitions are open */
1386 		part = ccdpart(dev);
1387 		pmask = (1 << part);
1388 		if ((cs->sc_openmask & ~pmask)) {
1389 			ccdunlock(cs);
1390 			return (EBUSY);
1391 		}
1392 
1393 		/*
1394 		 * Free ccd_softc information and clear entry.
1395 		 */
1396 
1397 		/* Close the components and free their pathnames. */
1398 		for (i = 0; i < cs->sc_nccdisks; ++i) {
1399 			/*
1400 			 * XXX: this close could potentially fail and
1401 			 * cause Bad Things.  Maybe we need to force
1402 			 * the close to happen?
1403 			 */
1404 #ifdef DEBUG
1405 			if (ccddebug & CCDB_VNODE)
1406 				vprint("CCDIOCCLR: vnode info",
1407 				    cs->sc_cinfo[i].ci_vp);
1408 #endif
1409 			(void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td);
1410 			free(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1411 		}
1412 
1413 		/* Free interleave index. */
1414 		for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1415 			free(cs->sc_itable[i].ii_index, M_DEVBUF);
1416 
1417 		/* Free component info and interleave table. */
1418 		free(cs->sc_cinfo, M_DEVBUF);
1419 		free(cs->sc_itable, M_DEVBUF);
1420 		cs->sc_flags &= ~CCDF_INITED;
1421 
1422 		/*
1423 		 * Free ccddevice information and clear entry.
1424 		 */
1425 		free(ccddevs[unit].ccd_cpp, M_DEVBUF);
1426 		free(ccddevs[unit].ccd_vpp, M_DEVBUF);
1427 		ccd.ccd_dk = -1;
1428 		bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1429 
1430 		/*
1431 		 * And remove the devstat entry.
1432 		 */
1433 		devstat_remove_entry(&cs->device_stats);
1434 
1435 		/* This must be atomic. */
1436 		s = splhigh();
1437 		ccdunlock(cs);
1438 		bzero(cs, sizeof(struct ccd_softc));
1439 		splx(s);
1440 
1441 		break;
1442 
1443 	case DIOCGDINFO:
1444 		if ((cs->sc_flags & CCDF_INITED) == 0)
1445 			return (ENXIO);
1446 
1447 		*(struct disklabel *)data = cs->sc_label;
1448 		break;
1449 
1450 	case DIOCGPART:
1451 		if ((cs->sc_flags & CCDF_INITED) == 0)
1452 			return (ENXIO);
1453 
1454 		((struct partinfo *)data)->disklab = &cs->sc_label;
1455 		((struct partinfo *)data)->part =
1456 		    &cs->sc_label.d_partitions[ccdpart(dev)];
1457 		break;
1458 
1459 	case DIOCWDINFO:
1460 	case DIOCSDINFO:
1461 		if ((cs->sc_flags & CCDF_INITED) == 0)
1462 			return (ENXIO);
1463 
1464 		if ((flag & FWRITE) == 0)
1465 			return (EBADF);
1466 
1467 		if ((error = ccdlock(cs)) != 0)
1468 			return (error);
1469 
1470 		cs->sc_flags |= CCDF_LABELLING;
1471 
1472 		error = setdisklabel(&cs->sc_label,
1473 		    (struct disklabel *)data, 0);
1474 		if (error == 0) {
1475 			if (cmd == DIOCWDINFO)
1476 				error = writedisklabel(CCDLABELDEV(dev),
1477 				    &cs->sc_label);
1478 		}
1479 
1480 		cs->sc_flags &= ~CCDF_LABELLING;
1481 
1482 		ccdunlock(cs);
1483 
1484 		if (error)
1485 			return (error);
1486 		break;
1487 
1488 	case DIOCWLABEL:
1489 		if ((cs->sc_flags & CCDF_INITED) == 0)
1490 			return (ENXIO);
1491 
1492 		if ((flag & FWRITE) == 0)
1493 			return (EBADF);
1494 		if (*(int *)data != 0)
1495 			cs->sc_flags |= CCDF_WLABEL;
1496 		else
1497 			cs->sc_flags &= ~CCDF_WLABEL;
1498 		break;
1499 
1500 	default:
1501 		return (ENOTTY);
1502 	}
1503 
1504 	return (0);
1505 }
1506 
1507 static int
1508 ccdsize(dev_t dev)
1509 {
1510 	struct ccd_softc *cs;
1511 	int part, size;
1512 
1513 	if (ccdopen(dev, 0, S_IFCHR, curthread))
1514 		return (-1);
1515 
1516 	cs = &ccd_softc[ccdunit(dev)];
1517 	part = ccdpart(dev);
1518 
1519 	if ((cs->sc_flags & CCDF_INITED) == 0)
1520 		return (-1);
1521 
1522 	if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP)
1523 		size = -1;
1524 	else
1525 		size = cs->sc_label.d_partitions[part].p_size;
1526 
1527 	if (ccdclose(dev, 0, S_IFCHR, curthread))
1528 		return (-1);
1529 
1530 	return (size);
1531 }
1532 
1533 static int
1534 ccddump(dev)
1535 	dev_t dev;
1536 {
1537 
1538 	/* Not implemented. */
1539 	return ENXIO;
1540 }
1541 
1542 /*
1543  * Lookup the provided name in the filesystem.  If the file exists,
1544  * is a valid block device, and isn't being used by anyone else,
1545  * set *vpp to the file's vnode.
1546  */
1547 static int
1548 ccdlookup(char *path, struct thread *td, struct vnode **vpp)
1549 {
1550 	struct nameidata nd;
1551 	struct vnode *vp;
1552 	int error;
1553 	struct ucred *cred;
1554 
1555 	KKASSERT(td->td_proc);
1556 	cred = td->td_proc->p_ucred;
1557 
1558 	NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, path, td);
1559 	if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) {
1560 #ifdef DEBUG
1561 		if (ccddebug & CCDB_FOLLOW|CCDB_INIT)
1562 			printf("ccdlookup: vn_open error = %d\n", error);
1563 #endif
1564 		return (error);
1565 	}
1566 	vp = nd.ni_vp;
1567 
1568 	if (vp->v_usecount > 1) {
1569 		error = EBUSY;
1570 		goto bad;
1571 	}
1572 
1573 	if (!vn_isdisk(vp, &error))
1574 		goto bad;
1575 
1576 #ifdef DEBUG
1577 	if (ccddebug & CCDB_VNODE)
1578 		vprint("ccdlookup: vnode info", vp);
1579 #endif
1580 
1581 	VOP_UNLOCK(vp, 0, td);
1582 	NDFREE(&nd, NDF_ONLY_PNBUF);
1583 	*vpp = vp;
1584 	return (0);
1585 bad:
1586 	VOP_UNLOCK(vp, 0, td);
1587 	NDFREE(&nd, NDF_ONLY_PNBUF);
1588 	/* vn_close does vrele() for vp */
1589 	(void)vn_close(vp, FREAD|FWRITE, td);
1590 	return (error);
1591 }
1592 
1593 /*
1594  * Read the disklabel from the ccd.  If one is not present, fake one
1595  * up.
1596  */
1597 static void
1598 ccdgetdisklabel(dev)
1599 	dev_t dev;
1600 {
1601 	int unit = ccdunit(dev);
1602 	struct ccd_softc *cs = &ccd_softc[unit];
1603 	char *errstring;
1604 	struct disklabel *lp = &cs->sc_label;
1605 	struct ccdgeom *ccg = &cs->sc_geom;
1606 
1607 	bzero(lp, sizeof(*lp));
1608 
1609 	lp->d_secperunit = cs->sc_size;
1610 	lp->d_secsize = ccg->ccg_secsize;
1611 	lp->d_nsectors = ccg->ccg_nsectors;
1612 	lp->d_ntracks = ccg->ccg_ntracks;
1613 	lp->d_ncylinders = ccg->ccg_ncylinders;
1614 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1615 
1616 	strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename));
1617 	lp->d_type = DTYPE_CCD;
1618 	strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
1619 	lp->d_rpm = 3600;
1620 	lp->d_interleave = 1;
1621 	lp->d_flags = 0;
1622 
1623 	lp->d_partitions[RAW_PART].p_offset = 0;
1624 	lp->d_partitions[RAW_PART].p_size = cs->sc_size;
1625 	lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED;
1626 	lp->d_npartitions = RAW_PART + 1;
1627 
1628 	lp->d_bbsize = BBSIZE;				/* XXX */
1629 	lp->d_sbsize = SBSIZE;				/* XXX */
1630 
1631 	lp->d_magic = DISKMAGIC;
1632 	lp->d_magic2 = DISKMAGIC;
1633 	lp->d_checksum = dkcksum(&cs->sc_label);
1634 
1635 	/*
1636 	 * Call the generic disklabel extraction routine.
1637 	 */
1638 	errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label);
1639 	if (errstring != NULL)
1640 		ccdmakedisklabel(cs);
1641 
1642 #ifdef DEBUG
1643 	/* It's actually extremely common to have unlabeled ccds. */
1644 	if (ccddebug & CCDB_LABEL)
1645 		if (errstring != NULL)
1646 			printf("ccd%d: %s\n", unit, errstring);
1647 #endif
1648 }
1649 
1650 /*
1651  * Take care of things one might want to take care of in the event
1652  * that a disklabel isn't present.
1653  */
1654 static void
1655 ccdmakedisklabel(cs)
1656 	struct ccd_softc *cs;
1657 {
1658 	struct disklabel *lp = &cs->sc_label;
1659 
1660 	/*
1661 	 * For historical reasons, if there's no disklabel present
1662 	 * the raw partition must be marked FS_BSDFFS.
1663 	 */
1664 	lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS;
1665 
1666 	strncpy(lp->d_packname, "default label", sizeof(lp->d_packname));
1667 }
1668 
1669 /*
1670  * Wait interruptibly for an exclusive lock.
1671  *
1672  * XXX
1673  * Several drivers do this; it should be abstracted and made MP-safe.
1674  */
1675 static int
1676 ccdlock(cs)
1677 	struct ccd_softc *cs;
1678 {
1679 	int error;
1680 
1681 	while ((cs->sc_flags & CCDF_LOCKED) != 0) {
1682 		cs->sc_flags |= CCDF_WANTED;
1683 		if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0)
1684 			return (error);
1685 	}
1686 	cs->sc_flags |= CCDF_LOCKED;
1687 	return (0);
1688 }
1689 
1690 /*
1691  * Unlock and wake up any waiters.
1692  */
1693 static void
1694 ccdunlock(cs)
1695 	struct ccd_softc *cs;
1696 {
1697 
1698 	cs->sc_flags &= ~CCDF_LOCKED;
1699 	if ((cs->sc_flags & CCDF_WANTED) != 0) {
1700 		cs->sc_flags &= ~CCDF_WANTED;
1701 		wakeup(cs);
1702 	}
1703 }
1704 
1705 #ifdef DEBUG
1706 static void
1707 printiinfo(ii)
1708 	struct ccdiinfo *ii;
1709 {
1710 	int ix, i;
1711 
1712 	for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1713 		printf(" itab[%d]: #dk %d sblk %d soff %d",
1714 		       ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1715 		for (i = 0; i < ii->ii_ndisk; i++)
1716 			printf(" %d", ii->ii_index[i]);
1717 		printf("\n");
1718 	}
1719 }
1720 #endif
1721 
1722 
1723 /* Local Variables: */
1724 /* c-argdecl-indent: 8 */
1725 /* c-continued-statement-offset: 8 */
1726 /* c-indent-level: 8 */
1727 /* End: */
1728