1 /*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 */
35 /*
36 * Copyright (c) 1995 Jason R. Thorpe.
37 * All rights reserved.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. All advertising materials mentioning features or use of this software
48 * must display the following acknowledgement:
49 * This product includes software developed for the NetBSD Project
50 * by Jason R. Thorpe.
51 * 4. The name of the author may not be used to endorse or promote products
52 * derived from this software without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
58 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
59 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
60 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
61 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
62 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 */
66
67 /*
68 * Copyright (c) 1988 University of Utah.
69 * Copyright (c) 1990, 1993
70 * The Regents of the University of California. All rights reserved.
71 *
72 * This code is derived from software contributed to Berkeley by
73 * the Systems Programming Group of the University of Utah Computer
74 * Science Department.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the above copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. All advertising materials mentioning features or use of this software
85 * must display the following acknowledgement:
86 * This product includes software developed by the University of
87 * California, Berkeley and its contributors.
88 * 4. Neither the name of the University nor the names of its contributors
89 * may be used to endorse or promote products derived from this software
90 * without specific prior written permission.
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102 * SUCH DAMAGE.
103 *
104 * from: Utah $Hdr: cd.c 1.6 90/11/28$
105 */
106 /*
107 * @(#)cd.c 8.2 (Berkeley) 11/16/93
108 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $
109 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $
110 */
111
112 /*
113 * "Concatenated" disk driver.
114 *
115 * Original dynamic configuration support by:
116 * Jason R. Thorpe <thorpej@nas.nasa.gov>
117 * Numerical Aerodynamic Simulation Facility
118 * Mail Stop 258-6
119 * NASA Ames Research Center
120 * Moffett Field, CA 94035
121 */
122
123 #include "use_ccd.h"
124
125 #include <sys/param.h>
126 #include <sys/systm.h>
127 #include <sys/kernel.h>
128 #include <sys/module.h>
129 #include <sys/proc.h>
130 #include <sys/buf.h>
131 #include <sys/malloc.h>
132 #include <sys/nlookup.h>
133 #include <sys/conf.h>
134 #include <sys/stat.h>
135 #include <sys/sysctl.h>
136 #include <sys/disk.h>
137 #include <sys/dtype.h>
138 #include <sys/diskslice.h>
139 #include <sys/devicestat.h>
140 #include <sys/fcntl.h>
141 #include <sys/vnode.h>
142 #include <sys/ccdvar.h>
143
144 #include <vm/vm_zone.h>
145
146 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */
147 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */
148
149 #include <sys/buf2.h>
150
151 #if defined(CCDDEBUG) && !defined(DEBUG)
152 #define DEBUG
153 #endif
154
155 #ifdef DEBUG
156 #define CCDB_FOLLOW 0x01
157 #define CCDB_INIT 0x02
158 #define CCDB_IO 0x04
159 #define CCDB_LABEL 0x08
160 #define CCDB_VNODE 0x10
161 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL |
162 CCDB_VNODE;
163 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, "");
164 #undef DEBUG
165 #endif
166
167 #define ccdunit(x) dkunit(x)
168 #define ccdpart(x) dkpart(x)
169
170 /*
171 This is how mirroring works (only writes are special):
172
173 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s
174 linked together by the cb_mirror field. "cb_pflags &
175 CCDPF_MIRROR_DONE" is set to 0 on both of them.
176
177 When a component returns to ccdiodone(), it checks if "cb_pflags &
178 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's
179 flag and returns. If it is, it means its partner has already
180 returned, so it will go to the regular cleanup.
181
182 */
183
184 struct ccdbuf {
185 struct buf cb_buf; /* new I/O buf */
186 struct vnode *cb_vp; /* related vnode */
187 struct bio *cb_obio; /* ptr. to original I/O buf */
188 int cb_unit; /* target unit */
189 int cb_comp; /* target component */
190 int cb_pflags; /* mirror/parity status flag */
191 struct ccdbuf *cb_mirror; /* mirror counterpart */
192 };
193
194 /* bits in cb_pflags */
195 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */
196
197 static d_open_t ccdopen;
198 static d_close_t ccdclose;
199 static d_strategy_t ccdstrategy;
200 static d_ioctl_t ccdioctl;
201 static d_dump_t ccddump;
202
203 static struct dev_ops ccd_ops = {
204 { "ccd", 0, D_DISK | D_MPSAFE },
205 .d_open = ccdopen,
206 .d_close = ccdclose,
207 .d_read = physread,
208 .d_write = physwrite,
209 .d_ioctl = ccdioctl,
210 .d_strategy = ccdstrategy,
211 .d_dump = ccddump
212 };
213
214 /* called during module initialization */
215 static void ccdattach (void);
216 static int ccddetach (void);
217 static int ccd_modevent (module_t, int, void *);
218
219 /* called by biodone() at interrupt time */
220 static void ccdiodone (struct bio *bio);
221
222 static void ccdstart (struct ccd_softc *, struct bio *);
223 static void ccdinterleave (struct ccd_softc *, int);
224 static void ccdintr (struct ccd_softc *, struct bio *);
225 static int ccdinit (struct ccddevice *, char **, struct ucred *);
226 static int ccdlookup (char *, struct vnode **);
227 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *,
228 struct bio *, off_t, caddr_t, long);
229 static int ccdlock (struct ccd_softc *);
230 static void ccdunlock (struct ccd_softc *);
231
232 #ifdef DEBUG
233 static void printiinfo (struct ccdiinfo *);
234 #endif
235
236 /* Non-private for the benefit of libkvm. */
237 struct ccd_softc *ccd_softc;
238 struct ccddevice *ccddevs;
239 static int numccd = 0;
240
241 /*
242 * getccdbuf() - Allocate and zero a ccd buffer.
243 */
244 static struct ccdbuf *
getccdbuf(void)245 getccdbuf(void)
246 {
247 struct ccdbuf *cbp;
248
249 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK | M_ZERO);
250 initbufbio(&cbp->cb_buf);
251
252 /*
253 * independant struct buf initialization
254 */
255 buf_dep_init(&cbp->cb_buf);
256 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE);
257 BUF_KERNPROC(&cbp->cb_buf);
258 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP;
259
260 return(cbp);
261 }
262
263 /*
264 * putccdbuf() - Free a ccd buffer.
265 */
266 static void
putccdbuf(struct ccdbuf * cbp)267 putccdbuf(struct ccdbuf *cbp)
268 {
269 BUF_UNLOCK(&cbp->cb_buf);
270
271 uninitbufbio(&cbp->cb_buf);
272 kfree(cbp, M_DEVBUF);
273 }
274
275 /*
276 * Called by main() during pseudo-device attachment. All we need
277 * to do is allocate enough space for devices to be configured later, and
278 * add devsw entries.
279 */
280 static void
ccdattach(void)281 ccdattach(void)
282 {
283 struct disk_info info;
284 struct ccd_softc *cs;
285 int i;
286 int num = NCCD;
287
288 if (num > 1)
289 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1);
290 else
291 kprintf("ccd0: Concatenated disk driver\n");
292
293 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF,
294 M_WAITOK | M_ZERO);
295 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF,
296 M_WAITOK | M_ZERO);
297 numccd = num;
298
299 /*
300 * With normal disk devices the open simply fails if the media
301 * is not present. With CCD we have to be able to open the
302 * raw disk to use the ioctl's to set it up, so create a dummy
303 * disk info structure so dscheck() doesn't blow up.
304 */
305 bzero(&info, sizeof(info));
306 info.d_media_blksize = DEV_BSIZE;
307
308 for (i = 0; i < numccd; ++i) {
309 cs = &ccd_softc[i];
310 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops);
311 cs->sc_dev->si_drv1 = cs;
312 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */
313 disk_setdiskinfo(&cs->sc_disk, &info);
314 }
315 }
316
317 static int
ccddetach(void)318 ccddetach(void)
319 {
320 struct ccd_softc *cs;
321 struct dev_ioctl_args ioctl_args;
322 int i;
323 int error = 0;
324 int eval;
325
326 bzero(&ioctl_args, sizeof(ioctl_args));
327
328 for (i = 0; i < numccd; ++i) {
329 cs = &ccd_softc[i];
330 if (cs->sc_dev == NULL)
331 continue;
332 ioctl_args.a_head.a_dev = cs->sc_dev;
333 ioctl_args.a_cmd = CCDIOCCLR;
334 ioctl_args.a_fflag = FWRITE;
335 eval = ccdioctl(&ioctl_args);
336 if (eval && eval != ENXIO) {
337 kprintf("ccd%d: In use, cannot detach\n", i);
338 error = EBUSY;
339 }
340 }
341 if (error == 0) {
342 for (i = 0; i < numccd; ++i) {
343 cs = &ccd_softc[i];
344 if (cs->sc_dev == NULL)
345 continue;
346 disk_destroy(&cs->sc_disk);
347 cs->sc_dev = NULL;
348 }
349 if (ccd_softc)
350 kfree(ccd_softc, M_DEVBUF);
351 if (ccddevs)
352 kfree(ccddevs, M_DEVBUF);
353 }
354 return (error);
355 }
356
357 static int
ccd_modevent(module_t mod,int type,void * data)358 ccd_modevent(module_t mod, int type, void *data)
359 {
360 int error = 0;
361
362 switch (type) {
363 case MOD_LOAD:
364 ccdattach();
365 break;
366
367 case MOD_UNLOAD:
368 error = ccddetach();
369 break;
370
371 default: /* MOD_SHUTDOWN etc */
372 break;
373 }
374 return (error);
375 }
376
377 DEV_MODULE(ccd, ccd_modevent, NULL);
378
379 static int
ccdinit(struct ccddevice * ccd,char ** cpaths,struct ucred * cred)380 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred)
381 {
382 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit];
383 struct ccdcinfo *ci = NULL; /* XXX */
384 int ix;
385 struct vnode *vp;
386 u_int64_t skip;
387 u_int64_t size;
388 u_int64_t minsize;
389 int maxsecsize;
390 struct partinfo dpart;
391 struct ccdgeom *ccg = &cs->sc_geom;
392 char tmppath[MAXPATHLEN];
393 int error = 0;
394
395 #ifdef DEBUG
396 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
397 kprintf("ccdinit: unit %d\n", ccd->ccd_unit);
398 #endif
399
400 cs->sc_size = 0;
401 cs->sc_ileave = ccd->ccd_interleave;
402 cs->sc_nccdisks = ccd->ccd_ndev;
403
404 /* Allocate space for the component info. */
405 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo),
406 M_DEVBUF, M_WAITOK);
407 cs->sc_maxiosize = MAXPHYS;
408
409 lockinit(&cs->sc_lock, "ccdlck", 0, 0);
410 ccdlock(cs);
411
412 /*
413 * Verify that each component piece exists and record
414 * relevant information about it.
415 */
416 maxsecsize = 0;
417 minsize = 0;
418 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
419 vp = ccd->ccd_vpp[ix];
420 ci = &cs->sc_cinfo[ix];
421 ci->ci_vp = vp;
422
423 /*
424 * Copy in the pathname of the component.
425 */
426 bzero(tmppath, sizeof(tmppath)); /* sanity */
427 if ((error = copyinstr(cpaths[ix], tmppath,
428 MAXPATHLEN, &ci->ci_pathlen)) != 0) {
429 #ifdef DEBUG
430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
431 kprintf("ccd%d: can't copy path, error = %d\n",
432 ccd->ccd_unit, error);
433 #endif
434 goto fail;
435 }
436 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK);
437 bcopy(tmppath, ci->ci_path, ci->ci_pathlen);
438
439 ci->ci_dev = vn_todev(vp);
440 if (ci->ci_dev->si_iosize_max &&
441 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) {
442 cs->sc_maxiosize = ci->ci_dev->si_iosize_max;
443 }
444
445 /*
446 * Get partition information for the component.
447 */
448 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD,
449 cred, NULL);
450 if (error) {
451 #ifdef DEBUG
452 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
453 kprintf("ccd%d: %s: ioctl failed, error = %d\n",
454 ccd->ccd_unit, ci->ci_path, error);
455 #endif
456 goto fail;
457 }
458 if (dpart.fstype != FS_CCD &&
459 !kuuid_is_ccd(&dpart.fstype_uuid)) {
460 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n",
461 ccd->ccd_unit, ci->ci_path);
462 error = EFTYPE;
463 goto fail;
464 }
465 if (maxsecsize < dpart.media_blksize)
466 maxsecsize = dpart.media_blksize;
467
468 /*
469 * Skip a certain amount of storage at the beginning of
470 * the component to make sure we don't infringe on any
471 * reserved sectors. This is handled entirely by
472 * dpart.reserved_blocks but we also impose a minimum
473 * of 16 sectors for backwards compatibility.
474 */
475 skip = 16;
476 if (skip < dpart.reserved_blocks)
477 skip = dpart.reserved_blocks;
478 size = dpart.media_blocks - skip;
479
480 /*
481 * Calculate the size, truncating to an interleave
482 * boundary if necessary.
483 */
484 if (cs->sc_ileave > 1)
485 size -= size % cs->sc_ileave;
486
487 if ((int64_t)size <= 0) {
488 #ifdef DEBUG
489 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
490 kprintf("ccd%d: %s: size == 0\n",
491 ccd->ccd_unit, ci->ci_path);
492 #endif
493 error = ENODEV;
494 goto fail;
495 }
496
497 /*
498 * Calculate the smallest uniform component, used
499 * elsewhere.
500 */
501 if (minsize == 0 || minsize > size)
502 minsize = size;
503 ci->ci_skip = skip;
504 ci->ci_size = size;
505 cs->sc_size += size;
506 }
507 kprintf("ccd%d: max component iosize is %d total blocks %lld\n",
508 cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size);
509
510 /*
511 * Don't allow the interleave to be smaller than
512 * the biggest component sector.
513 */
514 if ((cs->sc_ileave > 0) &&
515 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) {
516 #ifdef DEBUG
517 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
518 kprintf("ccd%d: interleave must be at least %d\n",
519 ccd->ccd_unit, (maxsecsize / DEV_BSIZE));
520 #endif
521 error = EINVAL;
522 goto fail;
523 }
524
525 /*
526 * If uniform interleave is desired set all sizes to that of
527 * the smallest component. This will guarentee that a single
528 * interleave table is generated.
529 *
530 * Lost space must be taken into account when calculating the
531 * overall size. Half the space is lost when CCDF_MIRROR is
532 * specified. One disk is lost when CCDF_PARITY is specified.
533 */
534 if (ccd->ccd_flags & CCDF_UNIFORM) {
535 for (ci = cs->sc_cinfo;
536 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
537 ci->ci_size = minsize;
538 }
539 if (ccd->ccd_flags & CCDF_MIRROR) {
540 /*
541 * Check to see if an even number of components
542 * have been specified. The interleave must also
543 * be non-zero in order for us to be able to
544 * guarentee the topology.
545 */
546 if (cs->sc_nccdisks % 2) {
547 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit );
548 error = EINVAL;
549 goto fail;
550 }
551 if (cs->sc_ileave == 0) {
552 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit);
553 error = EINVAL;
554 goto fail;
555 }
556 cs->sc_size = (cs->sc_nccdisks/2) * minsize;
557 } else if (ccd->ccd_flags & CCDF_PARITY) {
558 cs->sc_size = (cs->sc_nccdisks-1) * minsize;
559 } else {
560 if (cs->sc_ileave == 0) {
561 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit);
562 error = EINVAL;
563 goto fail;
564 }
565 cs->sc_size = cs->sc_nccdisks * minsize;
566 }
567 }
568
569 /*
570 * Construct the interleave table.
571 */
572 ccdinterleave(cs, ccd->ccd_unit);
573
574 /*
575 * Create pseudo-geometry based on 1MB cylinders. It's
576 * pretty close.
577 */
578 ccg->ccg_secsize = maxsecsize;
579 ccg->ccg_ntracks = 1;
580 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize;
581 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors;
582
583 /*
584 * Add an devstat entry for this device.
585 */
586 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit,
587 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED,
588 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER,
589 DEVSTAT_PRIORITY_ARRAY);
590
591 cs->sc_flags |= CCDF_INITED;
592 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */
593 cs->sc_unit = ccd->ccd_unit;
594 return (0);
595 fail:
596 while (ci > cs->sc_cinfo) {
597 ci--;
598 kfree(ci->ci_path, M_DEVBUF);
599 }
600 kfree(cs->sc_cinfo, M_DEVBUF);
601 cs->sc_cinfo = NULL;
602 return (error);
603 }
604
605 static void
ccdinterleave(struct ccd_softc * cs,int unit)606 ccdinterleave(struct ccd_softc *cs, int unit)
607 {
608 struct ccdcinfo *ci, *smallci;
609 struct ccdiinfo *ii;
610 u_int64_t bn;
611 u_int64_t lbn;
612 u_int64_t size;
613 int icount;
614 int ix;
615
616 #ifdef DEBUG
617 if (ccddebug & CCDB_INIT)
618 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave);
619 #endif
620
621 /*
622 * Allocate an interleave table. The worst case occurs when each
623 * of N disks is of a different size, resulting in N interleave
624 * tables.
625 *
626 * Chances are this is too big, but we don't care.
627 */
628 icount = cs->sc_nccdisks + 1;
629 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo),
630 M_DEVBUF, M_WAITOK|M_ZERO);
631
632 /*
633 * Trivial case: no interleave (actually interleave of disk size).
634 * Each table entry represents a single component in its entirety.
635 *
636 * An interleave of 0 may not be used with a mirror or parity setup.
637 */
638 if (cs->sc_ileave == 0) {
639 bn = 0;
640 ii = cs->sc_itable;
641
642 for (ix = 0; ix < cs->sc_nccdisks; ix++) {
643 /* Allocate space for ii_index. */
644 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK);
645 ii->ii_ndisk = 1;
646 ii->ii_startblk = bn;
647 ii->ii_startoff = 0;
648 ii->ii_index[0] = ix;
649 bn += cs->sc_cinfo[ix].ci_size;
650 ii++;
651 }
652 ii->ii_ndisk = 0;
653 #ifdef DEBUG
654 if (ccddebug & CCDB_INIT)
655 printiinfo(cs->sc_itable);
656 #endif
657 return;
658 }
659
660 /*
661 * The following isn't fast or pretty; it doesn't have to be.
662 */
663 size = 0;
664 bn = lbn = 0;
665 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) {
666 /*
667 * Allocate space for ii_index. We might allocate more then
668 * we use.
669 */
670 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks),
671 M_DEVBUF, M_WAITOK);
672
673 /*
674 * Locate the smallest of the remaining components
675 */
676 smallci = NULL;
677 ci = cs->sc_cinfo;
678 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) {
679 if (ci->ci_size > size &&
680 (smallci == NULL ||
681 ci->ci_size < smallci->ci_size)) {
682 smallci = ci;
683 }
684 ++ci;
685 }
686
687 /*
688 * Nobody left, all done
689 */
690 if (smallci == NULL) {
691 ii->ii_ndisk = 0;
692 break;
693 }
694
695 /*
696 * Record starting logical block using an sc_ileave blocksize.
697 */
698 ii->ii_startblk = bn / cs->sc_ileave;
699
700 /*
701 * Record starting component block using an sc_ileave
702 * blocksize. This value is relative to the beginning of
703 * a component disk.
704 */
705 ii->ii_startoff = lbn;
706
707 /*
708 * Determine how many disks take part in this interleave
709 * and record their indices.
710 */
711 ix = 0;
712 for (ci = cs->sc_cinfo;
713 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) {
714 if (ci->ci_size >= smallci->ci_size) {
715 ii->ii_index[ix++] = ci - cs->sc_cinfo;
716 }
717 }
718 ii->ii_ndisk = ix;
719
720 /*
721 * Adjust for loop
722 */
723 bn += ix * (smallci->ci_size - size);
724 lbn = smallci->ci_size / cs->sc_ileave;
725 size = smallci->ci_size;
726 }
727 if (ii == &cs->sc_itable[icount])
728 panic("ccdinterlave software bug! table exhausted");
729 #ifdef DEBUG
730 if (ccddebug & CCDB_INIT)
731 printiinfo(cs->sc_itable);
732 #endif
733 }
734
735 /* ARGSUSED */
736 static int
ccdopen(struct dev_open_args * ap)737 ccdopen(struct dev_open_args *ap)
738 {
739 cdev_t dev = ap->a_head.a_dev;
740 int unit = ccdunit(dev);
741 struct ccd_softc *cs;
742 int error = 0;
743
744 #ifdef DEBUG
745 if (ccddebug & CCDB_FOLLOW)
746 kprintf("ccdopen(%x, %x)\n", dev, flags);
747 #endif
748 if (unit >= numccd)
749 return (ENXIO);
750 cs = &ccd_softc[unit];
751
752 if ((error = ccdlock(cs)) == 0) {
753 ccdunlock(cs);
754 }
755 return (error);
756 }
757
758 /* ARGSUSED */
759 static int
ccdclose(struct dev_close_args * ap)760 ccdclose(struct dev_close_args *ap)
761 {
762 cdev_t dev = ap->a_head.a_dev;
763 int unit = ccdunit(dev);
764 struct ccd_softc *cs;
765 int error = 0;
766
767 #ifdef DEBUG
768 if (ccddebug & CCDB_FOLLOW)
769 kprintf("ccdclose(%x, %x)\n", dev, flags);
770 #endif
771
772 if (unit >= numccd)
773 return (ENXIO);
774 cs = &ccd_softc[unit];
775 if ((error = ccdlock(cs)) == 0) {
776 ccdunlock(cs);
777 }
778 return (error);
779 }
780
781 static int
ccdstrategy(struct dev_strategy_args * ap)782 ccdstrategy(struct dev_strategy_args *ap)
783 {
784 cdev_t dev = ap->a_head.a_dev;
785 struct bio *bio = ap->a_bio;
786 int unit = ccdunit(dev);
787 struct bio *nbio;
788 struct buf *bp = bio->bio_buf;
789 struct ccd_softc *cs = &ccd_softc[unit];
790 u_int64_t pbn; /* in sc_secsize chunks */
791 u_int32_t sz; /* in sc_secsize chunks */
792
793 #ifdef DEBUG
794 if (ccddebug & CCDB_FOLLOW)
795 kprintf("ccdstrategy(%x): unit %d\n", bp, unit);
796 #endif
797 if ((cs->sc_flags & CCDF_INITED) == 0) {
798 bp->b_error = ENXIO;
799 goto error;
800 }
801
802 /* If it's a nil transfer, wake up the top half now. */
803 if (bp->b_bcount == 0) {
804 bp->b_resid = 0;
805 goto done;
806 }
807
808 /*
809 * Do bounds checking and adjust transfer. If there's an
810 * error, the bounds check will flag that for us.
811 */
812
813 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize;
814 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize);
815
816 /*
817 * If out of bounds return an error. If the request goes
818 * past EOF, clip the request as appropriate. If exactly
819 * at EOF, return success (don't clip), but with 0 bytes
820 * of I/O.
821 *
822 * Mark EOF B_INVAL (just like bad), indicating that the
823 * contents of the buffer, if any, is invalid.
824 */
825 if ((int64_t)pbn < 0)
826 goto bad;
827 if (pbn + sz > cs->sc_size) {
828 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP))
829 goto bad;
830 if (pbn == cs->sc_size) {
831 bp->b_resid = bp->b_bcount;
832 bp->b_flags |= B_INVAL;
833 goto done;
834 }
835 sz = (long)(cs->sc_size - pbn);
836 bp->b_bcount = sz * cs->sc_geom.ccg_secsize;
837 }
838 nbio = bio;
839
840 bp->b_resid = bp->b_bcount;
841 nbio->bio_driver_info = dev;
842
843 /*
844 * "Start" the unit.
845 */
846 ccdstart(cs, nbio);
847 return(0);
848
849 /*
850 * note: bio, not nbio, is valid at the done label.
851 */
852 bad:
853 bp->b_error = EINVAL;
854 error:
855 bp->b_resid = bp->b_bcount;
856 bp->b_flags |= B_ERROR | B_INVAL;
857 done:
858 biodone(bio);
859 return(0);
860 }
861
862 static void
ccdstart(struct ccd_softc * cs,struct bio * bio)863 ccdstart(struct ccd_softc *cs, struct bio *bio)
864 {
865 long bcount, rcount;
866 struct ccdbuf *cbp[4];
867 struct buf *bp = bio->bio_buf;
868 /* XXX! : 2 reads and 2 writes for RAID 4/5 */
869 caddr_t addr;
870 off_t doffset;
871
872 #ifdef DEBUG
873 if (ccddebug & CCDB_FOLLOW)
874 kprintf("ccdstart(%x, %x)\n", cs, bp);
875 #endif
876
877 /* Record the transaction start */
878 devstat_start_transaction(&cs->device_stats);
879
880 /*
881 * Allocate component buffers and fire off the requests
882 */
883 doffset = bio->bio_offset;
884 addr = bp->b_data;
885
886 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) {
887 ccdbuffer(cbp, cs, bio, doffset, addr, bcount);
888 rcount = cbp[0]->cb_buf.b_bcount;
889
890 if (cs->sc_cflags & CCDF_MIRROR) {
891 /*
892 * Mirroring. Writes go to both disks, reads are
893 * taken from whichever disk seems most appropriate.
894 *
895 * We attempt to localize reads to the disk whos arm
896 * is nearest the read request. We ignore seeks due
897 * to writes when making this determination and we
898 * also try to avoid hogging.
899 */
900 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) {
901 vn_strategy(cbp[0]->cb_vp,
902 &cbp[0]->cb_buf.b_bio1);
903 vn_strategy(cbp[1]->cb_vp,
904 &cbp[1]->cb_buf.b_bio1);
905 } else {
906 int pick = cs->sc_pick;
907 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize;
908 if (doffset < cs->sc_blk[pick] - range ||
909 doffset > cs->sc_blk[pick] + range
910 ) {
911 cs->sc_pick = pick = 1 - pick;
912 }
913 cs->sc_blk[pick] = doffset + rcount;
914 vn_strategy(cbp[pick]->cb_vp,
915 &cbp[pick]->cb_buf.b_bio1);
916 }
917 } else {
918 /*
919 * Not mirroring
920 */
921 vn_strategy(cbp[0]->cb_vp,
922 &cbp[0]->cb_buf.b_bio1);
923 }
924 doffset += rcount;
925 addr += rcount;
926 }
927 }
928
929 /*
930 * Build a component buffer header.
931 */
932 static void
ccdbuffer(struct ccdbuf ** cb,struct ccd_softc * cs,struct bio * bio,off_t doffset,caddr_t addr,long bcount)933 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio,
934 off_t doffset, caddr_t addr, long bcount)
935 {
936 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */
937 struct ccdbuf *cbp;
938 u_int64_t bn;
939 u_int64_t cbn;
940 u_int64_t cboff;
941 off_t cbc;
942
943 #ifdef DEBUG
944 if (ccddebug & CCDB_IO)
945 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n",
946 cs, bp, bn, addr, bcount);
947 #endif
948 /*
949 * Determine which component bn falls in.
950 */
951 bn = doffset / cs->sc_geom.ccg_secsize;
952 cbn = bn;
953 cboff = 0;
954
955 if (cs->sc_ileave == 0) {
956 /*
957 * Serially concatenated and neither a mirror nor a parity
958 * config. This is a special case.
959 */
960 daddr_t sblk;
961
962 sblk = 0;
963 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
964 sblk += ci->ci_size;
965 cbn -= sblk;
966 } else {
967 struct ccdiinfo *ii;
968 int ccdisk, off;
969
970 /*
971 * Calculate cbn, the logical superblock (sc_ileave chunks),
972 * and cboff, a normal block offset (DEV_BSIZE chunks) relative
973 * to cbn.
974 */
975 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */
976 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */
977
978 /*
979 * Figure out which interleave table to use.
980 */
981 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
982 if (ii->ii_startblk > cbn)
983 break;
984 }
985 ii--;
986
987 /*
988 * off is the logical superblock relative to the beginning
989 * of this interleave block.
990 */
991 off = cbn - ii->ii_startblk;
992
993 /*
994 * We must calculate which disk component to use (ccdisk),
995 * and recalculate cbn to be the superblock relative to
996 * the beginning of the component. This is typically done by
997 * adding 'off' and ii->ii_startoff together. However, 'off'
998 * must typically be divided by the number of components in
999 * this interleave array to be properly convert it from a
1000 * CCD-relative logical superblock number to a
1001 * component-relative superblock number.
1002 */
1003 if (ii->ii_ndisk == 1) {
1004 /*
1005 * When we have just one disk, it can't be a mirror
1006 * or a parity config.
1007 */
1008 ccdisk = ii->ii_index[0];
1009 cbn = ii->ii_startoff + off;
1010 } else {
1011 if (cs->sc_cflags & CCDF_MIRROR) {
1012 /*
1013 * We have forced a uniform mapping, resulting
1014 * in a single interleave array. We double
1015 * up on the first half of the available
1016 * components and our mirror is in the second
1017 * half. This only works with a single
1018 * interleave array because doubling up
1019 * doubles the number of sectors, so there
1020 * cannot be another interleave array because
1021 * the next interleave array's calculations
1022 * would be off.
1023 */
1024 int ndisk2 = ii->ii_ndisk / 2;
1025 ccdisk = ii->ii_index[off % ndisk2];
1026 cbn = ii->ii_startoff + off / ndisk2;
1027 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
1028 } else if (cs->sc_cflags & CCDF_PARITY) {
1029 /*
1030 * XXX not implemented yet
1031 */
1032 int ndisk2 = ii->ii_ndisk - 1;
1033 ccdisk = ii->ii_index[off % ndisk2];
1034 cbn = ii->ii_startoff + off / ndisk2;
1035 if (cbn % ii->ii_ndisk <= ccdisk)
1036 ccdisk++;
1037 } else {
1038 ccdisk = ii->ii_index[off % ii->ii_ndisk];
1039 cbn = ii->ii_startoff + off / ii->ii_ndisk;
1040 }
1041 }
1042
1043 ci = &cs->sc_cinfo[ccdisk];
1044
1045 /*
1046 * Convert cbn from a superblock to a normal block so it
1047 * can be used to calculate (along with cboff) the normal
1048 * block index into this particular disk.
1049 */
1050 cbn *= cs->sc_ileave;
1051 }
1052
1053 /*
1054 * Fill in the component buf structure.
1055 *
1056 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount
1057 * will be truncated on device EOF so we use b_bufsize to detect
1058 * the case.
1059 */
1060 cbp = getccdbuf();
1061 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1062 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1063 cbp->cb_buf.b_data = addr;
1064 cbp->cb_vp = ci->ci_vp;
1065 if (cs->sc_ileave == 0)
1066 cbc = dbtob((off_t)(ci->ci_size - cbn));
1067 else
1068 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1069 if (cbc > cs->sc_maxiosize)
1070 cbc = cs->sc_maxiosize;
1071 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1072 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1073
1074 cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1075 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1076 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip);
1077
1078 /*
1079 * context for ccdiodone
1080 */
1081 cbp->cb_obio = bio;
1082 cbp->cb_unit = cs - ccd_softc;
1083 cbp->cb_comp = ci - cs->sc_cinfo;
1084
1085 #ifdef DEBUG
1086 if (ccddebug & CCDB_IO)
1087 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n",
1088 ci->ci_dev, ci-cs->sc_cinfo, cbp,
1089 cbp->cb_buf.b_bio1.bio_offset,
1090 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount);
1091 #endif
1092 cb[0] = cbp;
1093
1094 /*
1095 * Note: both I/O's setup when reading from mirror, but only one
1096 * will be executed.
1097 */
1098 if (cs->sc_cflags & CCDF_MIRROR) {
1099 /* mirror, setup second I/O */
1100 cbp = getccdbuf();
1101
1102 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd;
1103 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags;
1104 cbp->cb_buf.b_data = addr;
1105 cbp->cb_vp = ci2->ci_vp;
1106 if (cs->sc_ileave == 0)
1107 cbc = dbtob((off_t)(ci->ci_size - cbn));
1108 else
1109 cbc = dbtob((off_t)(cs->sc_ileave - cboff));
1110 if (cbc > cs->sc_maxiosize)
1111 cbc = cs->sc_maxiosize;
1112 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount;
1113 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount;
1114
1115 cbp->cb_buf.b_bio1.bio_done = ccdiodone;
1116 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp;
1117 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip);
1118
1119 /*
1120 * context for ccdiodone
1121 */
1122 cbp->cb_obio = bio;
1123 cbp->cb_unit = cs - ccd_softc;
1124 cbp->cb_comp = ci2 - cs->sc_cinfo;
1125 cb[1] = cbp;
1126 /* link together the ccdbuf's and clear "mirror done" flag */
1127 cb[0]->cb_mirror = cb[1];
1128 cb[1]->cb_mirror = cb[0];
1129 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1130 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE;
1131 }
1132 }
1133
1134 static void
ccdintr(struct ccd_softc * cs,struct bio * bio)1135 ccdintr(struct ccd_softc *cs, struct bio *bio)
1136 {
1137 struct buf *bp = bio->bio_buf;
1138
1139 #ifdef DEBUG
1140 if (ccddebug & CCDB_FOLLOW)
1141 kprintf("ccdintr(%x, %x)\n", cs, bp);
1142 #endif
1143 /*
1144 * Request is done for better or worse, wakeup the top half.
1145 */
1146 if (bp->b_flags & B_ERROR)
1147 bp->b_resid = bp->b_bcount;
1148 devstat_end_transaction_buf(&cs->device_stats, bp);
1149 biodone(bio);
1150 }
1151
1152 /*
1153 * Called at interrupt time.
1154 *
1155 * Mark the component as done and if all components are done,
1156 * take a ccd interrupt.
1157 */
1158 static void
ccdiodone(struct bio * bio)1159 ccdiodone(struct bio *bio)
1160 {
1161 struct ccdbuf *cbp = bio->bio_caller_info1.ptr;
1162 struct bio *obio = cbp->cb_obio;
1163 struct buf *obp = obio->bio_buf;
1164 int unit = cbp->cb_unit;
1165 struct ccd_softc *sc = &ccd_softc[unit];
1166 int count;
1167
1168 /*
1169 * Since we do not have exclusive access to underlying devices,
1170 * we can't keep cache translations around.
1171 */
1172 clearbiocache(bio->bio_next);
1173
1174 ccdlock(sc);
1175
1176 #ifdef DEBUG
1177 if (ccddebug & CCDB_FOLLOW)
1178 kprintf("ccdiodone(%x)\n", cbp);
1179 if (ccddebug & CCDB_IO) {
1180 kprintf("ccdiodone: bp %x bcount %d resid %d\n",
1181 obp, obp->b_bcount, obp->b_resid);
1182 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n",
1183 cbp->cb_buf.b_dev, cbp->cb_comp, cbp,
1184 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data,
1185 cbp->cb_buf.b_bcount);
1186 }
1187 #endif
1188
1189 /*
1190 * If an error occured, report it. If this is a mirrored
1191 * configuration and the first of two possible reads, do not
1192 * set the error in the bp yet because the second read may
1193 * succeed.
1194 */
1195 if (cbp->cb_buf.b_flags & B_ERROR) {
1196 const char *msg = "";
1197
1198 if ((sc->sc_cflags & CCDF_MIRROR) &&
1199 (cbp->cb_buf.b_cmd == BUF_CMD_READ) &&
1200 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1201 /*
1202 * We will try our read on the other disk down
1203 * below, also reverse the default pick so if we
1204 * are doing a scan we do not keep hitting the
1205 * bad disk first.
1206 */
1207 msg = ", trying other disk";
1208 sc->sc_pick = 1 - sc->sc_pick;
1209 sc->sc_blk[sc->sc_pick] = obio->bio_offset;
1210 } else {
1211 obp->b_flags |= B_ERROR;
1212 obp->b_error = cbp->cb_buf.b_error ?
1213 cbp->cb_buf.b_error : EIO;
1214 }
1215 kprintf("ccd%d: error %d on component %d "
1216 "offset %jd (ccd offset %jd)%s\n",
1217 unit, obp->b_error, cbp->cb_comp,
1218 (intmax_t)cbp->cb_buf.b_bio2.bio_offset,
1219 (intmax_t)obio->bio_offset,
1220 msg);
1221 }
1222
1223 /*
1224 * Process mirror. If we are writing, I/O has been initiated on both
1225 * buffers and we fall through only after both are finished.
1226 *
1227 * If we are reading only one I/O is initiated at a time. If an
1228 * error occurs we initiate the second I/O and return, otherwise
1229 * we free the second I/O without initiating it.
1230 */
1231
1232 if (sc->sc_cflags & CCDF_MIRROR) {
1233 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) {
1234 /*
1235 * When writing, handshake with the second buffer
1236 * to determine when both are done. If both are not
1237 * done, return here.
1238 */
1239 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1240 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE;
1241 putccdbuf(cbp);
1242 ccdunlock(sc);
1243 return;
1244 }
1245 } else {
1246 /*
1247 * When reading, either dispose of the second buffer
1248 * or initiate I/O on the second buffer if an error
1249 * occured with this one.
1250 */
1251 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) {
1252 if (cbp->cb_buf.b_flags & B_ERROR) {
1253 cbp->cb_mirror->cb_pflags |=
1254 CCDPF_MIRROR_DONE;
1255 vn_strategy(
1256 cbp->cb_mirror->cb_vp,
1257 &cbp->cb_mirror->cb_buf.b_bio1
1258 );
1259 putccdbuf(cbp);
1260 ccdunlock(sc);
1261 return;
1262 } else {
1263 putccdbuf(cbp->cb_mirror);
1264 /* fall through */
1265 }
1266 }
1267 }
1268 }
1269
1270 /*
1271 * Use our saved b_bufsize to determine if an unexpected EOF occured.
1272 */
1273 count = cbp->cb_buf.b_bufsize;
1274 putccdbuf(cbp);
1275
1276 /*
1277 * If all done, "interrupt".
1278 */
1279 obp->b_resid -= count;
1280 if (obp->b_resid < 0)
1281 panic("ccdiodone: count");
1282
1283 ccdunlock(sc);
1284
1285 if (obp->b_resid == 0)
1286 ccdintr(sc, obio);
1287 }
1288
1289 static int
ccdioctl(struct dev_ioctl_args * ap)1290 ccdioctl(struct dev_ioctl_args *ap)
1291 {
1292 cdev_t dev = ap->a_head.a_dev;
1293 int unit = ccdunit(dev);
1294 int i, j, lookedup = 0, error = 0;
1295 struct ccd_softc *cs;
1296 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data;
1297 struct ccddevice ccd;
1298 struct disk_info info;
1299 char **cpp;
1300 struct vnode **vpp;
1301
1302 if (unit >= numccd)
1303 return (ENXIO);
1304 cs = &ccd_softc[unit];
1305
1306 bzero(&ccd, sizeof(ccd));
1307
1308 switch (ap->a_cmd) {
1309 case CCDIOCSET:
1310 if (cs->sc_flags & CCDF_INITED)
1311 return (EBUSY);
1312
1313 if ((ap->a_fflag & FWRITE) == 0)
1314 return (EBADF);
1315
1316 if ((error = ccdlock(cs)) != 0)
1317 return (error);
1318
1319 if (ccio->ccio_ndisks > CCD_MAXNDISKS) {
1320 ccdunlock(cs);
1321 return (EINVAL);
1322 }
1323
1324 /* Fill in some important bits. */
1325 ccd.ccd_unit = unit;
1326 ccd.ccd_interleave = ccio->ccio_ileave;
1327 if (ccd.ccd_interleave == 0 &&
1328 ((ccio->ccio_flags & CCDF_MIRROR) ||
1329 (ccio->ccio_flags & CCDF_PARITY))) {
1330 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit);
1331 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY);
1332 }
1333 if ((ccio->ccio_flags & CCDF_MIRROR) &&
1334 (ccio->ccio_flags & CCDF_PARITY)) {
1335 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit);
1336 ccio->ccio_flags &= ~CCDF_PARITY;
1337 }
1338 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) &&
1339 !(ccio->ccio_flags & CCDF_UNIFORM)) {
1340 kprintf("ccd%d: mirror/parity forces uniform flag\n",
1341 unit);
1342 ccio->ccio_flags |= CCDF_UNIFORM;
1343 }
1344 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK;
1345
1346 /*
1347 * Allocate space for and copy in the array of
1348 * componet pathnames and device numbers.
1349 */
1350 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *),
1351 M_DEVBUF, M_WAITOK);
1352 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *),
1353 M_DEVBUF, M_WAITOK);
1354
1355 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp,
1356 ccio->ccio_ndisks * sizeof(char **));
1357 if (error) {
1358 kfree(vpp, M_DEVBUF);
1359 kfree(cpp, M_DEVBUF);
1360 ccdunlock(cs);
1361 return (error);
1362 }
1363
1364 #ifdef DEBUG
1365 if (ccddebug & CCDB_INIT) {
1366 for (i = 0; i < ccio->ccio_ndisks; ++i)
1367 kprintf("ccdioctl: component %d: 0x%x\n",
1368 i, cpp[i]);
1369 }
1370 #endif
1371
1372 for (i = 0; i < ccio->ccio_ndisks; ++i) {
1373 #ifdef DEBUG
1374 if (ccddebug & CCDB_INIT)
1375 kprintf("ccdioctl: lookedup = %d\n", lookedup);
1376 #endif
1377 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) {
1378 for (j = 0; j < lookedup; ++j)
1379 (void)vn_close(vpp[j], FREAD|FWRITE, NULL);
1380 kfree(vpp, M_DEVBUF);
1381 kfree(cpp, M_DEVBUF);
1382 ccdunlock(cs);
1383 return (error);
1384 }
1385 ++lookedup;
1386 }
1387 ccd.ccd_cpp = cpp;
1388 ccd.ccd_vpp = vpp;
1389 ccd.ccd_ndev = ccio->ccio_ndisks;
1390
1391 /*
1392 * Initialize the ccd. Fills in the softc for us.
1393 */
1394 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) {
1395 for (j = 0; j < lookedup; ++j)
1396 vn_close(vpp[j], FREAD|FWRITE, NULL);
1397 kfree(vpp, M_DEVBUF);
1398 kfree(cpp, M_DEVBUF);
1399 ccdunlock(cs);
1400 return (error);
1401 }
1402
1403 /*
1404 * The ccd has been successfully initialized, so
1405 * we can place it into the array and read the disklabel.
1406 */
1407 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1408 ccio->ccio_unit = unit;
1409 ccio->ccio_size = cs->sc_size;
1410
1411 bzero(&info, sizeof(info));
1412 info.d_media_blksize = cs->sc_geom.ccg_secsize;
1413 info.d_media_blocks = cs->sc_size;
1414 info.d_nheads = cs->sc_geom.ccg_ntracks;
1415 info.d_secpertrack = cs->sc_geom.ccg_nsectors;
1416 info.d_ncylinders = cs->sc_geom.ccg_ncylinders;
1417 info.d_secpercyl = info.d_nheads * info.d_secpertrack;
1418
1419 /*
1420 * For cases where a label is directly applied to the ccd,
1421 * without slices, DSO_COMPATMBR forces one sector be
1422 * reserved for backwards compatibility.
1423 */
1424 info.d_dsflags = DSO_COMPATMBR;
1425 disk_setdiskinfo(&cs->sc_disk, &info);
1426
1427 ccdunlock(cs);
1428
1429 break;
1430
1431 case CCDIOCCLR:
1432 if ((cs->sc_flags & CCDF_INITED) == 0)
1433 return (ENXIO);
1434
1435 if ((ap->a_fflag & FWRITE) == 0)
1436 return (EBADF);
1437
1438 if ((error = ccdlock(cs)) != 0)
1439 return (error);
1440
1441 if (dev_drefs(cs->sc_dev) > 1) {
1442 ccdunlock(cs);
1443 return (EBUSY);
1444 }
1445
1446 /*
1447 * Free ccd_softc information and clear entry.
1448 */
1449
1450 /* Close the components and free their pathnames. */
1451 for (i = 0; i < cs->sc_nccdisks; ++i) {
1452 /*
1453 * XXX: this close could potentially fail and
1454 * cause Bad Things. Maybe we need to force
1455 * the close to happen?
1456 */
1457 #ifdef DEBUG
1458 if (ccddebug & CCDB_VNODE)
1459 vprint("CCDIOCCLR: vnode info",
1460 cs->sc_cinfo[i].ci_vp);
1461 #endif
1462 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, NULL);
1463 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF);
1464 }
1465
1466 /* Free interleave index. */
1467 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i)
1468 kfree(cs->sc_itable[i].ii_index, M_DEVBUF);
1469
1470 /* Free component info and interleave table. */
1471 kfree(cs->sc_cinfo, M_DEVBUF);
1472 kfree(cs->sc_itable, M_DEVBUF);
1473 cs->sc_cinfo = NULL;
1474 cs->sc_itable = NULL;
1475 cs->sc_flags &= ~CCDF_INITED;
1476
1477 /*
1478 * Free ccddevice information and clear entry.
1479 */
1480 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF);
1481 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF);
1482 bcopy(&ccd, &ccddevs[unit], sizeof(ccd));
1483
1484 /*
1485 * And remove the devstat entry.
1486 */
1487 devstat_remove_entry(&cs->device_stats);
1488
1489 ccdunlock(cs);
1490
1491 break;
1492
1493 default:
1494 return (ENOTTY);
1495 }
1496
1497 return (0);
1498 }
1499
1500 static int
ccddump(struct dev_dump_args * ap)1501 ccddump(struct dev_dump_args *ap)
1502 {
1503 /* Not implemented. */
1504 return ENXIO;
1505 }
1506
1507 /*
1508 * Lookup the provided name in the filesystem. If the file exists,
1509 * is a valid block device, and isn't being used by anyone else,
1510 * set *vpp to the file's vnode.
1511 */
1512 static int
ccdlookup(char * path,struct vnode ** vpp)1513 ccdlookup(char *path, struct vnode **vpp)
1514 {
1515 struct nlookupdata nd;
1516 struct vnode *vp;
1517 int error;
1518
1519 *vpp = NULL;
1520
1521 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP);
1522 if (error)
1523 return (error);
1524 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) {
1525 #ifdef DEBUG
1526 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT))
1527 kprintf("ccdlookup: vn_open error = %d\n", error);
1528 #endif
1529 goto done;
1530 }
1531 vp = nd.nl_open_vp;
1532
1533 if (vp->v_opencount > 1) {
1534 error = EBUSY;
1535 goto done;
1536 }
1537
1538 if (!vn_isdisk(vp, &error))
1539 goto done;
1540
1541 #ifdef DEBUG
1542 if (ccddebug & CCDB_VNODE)
1543 vprint("ccdlookup: vnode info", vp);
1544 #endif
1545
1546 vn_unlock(vp);
1547 nd.nl_open_vp = NULL;
1548 nlookup_done(&nd);
1549 *vpp = vp; /* leave ref intact */
1550 return (0);
1551 done:
1552 nlookup_done(&nd);
1553 return (error);
1554 }
1555
1556 /*
1557 * Wait interruptibly for an exclusive lock.
1558 */
1559 static int
ccdlock(struct ccd_softc * cs)1560 ccdlock(struct ccd_softc *cs)
1561 {
1562 lockmgr(&cs->sc_lock, LK_EXCLUSIVE);
1563
1564 return (0);
1565 }
1566
1567 /*
1568 * Unlock and wake up any waiters.
1569 */
1570 static void
ccdunlock(struct ccd_softc * cs)1571 ccdunlock(struct ccd_softc *cs)
1572 {
1573 lockmgr(&cs->sc_lock, LK_RELEASE);
1574 }
1575
1576 #ifdef DEBUG
1577 static void
printiinfo(struct ccdiinfo * ii)1578 printiinfo(struct ccdiinfo *ii)
1579 {
1580 int ix, i;
1581
1582 for (ix = 0; ii->ii_ndisk; ix++, ii++) {
1583 kprintf(" itab[%d]: #dk %d sblk %d soff %d",
1584 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff);
1585 for (i = 0; i < ii->ii_ndisk; i++)
1586 kprintf(" %d", ii->ii_index[i]);
1587 kprintf("\n");
1588 }
1589 }
1590 #endif
1591