1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */ 2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.38 2006/12/22 23:26:16 swildner Exp $ */ 3 4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 5 6 /* 7 * Copyright (c) 1995 Jason R. Thorpe. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project 21 * by Jason R. Thorpe. 22 * 4. The name of the author may not be used to endorse or promote products 23 * derived from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 /* 81 * "Concatenated" disk driver. 82 * 83 * Dynamic configuration and disklabel support by: 84 * Jason R. Thorpe <thorpej@nas.nasa.gov> 85 * Numerical Aerodynamic Simulation Facility 86 * Mail Stop 258-6 87 * NASA Ames Research Center 88 * Moffett Field, CA 94035 89 */ 90 91 #include "use_ccd.h" 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/nlookup.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <sys/devicestat.h> 106 #include <sys/fcntl.h> 107 #include <sys/vnode.h> 108 #include <sys/buf2.h> 109 #include <sys/ccdvar.h> 110 111 #include <vm/vm_zone.h> 112 113 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 114 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 115 116 #include <sys/thread2.h> 117 118 #if defined(CCDDEBUG) && !defined(DEBUG) 119 #define DEBUG 120 #endif 121 122 #ifdef DEBUG 123 #define CCDB_FOLLOW 0x01 124 #define CCDB_INIT 0x02 125 #define CCDB_IO 0x04 126 #define CCDB_LABEL 0x08 127 #define CCDB_VNODE 0x10 128 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 129 CCDB_VNODE; 130 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 131 #undef DEBUG 132 #endif 133 134 #define ccdunit(x) dkunit(x) 135 #define ccdpart(x) dkpart(x) 136 137 /* 138 This is how mirroring works (only writes are special): 139 140 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 141 linked together by the cb_mirror field. "cb_pflags & 142 CCDPF_MIRROR_DONE" is set to 0 on both of them. 143 144 When a component returns to ccdiodone(), it checks if "cb_pflags & 145 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 146 flag and returns. If it is, it means its partner has already 147 returned, so it will go to the regular cleanup. 148 149 */ 150 151 struct ccdbuf { 152 struct buf cb_buf; /* new I/O buf */ 153 struct vnode *cb_vp; /* related vnode */ 154 struct bio *cb_obio; /* ptr. to original I/O buf */ 155 struct ccdbuf *cb_freenext; /* free list link */ 156 int cb_unit; /* target unit */ 157 int cb_comp; /* target component */ 158 int cb_pflags; /* mirror/parity status flag */ 159 struct ccdbuf *cb_mirror; /* mirror counterpart */ 160 }; 161 162 /* bits in cb_pflags */ 163 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 164 165 #define CCDLABELDEV(dev) \ 166 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 167 168 static d_open_t ccdopen; 169 static d_close_t ccdclose; 170 static d_strategy_t ccdstrategy; 171 static d_ioctl_t ccdioctl; 172 static d_dump_t ccddump; 173 static d_psize_t ccdsize; 174 175 #define NCCDFREEHIWAT 16 176 177 #define CDEV_MAJOR 74 178 179 static struct dev_ops ccd_ops = { 180 { "ccd", CDEV_MAJOR, D_DISK }, 181 .d_open = ccdopen, 182 .d_close = ccdclose, 183 .d_read = physread, 184 .d_write = physwrite, 185 .d_ioctl = ccdioctl, 186 .d_strategy = ccdstrategy, 187 .d_dump = ccddump, 188 .d_psize = ccdsize 189 }; 190 191 /* called during module initialization */ 192 static void ccdattach (void); 193 static int ccd_modevent (module_t, int, void *); 194 195 /* called by biodone() at interrupt time */ 196 static void ccdiodone (struct bio *bio); 197 198 static void ccdstart (struct ccd_softc *, struct bio *); 199 static void ccdinterleave (struct ccd_softc *, int); 200 static void ccdintr (struct ccd_softc *, struct bio *); 201 static int ccdinit (struct ccddevice *, char **, struct ucred *); 202 static int ccdlookup (char *, struct vnode **); 203 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 204 struct bio *, off_t, caddr_t, long); 205 static void ccdgetdisklabel (cdev_t); 206 static void ccdmakedisklabel (struct ccd_softc *); 207 static int ccdlock (struct ccd_softc *); 208 static void ccdunlock (struct ccd_softc *); 209 210 #ifdef DEBUG 211 static void printiinfo (struct ccdiinfo *); 212 #endif 213 214 /* Non-private for the benefit of libkvm. */ 215 struct ccd_softc *ccd_softc; 216 struct ccddevice *ccddevs; 217 struct ccdbuf *ccdfreebufs; 218 static int numccdfreebufs; 219 static int numccd = 0; 220 221 /* 222 * getccdbuf() - Allocate and zero a ccd buffer. 223 * 224 * This routine is called at splbio(). 225 */ 226 227 static __inline 228 struct ccdbuf * 229 getccdbuf(void) 230 { 231 struct ccdbuf *cbp; 232 233 /* 234 * Allocate from freelist or malloc as necessary 235 */ 236 if ((cbp = ccdfreebufs) != NULL) { 237 ccdfreebufs = cbp->cb_freenext; 238 --numccdfreebufs; 239 reinitbufbio(&cbp->cb_buf); 240 } else { 241 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 242 initbufbio(&cbp->cb_buf); 243 } 244 245 /* 246 * independant struct buf initialization 247 */ 248 LIST_INIT(&cbp->cb_buf.b_dep); 249 BUF_LOCKINIT(&cbp->cb_buf); 250 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 251 BUF_KERNPROC(&cbp->cb_buf); 252 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP; 253 254 return(cbp); 255 } 256 257 /* 258 * putccdbuf() - Free a ccd buffer. 259 * 260 * This routine is called at splbio(). 261 */ 262 263 static __inline 264 void 265 putccdbuf(struct ccdbuf *cbp) 266 { 267 BUF_UNLOCK(&cbp->cb_buf); 268 BUF_LOCKFREE(&cbp->cb_buf); 269 270 if (numccdfreebufs < NCCDFREEHIWAT) { 271 cbp->cb_freenext = ccdfreebufs; 272 ccdfreebufs = cbp; 273 ++numccdfreebufs; 274 } else { 275 kfree((caddr_t)cbp, M_DEVBUF); 276 } 277 } 278 279 280 /* 281 * Number of blocks to untouched in front of a component partition. 282 * This is to avoid violating its disklabel area when it starts at the 283 * beginning of the slice. 284 */ 285 #if !defined(CCD_OFFSET) 286 #define CCD_OFFSET 16 287 #endif 288 289 /* 290 * Called by main() during pseudo-device attachment. All we need 291 * to do is allocate enough space for devices to be configured later, and 292 * add devsw entries. 293 */ 294 static void 295 ccdattach(void) 296 { 297 int i; 298 int num = NCCD; 299 300 if (num > 1) 301 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1); 302 else 303 kprintf("ccd0: Concatenated disk driver\n"); 304 305 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF, 306 M_WAITOK | M_ZERO); 307 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF, 308 M_WAITOK | M_ZERO); 309 numccd = num; 310 311 dev_ops_add(&ccd_ops, 0, 0); 312 /* XXX: is this necessary? */ 313 for (i = 0; i < numccd; ++i) 314 ccddevs[i].ccd_dk = -1; 315 } 316 317 static int 318 ccd_modevent(module_t mod, int type, void *data) 319 { 320 int error = 0; 321 322 switch (type) { 323 case MOD_LOAD: 324 ccdattach(); 325 break; 326 327 case MOD_UNLOAD: 328 kprintf("ccd0: Unload not supported!\n"); 329 error = EOPNOTSUPP; 330 break; 331 332 default: /* MOD_SHUTDOWN etc */ 333 break; 334 } 335 return (error); 336 } 337 338 DEV_MODULE(ccd, ccd_modevent, NULL); 339 340 static int 341 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred) 342 { 343 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 344 struct ccdcinfo *ci = NULL; /* XXX */ 345 size_t size; 346 int ix; 347 struct vnode *vp; 348 size_t minsize; 349 int maxsecsize; 350 struct partinfo dpart; 351 struct ccdgeom *ccg = &cs->sc_geom; 352 char tmppath[MAXPATHLEN]; 353 int error = 0; 354 355 #ifdef DEBUG 356 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 357 kprintf("ccdinit: unit %d\n", ccd->ccd_unit); 358 #endif 359 360 cs->sc_size = 0; 361 cs->sc_ileave = ccd->ccd_interleave; 362 cs->sc_nccdisks = ccd->ccd_ndev; 363 364 /* Allocate space for the component info. */ 365 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 366 M_DEVBUF, M_WAITOK); 367 368 /* 369 * Verify that each component piece exists and record 370 * relevant information about it. 371 */ 372 maxsecsize = 0; 373 minsize = 0; 374 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 375 vp = ccd->ccd_vpp[ix]; 376 ci = &cs->sc_cinfo[ix]; 377 ci->ci_vp = vp; 378 379 /* 380 * Copy in the pathname of the component. 381 */ 382 bzero(tmppath, sizeof(tmppath)); /* sanity */ 383 if ((error = copyinstr(cpaths[ix], tmppath, 384 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 385 #ifdef DEBUG 386 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 387 kprintf("ccd%d: can't copy path, error = %d\n", 388 ccd->ccd_unit, error); 389 #endif 390 goto fail; 391 } 392 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 393 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 394 395 ci->ci_dev = vn_todev(vp); 396 397 /* 398 * Get partition information for the component. 399 */ 400 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 401 FREAD, cred)) != 0) { 402 #ifdef DEBUG 403 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 404 kprintf("ccd%d: %s: ioctl failed, error = %d\n", 405 ccd->ccd_unit, ci->ci_path, error); 406 #endif 407 goto fail; 408 } 409 if (dpart.part->p_fstype == FS_BSDFFS) { 410 maxsecsize = 411 ((dpart.disklab->d_secsize > maxsecsize) ? 412 dpart.disklab->d_secsize : maxsecsize); 413 size = dpart.part->p_size - CCD_OFFSET; 414 } else { 415 #ifdef DEBUG 416 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 417 kprintf("ccd%d: %s: incorrect partition type\n", 418 ccd->ccd_unit, ci->ci_path); 419 #endif 420 error = EFTYPE; 421 goto fail; 422 } 423 424 /* 425 * Calculate the size, truncating to an interleave 426 * boundary if necessary. 427 */ 428 429 if (cs->sc_ileave > 1) 430 size -= size % cs->sc_ileave; 431 432 if (size == 0) { 433 #ifdef DEBUG 434 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 435 kprintf("ccd%d: %s: size == 0\n", 436 ccd->ccd_unit, ci->ci_path); 437 #endif 438 error = ENODEV; 439 goto fail; 440 } 441 442 if (minsize == 0 || size < minsize) 443 minsize = size; 444 ci->ci_size = size; 445 cs->sc_size += size; 446 } 447 448 /* 449 * Don't allow the interleave to be smaller than 450 * the biggest component sector. 451 */ 452 if ((cs->sc_ileave > 0) && 453 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 454 #ifdef DEBUG 455 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 456 kprintf("ccd%d: interleave must be at least %d\n", 457 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 458 #endif 459 error = EINVAL; 460 goto fail; 461 } 462 463 /* 464 * If uniform interleave is desired set all sizes to that of 465 * the smallest component. This will guarentee that a single 466 * interleave table is generated. 467 * 468 * Lost space must be taken into account when calculating the 469 * overall size. Half the space is lost when CCDF_MIRROR is 470 * specified. One disk is lost when CCDF_PARITY is specified. 471 */ 472 if (ccd->ccd_flags & CCDF_UNIFORM) { 473 for (ci = cs->sc_cinfo; 474 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 475 ci->ci_size = minsize; 476 } 477 if (ccd->ccd_flags & CCDF_MIRROR) { 478 /* 479 * Check to see if an even number of components 480 * have been specified. The interleave must also 481 * be non-zero in order for us to be able to 482 * guarentee the topology. 483 */ 484 if (cs->sc_nccdisks % 2) { 485 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 486 error = EINVAL; 487 goto fail; 488 } 489 if (cs->sc_ileave == 0) { 490 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 491 error = EINVAL; 492 goto fail; 493 } 494 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 495 } else if (ccd->ccd_flags & CCDF_PARITY) { 496 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 497 } else { 498 if (cs->sc_ileave == 0) { 499 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 500 error = EINVAL; 501 goto fail; 502 } 503 cs->sc_size = cs->sc_nccdisks * minsize; 504 } 505 } 506 507 /* 508 * Construct the interleave table. 509 */ 510 ccdinterleave(cs, ccd->ccd_unit); 511 512 /* 513 * Create pseudo-geometry based on 1MB cylinders. It's 514 * pretty close. 515 */ 516 ccg->ccg_secsize = maxsecsize; 517 ccg->ccg_ntracks = 1; 518 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 519 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 520 521 /* 522 * Add an devstat entry for this device. 523 */ 524 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 525 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 526 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 527 DEVSTAT_PRIORITY_ARRAY); 528 529 cs->sc_flags |= CCDF_INITED; 530 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 531 cs->sc_unit = ccd->ccd_unit; 532 return (0); 533 fail: 534 while (ci > cs->sc_cinfo) { 535 ci--; 536 kfree(ci->ci_path, M_DEVBUF); 537 } 538 kfree(cs->sc_cinfo, M_DEVBUF); 539 return (error); 540 } 541 542 static void 543 ccdinterleave(struct ccd_softc *cs, int unit) 544 { 545 struct ccdcinfo *ci, *smallci; 546 struct ccdiinfo *ii; 547 daddr_t bn, lbn; 548 int ix; 549 u_long size; 550 551 #ifdef DEBUG 552 if (ccddebug & CCDB_INIT) 553 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 554 #endif 555 556 /* 557 * Allocate an interleave table. The worst case occurs when each 558 * of N disks is of a different size, resulting in N interleave 559 * tables. 560 * 561 * Chances are this is too big, but we don't care. 562 */ 563 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 564 cs->sc_itable = (struct ccdiinfo *)kmalloc(size, M_DEVBUF, M_WAITOK); 565 bzero((caddr_t)cs->sc_itable, size); 566 567 /* 568 * Trivial case: no interleave (actually interleave of disk size). 569 * Each table entry represents a single component in its entirety. 570 * 571 * An interleave of 0 may not be used with a mirror or parity setup. 572 */ 573 if (cs->sc_ileave == 0) { 574 bn = 0; 575 ii = cs->sc_itable; 576 577 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 578 /* Allocate space for ii_index. */ 579 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK); 580 ii->ii_ndisk = 1; 581 ii->ii_startblk = bn; 582 ii->ii_startoff = 0; 583 ii->ii_index[0] = ix; 584 bn += cs->sc_cinfo[ix].ci_size; 585 ii++; 586 } 587 ii->ii_ndisk = 0; 588 #ifdef DEBUG 589 if (ccddebug & CCDB_INIT) 590 printiinfo(cs->sc_itable); 591 #endif 592 return; 593 } 594 595 /* 596 * The following isn't fast or pretty; it doesn't have to be. 597 */ 598 size = 0; 599 bn = lbn = 0; 600 for (ii = cs->sc_itable; ; ii++) { 601 /* 602 * Allocate space for ii_index. We might allocate more then 603 * we use. 604 */ 605 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks), 606 M_DEVBUF, M_WAITOK); 607 608 /* 609 * Locate the smallest of the remaining components 610 */ 611 smallci = NULL; 612 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 613 ci++) { 614 if (ci->ci_size > size && 615 (smallci == NULL || 616 ci->ci_size < smallci->ci_size)) { 617 smallci = ci; 618 } 619 } 620 621 /* 622 * Nobody left, all done 623 */ 624 if (smallci == NULL) { 625 ii->ii_ndisk = 0; 626 break; 627 } 628 629 /* 630 * Record starting logical block using an sc_ileave blocksize. 631 */ 632 ii->ii_startblk = bn / cs->sc_ileave; 633 634 /* 635 * Record starting comopnent block using an sc_ileave 636 * blocksize. This value is relative to the beginning of 637 * a component disk. 638 */ 639 ii->ii_startoff = lbn; 640 641 /* 642 * Determine how many disks take part in this interleave 643 * and record their indices. 644 */ 645 ix = 0; 646 for (ci = cs->sc_cinfo; 647 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 648 if (ci->ci_size >= smallci->ci_size) { 649 ii->ii_index[ix++] = ci - cs->sc_cinfo; 650 } 651 } 652 ii->ii_ndisk = ix; 653 bn += ix * (smallci->ci_size - size); 654 lbn = smallci->ci_size / cs->sc_ileave; 655 size = smallci->ci_size; 656 } 657 #ifdef DEBUG 658 if (ccddebug & CCDB_INIT) 659 printiinfo(cs->sc_itable); 660 #endif 661 } 662 663 /* ARGSUSED */ 664 static int 665 ccdopen(struct dev_open_args *ap) 666 { 667 cdev_t dev = ap->a_head.a_dev; 668 int unit = ccdunit(dev); 669 struct ccd_softc *cs; 670 struct disklabel *lp; 671 int error = 0, part, pmask; 672 673 #ifdef DEBUG 674 if (ccddebug & CCDB_FOLLOW) 675 kprintf("ccdopen(%x, %x)\n", dev, flags); 676 #endif 677 if (unit >= numccd) 678 return (ENXIO); 679 cs = &ccd_softc[unit]; 680 681 if ((error = ccdlock(cs)) != 0) 682 return (error); 683 684 lp = &cs->sc_label; 685 686 part = ccdpart(dev); 687 pmask = (1 << part); 688 689 /* 690 * If we're initialized, check to see if there are any other 691 * open partitions. If not, then it's safe to update 692 * the in-core disklabel. 693 */ 694 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 695 ccdgetdisklabel(dev); 696 697 /* Check that the partition exists. */ 698 if (part != RAW_PART && ((part >= lp->d_npartitions) || 699 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 700 error = ENXIO; 701 goto done; 702 } 703 704 cs->sc_openmask |= pmask; 705 done: 706 ccdunlock(cs); 707 return (0); 708 } 709 710 /* ARGSUSED */ 711 static int 712 ccdclose(struct dev_close_args *ap) 713 { 714 cdev_t dev = ap->a_head.a_dev; 715 int unit = ccdunit(dev); 716 struct ccd_softc *cs; 717 int error = 0, part; 718 719 #ifdef DEBUG 720 if (ccddebug & CCDB_FOLLOW) 721 kprintf("ccdclose(%x, %x)\n", dev, flags); 722 #endif 723 724 if (unit >= numccd) 725 return (ENXIO); 726 cs = &ccd_softc[unit]; 727 728 if ((error = ccdlock(cs)) != 0) 729 return (error); 730 731 part = ccdpart(dev); 732 733 /* ...that much closer to allowing unconfiguration... */ 734 cs->sc_openmask &= ~(1 << part); 735 ccdunlock(cs); 736 return (0); 737 } 738 739 static int 740 ccdstrategy(struct dev_strategy_args *ap) 741 { 742 cdev_t dev = ap->a_head.a_dev; 743 struct bio *bio = ap->a_bio; 744 int unit = ccdunit(dev); 745 struct bio *nbio; 746 struct buf *bp = bio->bio_buf; 747 struct ccd_softc *cs = &ccd_softc[unit]; 748 int wlabel; 749 struct disklabel *lp; 750 751 #ifdef DEBUG 752 if (ccddebug & CCDB_FOLLOW) 753 kprintf("ccdstrategy(%x): unit %d\n", bp, unit); 754 #endif 755 if ((cs->sc_flags & CCDF_INITED) == 0) { 756 bp->b_error = ENXIO; 757 goto error; 758 } 759 760 /* If it's a nil transfer, wake up the top half now. */ 761 if (bp->b_bcount == 0) { 762 bp->b_resid = 0; 763 goto done; 764 } 765 766 lp = &cs->sc_label; 767 768 /* 769 * Do bounds checking and adjust transfer. If there's an 770 * error, the bounds check will flag that for us. 771 */ 772 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 773 if (ccdpart(dev) != RAW_PART) { 774 nbio = bounds_check_with_label(dev, bio, lp, wlabel); 775 if (nbio == NULL) 776 goto done; 777 } else { 778 int pbn; /* in sc_secsize chunks */ 779 long sz; /* in sc_secsize chunks */ 780 781 pbn = (int)(bio->bio_offset / cs->sc_geom.ccg_secsize); 782 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 783 784 /* 785 * If out of bounds return an error. If the request goes 786 * past EOF, clip the request as appropriate. If exactly 787 * at EOF, return success (don't clip), but with 0 bytes 788 * of I/O. 789 * 790 * Mark EOF B_INVAL (just like bad), indicating that the 791 * contents of the buffer, if any, is invalid. 792 */ 793 if (pbn < 0) 794 goto bad; 795 if (pbn + sz > cs->sc_size) { 796 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP)) 797 goto bad; 798 if (pbn == cs->sc_size) { 799 bp->b_resid = bp->b_bcount; 800 bp->b_flags |= B_INVAL; 801 goto done; 802 } 803 sz = cs->sc_size - pbn; 804 bp->b_bcount = sz * cs->sc_geom.ccg_secsize; 805 } 806 nbio = bio; 807 } 808 809 bp->b_resid = bp->b_bcount; 810 nbio->bio_driver_info = dev; 811 812 /* 813 * "Start" the unit. 814 */ 815 crit_enter(); 816 ccdstart(cs, nbio); 817 crit_exit(); 818 return(0); 819 820 /* 821 * note: bio, not nbio, is valid at the done label. 822 */ 823 bad: 824 bp->b_error = EINVAL; 825 error: 826 bp->b_resid = bp->b_bcount; 827 bp->b_flags |= B_ERROR | B_INVAL; 828 done: 829 biodone(bio); 830 return(0); 831 } 832 833 static void 834 ccdstart(struct ccd_softc *cs, struct bio *bio) 835 { 836 long bcount, rcount; 837 struct ccdbuf *cbp[4]; 838 struct buf *bp = bio->bio_buf; 839 cdev_t dev = bio->bio_driver_info; 840 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 841 caddr_t addr; 842 off_t doffset; 843 struct partition *pp; 844 845 #ifdef DEBUG 846 if (ccddebug & CCDB_FOLLOW) 847 kprintf("ccdstart(%x, %x)\n", cs, bp); 848 #endif 849 850 /* Record the transaction start */ 851 devstat_start_transaction(&cs->device_stats); 852 853 /* 854 * Translate the partition-relative block number to an absolute. 855 */ 856 doffset = bio->bio_offset; 857 if (ccdpart(dev) != RAW_PART) { 858 pp = &cs->sc_label.d_partitions[ccdpart(dev)]; 859 doffset += pp->p_offset * cs->sc_label.d_secsize; 860 } 861 862 /* 863 * Allocate component buffers and fire off the requests 864 */ 865 addr = bp->b_data; 866 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 867 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 868 rcount = cbp[0]->cb_buf.b_bcount; 869 870 if (cs->sc_cflags & CCDF_MIRROR) { 871 /* 872 * Mirroring. Writes go to both disks, reads are 873 * taken from whichever disk seems most appropriate. 874 * 875 * We attempt to localize reads to the disk whos arm 876 * is nearest the read request. We ignore seeks due 877 * to writes when making this determination and we 878 * also try to avoid hogging. 879 */ 880 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) { 881 vn_strategy(cbp[0]->cb_vp, 882 &cbp[0]->cb_buf.b_bio1); 883 vn_strategy(cbp[1]->cb_vp, 884 &cbp[1]->cb_buf.b_bio1); 885 } else { 886 int pick = cs->sc_pick; 887 daddr_t range = cs->sc_size / 16 * cs->sc_label.d_secsize; 888 889 if (doffset < cs->sc_blk[pick] - range || 890 doffset > cs->sc_blk[pick] + range 891 ) { 892 cs->sc_pick = pick = 1 - pick; 893 } 894 cs->sc_blk[pick] = doffset + rcount; 895 vn_strategy(cbp[pick]->cb_vp, 896 &cbp[pick]->cb_buf.b_bio1); 897 } 898 } else { 899 /* 900 * Not mirroring 901 */ 902 vn_strategy(cbp[0]->cb_vp, 903 &cbp[0]->cb_buf.b_bio1); 904 } 905 doffset += rcount; 906 addr += rcount; 907 } 908 } 909 910 /* 911 * Build a component buffer header. 912 */ 913 static void 914 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 915 off_t doffset, caddr_t addr, long bcount) 916 { 917 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 918 struct ccdbuf *cbp; 919 daddr_t bn, cbn, cboff; 920 off_t cbc; 921 922 #ifdef DEBUG 923 if (ccddebug & CCDB_IO) 924 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n", 925 cs, bp, bn, addr, bcount); 926 #endif 927 /* 928 * Determine which component bn falls in. 929 */ 930 bn = (daddr_t)(doffset / cs->sc_geom.ccg_secsize); 931 cbn = bn; 932 cboff = 0; 933 934 if (cs->sc_ileave == 0) { 935 /* 936 * Serially concatenated and neither a mirror nor a parity 937 * config. This is a special case. 938 */ 939 daddr_t sblk; 940 941 sblk = 0; 942 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 943 sblk += ci->ci_size; 944 cbn -= sblk; 945 } else { 946 struct ccdiinfo *ii; 947 int ccdisk, off; 948 949 /* 950 * Calculate cbn, the logical superblock (sc_ileave chunks), 951 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 952 * to cbn. 953 */ 954 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 955 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 956 957 /* 958 * Figure out which interleave table to use. 959 */ 960 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 961 if (ii->ii_startblk > cbn) 962 break; 963 } 964 ii--; 965 966 /* 967 * off is the logical superblock relative to the beginning 968 * of this interleave block. 969 */ 970 off = cbn - ii->ii_startblk; 971 972 /* 973 * We must calculate which disk component to use (ccdisk), 974 * and recalculate cbn to be the superblock relative to 975 * the beginning of the component. This is typically done by 976 * adding 'off' and ii->ii_startoff together. However, 'off' 977 * must typically be divided by the number of components in 978 * this interleave array to be properly convert it from a 979 * CCD-relative logical superblock number to a 980 * component-relative superblock number. 981 */ 982 if (ii->ii_ndisk == 1) { 983 /* 984 * When we have just one disk, it can't be a mirror 985 * or a parity config. 986 */ 987 ccdisk = ii->ii_index[0]; 988 cbn = ii->ii_startoff + off; 989 } else { 990 if (cs->sc_cflags & CCDF_MIRROR) { 991 /* 992 * We have forced a uniform mapping, resulting 993 * in a single interleave array. We double 994 * up on the first half of the available 995 * components and our mirror is in the second 996 * half. This only works with a single 997 * interleave array because doubling up 998 * doubles the number of sectors, so there 999 * cannot be another interleave array because 1000 * the next interleave array's calculations 1001 * would be off. 1002 */ 1003 int ndisk2 = ii->ii_ndisk / 2; 1004 ccdisk = ii->ii_index[off % ndisk2]; 1005 cbn = ii->ii_startoff + off / ndisk2; 1006 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1007 } else if (cs->sc_cflags & CCDF_PARITY) { 1008 /* 1009 * XXX not implemented yet 1010 */ 1011 int ndisk2 = ii->ii_ndisk - 1; 1012 ccdisk = ii->ii_index[off % ndisk2]; 1013 cbn = ii->ii_startoff + off / ndisk2; 1014 if (cbn % ii->ii_ndisk <= ccdisk) 1015 ccdisk++; 1016 } else { 1017 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1018 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1019 } 1020 } 1021 1022 ci = &cs->sc_cinfo[ccdisk]; 1023 1024 /* 1025 * Convert cbn from a superblock to a normal block so it 1026 * can be used to calculate (along with cboff) the normal 1027 * block index into this particular disk. 1028 */ 1029 cbn *= cs->sc_ileave; 1030 } 1031 1032 /* 1033 * Fill in the component buf structure. 1034 * 1035 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount 1036 * will be truncated on device EOF so we use b_bufsize to detect 1037 * the case. 1038 */ 1039 cbp = getccdbuf(); 1040 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1041 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1042 cbp->cb_buf.b_data = addr; 1043 cbp->cb_vp = ci->ci_vp; 1044 if (cs->sc_ileave == 0) 1045 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1046 else 1047 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1048 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1049 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1050 1051 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1052 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1053 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1054 1055 /* 1056 * context for ccdiodone 1057 */ 1058 cbp->cb_obio = bio; 1059 cbp->cb_unit = cs - ccd_softc; 1060 cbp->cb_comp = ci - cs->sc_cinfo; 1061 1062 #ifdef DEBUG 1063 if (ccddebug & CCDB_IO) 1064 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1065 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1066 cbp->cb_buf.b_bio1.bio_offset, 1067 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1068 #endif 1069 cb[0] = cbp; 1070 1071 /* 1072 * Note: both I/O's setup when reading from mirror, but only one 1073 * will be executed. 1074 */ 1075 if (cs->sc_cflags & CCDF_MIRROR) { 1076 /* mirror, setup second I/O */ 1077 cbp = getccdbuf(); 1078 1079 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1080 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1081 cbp->cb_buf.b_data = addr; 1082 cbp->cb_vp = ci2->ci_vp; 1083 if (cs->sc_ileave == 0) 1084 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1085 else 1086 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1087 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1088 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1089 1090 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1091 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1092 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1093 1094 /* 1095 * context for ccdiodone 1096 */ 1097 cbp->cb_obio = bio; 1098 cbp->cb_unit = cs - ccd_softc; 1099 cbp->cb_comp = ci2 - cs->sc_cinfo; 1100 cb[1] = cbp; 1101 /* link together the ccdbuf's and clear "mirror done" flag */ 1102 cb[0]->cb_mirror = cb[1]; 1103 cb[1]->cb_mirror = cb[0]; 1104 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1105 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1106 } 1107 } 1108 1109 static void 1110 ccdintr(struct ccd_softc *cs, struct bio *bio) 1111 { 1112 struct buf *bp = bio->bio_buf; 1113 1114 #ifdef DEBUG 1115 if (ccddebug & CCDB_FOLLOW) 1116 kprintf("ccdintr(%x, %x)\n", cs, bp); 1117 #endif 1118 /* 1119 * Request is done for better or worse, wakeup the top half. 1120 */ 1121 if (bp->b_flags & B_ERROR) 1122 bp->b_resid = bp->b_bcount; 1123 devstat_end_transaction_buf(&cs->device_stats, bp); 1124 biodone(bio); 1125 } 1126 1127 /* 1128 * Called at interrupt time. 1129 * Mark the component as done and if all components are done, 1130 * take a ccd interrupt. 1131 */ 1132 static void 1133 ccdiodone(struct bio *bio) 1134 { 1135 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1136 struct bio *obio = cbp->cb_obio; 1137 struct buf *obp = obio->bio_buf; 1138 int unit = cbp->cb_unit; 1139 int count; 1140 1141 /* 1142 * Since we do not have exclusive access to underlying devices, 1143 * we can't keep cache translations around. 1144 */ 1145 clearbiocache(bio->bio_next); 1146 1147 crit_enter(); 1148 #ifdef DEBUG 1149 if (ccddebug & CCDB_FOLLOW) 1150 kprintf("ccdiodone(%x)\n", cbp); 1151 if (ccddebug & CCDB_IO) { 1152 kprintf("ccdiodone: bp %x bcount %d resid %d\n", 1153 obp, obp->b_bcount, obp->b_resid); 1154 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1155 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1156 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1157 cbp->cb_buf.b_bcount); 1158 } 1159 #endif 1160 1161 /* 1162 * If an error occured, report it. If this is a mirrored 1163 * configuration and the first of two possible reads, do not 1164 * set the error in the bp yet because the second read may 1165 * succeed. 1166 */ 1167 if (cbp->cb_buf.b_flags & B_ERROR) { 1168 const char *msg = ""; 1169 1170 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1171 (cbp->cb_buf.b_cmd == BUF_CMD_READ) && 1172 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1173 /* 1174 * We will try our read on the other disk down 1175 * below, also reverse the default pick so if we 1176 * are doing a scan we do not keep hitting the 1177 * bad disk first. 1178 */ 1179 struct ccd_softc *cs = &ccd_softc[unit]; 1180 1181 msg = ", trying other disk"; 1182 cs->sc_pick = 1 - cs->sc_pick; 1183 cs->sc_blk[cs->sc_pick] = obio->bio_offset; 1184 } else { 1185 obp->b_flags |= B_ERROR; 1186 obp->b_error = cbp->cb_buf.b_error ? 1187 cbp->cb_buf.b_error : EIO; 1188 } 1189 kprintf("ccd%d: error %d on component %d offset %lld (ccd offset %lld)%s\n", 1190 unit, obp->b_error, cbp->cb_comp, 1191 cbp->cb_buf.b_bio2.bio_offset, 1192 obio->bio_offset, msg); 1193 } 1194 1195 /* 1196 * Process mirror. If we are writing, I/O has been initiated on both 1197 * buffers and we fall through only after both are finished. 1198 * 1199 * If we are reading only one I/O is initiated at a time. If an 1200 * error occurs we initiate the second I/O and return, otherwise 1201 * we free the second I/O without initiating it. 1202 */ 1203 1204 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1205 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) { 1206 /* 1207 * When writing, handshake with the second buffer 1208 * to determine when both are done. If both are not 1209 * done, return here. 1210 */ 1211 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1212 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1213 putccdbuf(cbp); 1214 crit_exit(); 1215 return; 1216 } 1217 } else { 1218 /* 1219 * When reading, either dispose of the second buffer 1220 * or initiate I/O on the second buffer if an error 1221 * occured with this one. 1222 */ 1223 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1224 if (cbp->cb_buf.b_flags & B_ERROR) { 1225 cbp->cb_mirror->cb_pflags |= 1226 CCDPF_MIRROR_DONE; 1227 vn_strategy( 1228 cbp->cb_mirror->cb_vp, 1229 &cbp->cb_mirror->cb_buf.b_bio1 1230 ); 1231 putccdbuf(cbp); 1232 crit_exit(); 1233 return; 1234 } else { 1235 putccdbuf(cbp->cb_mirror); 1236 /* fall through */ 1237 } 1238 } 1239 } 1240 } 1241 1242 /* 1243 * Use our saved b_bufsize to determine if an unexpected EOF occured. 1244 */ 1245 count = cbp->cb_buf.b_bufsize; 1246 putccdbuf(cbp); 1247 1248 /* 1249 * If all done, "interrupt". 1250 */ 1251 obp->b_resid -= count; 1252 if (obp->b_resid < 0) 1253 panic("ccdiodone: count"); 1254 if (obp->b_resid == 0) 1255 ccdintr(&ccd_softc[unit], obio); 1256 crit_exit(); 1257 } 1258 1259 static int 1260 ccdioctl(struct dev_ioctl_args *ap) 1261 { 1262 cdev_t dev = ap->a_head.a_dev; 1263 int unit = ccdunit(dev); 1264 int i, j, lookedup = 0, error = 0; 1265 int part, pmask; 1266 struct ccd_softc *cs; 1267 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data; 1268 struct ccddevice ccd; 1269 char **cpp; 1270 struct vnode **vpp; 1271 1272 if (unit >= numccd) 1273 return (ENXIO); 1274 cs = &ccd_softc[unit]; 1275 1276 bzero(&ccd, sizeof(ccd)); 1277 1278 switch (ap->a_cmd) { 1279 case CCDIOCSET: 1280 if (cs->sc_flags & CCDF_INITED) 1281 return (EBUSY); 1282 1283 if ((ap->a_fflag & FWRITE) == 0) 1284 return (EBADF); 1285 1286 if ((error = ccdlock(cs)) != 0) 1287 return (error); 1288 1289 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1290 return (EINVAL); 1291 1292 /* Fill in some important bits. */ 1293 ccd.ccd_unit = unit; 1294 ccd.ccd_interleave = ccio->ccio_ileave; 1295 if (ccd.ccd_interleave == 0 && 1296 ((ccio->ccio_flags & CCDF_MIRROR) || 1297 (ccio->ccio_flags & CCDF_PARITY))) { 1298 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1299 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1300 } 1301 if ((ccio->ccio_flags & CCDF_MIRROR) && 1302 (ccio->ccio_flags & CCDF_PARITY)) { 1303 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1304 ccio->ccio_flags &= ~CCDF_PARITY; 1305 } 1306 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1307 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1308 kprintf("ccd%d: mirror/parity forces uniform flag\n", 1309 unit); 1310 ccio->ccio_flags |= CCDF_UNIFORM; 1311 } 1312 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1313 1314 /* 1315 * Allocate space for and copy in the array of 1316 * componet pathnames and device numbers. 1317 */ 1318 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *), 1319 M_DEVBUF, M_WAITOK); 1320 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1321 M_DEVBUF, M_WAITOK); 1322 1323 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1324 ccio->ccio_ndisks * sizeof(char **)); 1325 if (error) { 1326 kfree(vpp, M_DEVBUF); 1327 kfree(cpp, M_DEVBUF); 1328 ccdunlock(cs); 1329 return (error); 1330 } 1331 1332 #ifdef DEBUG 1333 if (ccddebug & CCDB_INIT) 1334 for (i = 0; i < ccio->ccio_ndisks; ++i) 1335 kprintf("ccdioctl: component %d: 0x%x\n", 1336 i, cpp[i]); 1337 #endif 1338 1339 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1340 #ifdef DEBUG 1341 if (ccddebug & CCDB_INIT) 1342 kprintf("ccdioctl: lookedup = %d\n", lookedup); 1343 #endif 1344 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) { 1345 for (j = 0; j < lookedup; ++j) 1346 (void)vn_close(vpp[j], FREAD|FWRITE); 1347 kfree(vpp, M_DEVBUF); 1348 kfree(cpp, M_DEVBUF); 1349 ccdunlock(cs); 1350 return (error); 1351 } 1352 ++lookedup; 1353 } 1354 ccd.ccd_cpp = cpp; 1355 ccd.ccd_vpp = vpp; 1356 ccd.ccd_ndev = ccio->ccio_ndisks; 1357 1358 /* 1359 * Initialize the ccd. Fills in the softc for us. 1360 */ 1361 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) { 1362 for (j = 0; j < lookedup; ++j) 1363 (void)vn_close(vpp[j], FREAD|FWRITE); 1364 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1365 kfree(vpp, M_DEVBUF); 1366 kfree(cpp, M_DEVBUF); 1367 ccdunlock(cs); 1368 return (error); 1369 } 1370 1371 /* 1372 * The ccd has been successfully initialized, so 1373 * we can place it into the array and read the disklabel. 1374 */ 1375 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1376 ccio->ccio_unit = unit; 1377 ccio->ccio_size = cs->sc_size; 1378 ccdgetdisklabel(dev); 1379 1380 ccdunlock(cs); 1381 1382 break; 1383 1384 case CCDIOCCLR: 1385 if ((cs->sc_flags & CCDF_INITED) == 0) 1386 return (ENXIO); 1387 1388 if ((ap->a_fflag & FWRITE) == 0) 1389 return (EBADF); 1390 1391 if ((error = ccdlock(cs)) != 0) 1392 return (error); 1393 1394 /* Don't unconfigure if any other partitions are open */ 1395 part = ccdpart(dev); 1396 pmask = (1 << part); 1397 if ((cs->sc_openmask & ~pmask)) { 1398 ccdunlock(cs); 1399 return (EBUSY); 1400 } 1401 1402 /* 1403 * Free ccd_softc information and clear entry. 1404 */ 1405 1406 /* Close the components and free their pathnames. */ 1407 for (i = 0; i < cs->sc_nccdisks; ++i) { 1408 /* 1409 * XXX: this close could potentially fail and 1410 * cause Bad Things. Maybe we need to force 1411 * the close to happen? 1412 */ 1413 #ifdef DEBUG 1414 if (ccddebug & CCDB_VNODE) 1415 vprint("CCDIOCCLR: vnode info", 1416 cs->sc_cinfo[i].ci_vp); 1417 #endif 1418 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE); 1419 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1420 } 1421 1422 /* Free interleave index. */ 1423 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1424 kfree(cs->sc_itable[i].ii_index, M_DEVBUF); 1425 1426 /* Free component info and interleave table. */ 1427 kfree(cs->sc_cinfo, M_DEVBUF); 1428 kfree(cs->sc_itable, M_DEVBUF); 1429 cs->sc_flags &= ~CCDF_INITED; 1430 1431 /* 1432 * Free ccddevice information and clear entry. 1433 */ 1434 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF); 1435 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF); 1436 ccd.ccd_dk = -1; 1437 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1438 1439 /* 1440 * And remove the devstat entry. 1441 */ 1442 devstat_remove_entry(&cs->device_stats); 1443 1444 /* This must be atomic. */ 1445 crit_enter(); 1446 ccdunlock(cs); 1447 bzero(cs, sizeof(struct ccd_softc)); 1448 crit_exit(); 1449 1450 break; 1451 1452 case DIOCGDINFO: 1453 if ((cs->sc_flags & CCDF_INITED) == 0) 1454 return (ENXIO); 1455 1456 *(struct disklabel *)ap->a_data = cs->sc_label; 1457 break; 1458 1459 case DIOCGPART: 1460 if ((cs->sc_flags & CCDF_INITED) == 0) 1461 return (ENXIO); 1462 1463 ((struct partinfo *)ap->a_data)->disklab = &cs->sc_label; 1464 ((struct partinfo *)ap->a_data)->part = 1465 &cs->sc_label.d_partitions[ccdpart(dev)]; 1466 break; 1467 1468 case DIOCWDINFO: 1469 case DIOCSDINFO: 1470 if ((cs->sc_flags & CCDF_INITED) == 0) 1471 return (ENXIO); 1472 1473 if ((ap->a_fflag & FWRITE) == 0) 1474 return (EBADF); 1475 1476 if ((error = ccdlock(cs)) != 0) 1477 return (error); 1478 1479 cs->sc_flags |= CCDF_LABELLING; 1480 1481 error = setdisklabel(&cs->sc_label, 1482 (struct disklabel *)ap->a_data, 0); 1483 if (error == 0) { 1484 if (ap->a_cmd == DIOCWDINFO) { 1485 cdev_t cdev = CCDLABELDEV(dev); 1486 error = writedisklabel(cdev, &cs->sc_label); 1487 } 1488 } 1489 1490 cs->sc_flags &= ~CCDF_LABELLING; 1491 1492 ccdunlock(cs); 1493 1494 if (error) 1495 return (error); 1496 break; 1497 1498 case DIOCWLABEL: 1499 if ((cs->sc_flags & CCDF_INITED) == 0) 1500 return (ENXIO); 1501 1502 if ((ap->a_fflag & FWRITE) == 0) 1503 return (EBADF); 1504 if (*(int *)ap->a_data != 0) 1505 cs->sc_flags |= CCDF_WLABEL; 1506 else 1507 cs->sc_flags &= ~CCDF_WLABEL; 1508 break; 1509 1510 default: 1511 return (ENOTTY); 1512 } 1513 1514 return (0); 1515 } 1516 1517 static int 1518 ccdsize(struct dev_psize_args *ap) 1519 { 1520 cdev_t dev = ap->a_head.a_dev; 1521 struct ccd_softc *cs; 1522 int part, size; 1523 1524 if (dev_dopen(dev, 0, S_IFCHR, proc0.p_ucred)) 1525 return (-1); 1526 1527 cs = &ccd_softc[ccdunit(dev)]; 1528 part = ccdpart(dev); 1529 1530 if ((cs->sc_flags & CCDF_INITED) == 0) 1531 return (-1); 1532 1533 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1534 size = -1; 1535 else 1536 size = cs->sc_label.d_partitions[part].p_size; 1537 1538 if (dev_dclose(dev, 0, S_IFCHR)) 1539 return (-1); 1540 1541 ap->a_result = size; 1542 return(0); 1543 } 1544 1545 static int 1546 ccddump(struct dev_dump_args *ap) 1547 { 1548 /* Not implemented. */ 1549 return ENXIO; 1550 } 1551 1552 /* 1553 * Lookup the provided name in the filesystem. If the file exists, 1554 * is a valid block device, and isn't being used by anyone else, 1555 * set *vpp to the file's vnode. 1556 */ 1557 static int 1558 ccdlookup(char *path, struct vnode **vpp) 1559 { 1560 struct nlookupdata nd; 1561 struct vnode *vp; 1562 int error; 1563 1564 *vpp = NULL; 1565 1566 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1567 if (error) 1568 return (error); 1569 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1570 #ifdef DEBUG 1571 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1572 kprintf("ccdlookup: vn_open error = %d\n", error); 1573 #endif 1574 goto done; 1575 } 1576 vp = nd.nl_open_vp; 1577 1578 if (vp->v_usecount > 1) { 1579 error = EBUSY; 1580 goto done; 1581 } 1582 1583 if (!vn_isdisk(vp, &error)) 1584 goto done; 1585 1586 #ifdef DEBUG 1587 if (ccddebug & CCDB_VNODE) 1588 vprint("ccdlookup: vnode info", vp); 1589 #endif 1590 1591 vn_unlock(vp); 1592 nd.nl_open_vp = NULL; 1593 nlookup_done(&nd); 1594 *vpp = vp; /* leave ref intact */ 1595 return (0); 1596 done: 1597 nlookup_done(&nd); 1598 return (error); 1599 } 1600 1601 /* 1602 * Read the disklabel from the ccd. If one is not present, fake one 1603 * up. 1604 */ 1605 static void 1606 ccdgetdisklabel(cdev_t dev) 1607 { 1608 int unit = ccdunit(dev); 1609 struct ccd_softc *cs = &ccd_softc[unit]; 1610 char *errstring; 1611 struct disklabel *lp = &cs->sc_label; 1612 struct ccdgeom *ccg = &cs->sc_geom; 1613 cdev_t cdev; 1614 1615 bzero(lp, sizeof(*lp)); 1616 1617 lp->d_secperunit = cs->sc_size; 1618 lp->d_secsize = ccg->ccg_secsize; 1619 lp->d_nsectors = ccg->ccg_nsectors; 1620 lp->d_ntracks = ccg->ccg_ntracks; 1621 lp->d_ncylinders = ccg->ccg_ncylinders; 1622 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1623 1624 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1625 lp->d_type = DTYPE_CCD; 1626 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1627 lp->d_rpm = 3600; 1628 lp->d_interleave = 1; 1629 lp->d_flags = 0; 1630 1631 lp->d_partitions[RAW_PART].p_offset = 0; 1632 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1633 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1634 lp->d_npartitions = RAW_PART + 1; 1635 1636 lp->d_bbsize = BBSIZE; /* XXX */ 1637 lp->d_sbsize = SBSIZE; /* XXX */ 1638 1639 lp->d_magic = DISKMAGIC; 1640 lp->d_magic2 = DISKMAGIC; 1641 lp->d_checksum = dkcksum(&cs->sc_label); 1642 1643 /* 1644 * Call the generic disklabel extraction routine. 1645 */ 1646 cdev = CCDLABELDEV(dev); 1647 errstring = readdisklabel(cdev, &cs->sc_label); 1648 if (errstring != NULL) 1649 ccdmakedisklabel(cs); 1650 1651 #ifdef DEBUG 1652 /* It's actually extremely common to have unlabeled ccds. */ 1653 if (ccddebug & CCDB_LABEL) 1654 if (errstring != NULL) 1655 kprintf("ccd%d: %s\n", unit, errstring); 1656 #endif 1657 } 1658 1659 /* 1660 * Take care of things one might want to take care of in the event 1661 * that a disklabel isn't present. 1662 */ 1663 static void 1664 ccdmakedisklabel(struct ccd_softc *cs) 1665 { 1666 struct disklabel *lp = &cs->sc_label; 1667 1668 /* 1669 * For historical reasons, if there's no disklabel present 1670 * the raw partition must be marked FS_BSDFFS. 1671 */ 1672 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1673 1674 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1675 } 1676 1677 /* 1678 * Wait interruptibly for an exclusive lock. 1679 * 1680 * XXX 1681 * Several drivers do this; it should be abstracted and made MP-safe. 1682 */ 1683 static int 1684 ccdlock(struct ccd_softc *cs) 1685 { 1686 int error; 1687 1688 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1689 cs->sc_flags |= CCDF_WANTED; 1690 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1691 return (error); 1692 } 1693 cs->sc_flags |= CCDF_LOCKED; 1694 return (0); 1695 } 1696 1697 /* 1698 * Unlock and wake up any waiters. 1699 */ 1700 static void 1701 ccdunlock(struct ccd_softc *cs) 1702 { 1703 1704 cs->sc_flags &= ~CCDF_LOCKED; 1705 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1706 cs->sc_flags &= ~CCDF_WANTED; 1707 wakeup(cs); 1708 } 1709 } 1710 1711 #ifdef DEBUG 1712 static void 1713 printiinfo(struct ccdiinfo *ii) 1714 { 1715 int ix, i; 1716 1717 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1718 kprintf(" itab[%d]: #dk %d sblk %d soff %d", 1719 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1720 for (i = 0; i < ii->ii_ndisk; i++) 1721 kprintf(" %d", ii->ii_index[i]); 1722 kprintf("\n"); 1723 } 1724 } 1725 #endif 1726 1727 1728 /* Local Variables: */ 1729 /* c-argdecl-indent: 8 */ 1730 /* c-continued-statement-offset: 8 */ 1731 /* c-indent-level: 8 */ 1732 /* End: */ 1733